• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * SPDX-License-Identifier: MIT
9  */
10 
11 #include <fcntl.h>
12 
13 #ifdef MAJOR_IN_SYSMACROS
14 #include <sys/sysmacros.h>
15 #endif
16 
17 #include "vk_log.h"
18 #include "vk_shader_module.h"
19 
20 #include "util/disk_cache.h"
21 #include "util/hex.h"
22 #include "util/u_debug.h"
23 #include "radv_android.h"
24 #include "radv_debug.h"
25 #include "radv_entrypoints.h"
26 #include "radv_instance.h"
27 #include "radv_physical_device.h"
28 #include "radv_pipeline_rt.h"
29 #include "radv_video.h"
30 #include "radv_wsi.h"
31 
32 #ifdef _WIN32
33 typedef void *drmDevicePtr;
34 #include <io.h>
35 #else
36 #include <amdgpu.h>
37 #include "drm-uapi/amdgpu_drm.h"
38 #include "util/os_drm.h"
39 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
40 #endif
41 #include "winsys/null/radv_null_winsys_public.h"
42 #include "git_sha1.h"
43 
44 #if AMD_LLVM_AVAILABLE
45 #include "ac_llvm_util.h"
46 #endif
47 
48 #ifdef _WIN32
49 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 0
50 #else
51 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 1
52 #endif
53 
54 static bool
radv_perf_query_supported(const struct radv_physical_device * pdev)55 radv_perf_query_supported(const struct radv_physical_device *pdev)
56 {
57    const struct radv_instance *instance = radv_physical_device_instance(pdev);
58 
59    /* SQTT / SPM interfere with the register states for perf counters, and
60     * the code has only been tested on GFX10.3 */
61    return pdev->info.gfx_level == GFX10_3 && !(instance->vk.trace_mode & RADV_TRACE_MODE_RGP);
62 }
63 
64 static bool
radv_taskmesh_enabled(const struct radv_physical_device * pdev)65 radv_taskmesh_enabled(const struct radv_physical_device *pdev)
66 {
67    const struct radv_instance *instance = radv_physical_device_instance(pdev);
68 
69    if (instance->debug_flags & RADV_DEBUG_NO_MESH_SHADER)
70       return false;
71 
72    return pdev->use_ngg && !pdev->use_llvm && pdev->info.gfx_level >= GFX10_3 &&
73           !(instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE) && pdev->info.has_gang_submit;
74 }
75 
76 static bool
radv_transfer_queue_enabled(const struct radv_physical_device * pdev)77 radv_transfer_queue_enabled(const struct radv_physical_device *pdev)
78 {
79    const struct radv_instance *instance = radv_physical_device_instance(pdev);
80 
81    /* Check if the GPU has SDMA support and transfer queues are allowed. */
82    if (pdev->info.sdma_ip_version == SDMA_UNKNOWN || !pdev->info.ip[AMD_IP_SDMA].num_queues ||
83        !(instance->perftest_flags & RADV_PERFTEST_TRANSFER_QUEUE))
84       return false;
85 
86    return pdev->info.gfx_level >= GFX9;
87 }
88 
89 static bool
radv_vrs_attachment_enabled(const struct radv_physical_device * pdev)90 radv_vrs_attachment_enabled(const struct radv_physical_device *pdev)
91 {
92    const struct radv_instance *instance = radv_physical_device_instance(pdev);
93    return pdev->info.gfx_level >= GFX11 || !(instance->debug_flags & RADV_DEBUG_NO_HIZ);
94 }
95 
96 static bool
radv_calibrated_timestamps_enabled(const struct radv_physical_device * pdev)97 radv_calibrated_timestamps_enabled(const struct radv_physical_device *pdev)
98 {
99    return RADV_SUPPORT_CALIBRATED_TIMESTAMPS && !(pdev->info.family == CHIP_RAVEN || pdev->info.family == CHIP_RAVEN2);
100 }
101 
102 static bool
radv_filter_minmax_enabled(const struct radv_physical_device * pdev)103 radv_filter_minmax_enabled(const struct radv_physical_device *pdev)
104 {
105    /* Tahiti and Verde only: reduction mode is unsupported due to a bug
106     * (it might work sometimes, but that's not enough)
107     */
108    return !(pdev->info.family == CHIP_TAHITI || pdev->info.family == CHIP_VERDE);
109 }
110 
111 static bool
radv_cooperative_matrix_enabled(const struct radv_physical_device * pdev)112 radv_cooperative_matrix_enabled(const struct radv_physical_device *pdev)
113 {
114    if (pdev->info.gfx_level == GFX12)
115       return false; /* TODO */
116 
117    return pdev->info.gfx_level >= GFX11 && !pdev->use_llvm;
118 }
119 
120 bool
radv_enable_rt(const struct radv_physical_device * pdev)121 radv_enable_rt(const struct radv_physical_device *pdev)
122 {
123    if (!pdev->info.has_image_bvh_intersect_ray && !radv_emulate_rt(pdev))
124       return false;
125 
126    if (pdev->use_llvm)
127       return false;
128 
129    return true;
130 }
131 
132 bool
radv_emulate_rt(const struct radv_physical_device * pdev)133 radv_emulate_rt(const struct radv_physical_device *pdev)
134 {
135    const struct radv_instance *instance = radv_physical_device_instance(pdev);
136    return instance->perftest_flags & RADV_PERFTEST_EMULATE_RT;
137 }
138 
139 static VkConformanceVersion
radv_get_conformance_version(const struct radv_physical_device * pdev)140 radv_get_conformance_version(const struct radv_physical_device *pdev)
141 {
142    VkConformanceVersion conformance_version = {0}; /* Non-conformant by default */
143 
144    if (pdev->info.gfx_level >= GFX8 && pdev->info.gfx_level <= GFX11_5) {
145       switch (pdev->info.family) {
146       /* GFX8 */
147       case CHIP_TONGA:
148       case CHIP_FIJI:
149       case CHIP_POLARIS10:
150       /* GFX9 */
151       case CHIP_VEGA10:
152       case CHIP_RENOIR:
153       /* GFX10 */
154       case CHIP_NAVI14:
155       /* GFX10.3 */
156       case CHIP_NAVI21:
157       case CHIP_NAVI22:
158       case CHIP_VANGOGH:
159       /* GFX11 */
160       case CHIP_NAVI31:
161       /* GFX11.5 */
162       case CHIP_GFX1150:
163          conformance_version = (VkConformanceVersion){
164             .major = 1,
165             .minor = 4,
166             .subminor = 0,
167             .patch = 0,
168          };
169          break;
170       default:
171          break;
172       }
173    } else {
174       /* GFX6-7 */
175       switch (pdev->info.family) {
176       case CHIP_TAHITI:
177       case CHIP_PITCAIRN:
178       case CHIP_VERDE:
179       case CHIP_OLAND:
180       case CHIP_BONAIRE:
181       case CHIP_HAWAII:
182          conformance_version = (VkConformanceVersion){
183             .major = 1,
184             .minor = 3,
185             .subminor = 9,
186             .patch = 2,
187          };
188          break;
189       default:
190          break;
191       }
192    }
193 
194    return conformance_version;
195 }
196 
197 static void
parse_hex(char * out,const char * in,unsigned length)198 parse_hex(char *out, const char *in, unsigned length)
199 {
200    for (unsigned i = 0; i < length; ++i)
201       out[i] = 0;
202 
203    for (unsigned i = 0; i < 2 * length; ++i) {
204       unsigned v = in[i] <= '9' ? in[i] - '0' : (in[i] >= 'a' ? (in[i] - 'a' + 10) : (in[i] - 'A' + 10));
205       out[i / 2] |= v << (4 * (1 - i % 2));
206    }
207 }
208 
209 static void
radv_physical_device_init_cache_key(struct radv_physical_device * pdev)210 radv_physical_device_init_cache_key(struct radv_physical_device *pdev)
211 {
212    const struct radv_instance *instance = radv_physical_device_instance(pdev);
213    struct radv_physical_device_cache_key *key = &pdev->cache_key;
214 
215    key->family = pdev->info.family;
216    key->ptr_size = sizeof(void *);
217    key->conformant_trunc_coord = pdev->info.conformant_trunc_coord;
218 
219    key->clear_lds = instance->drirc.clear_lds;
220    key->cs_wave32 = pdev->cs_wave_size == 32;
221    key->disable_aniso_single_level = instance->drirc.disable_aniso_single_level && pdev->info.gfx_level < GFX8;
222    key->disable_shrink_image_store = instance->drirc.disable_shrink_image_store;
223    key->disable_sinking_load_input_fs = instance->drirc.disable_sinking_load_input_fs;
224    key->emulate_rt = !!(instance->perftest_flags & RADV_PERFTEST_EMULATE_RT);
225    key->ge_wave32 = pdev->ge_wave_size == 32;
226    key->invariant_geom = !!(instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM);
227    key->no_fmask = !!(instance->debug_flags & RADV_DEBUG_NO_FMASK);
228    key->no_ngg_gs = !!(instance->debug_flags & RADV_DEBUG_NO_NGG_GS);
229    key->no_rt = !!(instance->debug_flags & RADV_DEBUG_NO_RT);
230    key->ps_wave32 = pdev->ps_wave_size == 32;
231    key->rt_wave64 = pdev->rt_wave_size == 64;
232    key->split_fma = !!(instance->debug_flags & RADV_DEBUG_SPLIT_FMA);
233    key->ssbo_non_uniform = instance->drirc.ssbo_non_uniform;
234    key->tex_non_uniform = instance->drirc.tex_non_uniform;
235    key->lower_terminate_to_discard = instance->drirc.lower_terminate_to_discard;
236    key->use_llvm = pdev->use_llvm;
237    key->use_ngg = pdev->use_ngg;
238    key->use_ngg_culling = pdev->use_ngg_culling;
239 }
240 
241 static int
radv_device_get_cache_uuid(struct radv_physical_device * pdev,void * uuid)242 radv_device_get_cache_uuid(struct radv_physical_device *pdev, void *uuid)
243 {
244    struct mesa_sha1 ctx;
245    unsigned char sha1[20];
246 
247    memset(uuid, 0, VK_UUID_SIZE);
248    _mesa_sha1_init(&ctx);
249 
250 #ifdef RADV_BUILD_ID_OVERRIDE
251    {
252       unsigned size = strlen(RADV_BUILD_ID_OVERRIDE) / 2;
253       char *data = alloca(size);
254       parse_hex(data, RADV_BUILD_ID_OVERRIDE, size);
255       _mesa_sha1_update(&ctx, data, size);
256    }
257 #else
258    if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx))
259       return -1;
260 #endif
261 
262 #if AMD_LLVM_AVAILABLE
263    if (pdev->use_llvm && !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
264       return -1;
265 #endif
266 
267    _mesa_sha1_update(&ctx, &pdev->cache_key, sizeof(pdev->cache_key));
268    _mesa_sha1_final(&ctx, sha1);
269 
270    memcpy(uuid, sha1, VK_UUID_SIZE);
271    return 0;
272 }
273 
274 static void
radv_get_driver_uuid(void * uuid)275 radv_get_driver_uuid(void *uuid)
276 {
277    ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
278 }
279 
280 static void
radv_get_device_uuid(const struct radeon_info * gpu_info,void * uuid)281 radv_get_device_uuid(const struct radeon_info *gpu_info, void *uuid)
282 {
283    ac_compute_device_uuid(gpu_info, uuid, VK_UUID_SIZE);
284 }
285 
286 static void
radv_physical_device_init_queue_table(struct radv_physical_device * pdev)287 radv_physical_device_init_queue_table(struct radv_physical_device *pdev)
288 {
289    const struct radv_instance *instance = radv_physical_device_instance(pdev);
290    int idx = 0;
291    pdev->vk_queue_to_radv[idx] = RADV_QUEUE_GENERAL;
292    idx++;
293 
294    for (unsigned i = 1; i < RADV_MAX_QUEUE_FAMILIES; i++)
295       pdev->vk_queue_to_radv[i] = RADV_MAX_QUEUE_FAMILIES + 1;
296 
297    if (pdev->info.ip[AMD_IP_COMPUTE].num_queues > 0 && !(instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
298       pdev->vk_queue_to_radv[idx] = RADV_QUEUE_COMPUTE;
299       idx++;
300    }
301 
302    if (pdev->video_decode_enabled) {
303       if (pdev->info.ip[pdev->vid_decode_ip].num_queues > 0) {
304          pdev->vk_queue_to_radv[idx] = RADV_QUEUE_VIDEO_DEC;
305          idx++;
306       }
307    }
308 
309    if (radv_transfer_queue_enabled(pdev)) {
310       pdev->vk_queue_to_radv[idx] = RADV_QUEUE_TRANSFER;
311       idx++;
312    }
313 
314    if (pdev->video_encode_enabled) {
315       if (pdev->info.ip[AMD_IP_VCN_ENC].num_queues > 0) {
316          pdev->vk_queue_to_radv[idx] = RADV_QUEUE_VIDEO_ENC;
317          idx++;
318       }
319    }
320 
321    if (radv_sparse_queue_enabled(pdev)) {
322       pdev->vk_queue_to_radv[idx] = RADV_QUEUE_SPARSE;
323       idx++;
324    }
325 
326    pdev->num_queues = idx;
327 }
328 
329 enum radv_heap {
330    RADV_HEAP_VRAM = 1 << 0,
331    RADV_HEAP_GTT = 1 << 1,
332    RADV_HEAP_VRAM_VIS = 1 << 2,
333    RADV_HEAP_MAX = 1 << 3,
334 };
335 
336 static uint64_t
radv_get_adjusted_vram_size(struct radv_physical_device * pdev)337 radv_get_adjusted_vram_size(struct radv_physical_device *pdev)
338 {
339    const struct radv_instance *instance = radv_physical_device_instance(pdev);
340    int ov = instance->drirc.override_vram_size;
341    if (ov >= 0)
342       return MIN2((uint64_t)pdev->info.vram_size_kb * 1024, (uint64_t)ov << 20);
343    return (uint64_t)pdev->info.vram_size_kb * 1024;
344 }
345 
346 static uint64_t
radv_get_visible_vram_size(struct radv_physical_device * pdev)347 radv_get_visible_vram_size(struct radv_physical_device *pdev)
348 {
349    return MIN2(radv_get_adjusted_vram_size(pdev), (uint64_t)pdev->info.vram_vis_size_kb * 1024);
350 }
351 
352 static uint64_t
radv_get_vram_size(struct radv_physical_device * pdev)353 radv_get_vram_size(struct radv_physical_device *pdev)
354 {
355    uint64_t total_size = radv_get_adjusted_vram_size(pdev);
356    return total_size - MIN2(total_size, (uint64_t)pdev->info.vram_vis_size_kb * 1024);
357 }
358 
359 static void
radv_physical_device_init_mem_types(struct radv_physical_device * pdev)360 radv_physical_device_init_mem_types(struct radv_physical_device *pdev)
361 {
362    const struct radv_instance *instance = radv_physical_device_instance(pdev);
363    uint64_t visible_vram_size = radv_get_visible_vram_size(pdev);
364    uint64_t vram_size = radv_get_vram_size(pdev);
365    uint64_t gtt_size = (uint64_t)pdev->info.gart_size_kb * 1024;
366    int vram_index = -1, visible_vram_index = -1, gart_index = -1;
367 
368    pdev->memory_properties.memoryHeapCount = 0;
369    pdev->heaps = 0;
370 
371    if (!pdev->info.has_dedicated_vram) {
372       const uint64_t total_size = gtt_size + visible_vram_size;
373 
374       if (instance->drirc.enable_unified_heap_on_apu) {
375          /* Some applications seem better when the driver exposes only one heap of VRAM on APUs. */
376          visible_vram_size = total_size;
377          gtt_size = 0;
378       } else {
379          /* On APUs, the carveout is usually too small for games that request a minimum VRAM size
380           * greater than it. To workaround this, we compute the total available memory size (GTT +
381           * visible VRAM size) and report 2/3 as VRAM and 1/3 as GTT.
382           */
383          visible_vram_size = align64((total_size * 2) / 3, pdev->info.gart_page_size);
384          gtt_size = total_size - visible_vram_size;
385       }
386 
387       vram_size = 0;
388    }
389 
390    /* Only get a VRAM heap if it is significant, not if it is a 16 MiB
391     * remainder above visible VRAM. */
392    if (vram_size > 0 && vram_size * 9 >= visible_vram_size) {
393       vram_index = pdev->memory_properties.memoryHeapCount++;
394       pdev->heaps |= RADV_HEAP_VRAM;
395       pdev->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap){
396          .size = vram_size,
397          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
398       };
399    }
400 
401    if (gtt_size > 0) {
402       gart_index = pdev->memory_properties.memoryHeapCount++;
403       pdev->heaps |= RADV_HEAP_GTT;
404       pdev->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap){
405          .size = gtt_size,
406          .flags = 0,
407       };
408    }
409 
410    if (visible_vram_size) {
411       visible_vram_index = pdev->memory_properties.memoryHeapCount++;
412       pdev->heaps |= RADV_HEAP_VRAM_VIS;
413       pdev->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap){
414          .size = visible_vram_size,
415          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
416       };
417    }
418 
419    unsigned type_count = 0;
420 
421    if (vram_index >= 0 || visible_vram_index >= 0) {
422       pdev->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
423       pdev->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
424       pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
425          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
426          .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
427       };
428 
429       pdev->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
430       pdev->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_32BIT;
431       pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
432          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
433          .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
434       };
435    }
436 
437    if (gart_index >= 0) {
438       pdev->memory_domains[type_count] = RADEON_DOMAIN_GTT;
439       pdev->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
440       pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
441          .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
442          .heapIndex = gart_index,
443       };
444    }
445    if (visible_vram_index >= 0) {
446       pdev->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
447       pdev->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
448       pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
449          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
450                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
451          .heapIndex = visible_vram_index,
452       };
453 
454       pdev->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
455       pdev->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_32BIT;
456       pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
457          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
458                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
459          .heapIndex = visible_vram_index,
460       };
461    }
462 
463    if (gart_index >= 0) {
464       pdev->memory_domains[type_count] = RADEON_DOMAIN_GTT;
465       pdev->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
466       pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
467          .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
468                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
469          .heapIndex = gart_index,
470       };
471 
472       pdev->memory_domains[type_count] = RADEON_DOMAIN_GTT;
473       pdev->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_32BIT;
474       pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
475          .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
476                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
477          .heapIndex = gart_index,
478       };
479    }
480    pdev->memory_properties.memoryTypeCount = type_count;
481 
482    if (pdev->info.has_l2_uncached) {
483       for (int i = 0; i < pdev->memory_properties.memoryTypeCount; i++) {
484          VkMemoryType mem_type = pdev->memory_properties.memoryTypes[i];
485 
486          if (((mem_type.propertyFlags & (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
487               mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
488              !(pdev->memory_flags[i] & RADEON_FLAG_32BIT)) {
489 
490             VkMemoryPropertyFlags property_flags = mem_type.propertyFlags | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
491                                                    VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
492 
493             pdev->memory_domains[type_count] = pdev->memory_domains[i];
494             pdev->memory_flags[type_count] = pdev->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
495             pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
496                .propertyFlags = property_flags,
497                .heapIndex = mem_type.heapIndex,
498             };
499          }
500       }
501       pdev->memory_properties.memoryTypeCount = type_count;
502    }
503 
504    for (unsigned i = 0; i < type_count; ++i) {
505       if (pdev->memory_flags[i] & RADEON_FLAG_32BIT)
506          pdev->memory_types_32bit |= BITFIELD_BIT(i);
507    }
508 }
509 
510 uint32_t
radv_find_memory_index(const struct radv_physical_device * pdev,VkMemoryPropertyFlags flags)511 radv_find_memory_index(const struct radv_physical_device *pdev, VkMemoryPropertyFlags flags)
512 {
513    const VkPhysicalDeviceMemoryProperties *mem_properties = &pdev->memory_properties;
514    for (uint32_t i = 0; i < mem_properties->memoryTypeCount; ++i) {
515       if (mem_properties->memoryTypes[i].propertyFlags == flags) {
516          return i;
517       }
518    }
519    unreachable("invalid memory properties");
520 }
521 
522 static void
radv_get_binning_settings(const struct radv_physical_device * pdev,struct radv_binning_settings * settings)523 radv_get_binning_settings(const struct radv_physical_device *pdev, struct radv_binning_settings *settings)
524 {
525    if ((pdev->info.has_dedicated_vram && pdev->info.max_render_backends > 4) || pdev->info.gfx_level >= GFX10) {
526       /* Using higher settings on GFX10+ can cause random GPU hangs. */
527       settings->context_states_per_bin = 1;
528       settings->persistent_states_per_bin = 1;
529    } else {
530       settings->context_states_per_bin = pdev->info.has_gfx9_scissor_bug ? 1 : 3;
531       settings->persistent_states_per_bin = 1;
532    }
533 
534    settings->fpovs_per_batch = 63;
535 }
536 
537 static void
radv_physical_device_get_supported_extensions(const struct radv_physical_device * pdev,struct vk_device_extension_table * out_ext)538 radv_physical_device_get_supported_extensions(const struct radv_physical_device *pdev,
539                                               struct vk_device_extension_table *out_ext)
540 {
541    const struct radv_instance *instance = radv_physical_device_instance(pdev);
542    const struct vk_device_extension_table ext = {
543       .KHR_8bit_storage = true,
544       .KHR_16bit_storage = true,
545       .KHR_acceleration_structure = radv_enable_rt(pdev),
546       .KHR_calibrated_timestamps = radv_calibrated_timestamps_enabled(pdev),
547       .KHR_compute_shader_derivatives = true,
548       .KHR_cooperative_matrix = radv_cooperative_matrix_enabled(pdev),
549       .KHR_bind_memory2 = true,
550       .KHR_buffer_device_address = true,
551       .KHR_copy_commands2 = true,
552       .KHR_create_renderpass2 = true,
553       .KHR_dedicated_allocation = true,
554       .KHR_deferred_host_operations = true,
555       .KHR_depth_clamp_zero_one = true,
556       .KHR_depth_stencil_resolve = true,
557       .KHR_descriptor_update_template = true,
558       .KHR_device_group = true,
559       .KHR_draw_indirect_count = true,
560       .KHR_driver_properties = true,
561       .KHR_dynamic_rendering = true,
562       .KHR_dynamic_rendering_local_read = true,
563       .KHR_external_fence = true,
564       .KHR_external_fence_fd = true,
565       .KHR_external_memory = true,
566       .KHR_external_memory_fd = true,
567       .KHR_external_semaphore = true,
568       .KHR_external_semaphore_fd = true,
569       .KHR_format_feature_flags2 = true,
570       .KHR_fragment_shader_barycentric = pdev->info.gfx_level >= GFX10_3,
571       .KHR_fragment_shading_rate = pdev->info.gfx_level >= GFX10_3,
572       .KHR_get_memory_requirements2 = true,
573       .KHR_global_priority = true,
574       .KHR_image_format_list = true,
575       .KHR_imageless_framebuffer = true,
576 #ifdef RADV_USE_WSI_PLATFORM
577       .KHR_incremental_present = true,
578 #endif
579       .KHR_index_type_uint8 = pdev->info.gfx_level >= GFX8,
580       .KHR_line_rasterization = true,
581       .KHR_load_store_op_none = true,
582       .KHR_maintenance1 = true,
583       .KHR_maintenance2 = true,
584       .KHR_maintenance3 = true,
585       .KHR_maintenance4 = true,
586       .KHR_maintenance5 = true,
587       .KHR_maintenance6 = true,
588       .KHR_maintenance7 = true,
589       .KHR_maintenance8 = true,
590       .KHR_map_memory2 = true,
591       .KHR_multiview = true,
592       .KHR_performance_query = radv_perf_query_supported(pdev),
593       .KHR_pipeline_binary = true,
594       .KHR_pipeline_executable_properties = true,
595       .KHR_pipeline_library = !pdev->use_llvm,
596       /* Hide these behind dri configs for now since we cannot implement it reliably on
597        * all surfaces yet. There is no surface capability query for present wait/id,
598        * but the feature is useful enough to hide behind an opt-in mechanism for now.
599        * If the instance only enables surface extensions that unconditionally support present wait,
600        * we can also expose the extension that way. */
601       .KHR_present_id =
602          instance->drirc.enable_khr_present_wait || wsi_common_vk_instance_supports_present_wait(&instance->vk),
603       .KHR_present_wait =
604          instance->drirc.enable_khr_present_wait || wsi_common_vk_instance_supports_present_wait(&instance->vk),
605       .KHR_push_descriptor = true,
606       .KHR_ray_query = radv_enable_rt(pdev),
607       .KHR_ray_tracing_maintenance1 = radv_enable_rt(pdev),
608       .KHR_ray_tracing_pipeline = radv_enable_rt(pdev),
609       .KHR_ray_tracing_position_fetch = radv_enable_rt(pdev),
610       .KHR_relaxed_block_layout = true,
611       .KHR_sampler_mirror_clamp_to_edge = true,
612       .KHR_sampler_ycbcr_conversion = true,
613       .KHR_separate_depth_stencil_layouts = true,
614       .KHR_shader_atomic_int64 = true,
615       .KHR_shader_clock = true,
616       .KHR_shader_draw_parameters = true,
617       .KHR_shader_expect_assume = true,
618       .KHR_shader_float16_int8 = true,
619       .KHR_shader_float_controls = true,
620       .KHR_shader_float_controls2 = true,
621       .KHR_shader_integer_dot_product = true,
622       .KHR_shader_maximal_reconvergence = true,
623       .KHR_shader_non_semantic_info = true,
624       .KHR_shader_quad_control = true,
625       .KHR_shader_relaxed_extended_instruction = true,
626       .KHR_shader_subgroup_extended_types = true,
627       .KHR_shader_subgroup_rotate = true,
628       .KHR_shader_subgroup_uniform_control_flow = true,
629       .KHR_shader_terminate_invocation = true,
630       .KHR_spirv_1_4 = true,
631       .KHR_storage_buffer_storage_class = true,
632 #ifdef RADV_USE_WSI_PLATFORM
633       .KHR_swapchain = true,
634       .KHR_swapchain_mutable_format = true,
635 #endif
636       .KHR_synchronization2 = true,
637       .KHR_timeline_semaphore = true,
638       .KHR_uniform_buffer_standard_layout = true,
639       .KHR_variable_pointers = true,
640       .KHR_vertex_attribute_divisor = true,
641       .KHR_video_maintenance1 = pdev->video_decode_enabled || pdev->video_encode_enabled,
642       .KHR_video_queue = pdev->video_decode_enabled || pdev->video_encode_enabled,
643       .KHR_video_decode_av1 = (pdev->info.vcn_ip_version >= VCN_3_0_0 && pdev->info.vcn_ip_version != VCN_3_0_33 &&
644                                VIDEO_CODEC_AV1DEC && pdev->video_decode_enabled),
645       .KHR_video_decode_queue = pdev->video_decode_enabled,
646       .KHR_video_decode_h264 = VIDEO_CODEC_H264DEC && pdev->video_decode_enabled,
647       .KHR_video_decode_h265 = VIDEO_CODEC_H265DEC && pdev->video_decode_enabled,
648       .KHR_video_encode_h264 = VIDEO_CODEC_H264ENC && pdev->video_encode_enabled,
649       .KHR_video_encode_h265 = VIDEO_CODEC_H265ENC && pdev->video_encode_enabled,
650       .KHR_video_encode_queue = pdev->video_encode_enabled,
651       .KHR_vulkan_memory_model = true,
652       .KHR_workgroup_memory_explicit_layout = true,
653       .KHR_zero_initialize_workgroup_memory = true,
654       .EXT_4444_formats = true,
655       .EXT_attachment_feedback_loop_dynamic_state = true,
656       .EXT_attachment_feedback_loop_layout = true,
657       .EXT_border_color_swizzle = pdev->info.gfx_level >= GFX10,
658       .EXT_buffer_device_address = true,
659       .EXT_calibrated_timestamps = radv_calibrated_timestamps_enabled(pdev),
660       .EXT_color_write_enable = true,
661       .EXT_conditional_rendering = true,
662       .EXT_conservative_rasterization = pdev->info.gfx_level >= GFX9,
663       .EXT_custom_border_color = true,
664       .EXT_debug_marker = instance->vk.trace_mode & RADV_TRACE_MODE_RGP,
665       .EXT_depth_bias_control = true,
666       .EXT_depth_clamp_zero_one = true,
667       .EXT_depth_clamp_control = true,
668       .EXT_depth_clip_control = true,
669       .EXT_depth_clip_enable = true,
670       .EXT_depth_range_unrestricted = true,
671       .EXT_descriptor_buffer = true,
672       .EXT_descriptor_indexing = true,
673       .EXT_device_address_binding_report = true,
674       .EXT_device_fault = pdev->info.has_gpuvm_fault_query,
675       .EXT_device_generated_commands = pdev->info.gfx_level >= GFX8,
676       .EXT_discard_rectangles = true,
677 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
678       .EXT_display_control = true,
679 #endif
680       .EXT_dynamic_rendering_unused_attachments = true,
681       .EXT_extended_dynamic_state = true,
682       .EXT_extended_dynamic_state2 = true,
683       .EXT_extended_dynamic_state3 = true,
684       .EXT_external_memory_acquire_unmodified = true,
685       .EXT_external_memory_dma_buf = true,
686       .EXT_external_memory_host = pdev->info.has_userptr,
687       .EXT_fragment_shader_interlock = radv_has_pops(pdev),
688       .EXT_global_priority = true,
689       .EXT_global_priority_query = true,
690       .EXT_graphics_pipeline_library = !pdev->use_llvm && !(instance->debug_flags & RADV_DEBUG_NO_GPL),
691       .EXT_host_query_reset = true,
692       .EXT_image_2d_view_of_3d = true,
693       .EXT_image_compression_control = true,
694       .EXT_image_drm_format_modifier = pdev->info.gfx_level >= GFX9,
695       .EXT_image_robustness = true,
696       .EXT_image_sliced_view_of_3d = pdev->info.gfx_level >= GFX10,
697       .EXT_image_view_min_lod = true,
698       .EXT_index_type_uint8 = pdev->info.gfx_level >= GFX8,
699       .EXT_inline_uniform_block = true,
700       .EXT_legacy_vertex_attributes = !pdev->use_llvm,
701       .EXT_line_rasterization = true,
702       .EXT_load_store_op_none = true,
703       .EXT_map_memory_placed = true,
704       .EXT_memory_budget = true,
705       .EXT_memory_priority = true,
706       .EXT_mesh_shader = radv_taskmesh_enabled(pdev),
707       .EXT_multi_draw = true,
708       .EXT_mutable_descriptor_type = true, /* Trivial promotion from VALVE. */
709       .EXT_nested_command_buffer = true,
710       .EXT_non_seamless_cube_map = true,
711       .EXT_pci_bus_info = true,
712 #ifndef _WIN32
713       .EXT_physical_device_drm = true,
714 #endif
715       .EXT_pipeline_creation_cache_control = true,
716       .EXT_pipeline_creation_feedback = true,
717       .EXT_pipeline_library_group_handles = radv_enable_rt(pdev),
718       .EXT_pipeline_robustness = !pdev->use_llvm,
719       .EXT_post_depth_coverage = pdev->info.gfx_level >= GFX10,
720       .EXT_primitive_topology_list_restart = true,
721       .EXT_primitives_generated_query = true,
722       .EXT_private_data = true,
723       .EXT_provoking_vertex = true,
724       .EXT_queue_family_foreign = true,
725       .EXT_robustness2 = true,
726       .EXT_sample_locations = pdev->info.gfx_level < GFX10,
727       .EXT_sampler_filter_minmax = radv_filter_minmax_enabled(pdev),
728       .EXT_scalar_block_layout = pdev->info.gfx_level >= GFX7,
729       .EXT_separate_stencil_usage = true,
730       .EXT_shader_atomic_float = true,
731       .EXT_shader_atomic_float2 = true,
732       .EXT_shader_demote_to_helper_invocation = true,
733       .EXT_shader_image_atomic_int64 = true,
734       .EXT_shader_module_identifier = true,
735       .EXT_shader_object = !pdev->use_llvm && !(instance->debug_flags & RADV_DEBUG_NO_ESO),
736       .EXT_shader_replicated_composites = true,
737       .EXT_shader_stencil_export = true,
738       .EXT_shader_subgroup_ballot = true,
739       .EXT_shader_subgroup_vote = true,
740       .EXT_shader_viewport_index_layer = true,
741       .EXT_subgroup_size_control = true,
742 #ifdef RADV_USE_WSI_PLATFORM
743       .EXT_swapchain_maintenance1 = true,
744 #endif
745       .EXT_texel_buffer_alignment = true,
746       .EXT_tooling_info = true,
747       .EXT_transform_feedback = true,
748       .EXT_vertex_attribute_divisor = true,
749       .EXT_vertex_input_dynamic_state = !pdev->use_llvm,
750       .EXT_ycbcr_image_arrays = true,
751       .AMD_buffer_marker = true,
752       .AMD_device_coherent_memory = true,
753       .AMD_draw_indirect_count = true,
754       .AMD_gcn_shader = true,
755       .AMD_gpu_shader_half_float = pdev->info.has_packed_math_16bit,
756       .AMD_gpu_shader_int16 = pdev->info.has_packed_math_16bit,
757       .AMD_memory_overallocation_behavior = true,
758       .AMD_mixed_attachment_samples = true,
759       .AMD_rasterization_order = pdev->info.has_out_of_order_rast,
760       .AMD_shader_ballot = true,
761       .AMD_shader_core_properties = true,
762       .AMD_shader_core_properties2 = true,
763       .AMD_shader_early_and_late_fragment_tests = true,
764       .AMD_shader_explicit_vertex_parameter = true,
765       .AMD_shader_fragment_mask = pdev->use_fmask,
766       .AMD_shader_image_load_store_lod = true,
767       .AMD_shader_trinary_minmax = true,
768       .AMD_texture_gather_bias_lod = pdev->info.gfx_level < GFX11,
769 #if DETECT_OS_ANDROID
770       .ANDROID_external_memory_android_hardware_buffer = RADV_SUPPORT_ANDROID_HARDWARE_BUFFER,
771       .ANDROID_native_buffer = true,
772 #endif
773       .GOOGLE_decorate_string = true,
774       .GOOGLE_hlsl_functionality1 = true,
775       .GOOGLE_user_type = true,
776       .INTEL_shader_integer_functions2 = true,
777       .MESA_image_alignment_control = pdev->info.gfx_level >= GFX9,
778       .NV_compute_shader_derivatives = true,
779       .VALVE_mutable_descriptor_type = true,
780    };
781    *out_ext = ext;
782 }
783 
784 static void
radv_physical_device_get_features(const struct radv_physical_device * pdev,struct vk_features * features)785 radv_physical_device_get_features(const struct radv_physical_device *pdev, struct vk_features *features)
786 {
787    const struct radv_instance *instance = radv_physical_device_instance(pdev);
788    bool taskmesh_en = radv_taskmesh_enabled(pdev);
789    bool has_perf_query = radv_perf_query_supported(pdev);
790    bool has_shader_image_float_minmax = pdev->info.gfx_level != GFX8 && pdev->info.gfx_level != GFX9 &&
791                                         pdev->info.gfx_level != GFX11 && pdev->info.gfx_level != GFX11_5;
792    bool has_fragment_shader_interlock = radv_has_pops(pdev);
793 
794    *features = (struct vk_features){
795       /* Vulkan 1.0 */
796       .robustBufferAccess = true,
797       .fullDrawIndexUint32 = true,
798       .imageCubeArray = true,
799       .independentBlend = true,
800       .geometryShader = true,
801       .tessellationShader = true,
802       .sampleRateShading = true,
803       .dualSrcBlend = true,
804       .logicOp = true,
805       .multiDrawIndirect = true,
806       .drawIndirectFirstInstance = true,
807       .depthClamp = true,
808       .depthBiasClamp = true,
809       .fillModeNonSolid = true,
810       .depthBounds = true,
811       .wideLines = true,
812       .largePoints = true,
813       .alphaToOne = true,
814       .multiViewport = true,
815       .samplerAnisotropy = true,
816       .textureCompressionETC2 = pdev->info.has_etc_support || pdev->emulate_etc2,
817       .textureCompressionASTC_LDR = pdev->emulate_astc,
818       .textureCompressionBC = true,
819       .occlusionQueryPrecise = true,
820       .pipelineStatisticsQuery = true,
821       .vertexPipelineStoresAndAtomics = true,
822       .fragmentStoresAndAtomics = true,
823       .shaderTessellationAndGeometryPointSize = true,
824       .shaderImageGatherExtended = true,
825       .shaderStorageImageExtendedFormats = true,
826       .shaderStorageImageMultisample = true,
827       .shaderUniformBufferArrayDynamicIndexing = true,
828       .shaderSampledImageArrayDynamicIndexing = true,
829       .shaderStorageBufferArrayDynamicIndexing = true,
830       .shaderStorageImageArrayDynamicIndexing = true,
831       .shaderStorageImageReadWithoutFormat = true,
832       .shaderStorageImageWriteWithoutFormat = true,
833       .shaderClipDistance = true,
834       .shaderCullDistance = true,
835       .shaderFloat64 = true,
836       .shaderInt64 = true,
837       .shaderInt16 = true,
838       .sparseBinding = true,
839       .sparseResidencyBuffer = pdev->info.family >= CHIP_POLARIS10,
840       .sparseResidencyImage2D = pdev->info.family >= CHIP_POLARIS10,
841       .sparseResidencyImage3D = pdev->info.family >= CHIP_POLARIS10,
842       .sparseResidencyAliased = pdev->info.family >= CHIP_POLARIS10,
843       .variableMultisampleRate = true,
844       .shaderResourceMinLod = true,
845       .shaderResourceResidency = true,
846       .inheritedQueries = true,
847 
848       /* Vulkan 1.1 */
849       .storageBuffer16BitAccess = true,
850       .uniformAndStorageBuffer16BitAccess = true,
851       .storagePushConstant16 = true,
852       .storageInputOutput16 = pdev->info.has_packed_math_16bit,
853       .multiview = true,
854       .multiviewGeometryShader = true,
855       .multiviewTessellationShader = true,
856       .variablePointersStorageBuffer = true,
857       .variablePointers = true,
858       .protectedMemory = false,
859       .samplerYcbcrConversion = true,
860       .shaderDrawParameters = true,
861 
862       /* Vulkan 1.2 */
863       .samplerMirrorClampToEdge = true,
864       .drawIndirectCount = true,
865       .storageBuffer8BitAccess = true,
866       .uniformAndStorageBuffer8BitAccess = true,
867       .storagePushConstant8 = true,
868       .shaderBufferInt64Atomics = true,
869       .shaderSharedInt64Atomics = true,
870       .shaderFloat16 = pdev->info.has_packed_math_16bit,
871       .shaderInt8 = true,
872 
873       .descriptorIndexing = true,
874       .shaderInputAttachmentArrayDynamicIndexing = true,
875       .shaderUniformTexelBufferArrayDynamicIndexing = true,
876       .shaderStorageTexelBufferArrayDynamicIndexing = true,
877       .shaderUniformBufferArrayNonUniformIndexing = true,
878       .shaderSampledImageArrayNonUniformIndexing = true,
879       .shaderStorageBufferArrayNonUniformIndexing = true,
880       .shaderStorageImageArrayNonUniformIndexing = true,
881       .shaderInputAttachmentArrayNonUniformIndexing = true,
882       .shaderUniformTexelBufferArrayNonUniformIndexing = true,
883       .shaderStorageTexelBufferArrayNonUniformIndexing = true,
884       .descriptorBindingUniformBufferUpdateAfterBind = true,
885       .descriptorBindingSampledImageUpdateAfterBind = true,
886       .descriptorBindingStorageImageUpdateAfterBind = true,
887       .descriptorBindingStorageBufferUpdateAfterBind = true,
888       .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
889       .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
890       .descriptorBindingUpdateUnusedWhilePending = true,
891       .descriptorBindingPartiallyBound = true,
892       .descriptorBindingVariableDescriptorCount = true,
893       .runtimeDescriptorArray = true,
894 
895       .samplerFilterMinmax = true,
896       .scalarBlockLayout = pdev->info.gfx_level >= GFX7,
897       .imagelessFramebuffer = true,
898       .uniformBufferStandardLayout = true,
899       .shaderSubgroupExtendedTypes = true,
900       .separateDepthStencilLayouts = true,
901       .hostQueryReset = true,
902       .timelineSemaphore = true,
903       .bufferDeviceAddress = true,
904       .bufferDeviceAddressCaptureReplay = true,
905       .bufferDeviceAddressMultiDevice = false,
906       .vulkanMemoryModel = true,
907       .vulkanMemoryModelDeviceScope = true,
908       .vulkanMemoryModelAvailabilityVisibilityChains = false,
909       .shaderOutputViewportIndex = true,
910       .shaderOutputLayer = true,
911       .subgroupBroadcastDynamicId = true,
912 
913       /* Vulkan 1.3 */
914       .robustImageAccess = true,
915       .inlineUniformBlock = true,
916       .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
917       .pipelineCreationCacheControl = true,
918       .privateData = true,
919       .shaderDemoteToHelperInvocation = true,
920       .shaderTerminateInvocation = true,
921       .subgroupSizeControl = true,
922       .computeFullSubgroups = true,
923       .synchronization2 = true,
924       .textureCompressionASTC_HDR = false,
925       .shaderZeroInitializeWorkgroupMemory = true,
926       .dynamicRendering = true,
927       .shaderIntegerDotProduct = true,
928       .maintenance4 = true,
929 
930       /* Vulkan 1.4 */
931       .globalPriorityQuery = true,
932       .shaderSubgroupRotate = true,
933       .shaderSubgroupRotateClustered = true,
934       .shaderFloatControls2 = true,
935       .shaderExpectAssume = true,
936       .rectangularLines = true,
937       .bresenhamLines = true,
938       .smoothLines = true,
939       .stippledRectangularLines = false,
940       .stippledBresenhamLines = true,
941       .stippledSmoothLines = false,
942       .vertexAttributeInstanceRateDivisor = true,
943       .vertexAttributeInstanceRateZeroDivisor = true,
944       .indexTypeUint8 = pdev->info.gfx_level >= GFX8,
945       .dynamicRenderingLocalRead = true,
946       .maintenance5 = true,
947       .maintenance6 = true,
948       .pipelineProtectedAccess = false,
949       .pipelineRobustness = true,
950       .hostImageCopy = false,
951       .pushDescriptor = true,
952 
953       /* VK_EXT_conditional_rendering */
954       .conditionalRendering = true,
955       .inheritedConditionalRendering = false,
956 
957       /* VK_KHR_vertex_attribute_divisor */
958       .vertexAttributeInstanceRateDivisor = true,
959       .vertexAttributeInstanceRateZeroDivisor = true,
960 
961       /* VK_EXT_transform_feedback */
962       .transformFeedback = true,
963       .geometryStreams = true,
964 
965       /* VK_EXT_memory_priority */
966       .memoryPriority = true,
967 
968       /* VK_EXT_depth_clip_enable */
969       .depthClipEnable = true,
970 
971       /* VK_KHR_compute_shader_derivatives */
972       .computeDerivativeGroupQuads = false,
973       .computeDerivativeGroupLinear = true,
974 
975       /* VK_EXT_ycbcr_image_arrays */
976       .ycbcrImageArrays = true,
977 
978       /* VK_KHR_index_type_uint8 */
979       .indexTypeUint8 = pdev->info.gfx_level >= GFX8,
980 
981       /* VK_KHR_pipeline_executable_properties */
982       .pipelineExecutableInfo = true,
983 
984       /* VK_KHR_shader_clock */
985       .shaderSubgroupClock = true,
986       .shaderDeviceClock = pdev->info.gfx_level >= GFX8,
987 
988       /* VK_EXT_texel_buffer_alignment */
989       .texelBufferAlignment = true,
990 
991       /* VK_AMD_device_coherent_memory */
992       .deviceCoherentMemory = pdev->info.has_l2_uncached,
993 
994       /* VK_KHR_line_rasterization */
995       .rectangularLines = true,
996       .bresenhamLines = true,
997       .smoothLines = true,
998       .stippledRectangularLines = false,
999       .stippledBresenhamLines = true,
1000       .stippledSmoothLines = false,
1001 
1002       /* VK_EXT_robustness2 */
1003       .robustBufferAccess2 = true,
1004       .robustImageAccess2 = true,
1005       .nullDescriptor = true,
1006 
1007       /* VK_EXT_custom_border_color */
1008       .customBorderColors = true,
1009       .customBorderColorWithoutFormat = true,
1010 
1011       /* VK_EXT_extended_dynamic_state */
1012       .extendedDynamicState = true,
1013 
1014       /* VK_EXT_shader_atomic_float */
1015       .shaderBufferFloat32Atomics = true,
1016       .shaderBufferFloat32AtomicAdd = pdev->info.gfx_level >= GFX11,
1017       .shaderBufferFloat64Atomics = true,
1018       .shaderBufferFloat64AtomicAdd = false,
1019       .shaderSharedFloat32Atomics = true,
1020       .shaderSharedFloat32AtomicAdd = pdev->info.gfx_level >= GFX8,
1021       .shaderSharedFloat64Atomics = true,
1022       .shaderSharedFloat64AtomicAdd = false,
1023       .shaderImageFloat32Atomics = true,
1024       .shaderImageFloat32AtomicAdd = false,
1025       .sparseImageFloat32Atomics = true,
1026       .sparseImageFloat32AtomicAdd = false,
1027 
1028       /* VK_EXT_4444_formats */
1029       .formatA4R4G4B4 = true,
1030       .formatA4B4G4R4 = true,
1031 
1032       /* VK_EXT_shader_image_atomic_int64 */
1033       .shaderImageInt64Atomics = true,
1034       .sparseImageInt64Atomics = true,
1035 
1036       /* VK_EXT_mutable_descriptor_type */
1037       .mutableDescriptorType = true,
1038 
1039       /* VK_KHR_fragment_shading_rate */
1040       .pipelineFragmentShadingRate = true,
1041       .primitiveFragmentShadingRate = true,
1042       .attachmentFragmentShadingRate = radv_vrs_attachment_enabled(pdev),
1043 
1044       /* VK_KHR_workgroup_memory_explicit_layout */
1045       .workgroupMemoryExplicitLayout = true,
1046       .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
1047       .workgroupMemoryExplicitLayout8BitAccess = true,
1048       .workgroupMemoryExplicitLayout16BitAccess = true,
1049 
1050       /* VK_EXT_provoking_vertex */
1051       .provokingVertexLast = true,
1052       .transformFeedbackPreservesProvokingVertex = true,
1053 
1054       /* VK_EXT_extended_dynamic_state2 */
1055       .extendedDynamicState2 = true,
1056       .extendedDynamicState2LogicOp = true,
1057       .extendedDynamicState2PatchControlPoints = true,
1058 
1059       /* VK_EXT_global_priority_query */
1060       .globalPriorityQuery = true,
1061 
1062       /* VK_KHR_acceleration_structure */
1063       .accelerationStructure = true,
1064       .accelerationStructureCaptureReplay = true,
1065       .accelerationStructureIndirectBuild = false,
1066       .accelerationStructureHostCommands = false,
1067       .descriptorBindingAccelerationStructureUpdateAfterBind = true,
1068 
1069       /* VK_EXT_buffer_device_address */
1070       .bufferDeviceAddressCaptureReplayEXT = false,
1071 
1072       /* VK_KHR_shader_subgroup_uniform_control_flow */
1073       .shaderSubgroupUniformControlFlow = true,
1074 
1075       /* VK_EXT_map_memory_placed */
1076       .memoryMapPlaced = true,
1077       .memoryMapRangePlaced = false,
1078       .memoryUnmapReserve = true,
1079 
1080       /* VK_EXT_multi_draw */
1081       .multiDraw = true,
1082 
1083       /* VK_EXT_color_write_enable */
1084       .colorWriteEnable = true,
1085 
1086       /* VK_EXT_shader_atomic_float2 */
1087       .shaderBufferFloat16Atomics = false,
1088       .shaderBufferFloat16AtomicAdd = false,
1089       .shaderBufferFloat16AtomicMinMax = false,
1090       .shaderBufferFloat32AtomicMinMax = radv_has_shader_buffer_float_minmax(pdev, 32),
1091       .shaderBufferFloat64AtomicMinMax = radv_has_shader_buffer_float_minmax(pdev, 64),
1092       .shaderSharedFloat16Atomics = false,
1093       .shaderSharedFloat16AtomicAdd = false,
1094       .shaderSharedFloat16AtomicMinMax = false,
1095       .shaderSharedFloat32AtomicMinMax = true,
1096       .shaderSharedFloat64AtomicMinMax = true,
1097       .shaderImageFloat32AtomicMinMax = has_shader_image_float_minmax,
1098       .sparseImageFloat32AtomicMinMax = has_shader_image_float_minmax,
1099 
1100       /* VK_KHR_present_id */
1101       .presentId = pdev->vk.supported_extensions.KHR_present_id,
1102 
1103       /* VK_KHR_present_wait */
1104       .presentWait = pdev->vk.supported_extensions.KHR_present_wait,
1105 
1106       /* VK_EXT_primitive_topology_list_restart */
1107       .primitiveTopologyListRestart = true,
1108       .primitiveTopologyPatchListRestart = false,
1109 
1110       /* VK_KHR_ray_query */
1111       .rayQuery = true,
1112 
1113       /* VK_EXT_pipeline_library_group_handles */
1114       .pipelineLibraryGroupHandles = true,
1115 
1116       /* VK_KHR_ray_tracing_pipeline */
1117       .rayTracingPipeline = true,
1118       .rayTracingPipelineShaderGroupHandleCaptureReplay = true,
1119       .rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false,
1120       .rayTracingPipelineTraceRaysIndirect = true,
1121       .rayTraversalPrimitiveCulling = true,
1122 
1123       /* VK_KHR_ray_tracing_maintenance1 */
1124       .rayTracingMaintenance1 = true,
1125       .rayTracingPipelineTraceRaysIndirect2 = radv_enable_rt(pdev),
1126 
1127       /* VK_KHR_ray_tracing_position_fetch */
1128       .rayTracingPositionFetch = true,
1129 
1130       /* VK_EXT_vertex_input_dynamic_state */
1131       .vertexInputDynamicState = true,
1132 
1133       /* VK_EXT_image_view_min_lod */
1134       .minLod = true,
1135 
1136       /* VK_EXT_mesh_shader */
1137       .meshShader = taskmesh_en,
1138       .taskShader = taskmesh_en,
1139       .multiviewMeshShader = taskmesh_en,
1140       .primitiveFragmentShadingRateMeshShader = taskmesh_en,
1141       .meshShaderQueries = false,
1142 
1143       /* VK_EXT_depth_clip_control */
1144       .depthClipControl = true,
1145 
1146       /* VK_EXT_image_2d_view_of_3d  */
1147       .image2DViewOf3D = true,
1148       .sampler2DViewOf3D = false,
1149 
1150       /* VK_INTEL_shader_integer_functions2 */
1151       .shaderIntegerFunctions2 = true,
1152 
1153       /* VK_EXT_primitives_generated_query */
1154       .primitivesGeneratedQuery = true,
1155       .primitivesGeneratedQueryWithRasterizerDiscard = true,
1156       .primitivesGeneratedQueryWithNonZeroStreams = true,
1157 
1158       /* VK_EXT_non_seamless_cube_map */
1159       .nonSeamlessCubeMap = true,
1160 
1161       /* VK_EXT_border_color_swizzle */
1162       .borderColorSwizzle = true,
1163       .borderColorSwizzleFromImage = true,
1164 
1165       /* VK_EXT_shader_module_identifier */
1166       .shaderModuleIdentifier = true,
1167 
1168       /* VK_KHR_performance_query */
1169       .performanceCounterQueryPools = has_perf_query,
1170       .performanceCounterMultipleQueryPools = has_perf_query,
1171 
1172       /* VK_EXT_attachment_feedback_loop_layout */
1173       .attachmentFeedbackLoopLayout = true,
1174 
1175       /* VK_EXT_graphics_pipeline_library */
1176       .graphicsPipelineLibrary = true,
1177 
1178       /* VK_EXT_extended_dynamic_state3 */
1179       .extendedDynamicState3TessellationDomainOrigin = true,
1180       .extendedDynamicState3PolygonMode = true,
1181       .extendedDynamicState3SampleMask = true,
1182       .extendedDynamicState3AlphaToCoverageEnable = !pdev->use_llvm,
1183       .extendedDynamicState3LogicOpEnable = true,
1184       .extendedDynamicState3LineStippleEnable = true,
1185       .extendedDynamicState3ColorBlendEnable = !pdev->use_llvm,
1186       .extendedDynamicState3DepthClipEnable = true,
1187       .extendedDynamicState3ConservativeRasterizationMode = pdev->info.gfx_level >= GFX9,
1188       .extendedDynamicState3DepthClipNegativeOneToOne = true,
1189       .extendedDynamicState3ProvokingVertexMode = true,
1190       .extendedDynamicState3DepthClampEnable = true,
1191       .extendedDynamicState3ColorWriteMask = !pdev->use_llvm,
1192       .extendedDynamicState3RasterizationSamples = true,
1193       .extendedDynamicState3ColorBlendEquation = !pdev->use_llvm,
1194       .extendedDynamicState3SampleLocationsEnable = pdev->info.gfx_level < GFX10,
1195       .extendedDynamicState3LineRasterizationMode = true,
1196       .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
1197       .extendedDynamicState3AlphaToOneEnable = !pdev->use_llvm,
1198       .extendedDynamicState3RasterizationStream = false,
1199       .extendedDynamicState3ColorBlendAdvanced = false,
1200       .extendedDynamicState3ViewportWScalingEnable = false,
1201       .extendedDynamicState3ViewportSwizzle = false,
1202       .extendedDynamicState3CoverageToColorEnable = false,
1203       .extendedDynamicState3CoverageToColorLocation = false,
1204       .extendedDynamicState3CoverageModulationMode = false,
1205       .extendedDynamicState3CoverageModulationTableEnable = false,
1206       .extendedDynamicState3CoverageModulationTable = false,
1207       .extendedDynamicState3CoverageReductionMode = false,
1208       .extendedDynamicState3RepresentativeFragmentTestEnable = false,
1209       .extendedDynamicState3ShadingRateImageEnable = false,
1210 
1211       /* VK_EXT_descriptor_buffer */
1212       .descriptorBuffer = true,
1213       .descriptorBufferCaptureReplay = true,
1214       .descriptorBufferImageLayoutIgnored = true,
1215       .descriptorBufferPushDescriptors = true,
1216 
1217       /* VK_AMD_shader_early_and_late_fragment_tests */
1218       .shaderEarlyAndLateFragmentTests = true,
1219 
1220       /* VK_EXT_image_sliced_view_of_3d */
1221       .imageSlicedViewOf3D = true,
1222 
1223 #ifdef RADV_USE_WSI_PLATFORM
1224       /* VK_EXT_swapchain_maintenance1 */
1225       .swapchainMaintenance1 = true,
1226 #endif
1227 
1228       /* VK_EXT_attachment_feedback_loop_dynamic_state */
1229       .attachmentFeedbackLoopDynamicState = true,
1230 
1231       /* VK_EXT_dynamic_rendering_unused_attachments */
1232       .dynamicRenderingUnusedAttachments = true,
1233 
1234       /* VK_KHR_fragment_shader_barycentric */
1235       .fragmentShaderBarycentric = true,
1236 
1237       /* VK_EXT_depth_bias_control */
1238       .depthBiasControl = true,
1239       .leastRepresentableValueForceUnormRepresentation = true,
1240       .floatRepresentation = true,
1241       .depthBiasExact = true,
1242 
1243       /* VK_EXT_fragment_shader_interlock */
1244       .fragmentShaderSampleInterlock = has_fragment_shader_interlock,
1245       .fragmentShaderPixelInterlock = has_fragment_shader_interlock,
1246       .fragmentShaderShadingRateInterlock = false,
1247 
1248       /* VK_EXT_pipeline_robustness */
1249       .pipelineRobustness = true,
1250 
1251       /* VK_KHR_maintenance5 */
1252       .maintenance5 = true,
1253 
1254       /* VK_KHR_cooperative_matrix */
1255       .cooperativeMatrix = radv_cooperative_matrix_enabled(pdev),
1256       .cooperativeMatrixRobustBufferAccess = radv_cooperative_matrix_enabled(pdev),
1257 
1258       /* VK_EXT_image_compression_control */
1259       .imageCompressionControl = true,
1260 
1261       /* VK_EXT_device_fault */
1262       .deviceFault = true,
1263       .deviceFaultVendorBinary = instance->debug_flags & RADV_DEBUG_HANG,
1264 
1265       /* VK_KHR_depth_clamp_zero_one */
1266       .depthClampZeroOne = true,
1267 
1268       /* VK_KHR_maintenance6 */
1269       .maintenance6 = true,
1270 
1271       /* VK_KHR_shader_subgroup_rotate */
1272       .shaderSubgroupRotate = true,
1273       .shaderSubgroupRotateClustered = true,
1274 
1275       /* VK_EXT_shader_object */
1276       .shaderObject = true,
1277 
1278       /* VK_KHR_shader_expect_assume */
1279       .shaderExpectAssume = true,
1280 
1281       /* VK_KHR_shader_maximal_reconvergence */
1282       .shaderMaximalReconvergence = true,
1283 
1284       /* VK_KHR_shader_quad_control */
1285       .shaderQuadControl = true,
1286 
1287       /* VK_EXT_address_binding_report */
1288       .reportAddressBinding = true,
1289 
1290       /* VK_EXT_nested_command_buffer */
1291       .nestedCommandBuffer = true,
1292       .nestedCommandBufferRendering = true,
1293       .nestedCommandBufferSimultaneousUse = true,
1294 
1295       /* VK_KHR_dynamic_rendering_local_read */
1296       .dynamicRenderingLocalRead = true,
1297 
1298       /* VK_EXT_legacy_vertex_attributes */
1299       .legacyVertexAttributes = true,
1300 
1301       /* VK_MESA_image_alignment_control */
1302       .imageAlignmentControl = true,
1303 
1304       /* VK_EXT_shader_replicated_composites */
1305       .shaderReplicatedComposites = true,
1306 
1307       /* VK_KHR_maintenance7 */
1308       .maintenance7 = true,
1309 
1310       /* VK_KHR_video_maintenance1 */
1311       .videoMaintenance1 = true,
1312 
1313       /* VK_KHR_pipeline_binary */
1314       .pipelineBinaries = true,
1315 
1316       /* VK_KHR_shader_relaxed_extended_instruction */
1317       .shaderRelaxedExtendedInstruction = true,
1318 
1319       /* VK_KHR_shader_float_controls2 */
1320       .shaderFloatControls2 = true,
1321 
1322       /* VK_EXT_depth_clamp_control */
1323       .depthClampControl = true,
1324 
1325       /* VK_EXT_device_generated_commands */
1326       .deviceGeneratedCommands = true,
1327       .dynamicGeneratedPipelineLayout = true,
1328 
1329       /* VK_KHR_maintenance8 */
1330       .maintenance8 = true,
1331    };
1332 }
1333 
1334 static size_t
radv_max_descriptor_set_size()1335 radv_max_descriptor_set_size()
1336 {
1337    /* make sure that the entire descriptor set is addressable with a signed
1338     * 32-bit int. So the sum of all limits scaled by descriptor size has to
1339     * be at most 2 GiB. the combined image & samples object count as one of
1340     * both. This limit is for the pipeline layout, not for the set layout, but
1341     * there is no set limit, so we just set a pipeline limit. I don't think
1342     * any app is going to hit this soon. */
1343    return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS - MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1344           (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1345            32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1346            32 /* sampler, largest when combined with image */ + 64 /* sampled image */ + 64 /* storage image */);
1347 }
1348 
1349 static uint32_t
radv_uniform_buffer_offset_alignment(const struct radv_physical_device * pdev)1350 radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdev)
1351 {
1352    const struct radv_instance *instance = radv_physical_device_instance(pdev);
1353    uint32_t uniform_offset_alignment = instance->drirc.override_uniform_offset_alignment;
1354    if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {
1355       fprintf(stderr,
1356               "ERROR: invalid radv_override_uniform_offset_alignment setting %d:"
1357               "not a power of two\n",
1358               uniform_offset_alignment);
1359       uniform_offset_alignment = 0;
1360    }
1361 
1362    /* Take at least the hardware limit. */
1363    return MAX2(uniform_offset_alignment, 4);
1364 }
1365 
1366 static const char *
radv_get_compiler_string(struct radv_physical_device * pdev)1367 radv_get_compiler_string(struct radv_physical_device *pdev)
1368 {
1369    const struct radv_instance *instance = radv_physical_device_instance(pdev);
1370 
1371    if (!pdev->use_llvm) {
1372       /* Some games like SotTR apply shader workarounds if the LLVM
1373        * version is too old or if the LLVM version string is
1374        * missing. This gives 2-5% performance with SotTR and ACO.
1375        */
1376       if (instance->drirc.report_llvm9_version_string) {
1377          return " (LLVM 9.0.1)";
1378       }
1379 
1380       return "";
1381    }
1382 
1383 #if AMD_LLVM_AVAILABLE
1384    return " (LLVM " MESA_LLVM_VERSION_STRING ")";
1385 #else
1386    unreachable("LLVM is not available");
1387 #endif
1388 }
1389 
1390 static void
radv_get_physical_device_properties(struct radv_physical_device * pdev)1391 radv_get_physical_device_properties(struct radv_physical_device *pdev)
1392 {
1393    VkSampleCountFlags sample_counts = 0xf;
1394 
1395    size_t max_descriptor_set_size = radv_max_descriptor_set_size();
1396 
1397    VkPhysicalDeviceType device_type;
1398    if (pdev->info.has_dedicated_vram) {
1399       device_type = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
1400    } else {
1401       device_type = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
1402    }
1403 
1404    bool has_fp16 = pdev->info.has_packed_math_16bit;
1405 
1406    VkShaderStageFlags taskmesh_stages =
1407       radv_taskmesh_enabled(pdev) ? VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT : 0;
1408    VkShaderStageFlags rt_stages = radv_enable_rt(pdev) ? RADV_RT_STAGE_BITS : 0;
1409 
1410    bool accel_dot = pdev->info.has_accelerated_dot_product;
1411    bool gfx11plus = pdev->info.gfx_level >= GFX11;
1412 
1413    VkExtent2D vrs_texel_extent = radv_vrs_attachment_enabled(pdev) ? (VkExtent2D){8, 8} : (VkExtent2D){0, 0};
1414    const int32_t max_viewport_size = pdev->info.gfx_level >= GFX12 ? 32768 : 16384;
1415 
1416    uint64_t os_page_size = 4096;
1417    os_get_page_size(&os_page_size);
1418 
1419    pdev->vk.properties = (struct vk_properties){
1420 #ifdef ANDROID_STRICT
1421       .apiVersion = RADV_API_VERSION,
1422 #else
1423       .apiVersion = pdev->info.gfx_level >= GFX8 ? RADV_API_VERSION : RADV_API_VERSION_1_3,
1424 #endif
1425       .driverVersion = vk_get_driver_version(),
1426       .vendorID = ATI_VENDOR_ID,
1427       .deviceID = pdev->info.pci_id,
1428       .deviceType = device_type,
1429       .maxImageDimension1D = (1 << 14),
1430       .maxImageDimension2D = (1 << 14),
1431       .maxImageDimension3D = (1 << 11),
1432       .maxImageDimensionCube = (1 << 14),
1433       .maxImageArrayLayers = (1 << 11),
1434       .maxTexelBufferElements = UINT32_MAX,
1435       .maxUniformBufferRange = UINT32_MAX,
1436       .maxStorageBufferRange = UINT32_MAX,
1437       .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1438       .maxMemoryAllocationCount = UINT32_MAX,
1439       .maxSamplerAllocationCount = 64 * 1024,
1440       .bufferImageGranularity = 1,
1441       .sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
1442       .maxBoundDescriptorSets = MAX_SETS,
1443       .maxPerStageDescriptorSamplers = max_descriptor_set_size,
1444       .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1445       .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1446       .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1447       .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1448       .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
1449       .maxPerStageResources = max_descriptor_set_size,
1450       .maxDescriptorSetSamplers = max_descriptor_set_size,
1451       .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1452       .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1453       .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1454       .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1455       .maxDescriptorSetSampledImages = max_descriptor_set_size,
1456       .maxDescriptorSetStorageImages = max_descriptor_set_size,
1457       .maxDescriptorSetInputAttachments = max_descriptor_set_size,
1458       .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
1459       .maxVertexInputBindings = MAX_VBS,
1460       .maxVertexInputAttributeOffset = UINT32_MAX,
1461       .maxVertexInputBindingStride = 2048,
1462       .maxVertexOutputComponents = 128,
1463       .maxTessellationGenerationLevel = 64,
1464       .maxTessellationPatchSize = 32,
1465       .maxTessellationControlPerVertexInputComponents = 128,
1466       .maxTessellationControlPerVertexOutputComponents = 128,
1467       .maxTessellationControlPerPatchOutputComponents = 120,
1468       .maxTessellationControlTotalOutputComponents = 4096,
1469       .maxTessellationEvaluationInputComponents = 128,
1470       .maxTessellationEvaluationOutputComponents = 128,
1471       .maxGeometryShaderInvocations = 32,
1472       .maxGeometryInputComponents = 64,
1473       .maxGeometryOutputComponents = 128,
1474       .maxGeometryOutputVertices = 256,
1475       .maxGeometryTotalOutputComponents = 1024,
1476       .maxFragmentInputComponents = 128,
1477       .maxFragmentOutputAttachments = 8,
1478       .maxFragmentDualSrcAttachments = 1,
1479       .maxFragmentCombinedOutputResources = max_descriptor_set_size,
1480       .maxComputeSharedMemorySize = pdev->max_shared_size,
1481       .maxComputeWorkGroupCount = {4294967295, 65535, 65535},
1482       .maxComputeWorkGroupInvocations = 1024,
1483       .maxComputeWorkGroupSize = {1024, 1024, 1024},
1484       .subPixelPrecisionBits = 8,
1485       .subTexelPrecisionBits = 8,
1486       .mipmapPrecisionBits = 8,
1487       .maxDrawIndexedIndexValue = UINT32_MAX,
1488       .maxDrawIndirectCount = UINT32_MAX,
1489       .maxSamplerLodBias = 16,
1490       .maxSamplerAnisotropy = 16,
1491       .maxViewports = MAX_VIEWPORTS,
1492       .maxViewportDimensions = {max_viewport_size, max_viewport_size},
1493       .viewportBoundsRange = {-2 * max_viewport_size, 2 * max_viewport_size - 1},
1494       .viewportSubPixelBits = 8,
1495       .minMemoryMapAlignment = 4096, /* A page */
1496       .minTexelBufferOffsetAlignment = 4,
1497       .minUniformBufferOffsetAlignment = radv_uniform_buffer_offset_alignment(pdev),
1498       .minStorageBufferOffsetAlignment = 4,
1499       .minTexelOffset = -32,
1500       .maxTexelOffset = 31,
1501       .minTexelGatherOffset = -32,
1502       .maxTexelGatherOffset = 31,
1503       .minInterpolationOffset = -2,
1504       .maxInterpolationOffset = 2,
1505       .subPixelInterpolationOffsetBits = 8,
1506       .maxFramebufferWidth = MAX_FRAMEBUFFER_WIDTH,
1507       .maxFramebufferHeight = MAX_FRAMEBUFFER_HEIGHT,
1508       .maxFramebufferLayers = (1 << 10),
1509       .framebufferColorSampleCounts = sample_counts,
1510       .framebufferDepthSampleCounts = sample_counts,
1511       .framebufferStencilSampleCounts = sample_counts,
1512       .framebufferNoAttachmentsSampleCounts = sample_counts,
1513       .maxColorAttachments = MAX_RTS,
1514       .sampledImageColorSampleCounts = sample_counts,
1515       .sampledImageIntegerSampleCounts = sample_counts,
1516       .sampledImageDepthSampleCounts = sample_counts,
1517       .sampledImageStencilSampleCounts = sample_counts,
1518       .storageImageSampleCounts = sample_counts,
1519       .maxSampleMaskWords = 1,
1520       .timestampComputeAndGraphics = true,
1521       .timestampPeriod = 1000000.0 / pdev->info.clock_crystal_freq,
1522       .maxClipDistances = 8,
1523       .maxCullDistances = 8,
1524       .maxCombinedClipAndCullDistances = 8,
1525       .discreteQueuePriorities = 2,
1526       .pointSizeRange = {0.0, 8191.875},
1527       .lineWidthRange = {0.0, 8.0},
1528       .pointSizeGranularity = (1.0 / 8.0),
1529       .lineWidthGranularity = (1.0 / 8.0),
1530       .strictLines = false, /* FINISHME */
1531       .standardSampleLocations = true,
1532       .optimalBufferCopyOffsetAlignment = 1,
1533       .optimalBufferCopyRowPitchAlignment = 1,
1534       .nonCoherentAtomSize = 64,
1535       .sparseResidencyNonResidentStrict = pdev->info.family >= CHIP_POLARIS10,
1536       .sparseResidencyStandard2DBlockShape = pdev->info.family >= CHIP_POLARIS10,
1537       .sparseResidencyStandard3DBlockShape = pdev->info.gfx_level >= GFX9,
1538 
1539       /* Vulkan 1.1 */
1540       .driverID = VK_DRIVER_ID_MESA_RADV,
1541       .deviceLUIDValid = false, /* The LUID is for Windows. */
1542       .deviceNodeMask = 0,
1543       .subgroupSize = RADV_SUBGROUP_SIZE,
1544       .subgroupSupportedStages =
1545          VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT | taskmesh_stages | rt_stages,
1546       .subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
1547                                      VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
1548                                      VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
1549                                      VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
1550                                      VK_SUBGROUP_FEATURE_ROTATE_BIT | VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT,
1551       .subgroupQuadOperationsInAllStages = true,
1552       .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES,
1553       .maxMultiviewViewCount = MAX_VIEWS,
1554       .maxMultiviewInstanceIndex = INT_MAX,
1555       .protectedNoFault = false,
1556       .maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS,
1557       .maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE,
1558 
1559       /* Vulkan 1.2 */
1560       .conformanceVersion = radv_get_conformance_version(pdev),
1561       /* On AMD hardware, denormals and rounding modes for fp16/fp64 are
1562        * controlled by the same config register.
1563        */
1564       .denormBehaviorIndependence =
1565          has_fp16 ? VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY : VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
1566       .roundingModeIndependence =
1567          has_fp16 ? VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY : VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
1568       /* With LLVM, do not allow both preserving and flushing denorms because
1569        * different shaders in the same pipeline can have different settings and
1570        * this won't work for merged shaders. To make it work, this requires LLVM
1571        * support for changing the register. The same logic applies for the
1572        * rounding modes because they are configured with the same config
1573        * register.
1574        */
1575       .shaderDenormFlushToZeroFloat32 = true,
1576       .shaderDenormPreserveFloat32 = !pdev->use_llvm,
1577       .shaderRoundingModeRTEFloat32 = true,
1578       .shaderRoundingModeRTZFloat32 = !pdev->use_llvm,
1579       .shaderSignedZeroInfNanPreserveFloat32 = true,
1580       .shaderDenormFlushToZeroFloat16 = has_fp16 && !pdev->use_llvm,
1581       .shaderDenormPreserveFloat16 = has_fp16,
1582       .shaderRoundingModeRTEFloat16 = has_fp16,
1583       .shaderRoundingModeRTZFloat16 = has_fp16 && !pdev->use_llvm,
1584       .shaderSignedZeroInfNanPreserveFloat16 = has_fp16,
1585       .shaderDenormFlushToZeroFloat64 = pdev->info.gfx_level >= GFX8 && !pdev->use_llvm,
1586       .shaderDenormPreserveFloat64 = pdev->info.gfx_level >= GFX8,
1587       .shaderRoundingModeRTEFloat64 = pdev->info.gfx_level >= GFX8,
1588       .shaderRoundingModeRTZFloat64 = pdev->info.gfx_level >= GFX8 && !pdev->use_llvm,
1589       .shaderSignedZeroInfNanPreserveFloat64 = pdev->info.gfx_level >= GFX8,
1590       .maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64,
1591       .shaderUniformBufferArrayNonUniformIndexingNative = false,
1592       .shaderSampledImageArrayNonUniformIndexingNative = false,
1593       .shaderStorageBufferArrayNonUniformIndexingNative = false,
1594       .shaderStorageImageArrayNonUniformIndexingNative = false,
1595       .shaderInputAttachmentArrayNonUniformIndexingNative = false,
1596       .robustBufferAccessUpdateAfterBind = true,
1597       .quadDivergentImplicitLod = false,
1598       .maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size,
1599       .maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size,
1600       .maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size,
1601       .maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size,
1602       .maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size,
1603       .maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size,
1604       .maxPerStageUpdateAfterBindResources = max_descriptor_set_size,
1605       .maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size,
1606       .maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size,
1607       .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1608       .maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size,
1609       .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1610       .maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size,
1611       .maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size,
1612       .maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size,
1613       /* We support all of the depth resolve modes */
1614       .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT |
1615                                     VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT,
1616       /* Average doesn't make sense for stencil so we don't support that */
1617       .supportedStencilResolveModes =
1618          VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT,
1619       .independentResolveNone = true,
1620       .independentResolve = true,
1621       /* GFX6-8 only support single channel min/max filter. */
1622       .filterMinmaxImageComponentMapping = pdev->info.gfx_level >= GFX9,
1623       .filterMinmaxSingleComponentFormats = true,
1624       .maxTimelineSemaphoreValueDifference = UINT64_MAX,
1625       .framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1626 
1627       /* Vulkan 1.3 */
1628       .minSubgroupSize = pdev->info.gfx_level >= GFX10 ? 32 : 64,
1629       .maxSubgroupSize = 64,
1630       .maxComputeWorkgroupSubgroups = UINT32_MAX,
1631       .requiredSubgroupSizeStages = pdev->info.gfx_level >= GFX10 ? VK_SHADER_STAGE_COMPUTE_BIT | taskmesh_stages : 0,
1632       .maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE,
1633       .maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS,
1634       .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS,
1635       .maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT,
1636       .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT,
1637       .maxInlineUniformTotalSize = UINT16_MAX,
1638       .integerDotProduct8BitUnsignedAccelerated = accel_dot,
1639       .integerDotProduct8BitSignedAccelerated = accel_dot,
1640       .integerDotProduct8BitMixedSignednessAccelerated = accel_dot && gfx11plus,
1641       .integerDotProduct4x8BitPackedUnsignedAccelerated = accel_dot,
1642       .integerDotProduct4x8BitPackedSignedAccelerated = accel_dot,
1643       .integerDotProduct4x8BitPackedMixedSignednessAccelerated = accel_dot && gfx11plus,
1644       .integerDotProduct16BitUnsignedAccelerated = accel_dot && !gfx11plus,
1645       .integerDotProduct16BitSignedAccelerated = accel_dot && !gfx11plus,
1646       .integerDotProduct16BitMixedSignednessAccelerated = false,
1647       .integerDotProduct32BitUnsignedAccelerated = false,
1648       .integerDotProduct32BitSignedAccelerated = false,
1649       .integerDotProduct32BitMixedSignednessAccelerated = false,
1650       .integerDotProduct64BitUnsignedAccelerated = false,
1651       .integerDotProduct64BitSignedAccelerated = false,
1652       .integerDotProduct64BitMixedSignednessAccelerated = false,
1653       .integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = accel_dot,
1654       .integerDotProductAccumulatingSaturating8BitSignedAccelerated = accel_dot,
1655       .integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = accel_dot && gfx11plus,
1656       .integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = accel_dot,
1657       .integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = accel_dot,
1658       .integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = accel_dot && gfx11plus,
1659       .integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = accel_dot && !gfx11plus,
1660       .integerDotProductAccumulatingSaturating16BitSignedAccelerated = accel_dot && !gfx11plus,
1661       .integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false,
1662       .integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false,
1663       .integerDotProductAccumulatingSaturating32BitSignedAccelerated = false,
1664       .integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false,
1665       .integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false,
1666       .integerDotProductAccumulatingSaturating64BitSignedAccelerated = false,
1667       .integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false,
1668       .storageTexelBufferOffsetAlignmentBytes = 4,
1669       .storageTexelBufferOffsetSingleTexelAlignment = true,
1670       .uniformTexelBufferOffsetAlignmentBytes = 4,
1671       .uniformTexelBufferOffsetSingleTexelAlignment = true,
1672       .maxBufferSize = RADV_MAX_MEMORY_ALLOCATION_SIZE,
1673 
1674       /* Vulkan 1.4 */
1675       .lineSubPixelPrecisionBits = 4,
1676       .maxVertexAttribDivisor = UINT32_MAX,
1677       .supportsNonZeroFirstInstance = true,
1678       .maxPushDescriptors = MAX_PUSH_DESCRIPTORS,
1679       .dynamicRenderingLocalReadDepthStencilAttachments = true,
1680       .dynamicRenderingLocalReadMultisampledAttachments = true,
1681       .earlyFragmentMultisampleCoverageAfterSampleCounting = true,
1682       .earlyFragmentSampleMaskTestBeforeSampleCounting = true,
1683       .depthStencilSwizzleOneSupport = true,
1684       .polygonModePointSize = true,
1685       .nonStrictSinglePixelWideLinesUseParallelogram = true,
1686       .nonStrictWideLinesUseParallelogram = true,
1687       .blockTexelViewCompatibleMultipleLayers = true,
1688       .maxCombinedImageSamplerDescriptorCount = 1,
1689       .fragmentShadingRateClampCombinerInputs = true,
1690       .defaultRobustnessStorageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS,
1691       .defaultRobustnessUniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS,
1692       .defaultRobustnessVertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED,
1693       .defaultRobustnessImages = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2,
1694       .copySrcLayoutCount = 0,
1695       .pCopySrcLayouts = NULL,
1696       .copyDstLayoutCount = 0,
1697       .pCopyDstLayouts = NULL,
1698       .identicalMemoryTypeRequirements = false,
1699 
1700       /* VK_EXT_discard_rectangles */
1701       .maxDiscardRectangles = MAX_DISCARD_RECTANGLES,
1702 
1703       /* VK_EXT_external_memory_host */
1704       .minImportedHostPointerAlignment = 4096,
1705 
1706       /* VK_AMD_shader_core_properties */
1707       /* Shader engines. */
1708       .shaderEngineCount = pdev->info.max_se,
1709       .shaderArraysPerEngineCount = pdev->info.max_sa_per_se,
1710       .computeUnitsPerShaderArray = pdev->info.min_good_cu_per_sa,
1711       .simdPerComputeUnit = pdev->info.num_simd_per_compute_unit,
1712       .wavefrontsPerSimd = pdev->info.max_waves_per_simd,
1713       .wavefrontSize = 64,
1714 
1715       /* SGPR. */
1716       .sgprsPerSimd = pdev->info.num_physical_sgprs_per_simd,
1717       .minSgprAllocation = pdev->info.min_sgpr_alloc,
1718       .maxSgprAllocation = pdev->info.max_sgpr_alloc,
1719       .sgprAllocationGranularity = pdev->info.sgpr_alloc_granularity,
1720 
1721       /* VGPR. */
1722       .vgprsPerSimd = pdev->info.num_physical_wave64_vgprs_per_simd,
1723       .minVgprAllocation = pdev->info.min_wave64_vgpr_alloc,
1724       .maxVgprAllocation = pdev->info.max_vgpr_alloc,
1725       .vgprAllocationGranularity = pdev->info.wave64_vgpr_alloc_granularity,
1726 
1727       /* VK_AMD_shader_core_properties2 */
1728       .shaderCoreFeatures = 0,
1729       .activeComputeUnitCount = pdev->info.num_cu,
1730 
1731       /* VK_EXT_conservative_rasterization */
1732       .primitiveOverestimationSize = 0,
1733       .maxExtraPrimitiveOverestimationSize = 0,
1734       .extraPrimitiveOverestimationSizeGranularity = 0,
1735       .primitiveUnderestimation = true,
1736       .conservativePointAndLineRasterization = false,
1737       .degenerateTrianglesRasterized = true,
1738       .degenerateLinesRasterized = false,
1739       .fullyCoveredFragmentShaderInputVariable = true,
1740       .conservativeRasterizationPostDepthCoverage = false,
1741 
1742 #ifndef _WIN32
1743       /* VK_EXT_pci_bus_info */
1744       .pciDomain = pdev->bus_info.domain,
1745       .pciBus = pdev->bus_info.bus,
1746       .pciDevice = pdev->bus_info.dev,
1747       .pciFunction = pdev->bus_info.func,
1748 #endif
1749 
1750       /* VK_EXT_transform_feedback */
1751       .maxTransformFeedbackStreams = MAX_SO_STREAMS,
1752       .maxTransformFeedbackBuffers = MAX_SO_BUFFERS,
1753       .maxTransformFeedbackBufferSize = UINT32_MAX,
1754       .maxTransformFeedbackStreamDataSize = 512,
1755       .maxTransformFeedbackBufferDataSize = 512,
1756       .maxTransformFeedbackBufferDataStride = 512,
1757       .transformFeedbackQueries = true,
1758       .transformFeedbackStreamsLinesTriangles = true,
1759       .transformFeedbackRasterizationStreamSelect = false,
1760       .transformFeedbackDraw = true,
1761 
1762       /* VK_EXT_sample_locations */
1763       .sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
1764       .maxSampleLocationGridSize = (VkExtent2D){2, 2},
1765       .sampleLocationCoordinateRange = {0.0f, 0.9375f},
1766       .sampleLocationSubPixelBits = 4,
1767       .variableSampleLocations = true,
1768 
1769       /* VK_EXT_robustness2 */
1770       .robustStorageBufferAccessSizeAlignment = 4,
1771       .robustUniformBufferAccessSizeAlignment = 4,
1772 
1773       /* VK_EXT_custom_border_color */
1774       .maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT,
1775 
1776       /* VK_KHR_fragment_shading_rate */
1777       .minFragmentShadingRateAttachmentTexelSize = vrs_texel_extent,
1778       .maxFragmentShadingRateAttachmentTexelSize = vrs_texel_extent,
1779       .maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1,
1780       .primitiveFragmentShadingRateWithMultipleViewports = true,
1781       .layeredShadingRateAttachments = false, /* TODO */
1782       .fragmentShadingRateNonTrivialCombinerOps = true,
1783       .maxFragmentSize = (VkExtent2D){2, 2},
1784       .maxFragmentSizeAspectRatio = 2,
1785       .maxFragmentShadingRateCoverageSamples = pdev->info.gfx_level >= GFX12 ? 16 : 32,
1786       .maxFragmentShadingRateRasterizationSamples =
1787          pdev->info.gfx_level >= GFX12 ? VK_SAMPLE_COUNT_4_BIT : VK_SAMPLE_COUNT_8_BIT,
1788       .fragmentShadingRateWithShaderDepthStencilWrites = !pdev->info.has_vrs_ds_export_bug,
1789       .fragmentShadingRateWithSampleMask = true,
1790       .fragmentShadingRateWithShaderSampleMask = false,
1791       .fragmentShadingRateWithConservativeRasterization = true,
1792       .fragmentShadingRateWithFragmentShaderInterlock = pdev->info.gfx_level >= GFX11 && radv_has_pops(pdev),
1793       .fragmentShadingRateWithCustomSampleLocations = false,
1794       .fragmentShadingRateStrictMultiplyCombiner = true,
1795 
1796       /* VK_EXT_provoking_vertex */
1797       .provokingVertexModePerPipeline = true,
1798       .transformFeedbackPreservesTriangleFanProvokingVertex = true,
1799 
1800       /* VK_KHR_acceleration_structure */
1801       .maxGeometryCount = (1 << 24) - 1,
1802       .maxInstanceCount = (1 << 24) - 1,
1803       .maxPrimitiveCount = (1 << 29) - 1,
1804       .maxPerStageDescriptorAccelerationStructures = max_descriptor_set_size,
1805       .maxPerStageDescriptorUpdateAfterBindAccelerationStructures = max_descriptor_set_size,
1806       .maxDescriptorSetAccelerationStructures = max_descriptor_set_size,
1807       .maxDescriptorSetUpdateAfterBindAccelerationStructures = max_descriptor_set_size,
1808       .minAccelerationStructureScratchOffsetAlignment = 128,
1809 
1810       /* VK_EXT_multi_draw */
1811       .maxMultiDrawCount = 2048,
1812 
1813       /* VK_KHR_ray_tracing_pipeline */
1814       .shaderGroupHandleSize = RADV_RT_HANDLE_SIZE,
1815       .maxRayRecursionDepth = 31,    /* Minimum allowed for DXR. */
1816       .maxShaderGroupStride = 16384, /* dummy */
1817       /* This isn't strictly necessary, but Doom Eternal breaks if the
1818        * alignment is any lower. */
1819       .shaderGroupBaseAlignment = RADV_RT_HANDLE_SIZE,
1820       .shaderGroupHandleCaptureReplaySize = sizeof(struct radv_rt_capture_replay_handle),
1821       .maxRayDispatchInvocationCount = 1024 * 1024 * 64,
1822       .shaderGroupHandleAlignment = 16,
1823       .maxRayHitAttributeSize = RADV_MAX_HIT_ATTRIB_SIZE,
1824 
1825       /* VK_KHR_performance_query */
1826       .allowCommandBufferQueryCopies = false,
1827 
1828       /* VK_EXT_graphics_pipeline_library */
1829       .graphicsPipelineLibraryFastLinking = true,
1830       .graphicsPipelineLibraryIndependentInterpolationDecoration = true,
1831 
1832       /* VK_EXT_mesh_shader */
1833       .maxTaskWorkGroupTotalCount = 4194304, /* 2^22 min required */
1834       .maxTaskWorkGroupCount = {65535, 65535, 65535},
1835       .maxTaskWorkGroupInvocations = 1024,
1836       .maxTaskWorkGroupSize = {1024, 1024, 1024},
1837       .maxTaskPayloadSize = 16384, /* 16K min required */
1838       .maxTaskSharedMemorySize = 65536,
1839       .maxTaskPayloadAndSharedMemorySize = 65536,
1840 
1841       .maxMeshWorkGroupTotalCount = 4194304, /* 2^22 min required */
1842       .maxMeshWorkGroupCount = {65535, 65535, 65535},
1843       .maxMeshWorkGroupInvocations = 256, /* Max NGG HW limit */
1844       .maxMeshWorkGroupSize = {256, 256, 256},
1845       .maxMeshOutputMemorySize = 32 * 1024,                   /* 32K min required */
1846       .maxMeshSharedMemorySize = 28672,                       /* 28K min required */
1847       .maxMeshPayloadAndSharedMemorySize = 16384 + 28672,     /* 28K min required */
1848       .maxMeshPayloadAndOutputMemorySize = 16384 + 32 * 1024, /* 47K min required */
1849       .maxMeshOutputComponents = 128,                         /* 32x vec4 min required */
1850       .maxMeshOutputVertices = 256,
1851       .maxMeshOutputPrimitives = 256,
1852       .maxMeshOutputLayers = 8,
1853       .maxMeshMultiviewViewCount = MAX_VIEWS,
1854       .meshOutputPerVertexGranularity = 1,
1855       .meshOutputPerPrimitiveGranularity = 1,
1856 
1857       .maxPreferredTaskWorkGroupInvocations = 64,
1858       .maxPreferredMeshWorkGroupInvocations = 128,
1859       .prefersLocalInvocationVertexOutput = true,
1860       .prefersLocalInvocationPrimitiveOutput = true,
1861       .prefersCompactVertexOutput = true,
1862       .prefersCompactPrimitiveOutput = false,
1863 
1864       /* VK_EXT_extended_dynamic_state3 */
1865       .dynamicPrimitiveTopologyUnrestricted = false,
1866 
1867       /* VK_EXT_descriptor_buffer */
1868       .combinedImageSamplerDescriptorSingleArray = true,
1869       .bufferlessPushDescriptors = true,
1870       .allowSamplerImageViewPostSubmitCreation = false,
1871       .descriptorBufferOffsetAlignment = 4,
1872       .maxDescriptorBufferBindings = MAX_SETS,
1873       .maxResourceDescriptorBufferBindings = MAX_SETS,
1874       .maxSamplerDescriptorBufferBindings = MAX_SETS,
1875       .maxEmbeddedImmutableSamplerBindings = MAX_SETS,
1876       .maxEmbeddedImmutableSamplers = radv_max_descriptor_set_size(),
1877       /* No data required for capture/replay but these values need to be non-zero. */
1878       .bufferCaptureReplayDescriptorDataSize = 1,
1879       .imageCaptureReplayDescriptorDataSize = 1,
1880       .imageViewCaptureReplayDescriptorDataSize = 1,
1881       .samplerCaptureReplayDescriptorDataSize = 1,
1882       .accelerationStructureCaptureReplayDescriptorDataSize = 1,
1883       .samplerDescriptorSize = 16,
1884       .combinedImageSamplerDescriptorSize = 96,
1885       .sampledImageDescriptorSize = 64,
1886       .storageImageDescriptorSize = 32,
1887       .uniformTexelBufferDescriptorSize = 16,
1888       .robustUniformTexelBufferDescriptorSize = 16,
1889       .storageTexelBufferDescriptorSize = 16,
1890       .robustStorageTexelBufferDescriptorSize = 16,
1891       .uniformBufferDescriptorSize = 16,
1892       .robustUniformBufferDescriptorSize = 16,
1893       .storageBufferDescriptorSize = 16,
1894       .robustStorageBufferDescriptorSize = 16,
1895       .inputAttachmentDescriptorSize = 64,
1896       .accelerationStructureDescriptorSize = 16,
1897       .maxSamplerDescriptorBufferRange = UINT32_MAX,
1898       .maxResourceDescriptorBufferRange = UINT32_MAX,
1899       .samplerDescriptorBufferAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE,
1900       .resourceDescriptorBufferAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE,
1901       .descriptorBufferAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE,
1902 
1903       /* VK_KHR_fragment_shader_barycentric */
1904       .triStripVertexOrderIndependentOfProvokingVertex = false,
1905 
1906       /* VK_EXT_pipeline_robustness */
1907       .defaultRobustnessStorageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS,
1908       .defaultRobustnessUniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS,
1909       .defaultRobustnessVertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED,
1910       .defaultRobustnessImages = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2,
1911 
1912       /* VK_KHR_cooperative_matrix */
1913       .cooperativeMatrixSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT,
1914 
1915       /* VK_EXT_map_memory_placed */
1916       .minPlacedMemoryMapAlignment = os_page_size,
1917 
1918       /* VK_EXT_nested_command_buffer */
1919       .maxCommandBufferNestingLevel = UINT32_MAX,
1920 
1921       /* VK_EXT_legacy_vertex_attributes */
1922       .nativeUnalignedPerformance = false,
1923 
1924       /* VK_MESA_image_alignment_control */
1925       .supportedImageAlignmentMask = (4 * 1024) | (64 * 1024) | (gfx11plus ? 256 * 1024 : 0),
1926 
1927       /* VK_KHR_maintenance7 */
1928       .robustFragmentShadingRateAttachmentAccess = true,
1929       .separateDepthStencilAttachmentAccess = true,
1930       .maxDescriptorSetTotalUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1931       .maxDescriptorSetTotalStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1932       .maxDescriptorSetTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS,
1933       .maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1934       .maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1935       .maxDescriptorSetUpdateAfterBindTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS,
1936 
1937       /* VK_KHR_pipeline_binary */
1938       .pipelineBinaryInternalCache = true,
1939       .pipelineBinaryInternalCacheControl = true,
1940       .pipelineBinaryPrefersInternalCache = false,
1941       .pipelineBinaryPrecompiledInternalCache = false,
1942       .pipelineBinaryCompressedData = false,
1943 
1944       /* VK_KHR_compute_shader_derivatives */
1945       .meshAndTaskShaderDerivatives = radv_taskmesh_enabled(pdev),
1946 
1947       /* VK_EXT_device_generated_commands */
1948       .maxIndirectPipelineCount = 4096,
1949       .maxIndirectShaderObjectCount = 4096,
1950       .maxIndirectSequenceCount = 1048576,
1951       .maxIndirectCommandsTokenCount = 128,
1952       .maxIndirectCommandsTokenOffset = 2047,
1953       .maxIndirectCommandsIndirectStride = 2048,
1954       .supportedIndirectCommandsInputModes = VK_INDIRECT_COMMANDS_INPUT_MODE_VULKAN_INDEX_BUFFER_EXT |
1955                                              VK_INDIRECT_COMMANDS_INPUT_MODE_DXGI_INDEX_BUFFER_EXT,
1956       .supportedIndirectCommandsShaderStages =
1957          VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT | taskmesh_stages | rt_stages,
1958       .supportedIndirectCommandsShaderStagesPipelineBinding = VK_SHADER_STAGE_COMPUTE_BIT,
1959       .supportedIndirectCommandsShaderStagesShaderBinding = VK_SHADER_STAGE_COMPUTE_BIT,
1960       .deviceGeneratedCommandsTransformFeedback = true,
1961       .deviceGeneratedCommandsMultiDrawIndirectCount = true,
1962    };
1963 
1964    struct vk_properties *p = &pdev->vk.properties;
1965 
1966    strcpy(p->deviceName, pdev->marketing_name);
1967    memcpy(p->pipelineCacheUUID, pdev->cache_uuid, VK_UUID_SIZE);
1968 
1969    memcpy(p->deviceUUID, pdev->device_uuid, VK_UUID_SIZE);
1970    memcpy(p->driverUUID, pdev->driver_uuid, VK_UUID_SIZE);
1971    memset(p->deviceLUID, 0, VK_LUID_SIZE);
1972 
1973    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
1974    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 "%s",
1975             radv_get_compiler_string(pdev));
1976 
1977    memset(p->optimalTilingLayoutUUID, 0, sizeof(p->optimalTilingLayoutUUID));
1978 
1979    /* VK_EXT_physical_device_drm */
1980 #ifndef _WIN32
1981    if (pdev->available_nodes & (1 << DRM_NODE_PRIMARY)) {
1982       p->drmHasPrimary = true;
1983       p->drmPrimaryMajor = (int64_t)major(pdev->primary_devid);
1984       p->drmPrimaryMinor = (int64_t)minor(pdev->primary_devid);
1985    } else {
1986       p->drmHasPrimary = false;
1987    }
1988    if (pdev->available_nodes & (1 << DRM_NODE_RENDER)) {
1989       p->drmHasRender = true;
1990       p->drmRenderMajor = (int64_t)major(pdev->render_devid);
1991       p->drmRenderMinor = (int64_t)minor(pdev->render_devid);
1992    } else {
1993       p->drmHasRender = false;
1994    }
1995 #endif
1996 
1997    /* VK_EXT_shader_module_identifier */
1998    STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) == sizeof(p->shaderModuleIdentifierAlgorithmUUID));
1999    memcpy(p->shaderModuleIdentifierAlgorithmUUID, vk_shaderModuleIdentifierAlgorithmUUID,
2000           sizeof(p->shaderModuleIdentifierAlgorithmUUID));
2001 
2002    /* VK_EXT_shader_object */
2003    radv_device_get_cache_uuid(pdev, p->shaderBinaryUUID);
2004    p->shaderBinaryVersion = 1;
2005 }
2006 
2007 static VkResult
radv_physical_device_try_create(struct radv_instance * instance,drmDevicePtr drm_device,struct radv_physical_device ** pdev_out)2008 radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm_device,
2009                                 struct radv_physical_device **pdev_out)
2010 {
2011    VkResult result;
2012    int fd = -1;
2013    int master_fd = -1;
2014 
2015 #ifdef _WIN32
2016    assert(drm_device == NULL);
2017 #else
2018    bool is_virtio = false;
2019    if (drm_device) {
2020       const char *path = drm_device->nodes[DRM_NODE_RENDER];
2021       drmVersionPtr version;
2022 
2023       fd = open(path, O_RDWR | O_CLOEXEC);
2024       if (fd < 0) {
2025          return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "Could not open device %s: %m", path);
2026       }
2027 
2028       version = drmGetVersion(fd);
2029       if (!version) {
2030          close(fd);
2031 
2032          return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2033                           "Could not get the kernel driver version for device %s: %m", path);
2034       }
2035 
2036       if (!strcmp(version->name, "amdgpu")) {
2037          /* nothing to do. */
2038       } else
2039 #ifdef HAVE_AMDGPU_VIRTIO
2040          if (!strcmp(version->name, "virtio_gpu")) {
2041          is_virtio = true;
2042       } else
2043 #endif
2044       {
2045          drmFreeVersion(version);
2046          close(fd);
2047 
2048          return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2049                           "Device '%s' is not using the AMDGPU kernel driver: %m", path);
2050       }
2051       drmFreeVersion(version);
2052 
2053       if (instance->debug_flags & RADV_DEBUG_STARTUP)
2054          fprintf(stderr, "radv: info: Found compatible device '%s'.\n", path);
2055    }
2056 #endif
2057 
2058    struct radv_physical_device *pdev =
2059       vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*pdev), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
2060    if (!pdev) {
2061       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2062       goto fail_fd;
2063    }
2064 
2065    struct vk_physical_device_dispatch_table dispatch_table;
2066    vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_physical_device_entrypoints, true);
2067    vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, &wsi_physical_device_entrypoints, false);
2068 
2069    result = vk_physical_device_init(&pdev->vk, &instance->vk, NULL, NULL, NULL, &dispatch_table);
2070    if (result != VK_SUCCESS) {
2071       goto fail_alloc;
2072    }
2073 
2074 #ifdef _WIN32
2075    pdev->ws = radv_null_winsys_create();
2076 #else
2077    if (drm_device) {
2078       bool reserve_vmid = instance->vk.trace_mode & RADV_TRACE_MODE_RGP;
2079 
2080       pdev->ws =
2081          radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags, reserve_vmid, is_virtio);
2082    } else {
2083       pdev->ws = radv_null_winsys_create();
2084    }
2085 #endif
2086 
2087    if (!pdev->ws) {
2088       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to initialize winsys");
2089       goto fail_base;
2090    }
2091 
2092    pdev->vk.supported_sync_types = pdev->ws->get_sync_types(pdev->ws);
2093 
2094 #ifndef _WIN32
2095    if (drm_device && instance->vk.enabled_extensions.KHR_display) {
2096       master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
2097       if (master_fd >= 0) {
2098          uint32_t accel_working = 0;
2099          struct drm_amdgpu_info request = {.return_pointer = (uintptr_t)&accel_working,
2100                                            .return_size = sizeof(accel_working),
2101                                            .query = AMDGPU_INFO_ACCEL_WORKING};
2102 
2103          if (drm_ioctl_write(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) < 0 ||
2104              !accel_working) {
2105             close(master_fd);
2106             master_fd = -1;
2107          }
2108       }
2109    }
2110 #endif
2111 
2112    pdev->master_fd = master_fd;
2113    pdev->local_fd = fd;
2114    pdev->ws->query_info(pdev->ws, &pdev->info);
2115    pdev->info.family_overridden = drm_device == NULL;
2116 
2117    if (drm_device) {
2118       pdev->addrlib = ac_addrlib_create(&pdev->info, &pdev->info.max_alignment);
2119       if (!pdev->addrlib) {
2120          result = VK_ERROR_INITIALIZATION_FAILED;
2121          goto fail_wsi;
2122       }
2123    }
2124 
2125    pdev->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
2126 #if !AMD_LLVM_AVAILABLE
2127    if (pdev->use_llvm) {
2128       fprintf(stderr, "ERROR: LLVM compiler backend selected for radv, but LLVM support was not "
2129                       "enabled at build time.\n");
2130       abort();
2131    }
2132 #endif
2133 
2134 #if DETECT_OS_ANDROID
2135    pdev->emulate_etc2 = !pdev->info.has_etc_support;
2136    pdev->emulate_astc = true;
2137 #else
2138    pdev->emulate_etc2 = !pdev->info.has_etc_support && instance->drirc.vk_require_etc2;
2139    pdev->emulate_astc = instance->drirc.vk_require_astc;
2140 #endif
2141 
2142    snprintf(pdev->name, sizeof(pdev->name), "AMD RADV %s%s", pdev->info.name, radv_get_compiler_string(pdev));
2143 
2144    const char *marketing_name = pdev->ws->get_chip_name(pdev->ws);
2145    snprintf(pdev->marketing_name, sizeof(pdev->name), "%s (RADV %s%s)", marketing_name ? marketing_name : "AMD Unknown",
2146             pdev->info.name, radv_get_compiler_string(pdev));
2147 
2148    if (pdev->info.gfx_level >= GFX12)
2149       vk_warn_non_conformant_implementation("radv");
2150 
2151    radv_get_driver_uuid(&pdev->driver_uuid);
2152    radv_get_device_uuid(&pdev->info, &pdev->device_uuid);
2153 
2154    pdev->dcc_msaa_allowed = (instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
2155 
2156    pdev->use_fmask = pdev->info.gfx_level < GFX11 && !(instance->debug_flags & RADV_DEBUG_NO_FMASK);
2157 
2158    pdev->use_ngg = (pdev->info.gfx_level >= GFX10 && pdev->info.family != CHIP_NAVI14 &&
2159                     !(instance->debug_flags & RADV_DEBUG_NO_NGG)) ||
2160                    pdev->info.gfx_level >= GFX11;
2161 
2162    /* TODO: Investigate if NGG culling helps on GFX11. */
2163    pdev->use_ngg_culling = pdev->use_ngg && pdev->info.max_render_backends > 1 &&
2164                            (pdev->info.gfx_level == GFX10_3 || pdev->info.gfx_level == GFX10 ||
2165                             (instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
2166                            !(instance->debug_flags & RADV_DEBUG_NO_NGGC);
2167 
2168    pdev->use_ngg_streamout = pdev->info.gfx_level >= GFX11;
2169 
2170    pdev->emulate_ngg_gs_query_pipeline_stat = pdev->use_ngg && pdev->info.gfx_level < GFX11;
2171 
2172    pdev->mesh_fast_launch_2 = pdev->info.gfx_level >= GFX11;
2173 
2174    pdev->emulate_mesh_shader_queries = pdev->info.gfx_level == GFX10_3;
2175 
2176    /* Determine the number of threads per wave for all stages. */
2177    pdev->cs_wave_size = 64;
2178    pdev->ps_wave_size = 64;
2179    pdev->ge_wave_size = 64;
2180    pdev->rt_wave_size = 64;
2181 
2182    if (pdev->info.gfx_level >= GFX10) {
2183       if (instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
2184          pdev->cs_wave_size = 32;
2185 
2186       /* For pixel shaders, wave64 is recommended. */
2187       if (instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
2188          pdev->ps_wave_size = 32;
2189 
2190       if (instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
2191          pdev->ge_wave_size = 32;
2192 
2193       /* Default to 32 on RDNA1-2 as that gives better perf due to less issues with divergence.
2194        * However, on RDNA3+ default to wave64 as implicit dual issuing is likely better than
2195        * wave32 VOPD for VALU dependent code.
2196        * (as well as the SALU count becoming more problematic with wave32)
2197        */
2198       if (instance->perftest_flags & RADV_PERFTEST_RT_WAVE_32 || pdev->info.gfx_level < GFX11)
2199          pdev->rt_wave_size = 32;
2200 
2201       if (instance->perftest_flags & RADV_PERFTEST_RT_WAVE_64 || instance->drirc.force_rt_wave64)
2202          pdev->rt_wave_size = 64;
2203    }
2204 
2205    radv_probe_video_decode(pdev);
2206    radv_probe_video_encode(pdev);
2207 
2208    pdev->max_shared_size = pdev->info.gfx_level >= GFX7 ? 65536 : 32768;
2209 
2210    radv_physical_device_init_mem_types(pdev);
2211 
2212    radv_physical_device_get_supported_extensions(pdev, &pdev->vk.supported_extensions);
2213    radv_physical_device_get_features(pdev, &pdev->vk.supported_features);
2214 
2215    radv_get_nir_options(pdev);
2216 
2217 #ifndef _WIN32
2218    if (drm_device) {
2219       struct stat primary_stat = {0}, render_stat = {0};
2220 
2221       pdev->available_nodes = drm_device->available_nodes;
2222       pdev->bus_info = *drm_device->businfo.pci;
2223 
2224       if ((drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) &&
2225           stat(drm_device->nodes[DRM_NODE_PRIMARY], &primary_stat) != 0) {
2226          result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM primary node %s",
2227                             drm_device->nodes[DRM_NODE_PRIMARY]);
2228          goto fail_perfcounters;
2229       }
2230       pdev->primary_devid = primary_stat.st_rdev;
2231 
2232       if ((drm_device->available_nodes & (1 << DRM_NODE_RENDER)) &&
2233           stat(drm_device->nodes[DRM_NODE_RENDER], &render_stat) != 0) {
2234          result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM render node %s",
2235                             drm_device->nodes[DRM_NODE_RENDER]);
2236          goto fail_perfcounters;
2237       }
2238       pdev->render_devid = render_stat.st_rdev;
2239    }
2240 #endif
2241 
2242    radv_physical_device_init_cache_key(pdev);
2243 
2244    if (radv_device_get_cache_uuid(pdev, pdev->cache_uuid)) {
2245       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID");
2246       goto fail_wsi;
2247    }
2248 
2249    /* The gpu id is already embedded in the uuid so we just pass "radv"
2250     * when creating the cache.
2251     */
2252    char buf[VK_UUID_SIZE * 2 + 1];
2253    mesa_bytes_to_hex(buf, pdev->cache_uuid, VK_UUID_SIZE);
2254    pdev->vk.disk_cache = disk_cache_create(pdev->name, buf, 0);
2255 
2256    pdev->disk_cache_meta = disk_cache_create_custom(pdev->name, buf, 0, "radv_builtin_shaders", 1024 * 32 /* 32MiB */);
2257 
2258    radv_get_physical_device_properties(pdev);
2259 
2260    if ((instance->debug_flags & RADV_DEBUG_INFO))
2261       ac_print_gpu_info(&pdev->info, stdout);
2262 
2263    radv_init_physical_device_decoder(pdev);
2264    radv_init_physical_device_encoder(pdev);
2265 
2266    radv_physical_device_init_queue_table(pdev);
2267 
2268    /* We don't check the error code, but later check if it is initialized. */
2269    ac_init_perfcounters(&pdev->info, false, false, &pdev->ac_perfcounters);
2270 
2271    /* The WSI is structured as a layer on top of the driver, so this has
2272     * to be the last part of initialization (at least until we get other
2273     * semi-layers).
2274     */
2275    result = radv_init_wsi(pdev);
2276    if (result != VK_SUCCESS) {
2277       vk_error(instance, result);
2278       goto fail_perfcounters;
2279    }
2280 
2281    pdev->gs_table_depth = ac_get_gs_table_depth(pdev->info.gfx_level, pdev->info.family);
2282 
2283    ac_get_hs_info(&pdev->info, &pdev->hs);
2284    ac_get_task_info(&pdev->info, &pdev->task_info);
2285    radv_get_binning_settings(pdev, &pdev->binning_settings);
2286 
2287    if (pdev->info.has_distributed_tess) {
2288       if (pdev->info.family == CHIP_FIJI || pdev->info.family >= CHIP_POLARIS10)
2289          pdev->tess_distribution_mode = V_028B6C_TRAPEZOIDS;
2290       else
2291          pdev->tess_distribution_mode = V_028B6C_DONUTS;
2292    } else {
2293       pdev->tess_distribution_mode = V_028B6C_NO_DIST;
2294    }
2295 
2296    *pdev_out = pdev;
2297 
2298    return VK_SUCCESS;
2299 
2300 fail_perfcounters:
2301    ac_destroy_perfcounters(&pdev->ac_perfcounters);
2302    disk_cache_destroy(pdev->vk.disk_cache);
2303    disk_cache_destroy(pdev->disk_cache_meta);
2304 fail_wsi:
2305    if (pdev->addrlib)
2306       ac_addrlib_destroy(pdev->addrlib);
2307    pdev->ws->destroy(pdev->ws);
2308 fail_base:
2309    vk_physical_device_finish(&pdev->vk);
2310 fail_alloc:
2311    vk_free(&instance->vk.alloc, pdev);
2312 fail_fd:
2313    if (fd != -1)
2314       close(fd);
2315    if (master_fd != -1)
2316       close(master_fd);
2317    return result;
2318 }
2319 
2320 VkResult
create_null_physical_device(struct vk_instance * vk_instance)2321 create_null_physical_device(struct vk_instance *vk_instance)
2322 {
2323    struct radv_instance *instance = container_of(vk_instance, struct radv_instance, vk);
2324    struct radv_physical_device *pdev;
2325 
2326    VkResult result = radv_physical_device_try_create(instance, NULL, &pdev);
2327    if (result != VK_SUCCESS)
2328       return result;
2329 
2330    list_addtail(&pdev->vk.link, &instance->vk.physical_devices.list);
2331    return VK_SUCCESS;
2332 }
2333 
2334 VkResult
create_drm_physical_device(struct vk_instance * vk_instance,struct _drmDevice * device,struct vk_physical_device ** out)2335 create_drm_physical_device(struct vk_instance *vk_instance, struct _drmDevice *device, struct vk_physical_device **out)
2336 {
2337 #ifndef _WIN32
2338    bool supported_device = false;
2339 
2340    if (!(device->available_nodes & (1 << DRM_NODE_RENDER)) || device->bustype != DRM_BUS_PCI)
2341       return VK_ERROR_INCOMPATIBLE_DRIVER;
2342 
2343 #ifdef HAVE_AMDGPU_VIRTIO
2344    supported_device |= device->deviceinfo.pci->vendor_id == VIRTGPU_PCI_VENDOR_ID;
2345 #endif
2346 
2347    supported_device |= device->deviceinfo.pci->vendor_id == ATI_VENDOR_ID;
2348 
2349    if (!supported_device)
2350       return VK_ERROR_INCOMPATIBLE_DRIVER;
2351 
2352    return radv_physical_device_try_create((struct radv_instance *)vk_instance, device,
2353                                           (struct radv_physical_device **)out);
2354 #else
2355    return VK_SUCCESS;
2356 #endif
2357 }
2358 
2359 void
radv_physical_device_destroy(struct vk_physical_device * vk_device)2360 radv_physical_device_destroy(struct vk_physical_device *vk_device)
2361 {
2362    struct radv_physical_device *pdev = container_of(vk_device, struct radv_physical_device, vk);
2363    const struct radv_instance *instance = radv_physical_device_instance(pdev);
2364 
2365    radv_finish_wsi(pdev);
2366    ac_destroy_perfcounters(&pdev->ac_perfcounters);
2367    if (pdev->addrlib)
2368       ac_addrlib_destroy(pdev->addrlib);
2369    pdev->ws->destroy(pdev->ws);
2370    disk_cache_destroy(pdev->vk.disk_cache);
2371    disk_cache_destroy(pdev->disk_cache_meta);
2372    if (pdev->local_fd != -1)
2373       close(pdev->local_fd);
2374    if (pdev->master_fd != -1)
2375       close(pdev->master_fd);
2376    vk_physical_device_finish(&pdev->vk);
2377    vk_free(&instance->vk.alloc, pdev);
2378 }
2379 
2380 static void
radv_get_physical_device_queue_family_properties(struct radv_physical_device * pdev,uint32_t * pCount,VkQueueFamilyProperties ** pQueueFamilyProperties)2381 radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdev, uint32_t *pCount,
2382                                                  VkQueueFamilyProperties **pQueueFamilyProperties)
2383 {
2384    const struct radv_instance *instance = radv_physical_device_instance(pdev);
2385    int num_queue_families = 1;
2386    int idx;
2387    if (pdev->info.ip[AMD_IP_COMPUTE].num_queues > 0 && !(instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
2388       num_queue_families++;
2389 
2390    if (pdev->video_decode_enabled) {
2391       if (pdev->info.ip[pdev->vid_decode_ip].num_queues > 0)
2392          num_queue_families++;
2393    }
2394 
2395    if (radv_transfer_queue_enabled(pdev)) {
2396       num_queue_families++;
2397    }
2398 
2399    if (pdev->video_encode_enabled) {
2400      if (pdev->info.ip[AMD_IP_VCN_ENC].num_queues > 0)
2401        num_queue_families++;
2402    }
2403 
2404    if (radv_sparse_queue_enabled(pdev)) {
2405       num_queue_families++;
2406    }
2407 
2408    if (pQueueFamilyProperties == NULL) {
2409       *pCount = num_queue_families;
2410       return;
2411    }
2412 
2413    if (!*pCount)
2414       return;
2415 
2416    idx = 0;
2417    if (*pCount >= 1) {
2418       VkQueueFlags gfx_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
2419       if (!radv_sparse_queue_enabled(pdev))
2420          gfx_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
2421       *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2422          .queueFlags = gfx_flags,
2423          .queueCount = 1,
2424          .timestampValidBits = 64,
2425          .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2426       };
2427       idx++;
2428    }
2429 
2430    if (pdev->info.ip[AMD_IP_COMPUTE].num_queues > 0 && !(instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
2431       VkQueueFlags compute_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
2432       if (!radv_sparse_queue_enabled(pdev))
2433          compute_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
2434       if (*pCount > idx) {
2435          *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2436             .queueFlags = compute_flags,
2437             .queueCount = pdev->info.ip[AMD_IP_COMPUTE].num_queues,
2438             .timestampValidBits = 64,
2439             .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2440          };
2441          idx++;
2442       }
2443    }
2444 
2445    if (pdev->video_decode_enabled) {
2446       if (pdev->info.ip[pdev->vid_decode_ip].num_queues > 0) {
2447          if (*pCount > idx) {
2448             *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2449                .queueFlags = VK_QUEUE_VIDEO_DECODE_BIT_KHR,
2450                .queueCount = pdev->info.ip[pdev->vid_decode_ip].num_queues,
2451                .timestampValidBits = 0,
2452                .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2453             };
2454             idx++;
2455          }
2456       }
2457    }
2458 
2459    if (radv_transfer_queue_enabled(pdev)) {
2460       if (*pCount > idx) {
2461          *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2462             .queueFlags = VK_QUEUE_TRANSFER_BIT,
2463             .queueCount = pdev->info.ip[AMD_IP_SDMA].num_queues,
2464             .timestampValidBits = 64,
2465             .minImageTransferGranularity = (VkExtent3D){16, 16, 8},
2466          };
2467          idx++;
2468       }
2469    }
2470 
2471    if (pdev->video_encode_enabled) {
2472       if (pdev->info.ip[AMD_IP_VCN_ENC].num_queues > 0) {
2473          if (*pCount > idx) {
2474             *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2475                .queueFlags = VK_QUEUE_VIDEO_ENCODE_BIT_KHR,
2476                .queueCount = pdev->info.ip[AMD_IP_VCN_ENC].num_queues,
2477                .timestampValidBits = 0,
2478                .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2479             };
2480             idx++;
2481          }
2482       }
2483    }
2484 
2485    if (radv_sparse_queue_enabled(pdev)) {
2486       if (*pCount > idx) {
2487          *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2488             .queueFlags = VK_QUEUE_SPARSE_BINDING_BIT,
2489             .queueCount = 1,
2490             .timestampValidBits = 64,
2491             .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2492          };
2493          idx++;
2494       }
2495    }
2496 
2497    *pCount = idx;
2498 }
2499 
2500 static const VkQueueGlobalPriority radv_global_queue_priorities[] = {
2501    VK_QUEUE_GLOBAL_PRIORITY_LOW,
2502    VK_QUEUE_GLOBAL_PRIORITY_MEDIUM,
2503    VK_QUEUE_GLOBAL_PRIORITY_HIGH,
2504    VK_QUEUE_GLOBAL_PRIORITY_REALTIME,
2505 };
2506 
2507 VKAPI_ATTR void VKAPI_CALL
radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2508 radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, uint32_t *pCount,
2509                                              VkQueueFamilyProperties2 *pQueueFamilyProperties)
2510 {
2511    VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2512    if (!pQueueFamilyProperties) {
2513       radv_get_physical_device_queue_family_properties(pdev, pCount, NULL);
2514       return;
2515    }
2516    VkQueueFamilyProperties *properties[] = {
2517       &pQueueFamilyProperties[0].queueFamilyProperties, &pQueueFamilyProperties[1].queueFamilyProperties,
2518       &pQueueFamilyProperties[2].queueFamilyProperties, &pQueueFamilyProperties[3].queueFamilyProperties,
2519       &pQueueFamilyProperties[4].queueFamilyProperties, &pQueueFamilyProperties[5].queueFamilyProperties,
2520    };
2521    radv_get_physical_device_queue_family_properties(pdev, pCount, properties);
2522    assert(*pCount <= 6);
2523 
2524    for (uint32_t i = 0; i < *pCount; i++) {
2525       vk_foreach_struct (ext, pQueueFamilyProperties[i].pNext) {
2526          switch (ext->sType) {
2527          case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES: {
2528             VkQueueFamilyGlobalPriorityProperties *prop = (VkQueueFamilyGlobalPriorityProperties *)ext;
2529             STATIC_ASSERT(ARRAY_SIZE(radv_global_queue_priorities) <= VK_MAX_GLOBAL_PRIORITY_SIZE);
2530             prop->priorityCount = ARRAY_SIZE(radv_global_queue_priorities);
2531             memcpy(&prop->priorities, radv_global_queue_priorities, sizeof(radv_global_queue_priorities));
2532             break;
2533          }
2534          case VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR: {
2535             VkQueueFamilyQueryResultStatusPropertiesKHR *prop = (VkQueueFamilyQueryResultStatusPropertiesKHR *)ext;
2536             prop->queryResultStatusSupport = VK_FALSE;
2537             break;
2538          }
2539          case VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR: {
2540             VkQueueFamilyVideoPropertiesKHR *prop = (VkQueueFamilyVideoPropertiesKHR *)ext;
2541             prop->videoCodecOperations = 0;
2542             if (pQueueFamilyProperties[i].queueFamilyProperties.queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
2543                if (VIDEO_CODEC_H264DEC)
2544                   prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR;
2545                if (VIDEO_CODEC_H265DEC)
2546                   prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR;
2547                if (VIDEO_CODEC_AV1DEC && pdev->info.vcn_ip_version >= VCN_3_0_0 &&
2548                    pdev->info.vcn_ip_version != VCN_3_0_33)
2549                   prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR;
2550             }
2551             if (pQueueFamilyProperties[i].queueFamilyProperties.queueFlags & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) {
2552                if (VIDEO_CODEC_H264ENC)
2553                   prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR;
2554                if (VIDEO_CODEC_H265ENC)
2555                   prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR;
2556             }
2557             break;
2558          }
2559          default:
2560             break;
2561          }
2562       }
2563    }
2564 }
2565 
2566 static void
radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2567 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
2568                                   VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2569 {
2570    VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2571    const struct radv_instance *instance = radv_physical_device_instance(pdev);
2572    VkPhysicalDeviceMemoryProperties *memory_properties = &pdev->memory_properties;
2573 
2574    /* For all memory heaps, the computation of budget is as follow:
2575     *	heap_budget = heap_size - global_heap_usage + app_heap_usage
2576     *
2577     * The Vulkan spec 1.1.97 says that the budget should include any
2578     * currently allocated device memory.
2579     *
2580     * Note that the application heap usages are not really accurate (eg.
2581     * in presence of shared buffers).
2582     */
2583    if (!pdev->info.has_dedicated_vram) {
2584       if (instance->drirc.enable_unified_heap_on_apu) {
2585          /* When the heaps are unified, only the visible VRAM heap is exposed on APUs. */
2586          assert(pdev->heaps == RADV_HEAP_VRAM_VIS);
2587          assert(pdev->memory_properties.memoryHeaps[0].flags == VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
2588          const uint8_t vram_vis_heap_idx = 0;
2589 
2590          /* Get the total heap size which is the visible VRAM heap size. */
2591          uint64_t total_heap_size = pdev->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
2592 
2593          /* Get the different memory usages. */
2594          uint64_t vram_vis_internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM_VIS) +
2595                                             pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM);
2596          uint64_t gtt_internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_GTT);
2597          uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage;
2598          uint64_t total_system_usage =
2599             pdev->ws->query_value(pdev->ws, RADEON_VRAM_VIS_USAGE) + pdev->ws->query_value(pdev->ws, RADEON_GTT_USAGE);
2600          uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);
2601 
2602          /* Compute the total free space that can be allocated for this process across all heaps. */
2603          uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
2604 
2605          memoryBudget->heapBudget[vram_vis_heap_idx] = total_free_space + total_internal_usage;
2606          memoryBudget->heapUsage[vram_vis_heap_idx] = total_internal_usage;
2607       } else {
2608          /* On APUs, the driver exposes fake heaps to the application because usually the carveout
2609           * is too small for games but the budgets need to be redistributed accordingly.
2610           */
2611          assert(pdev->heaps == (RADV_HEAP_GTT | RADV_HEAP_VRAM_VIS));
2612          assert(pdev->memory_properties.memoryHeaps[0].flags == 0); /* GTT */
2613          assert(pdev->memory_properties.memoryHeaps[1].flags == VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
2614          const uint8_t gtt_heap_idx = 0, vram_vis_heap_idx = 1;
2615 
2616          /* Get the visible VRAM/GTT heap sizes and internal usages. */
2617          uint64_t gtt_heap_size = pdev->memory_properties.memoryHeaps[gtt_heap_idx].size;
2618          uint64_t vram_vis_heap_size = pdev->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
2619 
2620          uint64_t vram_vis_internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM_VIS) +
2621                                             pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM);
2622          uint64_t gtt_internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_GTT);
2623 
2624          /* Compute the total heap size, internal and system usage. */
2625          uint64_t total_heap_size = vram_vis_heap_size + gtt_heap_size;
2626          uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage;
2627          uint64_t total_system_usage =
2628             pdev->ws->query_value(pdev->ws, RADEON_VRAM_VIS_USAGE) + pdev->ws->query_value(pdev->ws, RADEON_GTT_USAGE);
2629 
2630          uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);
2631 
2632          /* Compute the total free space that can be allocated for this process across all heaps. */
2633          uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
2634 
2635          /* Compute the remaining visible VRAM size for this process. */
2636          uint64_t vram_vis_free_space = vram_vis_heap_size - MIN2(vram_vis_heap_size, vram_vis_internal_usage);
2637 
2638          /* Distribute the total free space (2/3rd as VRAM and 1/3rd as GTT) to match the heap
2639           * sizes, and align down to the page size to be conservative.
2640           */
2641          vram_vis_free_space =
2642             ROUND_DOWN_TO(MIN2((total_free_space * 2) / 3, vram_vis_free_space), pdev->info.gart_page_size);
2643          uint64_t gtt_free_space = total_free_space - vram_vis_free_space;
2644 
2645          memoryBudget->heapBudget[vram_vis_heap_idx] = vram_vis_free_space + vram_vis_internal_usage;
2646          memoryBudget->heapUsage[vram_vis_heap_idx] = vram_vis_internal_usage;
2647          memoryBudget->heapBudget[gtt_heap_idx] = gtt_free_space + gtt_internal_usage;
2648          memoryBudget->heapUsage[gtt_heap_idx] = gtt_internal_usage;
2649       }
2650    } else {
2651       unsigned mask = pdev->heaps;
2652       unsigned heap = 0;
2653       while (mask) {
2654          uint64_t internal_usage = 0, system_usage = 0;
2655          unsigned type = 1u << u_bit_scan(&mask);
2656 
2657          switch (type) {
2658          case RADV_HEAP_VRAM:
2659             internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM);
2660             system_usage = pdev->ws->query_value(pdev->ws, RADEON_VRAM_USAGE);
2661             break;
2662          case RADV_HEAP_VRAM_VIS:
2663             internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM_VIS);
2664             if (!(pdev->heaps & RADV_HEAP_VRAM))
2665                internal_usage += pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM);
2666             system_usage = pdev->ws->query_value(pdev->ws, RADEON_VRAM_VIS_USAGE);
2667             break;
2668          case RADV_HEAP_GTT:
2669             internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_GTT);
2670             system_usage = pdev->ws->query_value(pdev->ws, RADEON_GTT_USAGE);
2671             break;
2672          }
2673 
2674          uint64_t total_usage = MAX2(internal_usage, system_usage);
2675 
2676          uint64_t free_space = pdev->memory_properties.memoryHeaps[heap].size -
2677                                MIN2(pdev->memory_properties.memoryHeaps[heap].size, total_usage);
2678          memoryBudget->heapBudget[heap] = free_space + internal_usage;
2679          memoryBudget->heapUsage[heap] = internal_usage;
2680          ++heap;
2681       }
2682 
2683       assert(heap == memory_properties->memoryHeapCount);
2684    }
2685 
2686    /* The heapBudget value must be less than or equal to VkMemoryHeap::size for each heap. */
2687    for (uint32_t i = 0; i < memory_properties->memoryHeapCount; i++) {
2688       memoryBudget->heapBudget[i] = MIN2(memory_properties->memoryHeaps[i].size, memoryBudget->heapBudget[i]);
2689    }
2690 
2691    /* The heapBudget and heapUsage values must be zero for array elements
2692     * greater than or equal to
2693     * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
2694     */
2695    for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
2696       memoryBudget->heapBudget[i] = 0;
2697       memoryBudget->heapUsage[i] = 0;
2698    }
2699 }
2700 
2701 VKAPI_ATTR void VKAPI_CALL
radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2702 radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
2703                                         VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
2704 {
2705    VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2706 
2707    pMemoryProperties->memoryProperties = pdev->memory_properties;
2708 
2709    VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
2710       vk_find_struct(pMemoryProperties->pNext, PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
2711    if (memory_budget)
2712       radv_get_memory_budget_properties(physicalDevice, memory_budget);
2713 }
2714 
2715 VKAPI_ATTR void VKAPI_CALL
radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)2716 radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice, VkSampleCountFlagBits samples,
2717                                                VkMultisamplePropertiesEXT *pMultisampleProperties)
2718 {
2719    VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
2720 
2721    if (samples & supported_samples) {
2722       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
2723    } else {
2724       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
2725    }
2726 }
2727 
2728 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)2729 radv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount,
2730                                               VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
2731 {
2732    VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2733    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates,
2734                           pFragmentShadingRateCount);
2735 
2736 #define append_rate(w, h, s)                                                                                           \
2737    {                                                                                                                   \
2738       VkPhysicalDeviceFragmentShadingRateKHR rate = {                                                                  \
2739          .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR,                              \
2740          .sampleCounts = s,                                                                                            \
2741          .fragmentSize = {.width = w, .height = h},                                                                    \
2742       };                                                                                                               \
2743       vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate;                             \
2744    }
2745 
2746    for (uint32_t x = 2; x >= 1; x--) {
2747       for (uint32_t y = 2; y >= 1; y--) {
2748          VkSampleCountFlagBits samples;
2749 
2750          if (x == 1 && y == 1) {
2751             samples = ~0;
2752          } else {
2753             samples = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
2754 
2755             /* VRS coarse shading with 8x MSAA isn't supported on GFX12 and the
2756              * hw automatically clamps to 1x1.
2757              */
2758             if (pdev->info.gfx_level < GFX12)
2759                samples |= VK_SAMPLE_COUNT_8_BIT;
2760          }
2761 
2762          append_rate(x, y, samples);
2763       }
2764    }
2765 #undef append_rate
2766 
2767    return vk_outarray_status(&out);
2768 }
2769 
2770 /* VK_EXT_tooling_info */
2771 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceToolProperties(VkPhysicalDevice physicalDevice,uint32_t * pToolCount,VkPhysicalDeviceToolProperties * pToolProperties)2772 radv_GetPhysicalDeviceToolProperties(VkPhysicalDevice physicalDevice, uint32_t *pToolCount,
2773                                      VkPhysicalDeviceToolProperties *pToolProperties)
2774 {
2775    VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2776    const struct radv_instance *instance = radv_physical_device_instance(pdev);
2777    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceToolProperties, out, pToolProperties, pToolCount);
2778    bool rgp_enabled, rmv_enabled, rra_enabled;
2779    uint32_t tool_count = 0;
2780 
2781    /* RGP */
2782    rgp_enabled = instance->vk.trace_mode & RADV_TRACE_MODE_RGP;
2783    if (rgp_enabled)
2784       tool_count++;
2785 
2786    /* RMV */
2787    rmv_enabled = instance->vk.trace_mode & VK_TRACE_MODE_RMV;
2788    if (rmv_enabled)
2789       tool_count++;
2790 
2791    /* RRA */
2792    rra_enabled = instance->vk.trace_mode & RADV_TRACE_MODE_RRA;
2793    if (rra_enabled)
2794       tool_count++;
2795 
2796    if (!pToolProperties) {
2797       *pToolCount = tool_count;
2798       return VK_SUCCESS;
2799    }
2800 
2801    if (rgp_enabled) {
2802       VkPhysicalDeviceToolProperties tool = {
2803          .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TOOL_PROPERTIES,
2804          .name = "Radeon GPU Profiler",
2805          .version = "1.15",
2806          .description = "A ground-breaking low-level optimization tool that provides detailed "
2807                         "information on Radeon GPUs.",
2808          .purposes = VK_TOOL_PURPOSE_PROFILING_BIT | VK_TOOL_PURPOSE_TRACING_BIT |
2809                      /* VK_EXT_debug_marker is only exposed if SQTT is enabled. */
2810                      VK_TOOL_PURPOSE_ADDITIONAL_FEATURES_BIT | VK_TOOL_PURPOSE_DEBUG_MARKERS_BIT_EXT,
2811       };
2812       vk_outarray_append_typed(VkPhysicalDeviceToolProperties, &out, t) *t = tool;
2813    }
2814 
2815    if (rmv_enabled) {
2816       VkPhysicalDeviceToolProperties tool = {
2817          .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TOOL_PROPERTIES,
2818          .name = "Radeon Memory Visualizer",
2819          .version = "1.6",
2820          .description = "A tool to allow you to gain a deep understanding of how your application "
2821                         "uses memory for graphics resources.",
2822          .purposes = VK_TOOL_PURPOSE_PROFILING_BIT | VK_TOOL_PURPOSE_TRACING_BIT,
2823       };
2824       vk_outarray_append_typed(VkPhysicalDeviceToolProperties, &out, t) *t = tool;
2825    }
2826 
2827    if (rra_enabled) {
2828       VkPhysicalDeviceToolProperties tool = {
2829          .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TOOL_PROPERTIES,
2830          .name = "Radeon Raytracing Analyzer",
2831          .version = "1.2",
2832          .description = "A tool to investigate the performance of your ray tracing applications and "
2833                         "highlight potential bottlenecks.",
2834          .purposes = VK_TOOL_PURPOSE_PROFILING_BIT | VK_TOOL_PURPOSE_TRACING_BIT,
2835       };
2836       vk_outarray_append_typed(VkPhysicalDeviceToolProperties, &out, t) *t = tool;
2837    }
2838 
2839    return vk_outarray_status(&out);
2840 }
2841 
2842 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkCooperativeMatrixPropertiesKHR * pProperties)2843 radv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount,
2844                                                      VkCooperativeMatrixPropertiesKHR *pProperties)
2845 {
2846    VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount);
2847 
2848    vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
2849    {
2850       *p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
2851                                                      .MSize = 16,
2852                                                      .NSize = 16,
2853                                                      .KSize = 16,
2854                                                      .AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2855                                                      .BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2856                                                      .CType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2857                                                      .ResultType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2858                                                      .saturatingAccumulation = false,
2859                                                      .scope = VK_SCOPE_SUBGROUP_KHR};
2860    }
2861 
2862    vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
2863    {
2864       *p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
2865                                                      .MSize = 16,
2866                                                      .NSize = 16,
2867                                                      .KSize = 16,
2868                                                      .AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2869                                                      .BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2870                                                      .CType = VK_COMPONENT_TYPE_FLOAT32_KHR,
2871                                                      .ResultType = VK_COMPONENT_TYPE_FLOAT32_KHR,
2872                                                      .saturatingAccumulation = false,
2873                                                      .scope = VK_SCOPE_SUBGROUP_KHR};
2874    }
2875 
2876    for (unsigned asigned = 0; asigned < 2; asigned++) {
2877       for (unsigned bsigned = 0; bsigned < 2; bsigned++) {
2878          for (unsigned csigned = 0; csigned < 2; csigned++) {
2879             for (unsigned saturate = 0; saturate < 2; saturate++) {
2880                if (!csigned && saturate)
2881                   continue; /* The HW only supports signed acc. */
2882                vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
2883                {
2884                   *p = (struct VkCooperativeMatrixPropertiesKHR){
2885                      .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
2886                      .MSize = 16,
2887                      .NSize = 16,
2888                      .KSize = 16,
2889                      .AType = asigned ? VK_COMPONENT_TYPE_SINT8_KHR : VK_COMPONENT_TYPE_UINT8_KHR,
2890                      .BType = bsigned ? VK_COMPONENT_TYPE_SINT8_KHR : VK_COMPONENT_TYPE_UINT8_KHR,
2891                      .CType = csigned ? VK_COMPONENT_TYPE_SINT32_KHR : VK_COMPONENT_TYPE_UINT32_KHR,
2892                      .ResultType = csigned ? VK_COMPONENT_TYPE_SINT32_KHR : VK_COMPONENT_TYPE_UINT32_KHR,
2893                      .saturatingAccumulation = saturate,
2894                      .scope = VK_SCOPE_SUBGROUP_KHR};
2895                }
2896             }
2897          }
2898       }
2899    }
2900 
2901    return vk_outarray_status(&out);
2902 }
2903