• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #include <fcntl.h>
29 
30 #ifdef MAJOR_IN_SYSMACROS
31 #include <sys/sysmacros.h>
32 #endif
33 
34 #include "util/disk_cache.h"
35 #include "util/hex.h"
36 #include "util/u_debug.h"
37 #include "radv_debug.h"
38 #include "radv_private.h"
39 
40 #ifdef _WIN32
41 typedef void *drmDevicePtr;
42 #include <io.h>
43 #else
44 #include <amdgpu.h>
45 #include <xf86drm.h>
46 #include "drm-uapi/amdgpu_drm.h"
47 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
48 #endif
49 #include "winsys/null/radv_null_winsys_public.h"
50 #include "git_sha1.h"
51 
52 #if LLVM_AVAILABLE
53 #include "ac_llvm_util.h"
54 #endif
55 
56 static bool
radv_perf_query_supported(const struct radv_physical_device * pdev)57 radv_perf_query_supported(const struct radv_physical_device *pdev)
58 {
59    /* SQTT / SPM interfere with the register states for perf counters, and
60     * the code has only been tested on GFX10.3 */
61    return pdev->rad_info.gfx_level == GFX10_3 && !(pdev->instance->vk.trace_mode & RADV_TRACE_MODE_RGP);
62 }
63 
64 static bool
radv_taskmesh_enabled(const struct radv_physical_device * pdevice)65 radv_taskmesh_enabled(const struct radv_physical_device *pdevice)
66 {
67    if (pdevice->instance->debug_flags & RADV_DEBUG_NO_MESH_SHADER)
68       return false;
69 
70    return pdevice->use_ngg && !pdevice->use_llvm && pdevice->rad_info.gfx_level >= GFX10_3 &&
71           !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE) && pdevice->rad_info.has_gang_submit;
72 }
73 
74 static bool
radv_transfer_queue_enabled(const struct radv_physical_device * pdevice)75 radv_transfer_queue_enabled(const struct radv_physical_device *pdevice)
76 {
77    /* Check if the GPU has SDMA support and transfer queues are allowed. */
78    if (pdevice->rad_info.sdma_ip_version == SDMA_UNKNOWN || !pdevice->rad_info.ip[AMD_IP_SDMA].num_queues ||
79        !(pdevice->instance->perftest_flags & RADV_PERFTEST_TRANSFER_QUEUE))
80       return false;
81 
82    return pdevice->rad_info.gfx_level >= GFX9;
83 }
84 
85 static bool
radv_vrs_attachment_enabled(const struct radv_physical_device * pdevice)86 radv_vrs_attachment_enabled(const struct radv_physical_device *pdevice)
87 {
88    return pdevice->rad_info.gfx_level >= GFX11 || !(pdevice->instance->debug_flags & RADV_DEBUG_NO_HIZ);
89 }
90 
91 static bool
radv_calibrated_timestamps_enabled(const struct radv_physical_device * pdevice)92 radv_calibrated_timestamps_enabled(const struct radv_physical_device *pdevice)
93 {
94    return RADV_SUPPORT_CALIBRATED_TIMESTAMPS &&
95           !(pdevice->rad_info.family == CHIP_RAVEN || pdevice->rad_info.family == CHIP_RAVEN2);
96 }
97 
98 static bool
radv_shader_object_enabled(const struct radv_physical_device * pdevice)99 radv_shader_object_enabled(const struct radv_physical_device *pdevice)
100 {
101    /* FIXME: Fix GPU hangs on Renoir. */
102    return (pdevice->rad_info.gfx_level < GFX9 || pdevice->rad_info.family == CHIP_VEGA10) && !pdevice->use_llvm &&
103           pdevice->instance->perftest_flags & RADV_PERFTEST_SHADER_OBJECT;
104 }
105 
106 bool
radv_enable_rt(const struct radv_physical_device * pdevice,bool rt_pipelines)107 radv_enable_rt(const struct radv_physical_device *pdevice, bool rt_pipelines)
108 {
109    if (pdevice->rad_info.gfx_level < GFX10_3 && !radv_emulate_rt(pdevice))
110       return false;
111 
112    if (rt_pipelines && pdevice->use_llvm)
113       return false;
114 
115    return true;
116 }
117 
118 bool
radv_emulate_rt(const struct radv_physical_device * pdevice)119 radv_emulate_rt(const struct radv_physical_device *pdevice)
120 {
121    return pdevice->instance->perftest_flags & RADV_PERFTEST_EMULATE_RT;
122 }
123 
124 static bool
radv_is_conformant(const struct radv_physical_device * pdevice)125 radv_is_conformant(const struct radv_physical_device *pdevice)
126 {
127    return pdevice->rad_info.gfx_level >= GFX8;
128 }
129 
130 static void
parse_hex(char * out,const char * in,unsigned length)131 parse_hex(char *out, const char *in, unsigned length)
132 {
133    for (unsigned i = 0; i < length; ++i)
134       out[i] = 0;
135 
136    for (unsigned i = 0; i < 2 * length; ++i) {
137       unsigned v = in[i] <= '9' ? in[i] - '0' : (in[i] >= 'a' ? (in[i] - 'a' + 10) : (in[i] - 'A' + 10));
138       out[i / 2] |= v << (4 * (1 - i % 2));
139    }
140 }
141 
142 static void
radv_physical_device_init_cache_key(struct radv_physical_device * pdevice)143 radv_physical_device_init_cache_key(struct radv_physical_device *pdevice)
144 {
145    struct radv_physical_device_cache_key *key = &pdevice->cache_key;
146 
147    key->family = pdevice->rad_info.family;
148    key->ptr_size = sizeof(void *);
149    key->conformant_trunc_coord = pdevice->rad_info.conformant_trunc_coord;
150 
151    key->clear_lds = pdevice->instance->drirc.clear_lds;
152    key->cs_wave32 = pdevice->cs_wave_size == 32;
153    key->disable_aniso_single_level =
154       pdevice->instance->drirc.disable_aniso_single_level && pdevice->rad_info.gfx_level < GFX8;
155    key->disable_shrink_image_store = pdevice->instance->drirc.disable_shrink_image_store;
156    key->disable_sinking_load_input_fs = pdevice->instance->drirc.disable_sinking_load_input_fs;
157    key->dual_color_blend_by_location = pdevice->instance->drirc.dual_color_blend_by_location;
158    key->emulate_rt = !!(pdevice->instance->perftest_flags & RADV_PERFTEST_EMULATE_RT);
159    key->ge_wave32 = pdevice->ge_wave_size == 32;
160    key->invariant_geom = !!(pdevice->instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM);
161    key->lower_discard_to_demote = !!(pdevice->instance->debug_flags & RADV_DEBUG_DISCARD_TO_DEMOTE);
162    key->mesh_fast_launch_2 = pdevice->mesh_fast_launch_2;
163    key->no_fmask = !!(pdevice->instance->debug_flags & RADV_DEBUG_NO_FMASK);
164    key->no_rt = !!(pdevice->instance->debug_flags & RADV_DEBUG_NO_RT);
165    key->ps_wave32 = pdevice->ps_wave_size == 32;
166    key->rt_wave64 = pdevice->rt_wave_size == 64;
167    key->split_fma = !!(pdevice->instance->debug_flags & RADV_DEBUG_SPLIT_FMA);
168    key->ssbo_non_uniform = pdevice->instance->drirc.ssbo_non_uniform;
169    key->tex_non_uniform = pdevice->instance->drirc.tex_non_uniform;
170    key->use_llvm = pdevice->use_llvm;
171    key->use_ngg = pdevice->use_ngg;
172    key->use_ngg_culling = pdevice->use_ngg_culling;
173 }
174 
175 static int
radv_device_get_cache_uuid(struct radv_physical_device * pdevice,void * uuid)176 radv_device_get_cache_uuid(struct radv_physical_device *pdevice, void *uuid)
177 {
178    struct mesa_sha1 ctx;
179    unsigned char sha1[20];
180 
181    memset(uuid, 0, VK_UUID_SIZE);
182    _mesa_sha1_init(&ctx);
183 
184 #ifdef RADV_BUILD_ID_OVERRIDE
185    {
186       unsigned size = strlen(RADV_BUILD_ID_OVERRIDE) / 2;
187       char *data = alloca(size);
188       parse_hex(data, RADV_BUILD_ID_OVERRIDE, size);
189       _mesa_sha1_update(&ctx, data, size);
190    }
191 #else
192    if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx))
193       return -1;
194 #endif
195 
196 #if LLVM_AVAILABLE
197    if (pdevice->use_llvm && !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
198       return -1;
199 #endif
200 
201    _mesa_sha1_update(&ctx, &pdevice->cache_key, sizeof(pdevice->cache_key));
202    _mesa_sha1_final(&ctx, sha1);
203 
204    memcpy(uuid, sha1, VK_UUID_SIZE);
205    return 0;
206 }
207 
208 static void
radv_get_driver_uuid(void * uuid)209 radv_get_driver_uuid(void *uuid)
210 {
211    ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
212 }
213 
214 static void
radv_get_device_uuid(const struct radeon_info * info,void * uuid)215 radv_get_device_uuid(const struct radeon_info *info, void *uuid)
216 {
217    ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
218 }
219 
220 static void
radv_physical_device_init_queue_table(struct radv_physical_device * pdevice)221 radv_physical_device_init_queue_table(struct radv_physical_device *pdevice)
222 {
223    int idx = 0;
224    pdevice->vk_queue_to_radv[idx] = RADV_QUEUE_GENERAL;
225    idx++;
226 
227    for (unsigned i = 1; i < RADV_MAX_QUEUE_FAMILIES; i++)
228       pdevice->vk_queue_to_radv[i] = RADV_MAX_QUEUE_FAMILIES + 1;
229 
230    if (pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues > 0 &&
231        !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
232       pdevice->vk_queue_to_radv[idx] = RADV_QUEUE_COMPUTE;
233       idx++;
234    }
235 
236    if (pdevice->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE) {
237       if (pdevice->rad_info.ip[pdevice->vid_decode_ip].num_queues > 0) {
238          pdevice->vk_queue_to_radv[idx] = RADV_QUEUE_VIDEO_DEC;
239          idx++;
240       }
241    }
242 
243    if (radv_transfer_queue_enabled(pdevice)) {
244       pdevice->vk_queue_to_radv[idx] = RADV_QUEUE_TRANSFER;
245       idx++;
246    }
247 
248    pdevice->vk_queue_to_radv[idx++] = RADV_QUEUE_SPARSE;
249 
250    pdevice->num_queues = idx;
251 }
252 
253 enum radv_heap {
254    RADV_HEAP_VRAM = 1 << 0,
255    RADV_HEAP_GTT = 1 << 1,
256    RADV_HEAP_VRAM_VIS = 1 << 2,
257    RADV_HEAP_MAX = 1 << 3,
258 };
259 
260 static uint64_t
radv_get_adjusted_vram_size(struct radv_physical_device * device)261 radv_get_adjusted_vram_size(struct radv_physical_device *device)
262 {
263    int ov = device->instance->drirc.override_vram_size;
264    if (ov >= 0)
265       return MIN2((uint64_t)device->rad_info.vram_size_kb * 1024, (uint64_t)ov << 20);
266    return (uint64_t)device->rad_info.vram_size_kb * 1024;
267 }
268 
269 static uint64_t
radv_get_visible_vram_size(struct radv_physical_device * device)270 radv_get_visible_vram_size(struct radv_physical_device *device)
271 {
272    return MIN2(radv_get_adjusted_vram_size(device), (uint64_t)device->rad_info.vram_vis_size_kb * 1024);
273 }
274 
275 static uint64_t
radv_get_vram_size(struct radv_physical_device * device)276 radv_get_vram_size(struct radv_physical_device *device)
277 {
278    uint64_t total_size = radv_get_adjusted_vram_size(device);
279    return total_size - MIN2(total_size, (uint64_t)device->rad_info.vram_vis_size_kb * 1024);
280 }
281 
282 static void
radv_physical_device_init_mem_types(struct radv_physical_device * device)283 radv_physical_device_init_mem_types(struct radv_physical_device *device)
284 {
285    uint64_t visible_vram_size = radv_get_visible_vram_size(device);
286    uint64_t vram_size = radv_get_vram_size(device);
287    uint64_t gtt_size = (uint64_t)device->rad_info.gart_size_kb * 1024;
288    int vram_index = -1, visible_vram_index = -1, gart_index = -1;
289 
290    device->memory_properties.memoryHeapCount = 0;
291    device->heaps = 0;
292 
293    if (!device->rad_info.has_dedicated_vram) {
294       const uint64_t total_size = gtt_size + visible_vram_size;
295 
296       if (device->instance->drirc.enable_unified_heap_on_apu) {
297          /* Some applications seem better when the driver exposes only one heap of VRAM on APUs. */
298          visible_vram_size = total_size;
299          gtt_size = 0;
300       } else {
301          /* On APUs, the carveout is usually too small for games that request a minimum VRAM size
302           * greater than it. To workaround this, we compute the total available memory size (GTT +
303           * visible VRAM size) and report 2/3 as VRAM and 1/3 as GTT.
304           */
305          visible_vram_size = align64((total_size * 2) / 3, device->rad_info.gart_page_size);
306          gtt_size = total_size - visible_vram_size;
307       }
308 
309       vram_size = 0;
310    }
311 
312    /* Only get a VRAM heap if it is significant, not if it is a 16 MiB
313     * remainder above visible VRAM. */
314    if (vram_size > 0 && vram_size * 9 >= visible_vram_size) {
315       vram_index = device->memory_properties.memoryHeapCount++;
316       device->heaps |= RADV_HEAP_VRAM;
317       device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap){
318          .size = vram_size,
319          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
320       };
321    }
322 
323    if (gtt_size > 0) {
324       gart_index = device->memory_properties.memoryHeapCount++;
325       device->heaps |= RADV_HEAP_GTT;
326       device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap){
327          .size = gtt_size,
328          .flags = 0,
329       };
330    }
331 
332    if (visible_vram_size) {
333       visible_vram_index = device->memory_properties.memoryHeapCount++;
334       device->heaps |= RADV_HEAP_VRAM_VIS;
335       device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap){
336          .size = visible_vram_size,
337          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
338       };
339    }
340 
341    unsigned type_count = 0;
342 
343    if (vram_index >= 0 || visible_vram_index >= 0) {
344       device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
345       device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
346       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
347          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
348          .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
349       };
350 
351       device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
352       device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_32BIT;
353       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
354          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
355          .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
356       };
357    }
358 
359    if (gart_index >= 0) {
360       device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
361       device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
362       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
363          .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
364          .heapIndex = gart_index,
365       };
366    }
367    if (visible_vram_index >= 0) {
368       device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
369       device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
370       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
371          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
372                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
373          .heapIndex = visible_vram_index,
374       };
375 
376       device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
377       device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_32BIT;
378       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
379          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
380                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
381          .heapIndex = visible_vram_index,
382       };
383    }
384 
385    if (gart_index >= 0) {
386       device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
387       device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
388       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
389          .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
390                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
391          .heapIndex = gart_index,
392       };
393 
394       device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
395       device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_32BIT;
396       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
397          .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
398                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
399          .heapIndex = gart_index,
400       };
401    }
402    device->memory_properties.memoryTypeCount = type_count;
403 
404    if (device->rad_info.has_l2_uncached) {
405       for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
406          VkMemoryType mem_type = device->memory_properties.memoryTypes[i];
407 
408          if (((mem_type.propertyFlags & (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
409               mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
410              !(device->memory_flags[i] & RADEON_FLAG_32BIT)) {
411 
412             VkMemoryPropertyFlags property_flags = mem_type.propertyFlags | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
413                                                    VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
414 
415             device->memory_domains[type_count] = device->memory_domains[i];
416             device->memory_flags[type_count] = device->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
417             device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
418                .propertyFlags = property_flags,
419                .heapIndex = mem_type.heapIndex,
420             };
421          }
422       }
423       device->memory_properties.memoryTypeCount = type_count;
424    }
425 
426    for (unsigned i = 0; i < type_count; ++i) {
427       if (device->memory_flags[i] & RADEON_FLAG_32BIT)
428          device->memory_types_32bit |= BITFIELD_BIT(i);
429    }
430 }
431 
432 uint32_t
radv_find_memory_index(const struct radv_physical_device * pdevice,VkMemoryPropertyFlags flags)433 radv_find_memory_index(const struct radv_physical_device *pdevice, VkMemoryPropertyFlags flags)
434 {
435    const VkPhysicalDeviceMemoryProperties *mem_properties = &pdevice->memory_properties;
436    for (uint32_t i = 0; i < mem_properties->memoryTypeCount; ++i) {
437       if (mem_properties->memoryTypes[i].propertyFlags == flags) {
438          return i;
439       }
440    }
441    unreachable("invalid memory properties");
442 }
443 
444 static void
radv_get_binning_settings(const struct radv_physical_device * pdevice,struct radv_binning_settings * settings)445 radv_get_binning_settings(const struct radv_physical_device *pdevice, struct radv_binning_settings *settings)
446 {
447    if ((pdevice->rad_info.has_dedicated_vram && pdevice->rad_info.max_render_backends > 4) ||
448        pdevice->rad_info.gfx_level >= GFX10) {
449       /* Using higher settings on GFX10+ can cause random GPU hangs. */
450       settings->context_states_per_bin = 1;
451       settings->persistent_states_per_bin = 1;
452    } else {
453       settings->context_states_per_bin = pdevice->rad_info.has_gfx9_scissor_bug ? 1 : 3;
454       settings->persistent_states_per_bin = 1;
455    }
456 
457    settings->fpovs_per_batch = 63;
458 }
459 
460 static void
radv_physical_device_get_supported_extensions(const struct radv_physical_device * device,struct vk_device_extension_table * out_ext)461 radv_physical_device_get_supported_extensions(const struct radv_physical_device *device,
462                                               struct vk_device_extension_table *out_ext)
463 {
464    const struct vk_device_extension_table ext = {
465       .KHR_8bit_storage = true,
466       .KHR_16bit_storage = true,
467       .KHR_acceleration_structure = radv_enable_rt(device, false),
468       .KHR_calibrated_timestamps = radv_calibrated_timestamps_enabled(device),
469       .KHR_cooperative_matrix = device->rad_info.gfx_level >= GFX11 && !device->use_llvm,
470       .KHR_bind_memory2 = true,
471       .KHR_buffer_device_address = true,
472       .KHR_copy_commands2 = true,
473       .KHR_create_renderpass2 = true,
474       .KHR_dedicated_allocation = true,
475       .KHR_deferred_host_operations = true,
476       .KHR_depth_stencil_resolve = true,
477       .KHR_descriptor_update_template = true,
478       .KHR_device_group = true,
479       .KHR_draw_indirect_count = true,
480       .KHR_driver_properties = true,
481       .KHR_dynamic_rendering = true,
482       .KHR_external_fence = true,
483       .KHR_external_fence_fd = true,
484       .KHR_external_memory = true,
485       .KHR_external_memory_fd = true,
486       .KHR_external_semaphore = true,
487       .KHR_external_semaphore_fd = true,
488       .KHR_format_feature_flags2 = true,
489       .KHR_fragment_shader_barycentric = device->rad_info.gfx_level >= GFX10_3,
490       .KHR_fragment_shading_rate = device->rad_info.gfx_level >= GFX10_3,
491       .KHR_get_memory_requirements2 = true,
492       .KHR_global_priority = true,
493       .KHR_image_format_list = true,
494       .KHR_imageless_framebuffer = true,
495 #ifdef RADV_USE_WSI_PLATFORM
496       .KHR_incremental_present = true,
497 #endif
498       .KHR_index_type_uint8 = device->rad_info.gfx_level >= GFX8,
499       .KHR_line_rasterization = true,
500       .KHR_load_store_op_none = true,
501       .KHR_maintenance1 = true,
502       .KHR_maintenance2 = true,
503       .KHR_maintenance3 = true,
504       .KHR_maintenance4 = true,
505       .KHR_maintenance5 = true,
506       .KHR_maintenance6 = true,
507       .KHR_map_memory2 = true,
508       .KHR_multiview = true,
509       .KHR_performance_query = radv_perf_query_supported(device),
510       .KHR_pipeline_executable_properties = true,
511       .KHR_pipeline_library = !device->use_llvm,
512       /* Hide these behind dri configs for now since we cannot implement it reliably on
513        * all surfaces yet. There is no surface capability query for present wait/id,
514        * but the feature is useful enough to hide behind an opt-in mechanism for now.
515        * If the instance only enables surface extensions that unconditionally support present wait,
516        * we can also expose the extension that way. */
517       .KHR_present_id = device->instance->drirc.enable_khr_present_wait ||
518                         wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
519       .KHR_present_wait = device->instance->drirc.enable_khr_present_wait ||
520                           wsi_common_vk_instance_supports_present_wait(&device->instance->vk),
521       .KHR_push_descriptor = true,
522       .KHR_ray_query = radv_enable_rt(device, false),
523       .KHR_ray_tracing_maintenance1 = radv_enable_rt(device, false),
524       .KHR_ray_tracing_pipeline = radv_enable_rt(device, true),
525       .KHR_ray_tracing_position_fetch = radv_enable_rt(device, false),
526       .KHR_relaxed_block_layout = true,
527       .KHR_sampler_mirror_clamp_to_edge = true,
528       .KHR_sampler_ycbcr_conversion = true,
529       .KHR_separate_depth_stencil_layouts = true,
530       .KHR_shader_atomic_int64 = true,
531       .KHR_shader_clock = true,
532       .KHR_shader_draw_parameters = true,
533       .KHR_shader_expect_assume = true,
534       .KHR_shader_float16_int8 = true,
535       .KHR_shader_float_controls = true,
536       .KHR_shader_integer_dot_product = true,
537       .KHR_shader_maximal_reconvergence = true,
538       .KHR_shader_non_semantic_info = true,
539       .KHR_shader_quad_control = true,
540       .KHR_shader_subgroup_extended_types = true,
541       .KHR_shader_subgroup_rotate = true,
542       .KHR_shader_subgroup_uniform_control_flow = true,
543       .KHR_shader_terminate_invocation = true,
544       .KHR_spirv_1_4 = true,
545       .KHR_storage_buffer_storage_class = true,
546 #ifdef RADV_USE_WSI_PLATFORM
547       .KHR_swapchain = true,
548       .KHR_swapchain_mutable_format = true,
549 #endif
550       .KHR_synchronization2 = true,
551       .KHR_timeline_semaphore = true,
552       .KHR_uniform_buffer_standard_layout = true,
553       .KHR_variable_pointers = true,
554       .KHR_vertex_attribute_divisor = true,
555       .KHR_video_queue = !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE),
556       .KHR_video_decode_av1 =
557          (device->rad_info.vcn_ip_version >= VCN_3_0_0 && device->rad_info.vcn_ip_version != VCN_3_0_33 &&
558           VIDEO_CODEC_AV1DEC && !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE)),
559       .KHR_video_decode_queue = !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE),
560       .KHR_video_decode_h264 = VIDEO_CODEC_H264DEC && !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE),
561       .KHR_video_decode_h265 = VIDEO_CODEC_H265DEC && !!(device->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE),
562       .KHR_vulkan_memory_model = true,
563       .KHR_workgroup_memory_explicit_layout = true,
564       .KHR_zero_initialize_workgroup_memory = true,
565       .EXT_4444_formats = true,
566       .EXT_attachment_feedback_loop_dynamic_state = true,
567       .EXT_attachment_feedback_loop_layout = true,
568       .EXT_border_color_swizzle = device->rad_info.gfx_level >= GFX10,
569       .EXT_buffer_device_address = true,
570       .EXT_calibrated_timestamps = radv_calibrated_timestamps_enabled(device),
571       .EXT_color_write_enable = true,
572       .EXT_conditional_rendering = true,
573       .EXT_conservative_rasterization = device->rad_info.gfx_level >= GFX9,
574       .EXT_custom_border_color = true,
575       .EXT_debug_marker = device->instance->vk.trace_mode & RADV_TRACE_MODE_RGP,
576       .EXT_depth_bias_control = true,
577       .EXT_depth_clamp_zero_one = true,
578       .EXT_depth_clip_control = true,
579       .EXT_depth_clip_enable = true,
580       .EXT_depth_range_unrestricted = true,
581       .EXT_descriptor_buffer = true,
582       .EXT_descriptor_indexing = true,
583       .EXT_device_fault = device->rad_info.has_gpuvm_fault_query,
584       .EXT_discard_rectangles = true,
585 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
586       .EXT_display_control = true,
587 #endif
588       .EXT_dynamic_rendering_unused_attachments = true,
589       .EXT_extended_dynamic_state = true,
590       .EXT_extended_dynamic_state2 = true,
591       .EXT_extended_dynamic_state3 = true,
592       .EXT_external_memory_acquire_unmodified = true,
593       .EXT_external_memory_dma_buf = true,
594       .EXT_external_memory_host = device->rad_info.has_userptr,
595       .EXT_fragment_shader_interlock = radv_has_pops(device),
596       .EXT_global_priority = true,
597       .EXT_global_priority_query = true,
598       .EXT_graphics_pipeline_library = !device->use_llvm && !(device->instance->debug_flags & RADV_DEBUG_NO_GPL),
599       .EXT_host_query_reset = true,
600       .EXT_image_2d_view_of_3d = true,
601       .EXT_image_compression_control = true,
602       .EXT_image_drm_format_modifier = device->rad_info.gfx_level >= GFX9,
603       .EXT_image_robustness = true,
604       .EXT_image_sliced_view_of_3d = device->rad_info.gfx_level >= GFX10,
605       .EXT_image_view_min_lod = true,
606       .EXT_index_type_uint8 = device->rad_info.gfx_level >= GFX8,
607       .EXT_inline_uniform_block = true,
608       .EXT_line_rasterization = true,
609       .EXT_load_store_op_none = true,
610       .EXT_memory_budget = true,
611       .EXT_memory_priority = true,
612       .EXT_mesh_shader = radv_taskmesh_enabled(device),
613       .EXT_multi_draw = true,
614       .EXT_mutable_descriptor_type = true, /* Trivial promotion from VALVE. */
615       .EXT_non_seamless_cube_map = true,
616       .EXT_pci_bus_info = true,
617 #ifndef _WIN32
618       .EXT_physical_device_drm = true,
619 #endif
620       .EXT_pipeline_creation_cache_control = true,
621       .EXT_pipeline_creation_feedback = true,
622       .EXT_pipeline_library_group_handles = radv_enable_rt(device, true),
623       .EXT_pipeline_robustness = !device->use_llvm,
624       .EXT_post_depth_coverage = device->rad_info.gfx_level >= GFX10,
625       .EXT_primitive_topology_list_restart = true,
626       .EXT_primitives_generated_query = true,
627       .EXT_private_data = true,
628       .EXT_provoking_vertex = true,
629       .EXT_queue_family_foreign = true,
630       .EXT_robustness2 = true,
631       .EXT_sample_locations = device->rad_info.gfx_level < GFX10,
632       .EXT_sampler_filter_minmax = true,
633       .EXT_scalar_block_layout = device->rad_info.gfx_level >= GFX7,
634       .EXT_separate_stencil_usage = true,
635       .EXT_shader_atomic_float = true,
636       .EXT_shader_atomic_float2 = true,
637       .EXT_shader_demote_to_helper_invocation = true,
638       .EXT_shader_image_atomic_int64 = true,
639       .EXT_shader_module_identifier = true,
640       .EXT_shader_object = radv_shader_object_enabled(device),
641       .EXT_shader_stencil_export = true,
642       .EXT_shader_subgroup_ballot = true,
643       .EXT_shader_subgroup_vote = true,
644       .EXT_shader_viewport_index_layer = true,
645       .EXT_subgroup_size_control = true,
646 #ifdef RADV_USE_WSI_PLATFORM
647       .EXT_swapchain_maintenance1 = true,
648 #endif
649       .EXT_texel_buffer_alignment = true,
650       .EXT_tooling_info = true,
651       .EXT_transform_feedback = true,
652       .EXT_vertex_attribute_divisor = true,
653       .EXT_vertex_input_dynamic_state = !device->use_llvm && !device->instance->drirc.enable_dgc,
654       .EXT_ycbcr_image_arrays = true,
655       .AMD_buffer_marker = true,
656       .AMD_device_coherent_memory = true,
657       .AMD_draw_indirect_count = true,
658       .AMD_gcn_shader = true,
659       .AMD_gpu_shader_half_float = device->rad_info.has_packed_math_16bit,
660       .AMD_gpu_shader_int16 = device->rad_info.has_packed_math_16bit,
661       .AMD_memory_overallocation_behavior = true,
662       .AMD_mixed_attachment_samples = true,
663       .AMD_rasterization_order = device->rad_info.has_out_of_order_rast,
664       .AMD_shader_ballot = true,
665       .AMD_shader_core_properties = true,
666       .AMD_shader_core_properties2 = true,
667       .AMD_shader_early_and_late_fragment_tests = true,
668       .AMD_shader_explicit_vertex_parameter = true,
669       .AMD_shader_fragment_mask = device->use_fmask,
670       .AMD_shader_image_load_store_lod = true,
671       .AMD_shader_trinary_minmax = true,
672       .AMD_texture_gather_bias_lod = device->rad_info.gfx_level < GFX11,
673 #if DETECT_OS_ANDROID
674       .ANDROID_external_memory_android_hardware_buffer = RADV_SUPPORT_ANDROID_HARDWARE_BUFFER,
675       .ANDROID_native_buffer = true,
676 #endif
677       .GOOGLE_decorate_string = true,
678       .GOOGLE_hlsl_functionality1 = true,
679       .GOOGLE_user_type = true,
680       .INTEL_shader_integer_functions2 = true,
681       .NV_compute_shader_derivatives = true,
682       .NV_device_generated_commands = device->instance->drirc.enable_dgc,
683       .NV_device_generated_commands_compute = device->instance->drirc.enable_dgc,
684       /* Undocumented extension purely for vkd3d-proton. This check is to prevent anyone else from
685        * using it.
686        */
687       .VALVE_descriptor_set_host_mapping =
688          device->vk.instance->app_info.engine_name && strcmp(device->vk.instance->app_info.engine_name, "vkd3d") == 0,
689       .VALVE_mutable_descriptor_type = true,
690    };
691    *out_ext = ext;
692 }
693 
694 static void
radv_physical_device_get_features(const struct radv_physical_device * pdevice,struct vk_features * features)695 radv_physical_device_get_features(const struct radv_physical_device *pdevice, struct vk_features *features)
696 {
697    bool taskmesh_en = radv_taskmesh_enabled(pdevice);
698    bool has_perf_query = radv_perf_query_supported(pdevice);
699    bool has_shader_image_float_minmax = pdevice->rad_info.gfx_level != GFX8 && pdevice->rad_info.gfx_level != GFX9 &&
700                                         pdevice->rad_info.gfx_level != GFX11;
701    bool has_fragment_shader_interlock = radv_has_pops(pdevice);
702 
703    *features = (struct vk_features){
704       /* Vulkan 1.0 */
705       .robustBufferAccess = true,
706       .fullDrawIndexUint32 = true,
707       .imageCubeArray = true,
708       .independentBlend = true,
709       .geometryShader = true,
710       .tessellationShader = true,
711       .sampleRateShading = true,
712       .dualSrcBlend = true,
713       .logicOp = true,
714       .multiDrawIndirect = true,
715       .drawIndirectFirstInstance = true,
716       .depthClamp = true,
717       .depthBiasClamp = true,
718       .fillModeNonSolid = true,
719       .depthBounds = true,
720       .wideLines = true,
721       .largePoints = true,
722       .alphaToOne = false,
723       .multiViewport = true,
724       .samplerAnisotropy = true,
725       .textureCompressionETC2 = radv_device_supports_etc(pdevice) || pdevice->emulate_etc2,
726       .textureCompressionASTC_LDR = pdevice->emulate_astc,
727       .textureCompressionBC = true,
728       .occlusionQueryPrecise = true,
729       .pipelineStatisticsQuery = true,
730       .vertexPipelineStoresAndAtomics = true,
731       .fragmentStoresAndAtomics = true,
732       .shaderTessellationAndGeometryPointSize = true,
733       .shaderImageGatherExtended = true,
734       .shaderStorageImageExtendedFormats = true,
735       .shaderStorageImageMultisample = true,
736       .shaderUniformBufferArrayDynamicIndexing = true,
737       .shaderSampledImageArrayDynamicIndexing = true,
738       .shaderStorageBufferArrayDynamicIndexing = true,
739       .shaderStorageImageArrayDynamicIndexing = true,
740       .shaderStorageImageReadWithoutFormat = true,
741       .shaderStorageImageWriteWithoutFormat = true,
742       .shaderClipDistance = true,
743       .shaderCullDistance = true,
744       .shaderFloat64 = true,
745       .shaderInt64 = true,
746       .shaderInt16 = true,
747       .sparseBinding = true,
748       .sparseResidencyBuffer = pdevice->rad_info.family >= CHIP_POLARIS10,
749       .sparseResidencyImage2D = pdevice->rad_info.family >= CHIP_POLARIS10,
750       .sparseResidencyImage3D = pdevice->rad_info.family >= CHIP_POLARIS10,
751       .sparseResidencyAliased = pdevice->rad_info.family >= CHIP_POLARIS10,
752       .variableMultisampleRate = true,
753       .shaderResourceMinLod = true,
754       .shaderResourceResidency = true,
755       .inheritedQueries = true,
756 
757       /* Vulkan 1.1 */
758       .storageBuffer16BitAccess = true,
759       .uniformAndStorageBuffer16BitAccess = true,
760       .storagePushConstant16 = true,
761       .storageInputOutput16 = pdevice->rad_info.has_packed_math_16bit,
762       .multiview = true,
763       .multiviewGeometryShader = true,
764       .multiviewTessellationShader = true,
765       .variablePointersStorageBuffer = true,
766       .variablePointers = true,
767       .protectedMemory = false,
768       .samplerYcbcrConversion = true,
769       .shaderDrawParameters = true,
770 
771       /* Vulkan 1.2 */
772       .samplerMirrorClampToEdge = true,
773       .drawIndirectCount = true,
774       .storageBuffer8BitAccess = true,
775       .uniformAndStorageBuffer8BitAccess = true,
776       .storagePushConstant8 = true,
777       .shaderBufferInt64Atomics = true,
778       .shaderSharedInt64Atomics = true,
779       .shaderFloat16 = pdevice->rad_info.has_packed_math_16bit,
780       .shaderInt8 = true,
781 
782       .descriptorIndexing = true,
783       .shaderInputAttachmentArrayDynamicIndexing = true,
784       .shaderUniformTexelBufferArrayDynamicIndexing = true,
785       .shaderStorageTexelBufferArrayDynamicIndexing = true,
786       .shaderUniformBufferArrayNonUniformIndexing = true,
787       .shaderSampledImageArrayNonUniformIndexing = true,
788       .shaderStorageBufferArrayNonUniformIndexing = true,
789       .shaderStorageImageArrayNonUniformIndexing = true,
790       .shaderInputAttachmentArrayNonUniformIndexing = true,
791       .shaderUniformTexelBufferArrayNonUniformIndexing = true,
792       .shaderStorageTexelBufferArrayNonUniformIndexing = true,
793       .descriptorBindingUniformBufferUpdateAfterBind = true,
794       .descriptorBindingSampledImageUpdateAfterBind = true,
795       .descriptorBindingStorageImageUpdateAfterBind = true,
796       .descriptorBindingStorageBufferUpdateAfterBind = true,
797       .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
798       .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
799       .descriptorBindingUpdateUnusedWhilePending = true,
800       .descriptorBindingPartiallyBound = true,
801       .descriptorBindingVariableDescriptorCount = true,
802       .runtimeDescriptorArray = true,
803 
804       .samplerFilterMinmax = true,
805       .scalarBlockLayout = pdevice->rad_info.gfx_level >= GFX7,
806       .imagelessFramebuffer = true,
807       .uniformBufferStandardLayout = true,
808       .shaderSubgroupExtendedTypes = true,
809       .separateDepthStencilLayouts = true,
810       .hostQueryReset = true,
811       .timelineSemaphore = true,
812       .bufferDeviceAddress = true,
813       .bufferDeviceAddressCaptureReplay = true,
814       .bufferDeviceAddressMultiDevice = false,
815       .vulkanMemoryModel = true,
816       .vulkanMemoryModelDeviceScope = true,
817       .vulkanMemoryModelAvailabilityVisibilityChains = false,
818       .shaderOutputViewportIndex = true,
819       .shaderOutputLayer = true,
820       .subgroupBroadcastDynamicId = true,
821 
822       /* Vulkan 1.3 */
823       .robustImageAccess = true,
824       .inlineUniformBlock = true,
825       .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
826       .pipelineCreationCacheControl = true,
827       .privateData = true,
828       .shaderDemoteToHelperInvocation = true,
829       .shaderTerminateInvocation = true,
830       .subgroupSizeControl = true,
831       .computeFullSubgroups = true,
832       .synchronization2 = true,
833       .textureCompressionASTC_HDR = false,
834       .shaderZeroInitializeWorkgroupMemory = true,
835       .dynamicRendering = true,
836       .shaderIntegerDotProduct = true,
837       .maintenance4 = true,
838 
839       /* VK_EXT_conditional_rendering */
840       .conditionalRendering = true,
841       .inheritedConditionalRendering = false,
842 
843       /* VK_KHR_vertex_attribute_divisor */
844       .vertexAttributeInstanceRateDivisor = true,
845       .vertexAttributeInstanceRateZeroDivisor = true,
846 
847       /* VK_EXT_transform_feedback */
848       .transformFeedback = true,
849       .geometryStreams = true,
850 
851       /* VK_EXT_memory_priority */
852       .memoryPriority = true,
853 
854       /* VK_EXT_depth_clip_enable */
855       .depthClipEnable = true,
856 
857       /* VK_NV_compute_shader_derivatives */
858       .computeDerivativeGroupQuads = false,
859       .computeDerivativeGroupLinear = true,
860 
861       /* VK_EXT_ycbcr_image_arrays */
862       .ycbcrImageArrays = true,
863 
864       /* VK_KHR_index_type_uint8 */
865       .indexTypeUint8 = pdevice->rad_info.gfx_level >= GFX8,
866 
867       /* VK_KHR_pipeline_executable_properties */
868       .pipelineExecutableInfo = true,
869 
870       /* VK_KHR_shader_clock */
871       .shaderSubgroupClock = true,
872       .shaderDeviceClock = pdevice->rad_info.gfx_level >= GFX8,
873 
874       /* VK_EXT_texel_buffer_alignment */
875       .texelBufferAlignment = true,
876 
877       /* VK_AMD_device_coherent_memory */
878       .deviceCoherentMemory = pdevice->rad_info.has_l2_uncached,
879 
880       /* VK_KHR_line_rasterization */
881       .rectangularLines = true,
882       .bresenhamLines = true,
883       .smoothLines = true,
884       .stippledRectangularLines = false,
885       .stippledBresenhamLines = true,
886       .stippledSmoothLines = false,
887 
888       /* VK_EXT_robustness2 */
889       .robustBufferAccess2 = true,
890       .robustImageAccess2 = true,
891       .nullDescriptor = true,
892 
893       /* VK_EXT_custom_border_color */
894       .customBorderColors = true,
895       .customBorderColorWithoutFormat = true,
896 
897       /* VK_EXT_extended_dynamic_state */
898       .extendedDynamicState = true,
899 
900       /* VK_EXT_shader_atomic_float */
901       .shaderBufferFloat32Atomics = true,
902       .shaderBufferFloat32AtomicAdd = pdevice->rad_info.gfx_level >= GFX11,
903       .shaderBufferFloat64Atomics = true,
904       .shaderBufferFloat64AtomicAdd = false,
905       .shaderSharedFloat32Atomics = true,
906       .shaderSharedFloat32AtomicAdd = pdevice->rad_info.gfx_level >= GFX8,
907       .shaderSharedFloat64Atomics = true,
908       .shaderSharedFloat64AtomicAdd = false,
909       .shaderImageFloat32Atomics = true,
910       .shaderImageFloat32AtomicAdd = false,
911       .sparseImageFloat32Atomics = true,
912       .sparseImageFloat32AtomicAdd = false,
913 
914       /* VK_EXT_4444_formats */
915       .formatA4R4G4B4 = true,
916       .formatA4B4G4R4 = true,
917 
918       /* VK_EXT_shader_image_atomic_int64 */
919       .shaderImageInt64Atomics = true,
920       .sparseImageInt64Atomics = true,
921 
922       /* VK_EXT_mutable_descriptor_type */
923       .mutableDescriptorType = true,
924 
925       /* VK_KHR_fragment_shading_rate */
926       .pipelineFragmentShadingRate = true,
927       .primitiveFragmentShadingRate = true,
928       .attachmentFragmentShadingRate = radv_vrs_attachment_enabled(pdevice),
929 
930       /* VK_KHR_workgroup_memory_explicit_layout */
931       .workgroupMemoryExplicitLayout = true,
932       .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
933       .workgroupMemoryExplicitLayout8BitAccess = true,
934       .workgroupMemoryExplicitLayout16BitAccess = true,
935 
936       /* VK_EXT_provoking_vertex */
937       .provokingVertexLast = true,
938       .transformFeedbackPreservesProvokingVertex = true,
939 
940       /* VK_EXT_extended_dynamic_state2 */
941       .extendedDynamicState2 = true,
942       .extendedDynamicState2LogicOp = true,
943       .extendedDynamicState2PatchControlPoints = true,
944 
945       /* VK_EXT_global_priority_query */
946       .globalPriorityQuery = true,
947 
948       /* VK_KHR_acceleration_structure */
949       .accelerationStructure = true,
950       .accelerationStructureCaptureReplay = true,
951       .accelerationStructureIndirectBuild = false,
952       .accelerationStructureHostCommands = false,
953       .descriptorBindingAccelerationStructureUpdateAfterBind = true,
954 
955       /* VK_EXT_buffer_device_address */
956       .bufferDeviceAddressCaptureReplayEXT = true,
957 
958       /* VK_KHR_shader_subgroup_uniform_control_flow */
959       .shaderSubgroupUniformControlFlow = true,
960 
961       /* VK_EXT_multi_draw */
962       .multiDraw = true,
963 
964       /* VK_EXT_color_write_enable */
965       .colorWriteEnable = true,
966 
967       /* VK_EXT_shader_atomic_float2 */
968       .shaderBufferFloat16Atomics = false,
969       .shaderBufferFloat16AtomicAdd = false,
970       .shaderBufferFloat16AtomicMinMax = false,
971       .shaderBufferFloat32AtomicMinMax = radv_has_shader_buffer_float_minmax(pdevice, 32),
972       .shaderBufferFloat64AtomicMinMax = radv_has_shader_buffer_float_minmax(pdevice, 64),
973       .shaderSharedFloat16Atomics = false,
974       .shaderSharedFloat16AtomicAdd = false,
975       .shaderSharedFloat16AtomicMinMax = false,
976       .shaderSharedFloat32AtomicMinMax = true,
977       .shaderSharedFloat64AtomicMinMax = true,
978       .shaderImageFloat32AtomicMinMax = has_shader_image_float_minmax,
979       .sparseImageFloat32AtomicMinMax = has_shader_image_float_minmax,
980 
981       /* VK_KHR_present_id */
982       .presentId = pdevice->vk.supported_extensions.KHR_present_id,
983 
984       /* VK_KHR_present_wait */
985       .presentWait = pdevice->vk.supported_extensions.KHR_present_wait,
986 
987       /* VK_EXT_primitive_topology_list_restart */
988       .primitiveTopologyListRestart = true,
989       .primitiveTopologyPatchListRestart = false,
990 
991       /* VK_KHR_ray_query */
992       .rayQuery = true,
993 
994       /* VK_EXT_pipeline_library_group_handles */
995       .pipelineLibraryGroupHandles = true,
996 
997       /* VK_KHR_ray_tracing_pipeline */
998       .rayTracingPipeline = true,
999       .rayTracingPipelineShaderGroupHandleCaptureReplay = true,
1000       .rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false,
1001       .rayTracingPipelineTraceRaysIndirect = true,
1002       .rayTraversalPrimitiveCulling = true,
1003 
1004       /* VK_KHR_ray_tracing_maintenance1 */
1005       .rayTracingMaintenance1 = true,
1006       .rayTracingPipelineTraceRaysIndirect2 = radv_enable_rt(pdevice, true),
1007 
1008       /* VK_KHR_ray_tracing_position_fetch */
1009       .rayTracingPositionFetch = true,
1010 
1011       /* VK_EXT_vertex_input_dynamic_state */
1012       .vertexInputDynamicState = true,
1013 
1014       /* VK_EXT_image_view_min_lod */
1015       .minLod = true,
1016 
1017       /* VK_EXT_mesh_shader */
1018       .meshShader = taskmesh_en,
1019       .taskShader = taskmesh_en,
1020       .multiviewMeshShader = taskmesh_en,
1021       .primitiveFragmentShadingRateMeshShader = taskmesh_en,
1022       .meshShaderQueries = false,
1023 
1024       /* VK_VALVE_descriptor_set_host_mapping */
1025       .descriptorSetHostMapping = true,
1026 
1027       /* VK_EXT_depth_clip_control */
1028       .depthClipControl = true,
1029 
1030       /* VK_EXT_image_2d_view_of_3d  */
1031       .image2DViewOf3D = true,
1032       .sampler2DViewOf3D = false,
1033 
1034       /* VK_INTEL_shader_integer_functions2 */
1035       .shaderIntegerFunctions2 = true,
1036 
1037       /* VK_EXT_primitives_generated_query */
1038       .primitivesGeneratedQuery = true,
1039       .primitivesGeneratedQueryWithRasterizerDiscard = true,
1040       .primitivesGeneratedQueryWithNonZeroStreams = true,
1041 
1042       /* VK_EXT_non_seamless_cube_map */
1043       .nonSeamlessCubeMap = true,
1044 
1045       /* VK_EXT_border_color_swizzle */
1046       .borderColorSwizzle = true,
1047       .borderColorSwizzleFromImage = true,
1048 
1049       /* VK_EXT_shader_module_identifier */
1050       .shaderModuleIdentifier = true,
1051 
1052       /* VK_KHR_performance_query */
1053       .performanceCounterQueryPools = has_perf_query,
1054       .performanceCounterMultipleQueryPools = has_perf_query,
1055 
1056       /* VK_NV_device_generated_commands */
1057       .deviceGeneratedCommands = true,
1058 
1059       /* VK_EXT_attachment_feedback_loop_layout */
1060       .attachmentFeedbackLoopLayout = true,
1061 
1062       /* VK_EXT_graphics_pipeline_library */
1063       .graphicsPipelineLibrary = true,
1064 
1065       /* VK_EXT_extended_dynamic_state3 */
1066       .extendedDynamicState3TessellationDomainOrigin = true,
1067       .extendedDynamicState3PolygonMode = true,
1068       .extendedDynamicState3SampleMask = true,
1069       .extendedDynamicState3AlphaToCoverageEnable = !pdevice->use_llvm,
1070       .extendedDynamicState3LogicOpEnable = true,
1071       .extendedDynamicState3LineStippleEnable = true,
1072       .extendedDynamicState3ColorBlendEnable = !pdevice->use_llvm,
1073       .extendedDynamicState3DepthClipEnable = true,
1074       .extendedDynamicState3ConservativeRasterizationMode = pdevice->rad_info.gfx_level >= GFX9,
1075       .extendedDynamicState3DepthClipNegativeOneToOne = true,
1076       .extendedDynamicState3ProvokingVertexMode = true,
1077       .extendedDynamicState3DepthClampEnable = true,
1078       .extendedDynamicState3ColorWriteMask = !pdevice->use_llvm,
1079       .extendedDynamicState3RasterizationSamples = true,
1080       .extendedDynamicState3ColorBlendEquation = !pdevice->use_llvm,
1081       .extendedDynamicState3SampleLocationsEnable = pdevice->rad_info.gfx_level < GFX10,
1082       .extendedDynamicState3LineRasterizationMode = true,
1083       .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
1084       .extendedDynamicState3AlphaToOneEnable = false,
1085       .extendedDynamicState3RasterizationStream = false,
1086       .extendedDynamicState3ColorBlendAdvanced = false,
1087       .extendedDynamicState3ViewportWScalingEnable = false,
1088       .extendedDynamicState3ViewportSwizzle = false,
1089       .extendedDynamicState3CoverageToColorEnable = false,
1090       .extendedDynamicState3CoverageToColorLocation = false,
1091       .extendedDynamicState3CoverageModulationMode = false,
1092       .extendedDynamicState3CoverageModulationTableEnable = false,
1093       .extendedDynamicState3CoverageModulationTable = false,
1094       .extendedDynamicState3CoverageReductionMode = false,
1095       .extendedDynamicState3RepresentativeFragmentTestEnable = false,
1096       .extendedDynamicState3ShadingRateImageEnable = false,
1097 
1098       /* VK_EXT_descriptor_buffer */
1099       .descriptorBuffer = true,
1100       .descriptorBufferCaptureReplay = false,
1101       .descriptorBufferImageLayoutIgnored = true,
1102       .descriptorBufferPushDescriptors = true,
1103 
1104       /* VK_AMD_shader_early_and_late_fragment_tests */
1105       .shaderEarlyAndLateFragmentTests = true,
1106 
1107       /* VK_EXT_image_sliced_view_of_3d */
1108       .imageSlicedViewOf3D = true,
1109 
1110 #ifdef RADV_USE_WSI_PLATFORM
1111       /* VK_EXT_swapchain_maintenance1 */
1112       .swapchainMaintenance1 = true,
1113 #endif
1114 
1115       /* VK_EXT_attachment_feedback_loop_dynamic_state */
1116       .attachmentFeedbackLoopDynamicState = true,
1117 
1118       /* VK_EXT_dynamic_rendering_unused_attachments */
1119       .dynamicRenderingUnusedAttachments = true,
1120 
1121       /* VK_KHR_fragment_shader_barycentric */
1122       .fragmentShaderBarycentric = true,
1123 
1124       /* VK_EXT_depth_bias_control */
1125       .depthBiasControl = true,
1126       .leastRepresentableValueForceUnormRepresentation = true,
1127       .floatRepresentation = true,
1128       .depthBiasExact = true,
1129 
1130       /* VK_EXT_fragment_shader_interlock */
1131       .fragmentShaderSampleInterlock = has_fragment_shader_interlock,
1132       .fragmentShaderPixelInterlock = has_fragment_shader_interlock,
1133       .fragmentShaderShadingRateInterlock = false,
1134 
1135       /* VK_EXT_pipeline_robustness */
1136       .pipelineRobustness = true,
1137 
1138       /* VK_KHR_maintenance5 */
1139       .maintenance5 = true,
1140 
1141       /* VK_NV_device_generated_commands_compute */
1142       .deviceGeneratedCompute = true,
1143       .deviceGeneratedComputePipelines = false,
1144       .deviceGeneratedComputeCaptureReplay = false,
1145 
1146       /* VK_KHR_cooperative_matrix */
1147       .cooperativeMatrix = pdevice->rad_info.gfx_level >= GFX11 && !pdevice->use_llvm,
1148       .cooperativeMatrixRobustBufferAccess = pdevice->rad_info.gfx_level >= GFX11 && !pdevice->use_llvm,
1149 
1150       /* VK_EXT_image_compression_control */
1151       .imageCompressionControl = true,
1152 
1153       /* VK_EXT_device_fault */
1154       .deviceFault = true,
1155       .deviceFaultVendorBinary = pdevice->instance->debug_flags & RADV_DEBUG_HANG,
1156 
1157       /* VK_EXT_depth_clamp_zero_one */
1158       .depthClampZeroOne = true,
1159 
1160       /* VK_KHR_maintenance6 */
1161       .maintenance6 = true,
1162 
1163       /* VK_KHR_shader_subgroup_rotate */
1164       .shaderSubgroupRotate = true,
1165       .shaderSubgroupRotateClustered = true,
1166 
1167       /* VK_EXT_shader_object */
1168       .shaderObject = true,
1169 
1170       /* VK_KHR_shader_expect_assume */
1171       .shaderExpectAssume = true,
1172 
1173       /* VK_KHR_shader_maximal_reconvergence */
1174       .shaderMaximalReconvergence = true,
1175 
1176       /* VK_KHR_shader_quad_control */
1177       .shaderQuadControl = true,
1178    };
1179 }
1180 
1181 static size_t
radv_max_descriptor_set_size()1182 radv_max_descriptor_set_size()
1183 {
1184    /* make sure that the entire descriptor set is addressable with a signed
1185     * 32-bit int. So the sum of all limits scaled by descriptor size has to
1186     * be at most 2 GiB. the combined image & samples object count as one of
1187     * both. This limit is for the pipeline layout, not for the set layout, but
1188     * there is no set limit, so we just set a pipeline limit. I don't think
1189     * any app is going to hit this soon. */
1190    return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS - MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1191           (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1192            32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1193            32 /* sampler, largest when combined with image */ + 64 /* sampled image */ + 64 /* storage image */);
1194 }
1195 
1196 static uint32_t
radv_uniform_buffer_offset_alignment(const struct radv_physical_device * pdevice)1197 radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdevice)
1198 {
1199    uint32_t uniform_offset_alignment = pdevice->instance->drirc.override_uniform_offset_alignment;
1200    if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {
1201       fprintf(stderr,
1202               "ERROR: invalid radv_override_uniform_offset_alignment setting %d:"
1203               "not a power of two\n",
1204               uniform_offset_alignment);
1205       uniform_offset_alignment = 0;
1206    }
1207 
1208    /* Take at least the hardware limit. */
1209    return MAX2(uniform_offset_alignment, 4);
1210 }
1211 
1212 static const char *
radv_get_compiler_string(struct radv_physical_device * pdevice)1213 radv_get_compiler_string(struct radv_physical_device *pdevice)
1214 {
1215    if (!pdevice->use_llvm) {
1216       /* Some games like SotTR apply shader workarounds if the LLVM
1217        * version is too old or if the LLVM version string is
1218        * missing. This gives 2-5% performance with SotTR and ACO.
1219        */
1220       if (pdevice->instance->drirc.report_llvm9_version_string) {
1221          return " (LLVM 9.0.1)";
1222       }
1223 
1224       return "";
1225    }
1226 
1227 #if LLVM_AVAILABLE
1228    return " (LLVM " MESA_LLVM_VERSION_STRING ")";
1229 #else
1230    unreachable("LLVM is not available");
1231 #endif
1232 }
1233 
1234 static void
radv_get_physical_device_properties(struct radv_physical_device * pdevice)1235 radv_get_physical_device_properties(struct radv_physical_device *pdevice)
1236 {
1237    VkSampleCountFlags sample_counts = 0xf;
1238 
1239    size_t max_descriptor_set_size = radv_max_descriptor_set_size();
1240 
1241    VkPhysicalDeviceType device_type;
1242    if (pdevice->rad_info.has_dedicated_vram) {
1243       device_type = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
1244    } else {
1245       device_type = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
1246    }
1247 
1248    pdevice->vk.properties = (struct vk_properties){
1249       .apiVersion = RADV_API_VERSION,
1250       .driverVersion = vk_get_driver_version(),
1251       .vendorID = ATI_VENDOR_ID,
1252       .deviceID = pdevice->rad_info.pci_id,
1253       .deviceType = device_type,
1254       .maxImageDimension1D = (1 << 14),
1255       .maxImageDimension2D = (1 << 14),
1256       .maxImageDimension3D = (1 << 11),
1257       .maxImageDimensionCube = (1 << 14),
1258       .maxImageArrayLayers = (1 << 11),
1259       .maxTexelBufferElements = UINT32_MAX,
1260       .maxUniformBufferRange = UINT32_MAX,
1261       .maxStorageBufferRange = UINT32_MAX,
1262       .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1263       .maxMemoryAllocationCount = UINT32_MAX,
1264       .maxSamplerAllocationCount = 64 * 1024,
1265       .bufferImageGranularity = 1,
1266       .sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
1267       .maxBoundDescriptorSets = MAX_SETS,
1268       .maxPerStageDescriptorSamplers = max_descriptor_set_size,
1269       .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1270       .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1271       .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1272       .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1273       .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
1274       .maxPerStageResources = max_descriptor_set_size,
1275       .maxDescriptorSetSamplers = max_descriptor_set_size,
1276       .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1277       .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1278       .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1279       .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1280       .maxDescriptorSetSampledImages = max_descriptor_set_size,
1281       .maxDescriptorSetStorageImages = max_descriptor_set_size,
1282       .maxDescriptorSetInputAttachments = max_descriptor_set_size,
1283       .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
1284       .maxVertexInputBindings = MAX_VBS,
1285       .maxVertexInputAttributeOffset = UINT32_MAX,
1286       .maxVertexInputBindingStride = 2048,
1287       .maxVertexOutputComponents = 128,
1288       .maxTessellationGenerationLevel = 64,
1289       .maxTessellationPatchSize = 32,
1290       .maxTessellationControlPerVertexInputComponents = 128,
1291       .maxTessellationControlPerVertexOutputComponents = 128,
1292       .maxTessellationControlPerPatchOutputComponents = 120,
1293       .maxTessellationControlTotalOutputComponents = 4096,
1294       .maxTessellationEvaluationInputComponents = 128,
1295       .maxTessellationEvaluationOutputComponents = 128,
1296       .maxGeometryShaderInvocations = 127,
1297       .maxGeometryInputComponents = 64,
1298       .maxGeometryOutputComponents = 128,
1299       .maxGeometryOutputVertices = 256,
1300       .maxGeometryTotalOutputComponents = 1024,
1301       .maxFragmentInputComponents = 128,
1302       .maxFragmentOutputAttachments = 8,
1303       .maxFragmentDualSrcAttachments = 1,
1304       .maxFragmentCombinedOutputResources = max_descriptor_set_size,
1305       .maxComputeSharedMemorySize = pdevice->max_shared_size,
1306       .maxComputeWorkGroupCount = {65535, 65535, 65535},
1307       .maxComputeWorkGroupInvocations = 1024,
1308       .maxComputeWorkGroupSize = {1024, 1024, 1024},
1309       .subPixelPrecisionBits = 8,
1310       .subTexelPrecisionBits = 8,
1311       .mipmapPrecisionBits = 8,
1312       .maxDrawIndexedIndexValue = UINT32_MAX,
1313       .maxDrawIndirectCount = UINT32_MAX,
1314       .maxSamplerLodBias = 16,
1315       .maxSamplerAnisotropy = 16,
1316       .maxViewports = MAX_VIEWPORTS,
1317       .maxViewportDimensions = {(1 << 14), (1 << 14)},
1318       .viewportBoundsRange = {INT16_MIN, INT16_MAX},
1319       .viewportSubPixelBits = 8,
1320       .minMemoryMapAlignment = 4096, /* A page */
1321       .minTexelBufferOffsetAlignment = 4,
1322       .minUniformBufferOffsetAlignment = radv_uniform_buffer_offset_alignment(pdevice),
1323       .minStorageBufferOffsetAlignment = 4,
1324       .minTexelOffset = -32,
1325       .maxTexelOffset = 31,
1326       .minTexelGatherOffset = -32,
1327       .maxTexelGatherOffset = 31,
1328       .minInterpolationOffset = -2,
1329       .maxInterpolationOffset = 2,
1330       .subPixelInterpolationOffsetBits = 8,
1331       .maxFramebufferWidth = MAX_FRAMEBUFFER_WIDTH,
1332       .maxFramebufferHeight = MAX_FRAMEBUFFER_HEIGHT,
1333       .maxFramebufferLayers = (1 << 10),
1334       .framebufferColorSampleCounts = sample_counts,
1335       .framebufferDepthSampleCounts = sample_counts,
1336       .framebufferStencilSampleCounts = sample_counts,
1337       .framebufferNoAttachmentsSampleCounts = sample_counts,
1338       .maxColorAttachments = MAX_RTS,
1339       .sampledImageColorSampleCounts = sample_counts,
1340       .sampledImageIntegerSampleCounts = sample_counts,
1341       .sampledImageDepthSampleCounts = sample_counts,
1342       .sampledImageStencilSampleCounts = sample_counts,
1343       .storageImageSampleCounts = sample_counts,
1344       .maxSampleMaskWords = 1,
1345       .timestampComputeAndGraphics = true,
1346       .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
1347       .maxClipDistances = 8,
1348       .maxCullDistances = 8,
1349       .maxCombinedClipAndCullDistances = 8,
1350       .discreteQueuePriorities = 2,
1351       .pointSizeRange = {0.0, 8191.875},
1352       .lineWidthRange = {0.0, 8.0},
1353       .pointSizeGranularity = (1.0 / 8.0),
1354       .lineWidthGranularity = (1.0 / 8.0),
1355       .strictLines = false, /* FINISHME */
1356       .standardSampleLocations = true,
1357       .optimalBufferCopyOffsetAlignment = 1,
1358       .optimalBufferCopyRowPitchAlignment = 1,
1359       .nonCoherentAtomSize = 64,
1360       .sparseResidencyNonResidentStrict = pdevice->rad_info.family >= CHIP_POLARIS10,
1361       .sparseResidencyStandard2DBlockShape = pdevice->rad_info.family >= CHIP_POLARIS10,
1362       .sparseResidencyStandard3DBlockShape = pdevice->rad_info.gfx_level >= GFX9,
1363    };
1364 
1365    struct vk_properties *p = &pdevice->vk.properties;
1366 
1367    /* Vulkan 1.1 */
1368    strcpy(p->deviceName, pdevice->marketing_name);
1369    memcpy(p->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1370 
1371    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1372    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1373    memset(p->deviceLUID, 0, VK_LUID_SIZE);
1374    /* The LUID is for Windows. */
1375    p->deviceLUIDValid = false;
1376    p->deviceNodeMask = 0;
1377 
1378    p->subgroupSize = RADV_SUBGROUP_SIZE;
1379    p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT;
1380    if (radv_taskmesh_enabled(pdevice))
1381       p->subgroupSupportedStages |= VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT;
1382 
1383    if (radv_enable_rt(pdevice, true))
1384       p->subgroupSupportedStages |= RADV_RT_STAGE_BITS;
1385    p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
1386                                     VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
1387                                     VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
1388                                     VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
1389                                     VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR | VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR;
1390    p->subgroupQuadOperationsInAllStages = true;
1391 
1392    p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
1393    p->maxMultiviewViewCount = MAX_VIEWS;
1394    p->maxMultiviewInstanceIndex = INT_MAX;
1395    p->protectedNoFault = false;
1396    p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS;
1397    p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1398 
1399    /* Vulkan 1.2 */
1400    p->driverID = VK_DRIVER_ID_MESA_RADV;
1401    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
1402    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 "%s",
1403             radv_get_compiler_string(pdevice));
1404 
1405    if (radv_is_conformant(pdevice)) {
1406       if (pdevice->rad_info.gfx_level >= GFX10_3) {
1407          p->conformanceVersion = (VkConformanceVersion){
1408             .major = 1,
1409             .minor = 3,
1410             .subminor = 0,
1411             .patch = 0,
1412          };
1413       } else {
1414          p->conformanceVersion = (VkConformanceVersion){
1415             .major = 1,
1416             .minor = 2,
1417             .subminor = 7,
1418             .patch = 1,
1419          };
1420       }
1421    } else {
1422       p->conformanceVersion = (VkConformanceVersion){
1423          .major = 0,
1424          .minor = 0,
1425          .subminor = 0,
1426          .patch = 0,
1427       };
1428    }
1429 
1430    /* On AMD hardware, denormals and rounding modes for fp16/fp64 are
1431     * controlled by the same config register.
1432     */
1433    if (pdevice->rad_info.has_packed_math_16bit) {
1434       p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
1435       p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY;
1436    } else {
1437       p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
1438       p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
1439    }
1440 
1441    /* With LLVM, do not allow both preserving and flushing denorms because
1442     * different shaders in the same pipeline can have different settings and
1443     * this won't work for merged shaders. To make it work, this requires LLVM
1444     * support for changing the register. The same logic applies for the
1445     * rounding modes because they are configured with the same config
1446     * register.
1447     */
1448    p->shaderDenormFlushToZeroFloat32 = true;
1449    p->shaderDenormPreserveFloat32 = !pdevice->use_llvm;
1450    p->shaderRoundingModeRTEFloat32 = true;
1451    p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm;
1452    p->shaderSignedZeroInfNanPreserveFloat32 = true;
1453 
1454    p->shaderDenormFlushToZeroFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
1455    p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
1456    p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit;
1457    p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
1458    p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
1459 
1460    p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.gfx_level >= GFX8 && !pdevice->use_llvm;
1461    p->shaderDenormPreserveFloat64 = pdevice->rad_info.gfx_level >= GFX8;
1462    p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.gfx_level >= GFX8;
1463    p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.gfx_level >= GFX8 && !pdevice->use_llvm;
1464    p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.gfx_level >= GFX8;
1465 
1466    p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1467    p->shaderUniformBufferArrayNonUniformIndexingNative = false;
1468    p->shaderSampledImageArrayNonUniformIndexingNative = false;
1469    p->shaderStorageBufferArrayNonUniformIndexingNative = false;
1470    p->shaderStorageImageArrayNonUniformIndexingNative = false;
1471    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1472    p->robustBufferAccessUpdateAfterBind = true;
1473    p->quadDivergentImplicitLod = false;
1474 
1475    p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1476    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1477    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1478    p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1479    p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1480    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1481    p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1482    p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1483    p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1484    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1485    p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1486    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1487    p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1488    p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1489    p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1490 
1491    /* We support all of the depth resolve modes */
1492    p->supportedDepthResolveModes =
1493       VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT | VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT;
1494 
1495    /* Average doesn't make sense for stencil so we don't support that */
1496    p->supportedStencilResolveModes =
1497       VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT;
1498 
1499    p->independentResolveNone = true;
1500    p->independentResolve = true;
1501 
1502    /* GFX6-8 only support single channel min/max filter. */
1503    p->filterMinmaxImageComponentMapping = pdevice->rad_info.gfx_level >= GFX9;
1504    p->filterMinmaxSingleComponentFormats = true;
1505 
1506    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1507 
1508    p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;
1509 
1510    /* Vulkan 1.3 */
1511    p->minSubgroupSize = 64;
1512    p->maxSubgroupSize = 64;
1513    p->maxComputeWorkgroupSubgroups = UINT32_MAX;
1514    p->requiredSubgroupSizeStages = 0;
1515    if (pdevice->rad_info.gfx_level >= GFX10) {
1516       /* Only GFX10+ supports wave32. */
1517       p->minSubgroupSize = 32;
1518       p->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
1519 
1520       if (radv_taskmesh_enabled(pdevice)) {
1521          p->requiredSubgroupSizeStages |= VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT;
1522       }
1523    }
1524 
1525    p->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
1526    p->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
1527    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
1528    p->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
1529    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
1530    p->maxInlineUniformTotalSize = UINT16_MAX;
1531 
1532    bool accel_dot = pdevice->rad_info.has_accelerated_dot_product;
1533    bool gfx11plus = pdevice->rad_info.gfx_level >= GFX11;
1534    p->integerDotProduct8BitUnsignedAccelerated = accel_dot;
1535    p->integerDotProduct8BitSignedAccelerated = accel_dot;
1536    p->integerDotProduct8BitMixedSignednessAccelerated = accel_dot && gfx11plus;
1537    p->integerDotProduct4x8BitPackedUnsignedAccelerated = accel_dot;
1538    p->integerDotProduct4x8BitPackedSignedAccelerated = accel_dot;
1539    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated = accel_dot && gfx11plus;
1540    p->integerDotProduct16BitUnsignedAccelerated = accel_dot && !gfx11plus;
1541    p->integerDotProduct16BitSignedAccelerated = accel_dot && !gfx11plus;
1542    p->integerDotProduct16BitMixedSignednessAccelerated = false;
1543    p->integerDotProduct32BitUnsignedAccelerated = false;
1544    p->integerDotProduct32BitSignedAccelerated = false;
1545    p->integerDotProduct32BitMixedSignednessAccelerated = false;
1546    p->integerDotProduct64BitUnsignedAccelerated = false;
1547    p->integerDotProduct64BitSignedAccelerated = false;
1548    p->integerDotProduct64BitMixedSignednessAccelerated = false;
1549    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = accel_dot;
1550    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = accel_dot;
1551    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = accel_dot && gfx11plus;
1552    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = accel_dot;
1553    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = accel_dot;
1554    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = accel_dot && gfx11plus;
1555    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = accel_dot && !gfx11plus;
1556    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = accel_dot && !gfx11plus;
1557    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
1558    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
1559    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
1560    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
1561    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
1562    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
1563    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
1564 
1565    p->storageTexelBufferOffsetAlignmentBytes = 4;
1566    p->storageTexelBufferOffsetSingleTexelAlignment = true;
1567    p->uniformTexelBufferOffsetAlignmentBytes = 4;
1568    p->uniformTexelBufferOffsetSingleTexelAlignment = true;
1569 
1570    p->maxBufferSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1571 
1572    /* VK_KHR_push_descriptor */
1573    p->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1574 
1575    /* VK_EXT_discard_rectangles */
1576    p->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
1577 
1578    /* VK_EXT_external_memory_host */
1579    p->minImportedHostPointerAlignment = 4096;
1580 
1581    /* VK_AMD_shader_core_properties */
1582    /* Shader engines. */
1583    p->shaderEngineCount = pdevice->rad_info.max_se;
1584    p->shaderArraysPerEngineCount = pdevice->rad_info.max_sa_per_se;
1585    p->computeUnitsPerShaderArray = pdevice->rad_info.min_good_cu_per_sa;
1586    p->simdPerComputeUnit = pdevice->rad_info.num_simd_per_compute_unit;
1587    p->wavefrontsPerSimd = pdevice->rad_info.max_waves_per_simd;
1588    p->wavefrontSize = 64;
1589 
1590    /* SGPR. */
1591    p->sgprsPerSimd = pdevice->rad_info.num_physical_sgprs_per_simd;
1592    p->minSgprAllocation = pdevice->rad_info.min_sgpr_alloc;
1593    p->maxSgprAllocation = pdevice->rad_info.max_sgpr_alloc;
1594    p->sgprAllocationGranularity = pdevice->rad_info.sgpr_alloc_granularity;
1595 
1596    /* VGPR. */
1597    p->vgprsPerSimd = pdevice->rad_info.num_physical_wave64_vgprs_per_simd;
1598    p->minVgprAllocation = pdevice->rad_info.min_wave64_vgpr_alloc;
1599    p->maxVgprAllocation = pdevice->rad_info.max_vgpr_alloc;
1600    p->vgprAllocationGranularity = pdevice->rad_info.wave64_vgpr_alloc_granularity;
1601 
1602    /* VK_AMD_shader_core_properties2 */
1603    p->shaderCoreFeatures = 0;
1604    p->activeComputeUnitCount = pdevice->rad_info.num_cu;
1605 
1606    /* VK_KHR_vertex_attribute_divisor */
1607    p->maxVertexAttribDivisor = UINT32_MAX;
1608    p->supportsNonZeroFirstInstance = true;
1609 
1610    /* VK_EXT_conservative_rasterization */
1611    p->primitiveOverestimationSize = 0;
1612    p->maxExtraPrimitiveOverestimationSize = 0;
1613    p->extraPrimitiveOverestimationSizeGranularity = 0;
1614    p->primitiveUnderestimation = true;
1615    p->conservativePointAndLineRasterization = false;
1616    p->degenerateTrianglesRasterized = true;
1617    p->degenerateLinesRasterized = false;
1618    p->fullyCoveredFragmentShaderInputVariable = true;
1619    p->conservativeRasterizationPostDepthCoverage = false;
1620 
1621    /* VK_EXT_pci_bus_info */
1622 #ifndef _WIN32
1623    p->pciDomain = pdevice->bus_info.domain;
1624    p->pciBus = pdevice->bus_info.bus;
1625    p->pciDevice = pdevice->bus_info.dev;
1626    p->pciFunction = pdevice->bus_info.func;
1627 #endif
1628 
1629    /* VK_EXT_transform_feedback */
1630    p->maxTransformFeedbackStreams = MAX_SO_STREAMS;
1631    p->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
1632    p->maxTransformFeedbackBufferSize = UINT32_MAX;
1633    p->maxTransformFeedbackStreamDataSize = 512;
1634    p->maxTransformFeedbackBufferDataSize = 512;
1635    p->maxTransformFeedbackBufferDataStride = 512;
1636    p->transformFeedbackQueries = true;
1637    p->transformFeedbackStreamsLinesTriangles = true;
1638    p->transformFeedbackRasterizationStreamSelect = false;
1639    p->transformFeedbackDraw = true;
1640 
1641    /* VK_EXT_sample_locations */
1642    p->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
1643    p->maxSampleLocationGridSize = (VkExtent2D){2, 2};
1644    p->sampleLocationCoordinateRange[0] = 0.0f;
1645    p->sampleLocationCoordinateRange[1] = 0.9375f;
1646    p->sampleLocationSubPixelBits = 4;
1647    p->variableSampleLocations = false;
1648 
1649    /* VK_KHR_line_rasterization */
1650    p->lineSubPixelPrecisionBits = 4;
1651 
1652    /* VK_EXT_robustness2 */
1653    p->robustStorageBufferAccessSizeAlignment = 4;
1654    p->robustUniformBufferAccessSizeAlignment = 4;
1655 
1656    /* VK_EXT_custom_border_color */
1657    p->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;
1658 
1659    /* VK_KHR_fragment_shading_rate */
1660    if (radv_vrs_attachment_enabled(pdevice)) {
1661       p->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};
1662       p->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};
1663    } else {
1664       p->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D){0, 0};
1665       p->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D){0, 0};
1666    }
1667    p->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
1668    p->primitiveFragmentShadingRateWithMultipleViewports = true;
1669    p->layeredShadingRateAttachments = false; /* TODO */
1670    p->fragmentShadingRateNonTrivialCombinerOps = true;
1671    p->maxFragmentSize = (VkExtent2D){2, 2};
1672    p->maxFragmentSizeAspectRatio = 2;
1673    p->maxFragmentShadingRateCoverageSamples = 32;
1674    p->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_8_BIT;
1675    p->fragmentShadingRateWithShaderDepthStencilWrites = !pdevice->rad_info.has_vrs_ds_export_bug;
1676    p->fragmentShadingRateWithSampleMask = true;
1677    p->fragmentShadingRateWithShaderSampleMask = false;
1678    p->fragmentShadingRateWithConservativeRasterization = true;
1679    p->fragmentShadingRateWithFragmentShaderInterlock = pdevice->rad_info.gfx_level >= GFX11 && radv_has_pops(pdevice);
1680    p->fragmentShadingRateWithCustomSampleLocations = false;
1681    p->fragmentShadingRateStrictMultiplyCombiner = true;
1682 
1683    /* VK_EXT_provoking_vertex */
1684    p->provokingVertexModePerPipeline = true;
1685    p->transformFeedbackPreservesTriangleFanProvokingVertex = true;
1686 
1687    /* VK_KHR_acceleration_structure */
1688    p->maxGeometryCount = (1 << 24) - 1;
1689    p->maxInstanceCount = (1 << 24) - 1;
1690    p->maxPrimitiveCount = (1 << 29) - 1;
1691    p->maxPerStageDescriptorAccelerationStructures = p->maxPerStageDescriptorStorageBuffers;
1692    p->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = p->maxPerStageDescriptorStorageBuffers;
1693    p->maxDescriptorSetAccelerationStructures = p->maxDescriptorSetStorageBuffers;
1694    p->maxDescriptorSetUpdateAfterBindAccelerationStructures = p->maxDescriptorSetStorageBuffers;
1695    p->minAccelerationStructureScratchOffsetAlignment = 128;
1696 
1697    /* VK_EXT_physical_device_drm */
1698 #ifndef _WIN32
1699    if (pdevice->available_nodes & (1 << DRM_NODE_PRIMARY)) {
1700       p->drmHasPrimary = true;
1701       p->drmPrimaryMajor = (int64_t)major(pdevice->primary_devid);
1702       p->drmPrimaryMinor = (int64_t)minor(pdevice->primary_devid);
1703    } else {
1704       p->drmHasPrimary = false;
1705    }
1706    if (pdevice->available_nodes & (1 << DRM_NODE_RENDER)) {
1707       p->drmHasRender = true;
1708       p->drmRenderMajor = (int64_t)major(pdevice->render_devid);
1709       p->drmRenderMinor = (int64_t)minor(pdevice->render_devid);
1710    } else {
1711       p->drmHasRender = false;
1712    }
1713 #endif
1714 
1715    /* VK_EXT_multi_draw */
1716    p->maxMultiDrawCount = 2048;
1717 
1718    /* VK_KHR_ray_tracing_pipeline */
1719 
1720    p->shaderGroupHandleSize = RADV_RT_HANDLE_SIZE;
1721    p->maxRayRecursionDepth = 31;    /* Minimum allowed for DXR. */
1722    p->maxShaderGroupStride = 16384; /* dummy */
1723    /* This isn't strictly necessary, but Doom Eternal breaks if the
1724     * alignment is any lower. */
1725    p->shaderGroupBaseAlignment = RADV_RT_HANDLE_SIZE;
1726    p->shaderGroupHandleCaptureReplaySize = sizeof(struct radv_rt_capture_replay_handle);
1727    p->maxRayDispatchInvocationCount = 1024 * 1024 * 64;
1728    p->shaderGroupHandleAlignment = 16;
1729    p->maxRayHitAttributeSize = RADV_MAX_HIT_ATTRIB_SIZE;
1730 
1731    /* VK_EXT_shader_module_identifier */
1732    STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) == sizeof(p->shaderModuleIdentifierAlgorithmUUID));
1733    memcpy(p->shaderModuleIdentifierAlgorithmUUID, vk_shaderModuleIdentifierAlgorithmUUID,
1734           sizeof(p->shaderModuleIdentifierAlgorithmUUID));
1735 
1736    /* VK_KHR_performance_query */
1737    p->allowCommandBufferQueryCopies = false;
1738 
1739    /* VK_NV_device_generated_commands */
1740    p->maxIndirectCommandsStreamCount = 1;
1741    p->maxIndirectCommandsStreamStride = UINT32_MAX;
1742    p->maxIndirectCommandsTokenCount = 512;
1743    p->maxIndirectCommandsTokenOffset = UINT16_MAX;
1744    p->minIndirectCommandsBufferOffsetAlignment = 4;
1745    p->minSequencesCountBufferOffsetAlignment = 4;
1746    p->minSequencesIndexBufferOffsetAlignment = 4;
1747    /* Don't support even a shader group count = 1 until we support shader
1748     * overrides during pipeline creation. */
1749    p->maxGraphicsShaderGroupCount = 0;
1750    /* MSB reserved for signalling indirect count enablement. */
1751    p->maxIndirectSequenceCount = UINT32_MAX >> 1;
1752 
1753    /* VK_EXT_graphics_pipeline_library */
1754    p->graphicsPipelineLibraryFastLinking = true;
1755    p->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1756 
1757    /* VK_EXT_mesh_shader */
1758    p->maxTaskWorkGroupTotalCount = 4194304; /* 2^22 min required */
1759    p->maxTaskWorkGroupCount[0] = 65535;
1760    p->maxTaskWorkGroupCount[1] = 65535;
1761    p->maxTaskWorkGroupCount[2] = 65535;
1762    p->maxTaskWorkGroupInvocations = 1024;
1763    p->maxTaskWorkGroupSize[0] = 1024;
1764    p->maxTaskWorkGroupSize[1] = 1024;
1765    p->maxTaskWorkGroupSize[2] = 1024;
1766    p->maxTaskPayloadSize = 16384; /* 16K min required */
1767    p->maxTaskSharedMemorySize = 65536;
1768    p->maxTaskPayloadAndSharedMemorySize = 65536;
1769 
1770    p->maxMeshWorkGroupTotalCount = 4194304; /* 2^22 min required */
1771    p->maxMeshWorkGroupCount[0] = 65535;
1772    p->maxMeshWorkGroupCount[1] = 65535;
1773    p->maxMeshWorkGroupCount[2] = 65535;
1774    p->maxMeshWorkGroupInvocations = 256; /* Max NGG HW limit */
1775    p->maxMeshWorkGroupSize[0] = 256;
1776    p->maxMeshWorkGroupSize[1] = 256;
1777    p->maxMeshWorkGroupSize[2] = 256;
1778    p->maxMeshOutputMemorySize = 32 * 1024;                                                    /* 32K min required */
1779    p->maxMeshSharedMemorySize = 28672;                                                        /* 28K min required */
1780    p->maxMeshPayloadAndSharedMemorySize = p->maxTaskPayloadSize + p->maxMeshSharedMemorySize; /* 28K min required */
1781    p->maxMeshPayloadAndOutputMemorySize = p->maxTaskPayloadSize + p->maxMeshOutputMemorySize; /* 47K min required */
1782    p->maxMeshOutputComponents = 128; /* 32x vec4 min required */
1783    p->maxMeshOutputVertices = 256;
1784    p->maxMeshOutputPrimitives = 256;
1785    p->maxMeshOutputLayers = 8;
1786    p->maxMeshMultiviewViewCount = MAX_VIEWS;
1787    p->meshOutputPerVertexGranularity = 1;
1788    p->meshOutputPerPrimitiveGranularity = 1;
1789 
1790    p->maxPreferredTaskWorkGroupInvocations = 64;
1791    p->maxPreferredMeshWorkGroupInvocations = 128;
1792    p->prefersLocalInvocationVertexOutput = true;
1793    p->prefersLocalInvocationPrimitiveOutput = true;
1794    p->prefersCompactVertexOutput = true;
1795    p->prefersCompactPrimitiveOutput = false;
1796 
1797    /* VK_EXT_extended_dynamic_state3 */
1798    p->dynamicPrimitiveTopologyUnrestricted = false;
1799 
1800    /* VK_EXT_descriptor_buffer */
1801    p->combinedImageSamplerDescriptorSingleArray = true;
1802    p->bufferlessPushDescriptors = true;
1803    p->allowSamplerImageViewPostSubmitCreation = false;
1804    p->descriptorBufferOffsetAlignment = 4;
1805    p->maxDescriptorBufferBindings = MAX_SETS;
1806    p->maxResourceDescriptorBufferBindings = MAX_SETS;
1807    p->maxSamplerDescriptorBufferBindings = MAX_SETS;
1808    p->maxEmbeddedImmutableSamplerBindings = MAX_SETS;
1809    p->maxEmbeddedImmutableSamplers = radv_max_descriptor_set_size();
1810    p->bufferCaptureReplayDescriptorDataSize = 0;
1811    p->imageCaptureReplayDescriptorDataSize = 0;
1812    p->imageViewCaptureReplayDescriptorDataSize = 0;
1813    p->samplerCaptureReplayDescriptorDataSize = 0;
1814    p->accelerationStructureCaptureReplayDescriptorDataSize = 0;
1815    p->samplerDescriptorSize = 16;
1816    p->combinedImageSamplerDescriptorSize = 96;
1817    p->sampledImageDescriptorSize = 64;
1818    p->storageImageDescriptorSize = 32;
1819    p->uniformTexelBufferDescriptorSize = 16;
1820    p->robustUniformTexelBufferDescriptorSize = 16;
1821    p->storageTexelBufferDescriptorSize = 16;
1822    p->robustStorageTexelBufferDescriptorSize = 16;
1823    p->uniformBufferDescriptorSize = 16;
1824    p->robustUniformBufferDescriptorSize = 16;
1825    p->storageBufferDescriptorSize = 16;
1826    p->robustStorageBufferDescriptorSize = 16;
1827    p->inputAttachmentDescriptorSize = 64;
1828    p->accelerationStructureDescriptorSize = 16;
1829    p->maxSamplerDescriptorBufferRange = UINT32_MAX;
1830    p->maxResourceDescriptorBufferRange = UINT32_MAX;
1831    p->samplerDescriptorBufferAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1832    p->resourceDescriptorBufferAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1833    p->descriptorBufferAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1834 
1835    /* VK_KHR_fragment_shader_barycentric */
1836    p->triStripVertexOrderIndependentOfProvokingVertex = false;
1837 
1838    /* VK_EXT_pipeline_robustness */
1839    p->defaultRobustnessStorageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1840    p->defaultRobustnessUniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1841    p->defaultRobustnessVertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED_EXT;
1842    p->defaultRobustnessImages = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT;
1843 
1844    /* VK_KHR_maintenance5 */
1845    p->earlyFragmentMultisampleCoverageAfterSampleCounting = false;
1846    p->earlyFragmentSampleMaskTestBeforeSampleCounting = false;
1847    p->depthStencilSwizzleOneSupport = false;
1848    p->polygonModePointSize = true;
1849    p->nonStrictSinglePixelWideLinesUseParallelogram = false;
1850    p->nonStrictWideLinesUseParallelogram = false;
1851 
1852    /* VK_KHR_cooperative_matrix */
1853    p->cooperativeMatrixSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
1854 
1855    /* VK_KHR_maintenance6 */
1856    p->blockTexelViewCompatibleMultipleLayers = true;
1857    p->maxCombinedImageSamplerDescriptorCount = 1;
1858    p->fragmentShadingRateClampCombinerInputs = true;
1859 
1860    /* VK_EXT_shader_object */
1861    radv_device_get_cache_uuid(pdevice, p->shaderBinaryUUID);
1862    p->shaderBinaryVersion = 1;
1863 }
1864 
1865 static VkResult
radv_physical_device_try_create(struct radv_instance * instance,drmDevicePtr drm_device,struct radv_physical_device ** device_out)1866 radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm_device,
1867                                 struct radv_physical_device **device_out)
1868 {
1869    VkResult result;
1870    int fd = -1;
1871    int master_fd = -1;
1872 
1873 #ifdef _WIN32
1874    assert(drm_device == NULL);
1875 #else
1876    if (drm_device) {
1877       const char *path = drm_device->nodes[DRM_NODE_RENDER];
1878       drmVersionPtr version;
1879 
1880       fd = open(path, O_RDWR | O_CLOEXEC);
1881       if (fd < 0) {
1882          return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "Could not open device %s: %m", path);
1883       }
1884 
1885       version = drmGetVersion(fd);
1886       if (!version) {
1887          close(fd);
1888 
1889          return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1890                           "Could not get the kernel driver version for device %s: %m", path);
1891       }
1892 
1893       if (strcmp(version->name, "amdgpu")) {
1894          drmFreeVersion(version);
1895          close(fd);
1896 
1897          return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1898                           "Device '%s' is not using the AMDGPU kernel driver: %m", path);
1899       }
1900       drmFreeVersion(version);
1901 
1902       if (instance->debug_flags & RADV_DEBUG_STARTUP)
1903          fprintf(stderr, "radv: info: Found compatible device '%s'.\n", path);
1904    }
1905 #endif
1906 
1907    struct radv_physical_device *device =
1908       vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*device), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1909    if (!device) {
1910       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1911       goto fail_fd;
1912    }
1913 
1914    struct vk_physical_device_dispatch_table dispatch_table;
1915    vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_physical_device_entrypoints, true);
1916    vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, &wsi_physical_device_entrypoints, false);
1917 
1918    result = vk_physical_device_init(&device->vk, &instance->vk, NULL, NULL, NULL, &dispatch_table);
1919    if (result != VK_SUCCESS) {
1920       goto fail_alloc;
1921    }
1922 
1923    device->instance = instance;
1924 
1925 #ifdef _WIN32
1926    device->ws = radv_null_winsys_create();
1927 #else
1928    if (drm_device) {
1929       bool reserve_vmid = instance->vk.trace_mode & RADV_TRACE_MODE_RGP;
1930 
1931       device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags, reserve_vmid);
1932    } else {
1933       device->ws = radv_null_winsys_create();
1934    }
1935 #endif
1936 
1937    if (!device->ws) {
1938       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to initialize winsys");
1939       goto fail_base;
1940    }
1941 
1942    device->vk.supported_sync_types = device->ws->get_sync_types(device->ws);
1943 
1944 #ifndef _WIN32
1945    if (drm_device && instance->vk.enabled_extensions.KHR_display) {
1946       master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
1947       if (master_fd >= 0) {
1948          uint32_t accel_working = 0;
1949          struct drm_amdgpu_info request = {.return_pointer = (uintptr_t)&accel_working,
1950                                            .return_size = sizeof(accel_working),
1951                                            .query = AMDGPU_INFO_ACCEL_WORKING};
1952 
1953          if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) < 0 ||
1954              !accel_working) {
1955             close(master_fd);
1956             master_fd = -1;
1957          }
1958       }
1959    }
1960 #endif
1961 
1962    device->master_fd = master_fd;
1963    device->local_fd = fd;
1964    device->ws->query_info(device->ws, &device->rad_info);
1965 
1966    device->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
1967 #if !LLVM_AVAILABLE
1968    if (device->use_llvm) {
1969       fprintf(stderr, "ERROR: LLVM compiler backend selected for radv, but LLVM support was not "
1970                       "enabled at build time.\n");
1971       abort();
1972    }
1973 #endif
1974 
1975 #if DETECT_OS_ANDROID
1976    device->emulate_etc2 = !radv_device_supports_etc(device);
1977    device->emulate_astc = true;
1978 #else
1979    device->emulate_etc2 = !radv_device_supports_etc(device) && instance->drirc.vk_require_etc2;
1980    device->emulate_astc = instance->drirc.vk_require_astc;
1981 #endif
1982 
1983    snprintf(device->name, sizeof(device->name), "AMD RADV %s%s", device->rad_info.name,
1984             radv_get_compiler_string(device));
1985 
1986    const char *marketing_name = device->ws->get_chip_name(device->ws);
1987    snprintf(device->marketing_name, sizeof(device->name), "%s (RADV %s%s)",
1988             marketing_name ? marketing_name : "AMD Unknown", device->rad_info.name, radv_get_compiler_string(device));
1989 
1990    if (!radv_is_conformant(device))
1991       vk_warn_non_conformant_implementation("radv");
1992 
1993    radv_get_driver_uuid(&device->driver_uuid);
1994    radv_get_device_uuid(&device->rad_info, &device->device_uuid);
1995 
1996    device->dcc_msaa_allowed = (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
1997 
1998    device->use_fmask = device->rad_info.gfx_level < GFX11 && !(device->instance->debug_flags & RADV_DEBUG_NO_FMASK);
1999 
2000    device->use_ngg = (device->rad_info.gfx_level >= GFX10 && device->rad_info.family != CHIP_NAVI14 &&
2001                       !(device->instance->debug_flags & RADV_DEBUG_NO_NGG)) ||
2002                      device->rad_info.gfx_level >= GFX11;
2003 
2004    /* TODO: Investigate if NGG culling helps on GFX11. */
2005    device->use_ngg_culling =
2006       device->use_ngg && device->rad_info.max_render_backends > 1 &&
2007       (device->rad_info.gfx_level == GFX10_3 || (device->instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
2008       !(device->instance->debug_flags & RADV_DEBUG_NO_NGGC);
2009 
2010    device->use_ngg_streamout = device->rad_info.gfx_level >= GFX11;
2011 
2012    device->emulate_ngg_gs_query_pipeline_stat = device->use_ngg && device->rad_info.gfx_level < GFX11;
2013 
2014    device->mesh_fast_launch_2 =
2015       device->rad_info.gfx_level >= GFX11 && !(device->instance->debug_flags & RADV_DEBUG_NO_GS_FAST_LAUNCH_2);
2016 
2017    device->emulate_mesh_shader_queries = device->rad_info.gfx_level == GFX10_3;
2018 
2019    /* Determine the number of threads per wave for all stages. */
2020    device->cs_wave_size = 64;
2021    device->ps_wave_size = 64;
2022    device->ge_wave_size = 64;
2023    device->rt_wave_size = 64;
2024 
2025    if (device->rad_info.gfx_level >= GFX10) {
2026       if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
2027          device->cs_wave_size = 32;
2028 
2029       /* For pixel shaders, wave64 is recommended. */
2030       if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
2031          device->ps_wave_size = 32;
2032 
2033       if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
2034          device->ge_wave_size = 32;
2035 
2036       /* Default to 32 on RDNA1-2 as that gives better perf due to less issues with divergence.
2037        * However, on RDNA3+ default to wave64 as implicit dual issuing is likely better than
2038        * wave32 VOPD for VALU dependent code.
2039        * (as well as the SALU count becoming more problematic with wave32)
2040        */
2041       if (device->instance->perftest_flags & RADV_PERFTEST_RT_WAVE_32 || device->rad_info.gfx_level < GFX11)
2042          device->rt_wave_size = 32;
2043 
2044       if (device->instance->perftest_flags & RADV_PERFTEST_RT_WAVE_64 || device->instance->drirc.force_rt_wave64)
2045          device->rt_wave_size = 64;
2046    }
2047 
2048    device->max_shared_size = device->rad_info.gfx_level >= GFX7 ? 65536 : 32768;
2049 
2050    radv_physical_device_init_mem_types(device);
2051 
2052    radv_physical_device_get_supported_extensions(device, &device->vk.supported_extensions);
2053    radv_physical_device_get_features(device, &device->vk.supported_features);
2054 
2055    radv_get_nir_options(device);
2056 
2057 #ifndef _WIN32
2058    if (drm_device) {
2059       struct stat primary_stat = {0}, render_stat = {0};
2060 
2061       device->available_nodes = drm_device->available_nodes;
2062       device->bus_info = *drm_device->businfo.pci;
2063 
2064       if ((drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) &&
2065           stat(drm_device->nodes[DRM_NODE_PRIMARY], &primary_stat) != 0) {
2066          result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM primary node %s",
2067                             drm_device->nodes[DRM_NODE_PRIMARY]);
2068          goto fail_perfcounters;
2069       }
2070       device->primary_devid = primary_stat.st_rdev;
2071 
2072       if ((drm_device->available_nodes & (1 << DRM_NODE_RENDER)) &&
2073           stat(drm_device->nodes[DRM_NODE_RENDER], &render_stat) != 0) {
2074          result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM render node %s",
2075                             drm_device->nodes[DRM_NODE_RENDER]);
2076          goto fail_perfcounters;
2077       }
2078       device->render_devid = render_stat.st_rdev;
2079    }
2080 #endif
2081 
2082    radv_physical_device_init_cache_key(device);
2083 
2084    if (radv_device_get_cache_uuid(device, device->cache_uuid)) {
2085       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID");
2086       goto fail_wsi;
2087    }
2088 
2089    /* The gpu id is already embedded in the uuid so we just pass "radv"
2090     * when creating the cache.
2091     */
2092    char buf[VK_UUID_SIZE * 2 + 1];
2093    mesa_bytes_to_hex(buf, device->cache_uuid, VK_UUID_SIZE);
2094    device->vk.disk_cache = disk_cache_create(device->name, buf, 0);
2095 
2096    radv_get_physical_device_properties(device);
2097 
2098    if ((device->instance->debug_flags & RADV_DEBUG_INFO))
2099       ac_print_gpu_info(&device->rad_info, stdout);
2100 
2101    radv_init_physical_device_decoder(device);
2102 
2103    radv_physical_device_init_queue_table(device);
2104 
2105    /* We don't check the error code, but later check if it is initialized. */
2106    ac_init_perfcounters(&device->rad_info, false, false, &device->ac_perfcounters);
2107 
2108    /* The WSI is structured as a layer on top of the driver, so this has
2109     * to be the last part of initialization (at least until we get other
2110     * semi-layers).
2111     */
2112    result = radv_init_wsi(device);
2113    if (result != VK_SUCCESS) {
2114       vk_error(instance, result);
2115       goto fail_perfcounters;
2116    }
2117 
2118    device->gs_table_depth = ac_get_gs_table_depth(device->rad_info.gfx_level, device->rad_info.family);
2119 
2120    ac_get_hs_info(&device->rad_info, &device->hs);
2121    ac_get_task_info(&device->rad_info, &device->task_info);
2122    radv_get_binning_settings(device, &device->binning_settings);
2123 
2124    *device_out = device;
2125 
2126    return VK_SUCCESS;
2127 
2128 fail_perfcounters:
2129    ac_destroy_perfcounters(&device->ac_perfcounters);
2130    disk_cache_destroy(device->vk.disk_cache);
2131 fail_wsi:
2132    device->ws->destroy(device->ws);
2133 fail_base:
2134    vk_physical_device_finish(&device->vk);
2135 fail_alloc:
2136    vk_free(&instance->vk.alloc, device);
2137 fail_fd:
2138    if (fd != -1)
2139       close(fd);
2140    if (master_fd != -1)
2141       close(master_fd);
2142    return result;
2143 }
2144 
2145 VkResult
create_null_physical_device(struct vk_instance * vk_instance)2146 create_null_physical_device(struct vk_instance *vk_instance)
2147 {
2148    struct radv_instance *instance = container_of(vk_instance, struct radv_instance, vk);
2149    struct radv_physical_device *pdevice;
2150 
2151    VkResult result = radv_physical_device_try_create(instance, NULL, &pdevice);
2152    if (result != VK_SUCCESS)
2153       return result;
2154 
2155    list_addtail(&pdevice->vk.link, &instance->vk.physical_devices.list);
2156    return VK_SUCCESS;
2157 }
2158 
2159 VkResult
create_drm_physical_device(struct vk_instance * vk_instance,struct _drmDevice * device,struct vk_physical_device ** out)2160 create_drm_physical_device(struct vk_instance *vk_instance, struct _drmDevice *device, struct vk_physical_device **out)
2161 {
2162 #ifndef _WIN32
2163    if (!(device->available_nodes & (1 << DRM_NODE_RENDER)) || device->bustype != DRM_BUS_PCI ||
2164        device->deviceinfo.pci->vendor_id != ATI_VENDOR_ID)
2165       return VK_ERROR_INCOMPATIBLE_DRIVER;
2166 
2167    return radv_physical_device_try_create((struct radv_instance *)vk_instance, device,
2168                                           (struct radv_physical_device **)out);
2169 #else
2170    return VK_SUCCESS;
2171 #endif
2172 }
2173 
2174 void
radv_physical_device_destroy(struct vk_physical_device * vk_device)2175 radv_physical_device_destroy(struct vk_physical_device *vk_device)
2176 {
2177    struct radv_physical_device *device = container_of(vk_device, struct radv_physical_device, vk);
2178 
2179    radv_finish_wsi(device);
2180    ac_destroy_perfcounters(&device->ac_perfcounters);
2181    device->ws->destroy(device->ws);
2182    disk_cache_destroy(device->vk.disk_cache);
2183    if (device->local_fd != -1)
2184       close(device->local_fd);
2185    if (device->master_fd != -1)
2186       close(device->master_fd);
2187    vk_physical_device_finish(&device->vk);
2188    vk_free(&device->instance->vk.alloc, device);
2189 }
2190 
2191 static void
radv_get_physical_device_queue_family_properties(struct radv_physical_device * pdevice,uint32_t * pCount,VkQueueFamilyProperties ** pQueueFamilyProperties)2192 radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdevice, uint32_t *pCount,
2193                                                  VkQueueFamilyProperties **pQueueFamilyProperties)
2194 {
2195    int num_queue_families = 2;
2196    int idx;
2197    if (pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues > 0 &&
2198        !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
2199       num_queue_families++;
2200 
2201    if (pdevice->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE) {
2202       if (pdevice->rad_info.ip[pdevice->vid_decode_ip].num_queues > 0)
2203          num_queue_families++;
2204    }
2205 
2206    if (radv_transfer_queue_enabled(pdevice)) {
2207       num_queue_families++;
2208    }
2209 
2210    if (pQueueFamilyProperties == NULL) {
2211       *pCount = num_queue_families;
2212       return;
2213    }
2214 
2215    if (!*pCount)
2216       return;
2217 
2218    idx = 0;
2219    if (*pCount >= 1) {
2220       VkQueueFlags gfx_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
2221       if (pdevice->instance->drirc.legacy_sparse_binding)
2222          gfx_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
2223       *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2224          .queueFlags = gfx_flags,
2225          .queueCount = 1,
2226          .timestampValidBits = 64,
2227          .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2228       };
2229       idx++;
2230    }
2231 
2232    if (pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues > 0 &&
2233        !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
2234       VkQueueFlags compute_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
2235       if (pdevice->instance->drirc.legacy_sparse_binding)
2236          compute_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
2237       if (*pCount > idx) {
2238          *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2239             .queueFlags = compute_flags,
2240             .queueCount = pdevice->rad_info.ip[AMD_IP_COMPUTE].num_queues,
2241             .timestampValidBits = 64,
2242             .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2243          };
2244          idx++;
2245       }
2246    }
2247 
2248    if (pdevice->instance->perftest_flags & RADV_PERFTEST_VIDEO_DECODE) {
2249       if (pdevice->rad_info.ip[pdevice->vid_decode_ip].num_queues > 0) {
2250          if (*pCount > idx) {
2251             *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2252                .queueFlags = VK_QUEUE_VIDEO_DECODE_BIT_KHR,
2253                .queueCount = pdevice->rad_info.ip[pdevice->vid_decode_ip].num_queues,
2254                .timestampValidBits = 64,
2255                .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2256             };
2257             idx++;
2258          }
2259       }
2260    }
2261 
2262    if (radv_transfer_queue_enabled(pdevice)) {
2263       if (*pCount > idx) {
2264          *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2265             .queueFlags = VK_QUEUE_TRANSFER_BIT,
2266             .queueCount = pdevice->rad_info.ip[AMD_IP_SDMA].num_queues,
2267             .timestampValidBits = 64,
2268             .minImageTransferGranularity = (VkExtent3D){16, 16, 8},
2269          };
2270          idx++;
2271       }
2272    }
2273 
2274    if (*pCount > idx) {
2275       *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2276          .queueFlags = VK_QUEUE_SPARSE_BINDING_BIT,
2277          .queueCount = 1,
2278          .timestampValidBits = 64,
2279          .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2280       };
2281       idx++;
2282    }
2283 
2284    *pCount = idx;
2285 }
2286 
2287 static const VkQueueGlobalPriorityKHR radv_global_queue_priorities[] = {
2288    VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR,
2289    VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR,
2290    VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR,
2291    VK_QUEUE_GLOBAL_PRIORITY_REALTIME_KHR,
2292 };
2293 
2294 VKAPI_ATTR void VKAPI_CALL
radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2295 radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, uint32_t *pCount,
2296                                              VkQueueFamilyProperties2 *pQueueFamilyProperties)
2297 {
2298    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2299    if (!pQueueFamilyProperties) {
2300       radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
2301       return;
2302    }
2303    VkQueueFamilyProperties *properties[] = {
2304       &pQueueFamilyProperties[0].queueFamilyProperties, &pQueueFamilyProperties[1].queueFamilyProperties,
2305       &pQueueFamilyProperties[2].queueFamilyProperties, &pQueueFamilyProperties[3].queueFamilyProperties,
2306       &pQueueFamilyProperties[4].queueFamilyProperties,
2307    };
2308    radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
2309    assert(*pCount <= 5);
2310 
2311    for (uint32_t i = 0; i < *pCount; i++) {
2312       vk_foreach_struct (ext, pQueueFamilyProperties[i].pNext) {
2313          switch (ext->sType) {
2314          case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
2315             VkQueueFamilyGlobalPriorityPropertiesKHR *prop = (VkQueueFamilyGlobalPriorityPropertiesKHR *)ext;
2316             STATIC_ASSERT(ARRAY_SIZE(radv_global_queue_priorities) <= VK_MAX_GLOBAL_PRIORITY_SIZE_KHR);
2317             prop->priorityCount = ARRAY_SIZE(radv_global_queue_priorities);
2318             memcpy(&prop->priorities, radv_global_queue_priorities, sizeof(radv_global_queue_priorities));
2319             break;
2320          }
2321          case VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR: {
2322             VkQueueFamilyQueryResultStatusPropertiesKHR *prop = (VkQueueFamilyQueryResultStatusPropertiesKHR *)ext;
2323             prop->queryResultStatusSupport = VK_FALSE;
2324             break;
2325          }
2326          case VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR: {
2327             VkQueueFamilyVideoPropertiesKHR *prop = (VkQueueFamilyVideoPropertiesKHR *)ext;
2328             prop->videoCodecOperations = 0;
2329             if (pQueueFamilyProperties[i].queueFamilyProperties.queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
2330                if (VIDEO_CODEC_H264DEC)
2331                   prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR;
2332                if (VIDEO_CODEC_H265DEC)
2333                   prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR;
2334                if (VIDEO_CODEC_AV1DEC && pdevice->rad_info.vcn_ip_version >= VCN_3_0_0 &&
2335                    pdevice->rad_info.vcn_ip_version != VCN_3_0_33)
2336                   prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR;
2337             }
2338             break;
2339          }
2340          default:
2341             break;
2342          }
2343       }
2344    }
2345 }
2346 
2347 static void
radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2348 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
2349                                   VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2350 {
2351    RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2352    VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
2353 
2354    /* For all memory heaps, the computation of budget is as follow:
2355     *	heap_budget = heap_size - global_heap_usage + app_heap_usage
2356     *
2357     * The Vulkan spec 1.1.97 says that the budget should include any
2358     * currently allocated device memory.
2359     *
2360     * Note that the application heap usages are not really accurate (eg.
2361     * in presence of shared buffers).
2362     */
2363    if (!device->rad_info.has_dedicated_vram) {
2364       if (device->instance->drirc.enable_unified_heap_on_apu) {
2365          /* When the heaps are unified, only the visible VRAM heap is exposed on APUs. */
2366          assert(device->heaps == RADV_HEAP_VRAM_VIS);
2367          assert(device->memory_properties.memoryHeaps[0].flags == VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
2368          const uint8_t vram_vis_heap_idx = 0;
2369 
2370          /* Get the total heap size which is the visible VRAM heap size. */
2371          uint64_t total_heap_size = device->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
2372 
2373          /* Get the different memory usages. */
2374          uint64_t vram_vis_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) +
2375                                             device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2376          uint64_t gtt_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
2377          uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage;
2378          uint64_t total_system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
2379                                        device->ws->query_value(device->ws, RADEON_GTT_USAGE);
2380          uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);
2381 
2382          /* Compute the total free space that can be allocated for this process across all heaps. */
2383          uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
2384 
2385          memoryBudget->heapBudget[vram_vis_heap_idx] = total_free_space + total_internal_usage;
2386          memoryBudget->heapUsage[vram_vis_heap_idx] = total_internal_usage;
2387       } else {
2388          /* On APUs, the driver exposes fake heaps to the application because usually the carveout
2389           * is too small for games but the budgets need to be redistributed accordingly.
2390           */
2391          assert(device->heaps == (RADV_HEAP_GTT | RADV_HEAP_VRAM_VIS));
2392          assert(device->memory_properties.memoryHeaps[0].flags == 0); /* GTT */
2393          assert(device->memory_properties.memoryHeaps[1].flags == VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
2394          const uint8_t gtt_heap_idx = 0, vram_vis_heap_idx = 1;
2395 
2396          /* Get the visible VRAM/GTT heap sizes and internal usages. */
2397          uint64_t gtt_heap_size = device->memory_properties.memoryHeaps[gtt_heap_idx].size;
2398          uint64_t vram_vis_heap_size = device->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
2399 
2400          uint64_t vram_vis_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) +
2401                                             device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2402          uint64_t gtt_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
2403 
2404          /* Compute the total heap size, internal and system usage. */
2405          uint64_t total_heap_size = vram_vis_heap_size + gtt_heap_size;
2406          uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage;
2407          uint64_t total_system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
2408                                        device->ws->query_value(device->ws, RADEON_GTT_USAGE);
2409 
2410          uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);
2411 
2412          /* Compute the total free space that can be allocated for this process across all heaps. */
2413          uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
2414 
2415          /* Compute the remaining visible VRAM size for this process. */
2416          uint64_t vram_vis_free_space = vram_vis_heap_size - MIN2(vram_vis_heap_size, vram_vis_internal_usage);
2417 
2418          /* Distribute the total free space (2/3rd as VRAM and 1/3rd as GTT) to match the heap
2419           * sizes, and align down to the page size to be conservative.
2420           */
2421          vram_vis_free_space =
2422             ROUND_DOWN_TO(MIN2((total_free_space * 2) / 3, vram_vis_free_space), device->rad_info.gart_page_size);
2423          uint64_t gtt_free_space = total_free_space - vram_vis_free_space;
2424 
2425          memoryBudget->heapBudget[vram_vis_heap_idx] = vram_vis_free_space + vram_vis_internal_usage;
2426          memoryBudget->heapUsage[vram_vis_heap_idx] = vram_vis_internal_usage;
2427          memoryBudget->heapBudget[gtt_heap_idx] = gtt_free_space + gtt_internal_usage;
2428          memoryBudget->heapUsage[gtt_heap_idx] = gtt_internal_usage;
2429       }
2430    } else {
2431       unsigned mask = device->heaps;
2432       unsigned heap = 0;
2433       while (mask) {
2434          uint64_t internal_usage = 0, system_usage = 0;
2435          unsigned type = 1u << u_bit_scan(&mask);
2436 
2437          switch (type) {
2438          case RADV_HEAP_VRAM:
2439             internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2440             system_usage = device->ws->query_value(device->ws, RADEON_VRAM_USAGE);
2441             break;
2442          case RADV_HEAP_VRAM_VIS:
2443             internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS);
2444             if (!(device->heaps & RADV_HEAP_VRAM))
2445                internal_usage += device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2446             system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE);
2447             break;
2448          case RADV_HEAP_GTT:
2449             internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
2450             system_usage = device->ws->query_value(device->ws, RADEON_GTT_USAGE);
2451             break;
2452          }
2453 
2454          uint64_t total_usage = MAX2(internal_usage, system_usage);
2455 
2456          uint64_t free_space = device->memory_properties.memoryHeaps[heap].size -
2457                                MIN2(device->memory_properties.memoryHeaps[heap].size, total_usage);
2458          memoryBudget->heapBudget[heap] = free_space + internal_usage;
2459          memoryBudget->heapUsage[heap] = internal_usage;
2460          ++heap;
2461       }
2462 
2463       assert(heap == memory_properties->memoryHeapCount);
2464    }
2465 
2466    /* The heapBudget and heapUsage values must be zero for array elements
2467     * greater than or equal to
2468     * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
2469     */
2470    for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
2471       memoryBudget->heapBudget[i] = 0;
2472       memoryBudget->heapUsage[i] = 0;
2473    }
2474 }
2475 
2476 VKAPI_ATTR void VKAPI_CALL
radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2477 radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
2478                                         VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
2479 {
2480    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2481 
2482    pMemoryProperties->memoryProperties = pdevice->memory_properties;
2483 
2484    VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
2485       vk_find_struct(pMemoryProperties->pNext, PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
2486    if (memory_budget)
2487       radv_get_memory_budget_properties(physicalDevice, memory_budget);
2488 }
2489 
2490 static const VkTimeDomainKHR radv_time_domains[] = {
2491    VK_TIME_DOMAIN_DEVICE_KHR,
2492    VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR,
2493 #ifdef CLOCK_MONOTONIC_RAW
2494    VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR,
2495 #endif
2496 };
2497 
2498 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceCalibrateableTimeDomainsKHR(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainKHR * pTimeDomains)2499 radv_GetPhysicalDeviceCalibrateableTimeDomainsKHR(VkPhysicalDevice physicalDevice, uint32_t *pTimeDomainCount,
2500                                                   VkTimeDomainKHR *pTimeDomains)
2501 {
2502    int d;
2503    VK_OUTARRAY_MAKE_TYPED(VkTimeDomainKHR, out, pTimeDomains, pTimeDomainCount);
2504 
2505    for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
2506       vk_outarray_append_typed(VkTimeDomainKHR, &out, i)
2507       {
2508          *i = radv_time_domains[d];
2509       }
2510    }
2511 
2512    return vk_outarray_status(&out);
2513 }
2514 
2515 VKAPI_ATTR void VKAPI_CALL
radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)2516 radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice, VkSampleCountFlagBits samples,
2517                                                VkMultisamplePropertiesEXT *pMultisampleProperties)
2518 {
2519    VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
2520 
2521    if (samples & supported_samples) {
2522       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
2523    } else {
2524       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
2525    }
2526 }
2527 
2528 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)2529 radv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount,
2530                                               VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
2531 {
2532    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates,
2533                           pFragmentShadingRateCount);
2534 
2535 #define append_rate(w, h, s)                                                                                           \
2536    {                                                                                                                   \
2537       VkPhysicalDeviceFragmentShadingRateKHR rate = {                                                                  \
2538          .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR,                              \
2539          .sampleCounts = s,                                                                                            \
2540          .fragmentSize = {.width = w, .height = h},                                                                    \
2541       };                                                                                                               \
2542       vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate;                             \
2543    }
2544 
2545    for (uint32_t x = 2; x >= 1; x--) {
2546       for (uint32_t y = 2; y >= 1; y--) {
2547          VkSampleCountFlagBits samples;
2548 
2549          if (x == 1 && y == 1) {
2550             samples = ~0;
2551          } else {
2552             samples = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
2553          }
2554 
2555          append_rate(x, y, samples);
2556       }
2557    }
2558 #undef append_rate
2559 
2560    return vk_outarray_status(&out);
2561 }
2562 
2563 /* VK_EXT_tooling_info */
2564 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceToolProperties(VkPhysicalDevice physicalDevice,uint32_t * pToolCount,VkPhysicalDeviceToolProperties * pToolProperties)2565 radv_GetPhysicalDeviceToolProperties(VkPhysicalDevice physicalDevice, uint32_t *pToolCount,
2566                                      VkPhysicalDeviceToolProperties *pToolProperties)
2567 {
2568    VK_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2569 
2570    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceToolProperties, out, pToolProperties, pToolCount);
2571    bool rgp_enabled, rmv_enabled, rra_enabled;
2572    uint32_t tool_count = 0;
2573 
2574    /* RGP */
2575    rgp_enabled = pdevice->instance->vk.trace_mode & RADV_TRACE_MODE_RGP;
2576    if (rgp_enabled)
2577       tool_count++;
2578 
2579    /* RMV */
2580    rmv_enabled = pdevice->instance->vk.trace_mode & VK_TRACE_MODE_RMV;
2581    if (rmv_enabled)
2582       tool_count++;
2583 
2584    /* RRA */
2585    rra_enabled = pdevice->instance->vk.trace_mode & RADV_TRACE_MODE_RRA;
2586    if (rra_enabled)
2587       tool_count++;
2588 
2589    if (!pToolProperties) {
2590       *pToolCount = tool_count;
2591       return VK_SUCCESS;
2592    }
2593 
2594    if (rgp_enabled) {
2595       VkPhysicalDeviceToolProperties tool = {
2596          .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TOOL_PROPERTIES,
2597          .name = "Radeon GPU Profiler",
2598          .version = "1.15",
2599          .description = "A ground-breaking low-level optimization tool that provides detailed "
2600                         "information on Radeon GPUs.",
2601          .purposes = VK_TOOL_PURPOSE_PROFILING_BIT | VK_TOOL_PURPOSE_TRACING_BIT |
2602                      /* VK_EXT_debug_marker is only exposed if SQTT is enabled. */
2603                      VK_TOOL_PURPOSE_ADDITIONAL_FEATURES_BIT | VK_TOOL_PURPOSE_DEBUG_MARKERS_BIT_EXT,
2604       };
2605       vk_outarray_append_typed(VkPhysicalDeviceToolProperties, &out, t) *t = tool;
2606    }
2607 
2608    if (rmv_enabled) {
2609       VkPhysicalDeviceToolProperties tool = {
2610          .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TOOL_PROPERTIES,
2611          .name = "Radeon Memory Visualizer",
2612          .version = "1.6",
2613          .description = "A tool to allow you to gain a deep understanding of how your application "
2614                         "uses memory for graphics resources.",
2615          .purposes = VK_TOOL_PURPOSE_PROFILING_BIT | VK_TOOL_PURPOSE_TRACING_BIT,
2616       };
2617       vk_outarray_append_typed(VkPhysicalDeviceToolProperties, &out, t) *t = tool;
2618    }
2619 
2620    if (rra_enabled) {
2621       VkPhysicalDeviceToolProperties tool = {
2622          .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TOOL_PROPERTIES,
2623          .name = "Radeon Raytracing Analyzer",
2624          .version = "1.2",
2625          .description = "A tool to investigate the performance of your ray tracing applications and "
2626                         "highlight potential bottlenecks.",
2627          .purposes = VK_TOOL_PURPOSE_PROFILING_BIT | VK_TOOL_PURPOSE_TRACING_BIT,
2628       };
2629       vk_outarray_append_typed(VkPhysicalDeviceToolProperties, &out, t) *t = tool;
2630    }
2631 
2632    return vk_outarray_status(&out);
2633 }
2634 
2635 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkCooperativeMatrixPropertiesKHR * pProperties)2636 radv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount,
2637                                                      VkCooperativeMatrixPropertiesKHR *pProperties)
2638 {
2639    VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount);
2640 
2641    vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
2642    {
2643       *p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
2644                                                      .MSize = 16,
2645                                                      .NSize = 16,
2646                                                      .KSize = 16,
2647                                                      .AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2648                                                      .BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2649                                                      .CType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2650                                                      .ResultType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2651                                                      .saturatingAccumulation = false,
2652                                                      .scope = VK_SCOPE_SUBGROUP_KHR};
2653    }
2654 
2655    vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
2656    {
2657       *p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
2658                                                      .MSize = 16,
2659                                                      .NSize = 16,
2660                                                      .KSize = 16,
2661                                                      .AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2662                                                      .BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2663                                                      .CType = VK_COMPONENT_TYPE_FLOAT32_KHR,
2664                                                      .ResultType = VK_COMPONENT_TYPE_FLOAT32_KHR,
2665                                                      .saturatingAccumulation = false,
2666                                                      .scope = VK_SCOPE_SUBGROUP_KHR};
2667    }
2668 
2669    for (unsigned asigned = 0; asigned < 2; asigned++) {
2670       for (unsigned bsigned = 0; bsigned < 2; bsigned++) {
2671          for (unsigned csigned = 0; csigned < 2; csigned++) {
2672             for (unsigned saturate = 0; saturate < 2; saturate++) {
2673                if (!csigned && saturate)
2674                   continue; /* The HW only supports signed acc. */
2675                vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
2676                {
2677                   *p = (struct VkCooperativeMatrixPropertiesKHR){
2678                      .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
2679                      .MSize = 16,
2680                      .NSize = 16,
2681                      .KSize = 16,
2682                      .AType = asigned ? VK_COMPONENT_TYPE_SINT8_KHR : VK_COMPONENT_TYPE_UINT8_KHR,
2683                      .BType = bsigned ? VK_COMPONENT_TYPE_SINT8_KHR : VK_COMPONENT_TYPE_UINT8_KHR,
2684                      .CType = csigned ? VK_COMPONENT_TYPE_SINT32_KHR : VK_COMPONENT_TYPE_UINT32_KHR,
2685                      .ResultType = csigned ? VK_COMPONENT_TYPE_SINT32_KHR : VK_COMPONENT_TYPE_UINT32_KHR,
2686                      .saturatingAccumulation = saturate,
2687                      .scope = VK_SCOPE_SUBGROUP_KHR};
2688                }
2689             }
2690          }
2691       }
2692    }
2693 
2694    return vk_outarray_status(&out);
2695 }
2696