1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * SPDX-License-Identifier: MIT
9 */
10
11 #include <fcntl.h>
12
13 #ifdef MAJOR_IN_SYSMACROS
14 #include <sys/sysmacros.h>
15 #endif
16
17 #include "vk_log.h"
18 #include "vk_shader_module.h"
19
20 #include "util/disk_cache.h"
21 #include "util/hex.h"
22 #include "util/u_debug.h"
23 #include "radv_android.h"
24 #include "radv_debug.h"
25 #include "radv_entrypoints.h"
26 #include "radv_instance.h"
27 #include "radv_physical_device.h"
28 #include "radv_pipeline_rt.h"
29 #include "radv_video.h"
30 #include "radv_wsi.h"
31
32 #ifdef _WIN32
33 typedef void *drmDevicePtr;
34 #include <io.h>
35 #else
36 #include <amdgpu.h>
37 #include "drm-uapi/amdgpu_drm.h"
38 #include "util/os_drm.h"
39 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
40 #endif
41 #include "winsys/null/radv_null_winsys_public.h"
42 #include "git_sha1.h"
43
44 #if AMD_LLVM_AVAILABLE
45 #include "ac_llvm_util.h"
46 #endif
47
48 #ifdef _WIN32
49 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 0
50 #else
51 #define RADV_SUPPORT_CALIBRATED_TIMESTAMPS 1
52 #endif
53
54 static bool
radv_perf_query_supported(const struct radv_physical_device * pdev)55 radv_perf_query_supported(const struct radv_physical_device *pdev)
56 {
57 const struct radv_instance *instance = radv_physical_device_instance(pdev);
58
59 /* SQTT / SPM interfere with the register states for perf counters, and
60 * the code has only been tested on GFX10.3 */
61 return pdev->info.gfx_level == GFX10_3 && !(instance->vk.trace_mode & RADV_TRACE_MODE_RGP);
62 }
63
64 static bool
radv_taskmesh_enabled(const struct radv_physical_device * pdev)65 radv_taskmesh_enabled(const struct radv_physical_device *pdev)
66 {
67 const struct radv_instance *instance = radv_physical_device_instance(pdev);
68
69 if (instance->debug_flags & RADV_DEBUG_NO_MESH_SHADER)
70 return false;
71
72 return pdev->use_ngg && !pdev->use_llvm && pdev->info.gfx_level >= GFX10_3 &&
73 !(instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE) && pdev->info.has_gang_submit;
74 }
75
76 static bool
radv_transfer_queue_enabled(const struct radv_physical_device * pdev)77 radv_transfer_queue_enabled(const struct radv_physical_device *pdev)
78 {
79 const struct radv_instance *instance = radv_physical_device_instance(pdev);
80
81 /* Check if the GPU has SDMA support and transfer queues are allowed. */
82 if (pdev->info.sdma_ip_version == SDMA_UNKNOWN || !pdev->info.ip[AMD_IP_SDMA].num_queues ||
83 !(instance->perftest_flags & RADV_PERFTEST_TRANSFER_QUEUE))
84 return false;
85
86 return pdev->info.gfx_level >= GFX9;
87 }
88
89 static bool
radv_vrs_attachment_enabled(const struct radv_physical_device * pdev)90 radv_vrs_attachment_enabled(const struct radv_physical_device *pdev)
91 {
92 const struct radv_instance *instance = radv_physical_device_instance(pdev);
93 return pdev->info.gfx_level >= GFX11 || !(instance->debug_flags & RADV_DEBUG_NO_HIZ);
94 }
95
96 static bool
radv_calibrated_timestamps_enabled(const struct radv_physical_device * pdev)97 radv_calibrated_timestamps_enabled(const struct radv_physical_device *pdev)
98 {
99 return RADV_SUPPORT_CALIBRATED_TIMESTAMPS && !(pdev->info.family == CHIP_RAVEN || pdev->info.family == CHIP_RAVEN2);
100 }
101
102 static bool
radv_filter_minmax_enabled(const struct radv_physical_device * pdev)103 radv_filter_minmax_enabled(const struct radv_physical_device *pdev)
104 {
105 /* Tahiti and Verde only: reduction mode is unsupported due to a bug
106 * (it might work sometimes, but that's not enough)
107 */
108 return !(pdev->info.family == CHIP_TAHITI || pdev->info.family == CHIP_VERDE);
109 }
110
111 static bool
radv_cooperative_matrix_enabled(const struct radv_physical_device * pdev)112 radv_cooperative_matrix_enabled(const struct radv_physical_device *pdev)
113 {
114 if (pdev->info.gfx_level == GFX12)
115 return false; /* TODO */
116
117 return pdev->info.gfx_level >= GFX11 && !pdev->use_llvm;
118 }
119
120 bool
radv_enable_rt(const struct radv_physical_device * pdev)121 radv_enable_rt(const struct radv_physical_device *pdev)
122 {
123 if (!pdev->info.has_image_bvh_intersect_ray && !radv_emulate_rt(pdev))
124 return false;
125
126 if (pdev->use_llvm)
127 return false;
128
129 return true;
130 }
131
132 bool
radv_emulate_rt(const struct radv_physical_device * pdev)133 radv_emulate_rt(const struct radv_physical_device *pdev)
134 {
135 const struct radv_instance *instance = radv_physical_device_instance(pdev);
136 return instance->perftest_flags & RADV_PERFTEST_EMULATE_RT;
137 }
138
139 static VkConformanceVersion
radv_get_conformance_version(const struct radv_physical_device * pdev)140 radv_get_conformance_version(const struct radv_physical_device *pdev)
141 {
142 VkConformanceVersion conformance_version = {0}; /* Non-conformant by default */
143
144 if (pdev->info.gfx_level >= GFX8 && pdev->info.gfx_level <= GFX11_5) {
145 switch (pdev->info.family) {
146 /* GFX8 */
147 case CHIP_TONGA:
148 case CHIP_FIJI:
149 case CHIP_POLARIS10:
150 /* GFX9 */
151 case CHIP_VEGA10:
152 case CHIP_RENOIR:
153 /* GFX10 */
154 case CHIP_NAVI14:
155 /* GFX10.3 */
156 case CHIP_NAVI21:
157 case CHIP_NAVI22:
158 case CHIP_VANGOGH:
159 /* GFX11 */
160 case CHIP_NAVI31:
161 /* GFX11.5 */
162 case CHIP_GFX1150:
163 conformance_version = (VkConformanceVersion){
164 .major = 1,
165 .minor = 4,
166 .subminor = 0,
167 .patch = 0,
168 };
169 break;
170 default:
171 break;
172 }
173 } else {
174 /* GFX6-7 */
175 switch (pdev->info.family) {
176 case CHIP_TAHITI:
177 case CHIP_PITCAIRN:
178 case CHIP_VERDE:
179 case CHIP_OLAND:
180 case CHIP_BONAIRE:
181 case CHIP_HAWAII:
182 conformance_version = (VkConformanceVersion){
183 .major = 1,
184 .minor = 3,
185 .subminor = 9,
186 .patch = 2,
187 };
188 break;
189 default:
190 break;
191 }
192 }
193
194 return conformance_version;
195 }
196
197 static void
parse_hex(char * out,const char * in,unsigned length)198 parse_hex(char *out, const char *in, unsigned length)
199 {
200 for (unsigned i = 0; i < length; ++i)
201 out[i] = 0;
202
203 for (unsigned i = 0; i < 2 * length; ++i) {
204 unsigned v = in[i] <= '9' ? in[i] - '0' : (in[i] >= 'a' ? (in[i] - 'a' + 10) : (in[i] - 'A' + 10));
205 out[i / 2] |= v << (4 * (1 - i % 2));
206 }
207 }
208
209 static void
radv_physical_device_init_cache_key(struct radv_physical_device * pdev)210 radv_physical_device_init_cache_key(struct radv_physical_device *pdev)
211 {
212 const struct radv_instance *instance = radv_physical_device_instance(pdev);
213 struct radv_physical_device_cache_key *key = &pdev->cache_key;
214
215 key->family = pdev->info.family;
216 key->ptr_size = sizeof(void *);
217 key->conformant_trunc_coord = pdev->info.conformant_trunc_coord;
218
219 key->clear_lds = instance->drirc.clear_lds;
220 key->cs_wave32 = pdev->cs_wave_size == 32;
221 key->disable_aniso_single_level = instance->drirc.disable_aniso_single_level && pdev->info.gfx_level < GFX8;
222 key->disable_shrink_image_store = instance->drirc.disable_shrink_image_store;
223 key->disable_sinking_load_input_fs = instance->drirc.disable_sinking_load_input_fs;
224 key->emulate_rt = !!(instance->perftest_flags & RADV_PERFTEST_EMULATE_RT);
225 key->ge_wave32 = pdev->ge_wave_size == 32;
226 key->invariant_geom = !!(instance->debug_flags & RADV_DEBUG_INVARIANT_GEOM);
227 key->no_fmask = !!(instance->debug_flags & RADV_DEBUG_NO_FMASK);
228 key->no_ngg_gs = !!(instance->debug_flags & RADV_DEBUG_NO_NGG_GS);
229 key->no_rt = !!(instance->debug_flags & RADV_DEBUG_NO_RT);
230 key->ps_wave32 = pdev->ps_wave_size == 32;
231 key->rt_wave64 = pdev->rt_wave_size == 64;
232 key->split_fma = !!(instance->debug_flags & RADV_DEBUG_SPLIT_FMA);
233 key->ssbo_non_uniform = instance->drirc.ssbo_non_uniform;
234 key->tex_non_uniform = instance->drirc.tex_non_uniform;
235 key->lower_terminate_to_discard = instance->drirc.lower_terminate_to_discard;
236 key->use_llvm = pdev->use_llvm;
237 key->use_ngg = pdev->use_ngg;
238 key->use_ngg_culling = pdev->use_ngg_culling;
239 }
240
241 static int
radv_device_get_cache_uuid(struct radv_physical_device * pdev,void * uuid)242 radv_device_get_cache_uuid(struct radv_physical_device *pdev, void *uuid)
243 {
244 struct mesa_sha1 ctx;
245 unsigned char sha1[20];
246
247 memset(uuid, 0, VK_UUID_SIZE);
248 _mesa_sha1_init(&ctx);
249
250 #ifdef RADV_BUILD_ID_OVERRIDE
251 {
252 unsigned size = strlen(RADV_BUILD_ID_OVERRIDE) / 2;
253 char *data = alloca(size);
254 parse_hex(data, RADV_BUILD_ID_OVERRIDE, size);
255 _mesa_sha1_update(&ctx, data, size);
256 }
257 #else
258 if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx))
259 return -1;
260 #endif
261
262 #if AMD_LLVM_AVAILABLE
263 if (pdev->use_llvm && !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
264 return -1;
265 #endif
266
267 _mesa_sha1_update(&ctx, &pdev->cache_key, sizeof(pdev->cache_key));
268 _mesa_sha1_final(&ctx, sha1);
269
270 memcpy(uuid, sha1, VK_UUID_SIZE);
271 return 0;
272 }
273
274 static void
radv_get_driver_uuid(void * uuid)275 radv_get_driver_uuid(void *uuid)
276 {
277 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
278 }
279
280 static void
radv_get_device_uuid(const struct radeon_info * gpu_info,void * uuid)281 radv_get_device_uuid(const struct radeon_info *gpu_info, void *uuid)
282 {
283 ac_compute_device_uuid(gpu_info, uuid, VK_UUID_SIZE);
284 }
285
286 static void
radv_physical_device_init_queue_table(struct radv_physical_device * pdev)287 radv_physical_device_init_queue_table(struct radv_physical_device *pdev)
288 {
289 const struct radv_instance *instance = radv_physical_device_instance(pdev);
290 int idx = 0;
291 pdev->vk_queue_to_radv[idx] = RADV_QUEUE_GENERAL;
292 idx++;
293
294 for (unsigned i = 1; i < RADV_MAX_QUEUE_FAMILIES; i++)
295 pdev->vk_queue_to_radv[i] = RADV_MAX_QUEUE_FAMILIES + 1;
296
297 if (pdev->info.ip[AMD_IP_COMPUTE].num_queues > 0 && !(instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
298 pdev->vk_queue_to_radv[idx] = RADV_QUEUE_COMPUTE;
299 idx++;
300 }
301
302 if (pdev->video_decode_enabled) {
303 if (pdev->info.ip[pdev->vid_decode_ip].num_queues > 0) {
304 pdev->vk_queue_to_radv[idx] = RADV_QUEUE_VIDEO_DEC;
305 idx++;
306 }
307 }
308
309 if (radv_transfer_queue_enabled(pdev)) {
310 pdev->vk_queue_to_radv[idx] = RADV_QUEUE_TRANSFER;
311 idx++;
312 }
313
314 if (pdev->video_encode_enabled) {
315 if (pdev->info.ip[AMD_IP_VCN_ENC].num_queues > 0) {
316 pdev->vk_queue_to_radv[idx] = RADV_QUEUE_VIDEO_ENC;
317 idx++;
318 }
319 }
320
321 if (radv_sparse_queue_enabled(pdev)) {
322 pdev->vk_queue_to_radv[idx] = RADV_QUEUE_SPARSE;
323 idx++;
324 }
325
326 pdev->num_queues = idx;
327 }
328
329 enum radv_heap {
330 RADV_HEAP_VRAM = 1 << 0,
331 RADV_HEAP_GTT = 1 << 1,
332 RADV_HEAP_VRAM_VIS = 1 << 2,
333 RADV_HEAP_MAX = 1 << 3,
334 };
335
336 static uint64_t
radv_get_adjusted_vram_size(struct radv_physical_device * pdev)337 radv_get_adjusted_vram_size(struct radv_physical_device *pdev)
338 {
339 const struct radv_instance *instance = radv_physical_device_instance(pdev);
340 int ov = instance->drirc.override_vram_size;
341 if (ov >= 0)
342 return MIN2((uint64_t)pdev->info.vram_size_kb * 1024, (uint64_t)ov << 20);
343 return (uint64_t)pdev->info.vram_size_kb * 1024;
344 }
345
346 static uint64_t
radv_get_visible_vram_size(struct radv_physical_device * pdev)347 radv_get_visible_vram_size(struct radv_physical_device *pdev)
348 {
349 return MIN2(radv_get_adjusted_vram_size(pdev), (uint64_t)pdev->info.vram_vis_size_kb * 1024);
350 }
351
352 static uint64_t
radv_get_vram_size(struct radv_physical_device * pdev)353 radv_get_vram_size(struct radv_physical_device *pdev)
354 {
355 uint64_t total_size = radv_get_adjusted_vram_size(pdev);
356 return total_size - MIN2(total_size, (uint64_t)pdev->info.vram_vis_size_kb * 1024);
357 }
358
359 static void
radv_physical_device_init_mem_types(struct radv_physical_device * pdev)360 radv_physical_device_init_mem_types(struct radv_physical_device *pdev)
361 {
362 const struct radv_instance *instance = radv_physical_device_instance(pdev);
363 uint64_t visible_vram_size = radv_get_visible_vram_size(pdev);
364 uint64_t vram_size = radv_get_vram_size(pdev);
365 uint64_t gtt_size = (uint64_t)pdev->info.gart_size_kb * 1024;
366 int vram_index = -1, visible_vram_index = -1, gart_index = -1;
367
368 pdev->memory_properties.memoryHeapCount = 0;
369 pdev->heaps = 0;
370
371 if (!pdev->info.has_dedicated_vram) {
372 const uint64_t total_size = gtt_size + visible_vram_size;
373
374 if (instance->drirc.enable_unified_heap_on_apu) {
375 /* Some applications seem better when the driver exposes only one heap of VRAM on APUs. */
376 visible_vram_size = total_size;
377 gtt_size = 0;
378 } else {
379 /* On APUs, the carveout is usually too small for games that request a minimum VRAM size
380 * greater than it. To workaround this, we compute the total available memory size (GTT +
381 * visible VRAM size) and report 2/3 as VRAM and 1/3 as GTT.
382 */
383 visible_vram_size = align64((total_size * 2) / 3, pdev->info.gart_page_size);
384 gtt_size = total_size - visible_vram_size;
385 }
386
387 vram_size = 0;
388 }
389
390 /* Only get a VRAM heap if it is significant, not if it is a 16 MiB
391 * remainder above visible VRAM. */
392 if (vram_size > 0 && vram_size * 9 >= visible_vram_size) {
393 vram_index = pdev->memory_properties.memoryHeapCount++;
394 pdev->heaps |= RADV_HEAP_VRAM;
395 pdev->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap){
396 .size = vram_size,
397 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
398 };
399 }
400
401 if (gtt_size > 0) {
402 gart_index = pdev->memory_properties.memoryHeapCount++;
403 pdev->heaps |= RADV_HEAP_GTT;
404 pdev->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap){
405 .size = gtt_size,
406 .flags = 0,
407 };
408 }
409
410 if (visible_vram_size) {
411 visible_vram_index = pdev->memory_properties.memoryHeapCount++;
412 pdev->heaps |= RADV_HEAP_VRAM_VIS;
413 pdev->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap){
414 .size = visible_vram_size,
415 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
416 };
417 }
418
419 unsigned type_count = 0;
420
421 if (vram_index >= 0 || visible_vram_index >= 0) {
422 pdev->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
423 pdev->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
424 pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
425 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
426 .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
427 };
428
429 pdev->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
430 pdev->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_32BIT;
431 pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
432 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
433 .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
434 };
435 }
436
437 if (gart_index >= 0) {
438 pdev->memory_domains[type_count] = RADEON_DOMAIN_GTT;
439 pdev->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
440 pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
441 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
442 .heapIndex = gart_index,
443 };
444 }
445 if (visible_vram_index >= 0) {
446 pdev->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
447 pdev->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
448 pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
449 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
450 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
451 .heapIndex = visible_vram_index,
452 };
453
454 pdev->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
455 pdev->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_32BIT;
456 pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
457 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
458 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
459 .heapIndex = visible_vram_index,
460 };
461 }
462
463 if (gart_index >= 0) {
464 pdev->memory_domains[type_count] = RADEON_DOMAIN_GTT;
465 pdev->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
466 pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
467 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
468 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
469 .heapIndex = gart_index,
470 };
471
472 pdev->memory_domains[type_count] = RADEON_DOMAIN_GTT;
473 pdev->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_32BIT;
474 pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
475 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
476 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
477 .heapIndex = gart_index,
478 };
479 }
480 pdev->memory_properties.memoryTypeCount = type_count;
481
482 if (pdev->info.has_l2_uncached) {
483 for (int i = 0; i < pdev->memory_properties.memoryTypeCount; i++) {
484 VkMemoryType mem_type = pdev->memory_properties.memoryTypes[i];
485
486 if (((mem_type.propertyFlags & (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
487 mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) &&
488 !(pdev->memory_flags[i] & RADEON_FLAG_32BIT)) {
489
490 VkMemoryPropertyFlags property_flags = mem_type.propertyFlags | VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
491 VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
492
493 pdev->memory_domains[type_count] = pdev->memory_domains[i];
494 pdev->memory_flags[type_count] = pdev->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
495 pdev->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
496 .propertyFlags = property_flags,
497 .heapIndex = mem_type.heapIndex,
498 };
499 }
500 }
501 pdev->memory_properties.memoryTypeCount = type_count;
502 }
503
504 for (unsigned i = 0; i < type_count; ++i) {
505 if (pdev->memory_flags[i] & RADEON_FLAG_32BIT)
506 pdev->memory_types_32bit |= BITFIELD_BIT(i);
507 }
508 }
509
510 uint32_t
radv_find_memory_index(const struct radv_physical_device * pdev,VkMemoryPropertyFlags flags)511 radv_find_memory_index(const struct radv_physical_device *pdev, VkMemoryPropertyFlags flags)
512 {
513 const VkPhysicalDeviceMemoryProperties *mem_properties = &pdev->memory_properties;
514 for (uint32_t i = 0; i < mem_properties->memoryTypeCount; ++i) {
515 if (mem_properties->memoryTypes[i].propertyFlags == flags) {
516 return i;
517 }
518 }
519 unreachable("invalid memory properties");
520 }
521
522 static void
radv_get_binning_settings(const struct radv_physical_device * pdev,struct radv_binning_settings * settings)523 radv_get_binning_settings(const struct radv_physical_device *pdev, struct radv_binning_settings *settings)
524 {
525 if ((pdev->info.has_dedicated_vram && pdev->info.max_render_backends > 4) || pdev->info.gfx_level >= GFX10) {
526 /* Using higher settings on GFX10+ can cause random GPU hangs. */
527 settings->context_states_per_bin = 1;
528 settings->persistent_states_per_bin = 1;
529 } else {
530 settings->context_states_per_bin = pdev->info.has_gfx9_scissor_bug ? 1 : 3;
531 settings->persistent_states_per_bin = 1;
532 }
533
534 settings->fpovs_per_batch = 63;
535 }
536
537 static void
radv_physical_device_get_supported_extensions(const struct radv_physical_device * pdev,struct vk_device_extension_table * out_ext)538 radv_physical_device_get_supported_extensions(const struct radv_physical_device *pdev,
539 struct vk_device_extension_table *out_ext)
540 {
541 const struct radv_instance *instance = radv_physical_device_instance(pdev);
542 const struct vk_device_extension_table ext = {
543 .KHR_8bit_storage = true,
544 .KHR_16bit_storage = true,
545 .KHR_acceleration_structure = radv_enable_rt(pdev),
546 .KHR_calibrated_timestamps = radv_calibrated_timestamps_enabled(pdev),
547 .KHR_compute_shader_derivatives = true,
548 .KHR_cooperative_matrix = radv_cooperative_matrix_enabled(pdev),
549 .KHR_bind_memory2 = true,
550 .KHR_buffer_device_address = true,
551 .KHR_copy_commands2 = true,
552 .KHR_create_renderpass2 = true,
553 .KHR_dedicated_allocation = true,
554 .KHR_deferred_host_operations = true,
555 .KHR_depth_clamp_zero_one = true,
556 .KHR_depth_stencil_resolve = true,
557 .KHR_descriptor_update_template = true,
558 .KHR_device_group = true,
559 .KHR_draw_indirect_count = true,
560 .KHR_driver_properties = true,
561 .KHR_dynamic_rendering = true,
562 .KHR_dynamic_rendering_local_read = true,
563 .KHR_external_fence = true,
564 .KHR_external_fence_fd = true,
565 .KHR_external_memory = true,
566 .KHR_external_memory_fd = true,
567 .KHR_external_semaphore = true,
568 .KHR_external_semaphore_fd = true,
569 .KHR_format_feature_flags2 = true,
570 .KHR_fragment_shader_barycentric = pdev->info.gfx_level >= GFX10_3,
571 .KHR_fragment_shading_rate = pdev->info.gfx_level >= GFX10_3,
572 .KHR_get_memory_requirements2 = true,
573 .KHR_global_priority = true,
574 .KHR_image_format_list = true,
575 .KHR_imageless_framebuffer = true,
576 #ifdef RADV_USE_WSI_PLATFORM
577 .KHR_incremental_present = true,
578 #endif
579 .KHR_index_type_uint8 = pdev->info.gfx_level >= GFX8,
580 .KHR_line_rasterization = true,
581 .KHR_load_store_op_none = true,
582 .KHR_maintenance1 = true,
583 .KHR_maintenance2 = true,
584 .KHR_maintenance3 = true,
585 .KHR_maintenance4 = true,
586 .KHR_maintenance5 = true,
587 .KHR_maintenance6 = true,
588 .KHR_maintenance7 = true,
589 .KHR_maintenance8 = true,
590 .KHR_map_memory2 = true,
591 .KHR_multiview = true,
592 .KHR_performance_query = radv_perf_query_supported(pdev),
593 .KHR_pipeline_binary = true,
594 .KHR_pipeline_executable_properties = true,
595 .KHR_pipeline_library = !pdev->use_llvm,
596 /* Hide these behind dri configs for now since we cannot implement it reliably on
597 * all surfaces yet. There is no surface capability query for present wait/id,
598 * but the feature is useful enough to hide behind an opt-in mechanism for now.
599 * If the instance only enables surface extensions that unconditionally support present wait,
600 * we can also expose the extension that way. */
601 .KHR_present_id =
602 instance->drirc.enable_khr_present_wait || wsi_common_vk_instance_supports_present_wait(&instance->vk),
603 .KHR_present_wait =
604 instance->drirc.enable_khr_present_wait || wsi_common_vk_instance_supports_present_wait(&instance->vk),
605 .KHR_push_descriptor = true,
606 .KHR_ray_query = radv_enable_rt(pdev),
607 .KHR_ray_tracing_maintenance1 = radv_enable_rt(pdev),
608 .KHR_ray_tracing_pipeline = radv_enable_rt(pdev),
609 .KHR_ray_tracing_position_fetch = radv_enable_rt(pdev),
610 .KHR_relaxed_block_layout = true,
611 .KHR_sampler_mirror_clamp_to_edge = true,
612 .KHR_sampler_ycbcr_conversion = true,
613 .KHR_separate_depth_stencil_layouts = true,
614 .KHR_shader_atomic_int64 = true,
615 .KHR_shader_clock = true,
616 .KHR_shader_draw_parameters = true,
617 .KHR_shader_expect_assume = true,
618 .KHR_shader_float16_int8 = true,
619 .KHR_shader_float_controls = true,
620 .KHR_shader_float_controls2 = true,
621 .KHR_shader_integer_dot_product = true,
622 .KHR_shader_maximal_reconvergence = true,
623 .KHR_shader_non_semantic_info = true,
624 .KHR_shader_quad_control = true,
625 .KHR_shader_relaxed_extended_instruction = true,
626 .KHR_shader_subgroup_extended_types = true,
627 .KHR_shader_subgroup_rotate = true,
628 .KHR_shader_subgroup_uniform_control_flow = true,
629 .KHR_shader_terminate_invocation = true,
630 .KHR_spirv_1_4 = true,
631 .KHR_storage_buffer_storage_class = true,
632 #ifdef RADV_USE_WSI_PLATFORM
633 .KHR_swapchain = true,
634 .KHR_swapchain_mutable_format = true,
635 #endif
636 .KHR_synchronization2 = true,
637 .KHR_timeline_semaphore = true,
638 .KHR_uniform_buffer_standard_layout = true,
639 .KHR_variable_pointers = true,
640 .KHR_vertex_attribute_divisor = true,
641 .KHR_video_maintenance1 = pdev->video_decode_enabled || pdev->video_encode_enabled,
642 .KHR_video_queue = pdev->video_decode_enabled || pdev->video_encode_enabled,
643 .KHR_video_decode_av1 = (pdev->info.vcn_ip_version >= VCN_3_0_0 && pdev->info.vcn_ip_version != VCN_3_0_33 &&
644 VIDEO_CODEC_AV1DEC && pdev->video_decode_enabled),
645 .KHR_video_decode_queue = pdev->video_decode_enabled,
646 .KHR_video_decode_h264 = VIDEO_CODEC_H264DEC && pdev->video_decode_enabled,
647 .KHR_video_decode_h265 = VIDEO_CODEC_H265DEC && pdev->video_decode_enabled,
648 .KHR_video_encode_h264 = VIDEO_CODEC_H264ENC && pdev->video_encode_enabled,
649 .KHR_video_encode_h265 = VIDEO_CODEC_H265ENC && pdev->video_encode_enabled,
650 .KHR_video_encode_queue = pdev->video_encode_enabled,
651 .KHR_vulkan_memory_model = true,
652 .KHR_workgroup_memory_explicit_layout = true,
653 .KHR_zero_initialize_workgroup_memory = true,
654 .EXT_4444_formats = true,
655 .EXT_attachment_feedback_loop_dynamic_state = true,
656 .EXT_attachment_feedback_loop_layout = true,
657 .EXT_border_color_swizzle = pdev->info.gfx_level >= GFX10,
658 .EXT_buffer_device_address = true,
659 .EXT_calibrated_timestamps = radv_calibrated_timestamps_enabled(pdev),
660 .EXT_color_write_enable = true,
661 .EXT_conditional_rendering = true,
662 .EXT_conservative_rasterization = pdev->info.gfx_level >= GFX9,
663 .EXT_custom_border_color = true,
664 .EXT_debug_marker = instance->vk.trace_mode & RADV_TRACE_MODE_RGP,
665 .EXT_depth_bias_control = true,
666 .EXT_depth_clamp_zero_one = true,
667 .EXT_depth_clamp_control = true,
668 .EXT_depth_clip_control = true,
669 .EXT_depth_clip_enable = true,
670 .EXT_depth_range_unrestricted = true,
671 .EXT_descriptor_buffer = true,
672 .EXT_descriptor_indexing = true,
673 .EXT_device_address_binding_report = true,
674 .EXT_device_fault = pdev->info.has_gpuvm_fault_query,
675 .EXT_device_generated_commands = pdev->info.gfx_level >= GFX8,
676 .EXT_discard_rectangles = true,
677 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
678 .EXT_display_control = true,
679 #endif
680 .EXT_dynamic_rendering_unused_attachments = true,
681 .EXT_extended_dynamic_state = true,
682 .EXT_extended_dynamic_state2 = true,
683 .EXT_extended_dynamic_state3 = true,
684 .EXT_external_memory_acquire_unmodified = true,
685 .EXT_external_memory_dma_buf = true,
686 .EXT_external_memory_host = pdev->info.has_userptr,
687 .EXT_fragment_shader_interlock = radv_has_pops(pdev),
688 .EXT_global_priority = true,
689 .EXT_global_priority_query = true,
690 .EXT_graphics_pipeline_library = !pdev->use_llvm && !(instance->debug_flags & RADV_DEBUG_NO_GPL),
691 .EXT_host_query_reset = true,
692 .EXT_image_2d_view_of_3d = true,
693 .EXT_image_compression_control = true,
694 .EXT_image_drm_format_modifier = pdev->info.gfx_level >= GFX9,
695 .EXT_image_robustness = true,
696 .EXT_image_sliced_view_of_3d = pdev->info.gfx_level >= GFX10,
697 .EXT_image_view_min_lod = true,
698 .EXT_index_type_uint8 = pdev->info.gfx_level >= GFX8,
699 .EXT_inline_uniform_block = true,
700 .EXT_legacy_vertex_attributes = !pdev->use_llvm,
701 .EXT_line_rasterization = true,
702 .EXT_load_store_op_none = true,
703 .EXT_map_memory_placed = true,
704 .EXT_memory_budget = true,
705 .EXT_memory_priority = true,
706 .EXT_mesh_shader = radv_taskmesh_enabled(pdev),
707 .EXT_multi_draw = true,
708 .EXT_mutable_descriptor_type = true, /* Trivial promotion from VALVE. */
709 .EXT_nested_command_buffer = true,
710 .EXT_non_seamless_cube_map = true,
711 .EXT_pci_bus_info = true,
712 #ifndef _WIN32
713 .EXT_physical_device_drm = true,
714 #endif
715 .EXT_pipeline_creation_cache_control = true,
716 .EXT_pipeline_creation_feedback = true,
717 .EXT_pipeline_library_group_handles = radv_enable_rt(pdev),
718 .EXT_pipeline_robustness = !pdev->use_llvm,
719 .EXT_post_depth_coverage = pdev->info.gfx_level >= GFX10,
720 .EXT_primitive_topology_list_restart = true,
721 .EXT_primitives_generated_query = true,
722 .EXT_private_data = true,
723 .EXT_provoking_vertex = true,
724 .EXT_queue_family_foreign = true,
725 .EXT_robustness2 = true,
726 .EXT_sample_locations = pdev->info.gfx_level < GFX10,
727 .EXT_sampler_filter_minmax = radv_filter_minmax_enabled(pdev),
728 .EXT_scalar_block_layout = pdev->info.gfx_level >= GFX7,
729 .EXT_separate_stencil_usage = true,
730 .EXT_shader_atomic_float = true,
731 .EXT_shader_atomic_float2 = true,
732 .EXT_shader_demote_to_helper_invocation = true,
733 .EXT_shader_image_atomic_int64 = true,
734 .EXT_shader_module_identifier = true,
735 .EXT_shader_object = !pdev->use_llvm && !(instance->debug_flags & RADV_DEBUG_NO_ESO),
736 .EXT_shader_replicated_composites = true,
737 .EXT_shader_stencil_export = true,
738 .EXT_shader_subgroup_ballot = true,
739 .EXT_shader_subgroup_vote = true,
740 .EXT_shader_viewport_index_layer = true,
741 .EXT_subgroup_size_control = true,
742 #ifdef RADV_USE_WSI_PLATFORM
743 .EXT_swapchain_maintenance1 = true,
744 #endif
745 .EXT_texel_buffer_alignment = true,
746 .EXT_tooling_info = true,
747 .EXT_transform_feedback = true,
748 .EXT_vertex_attribute_divisor = true,
749 .EXT_vertex_input_dynamic_state = !pdev->use_llvm,
750 .EXT_ycbcr_image_arrays = true,
751 .AMD_buffer_marker = true,
752 .AMD_device_coherent_memory = true,
753 .AMD_draw_indirect_count = true,
754 .AMD_gcn_shader = true,
755 .AMD_gpu_shader_half_float = pdev->info.has_packed_math_16bit,
756 .AMD_gpu_shader_int16 = pdev->info.has_packed_math_16bit,
757 .AMD_memory_overallocation_behavior = true,
758 .AMD_mixed_attachment_samples = true,
759 .AMD_rasterization_order = pdev->info.has_out_of_order_rast,
760 .AMD_shader_ballot = true,
761 .AMD_shader_core_properties = true,
762 .AMD_shader_core_properties2 = true,
763 .AMD_shader_early_and_late_fragment_tests = true,
764 .AMD_shader_explicit_vertex_parameter = true,
765 .AMD_shader_fragment_mask = pdev->use_fmask,
766 .AMD_shader_image_load_store_lod = true,
767 .AMD_shader_trinary_minmax = true,
768 .AMD_texture_gather_bias_lod = pdev->info.gfx_level < GFX11,
769 #if DETECT_OS_ANDROID
770 .ANDROID_external_memory_android_hardware_buffer = RADV_SUPPORT_ANDROID_HARDWARE_BUFFER,
771 .ANDROID_native_buffer = true,
772 #endif
773 .GOOGLE_decorate_string = true,
774 .GOOGLE_hlsl_functionality1 = true,
775 .GOOGLE_user_type = true,
776 .INTEL_shader_integer_functions2 = true,
777 .MESA_image_alignment_control = pdev->info.gfx_level >= GFX9,
778 .NV_compute_shader_derivatives = true,
779 .VALVE_mutable_descriptor_type = true,
780 };
781 *out_ext = ext;
782 }
783
784 static void
radv_physical_device_get_features(const struct radv_physical_device * pdev,struct vk_features * features)785 radv_physical_device_get_features(const struct radv_physical_device *pdev, struct vk_features *features)
786 {
787 const struct radv_instance *instance = radv_physical_device_instance(pdev);
788 bool taskmesh_en = radv_taskmesh_enabled(pdev);
789 bool has_perf_query = radv_perf_query_supported(pdev);
790 bool has_shader_image_float_minmax = pdev->info.gfx_level != GFX8 && pdev->info.gfx_level != GFX9 &&
791 pdev->info.gfx_level != GFX11 && pdev->info.gfx_level != GFX11_5;
792 bool has_fragment_shader_interlock = radv_has_pops(pdev);
793
794 *features = (struct vk_features){
795 /* Vulkan 1.0 */
796 .robustBufferAccess = true,
797 .fullDrawIndexUint32 = true,
798 .imageCubeArray = true,
799 .independentBlend = true,
800 .geometryShader = true,
801 .tessellationShader = true,
802 .sampleRateShading = true,
803 .dualSrcBlend = true,
804 .logicOp = true,
805 .multiDrawIndirect = true,
806 .drawIndirectFirstInstance = true,
807 .depthClamp = true,
808 .depthBiasClamp = true,
809 .fillModeNonSolid = true,
810 .depthBounds = true,
811 .wideLines = true,
812 .largePoints = true,
813 .alphaToOne = true,
814 .multiViewport = true,
815 .samplerAnisotropy = true,
816 .textureCompressionETC2 = pdev->info.has_etc_support || pdev->emulate_etc2,
817 .textureCompressionASTC_LDR = pdev->emulate_astc,
818 .textureCompressionBC = true,
819 .occlusionQueryPrecise = true,
820 .pipelineStatisticsQuery = true,
821 .vertexPipelineStoresAndAtomics = true,
822 .fragmentStoresAndAtomics = true,
823 .shaderTessellationAndGeometryPointSize = true,
824 .shaderImageGatherExtended = true,
825 .shaderStorageImageExtendedFormats = true,
826 .shaderStorageImageMultisample = true,
827 .shaderUniformBufferArrayDynamicIndexing = true,
828 .shaderSampledImageArrayDynamicIndexing = true,
829 .shaderStorageBufferArrayDynamicIndexing = true,
830 .shaderStorageImageArrayDynamicIndexing = true,
831 .shaderStorageImageReadWithoutFormat = true,
832 .shaderStorageImageWriteWithoutFormat = true,
833 .shaderClipDistance = true,
834 .shaderCullDistance = true,
835 .shaderFloat64 = true,
836 .shaderInt64 = true,
837 .shaderInt16 = true,
838 .sparseBinding = true,
839 .sparseResidencyBuffer = pdev->info.family >= CHIP_POLARIS10,
840 .sparseResidencyImage2D = pdev->info.family >= CHIP_POLARIS10,
841 .sparseResidencyImage3D = pdev->info.family >= CHIP_POLARIS10,
842 .sparseResidencyAliased = pdev->info.family >= CHIP_POLARIS10,
843 .variableMultisampleRate = true,
844 .shaderResourceMinLod = true,
845 .shaderResourceResidency = true,
846 .inheritedQueries = true,
847
848 /* Vulkan 1.1 */
849 .storageBuffer16BitAccess = true,
850 .uniformAndStorageBuffer16BitAccess = true,
851 .storagePushConstant16 = true,
852 .storageInputOutput16 = pdev->info.has_packed_math_16bit,
853 .multiview = true,
854 .multiviewGeometryShader = true,
855 .multiviewTessellationShader = true,
856 .variablePointersStorageBuffer = true,
857 .variablePointers = true,
858 .protectedMemory = false,
859 .samplerYcbcrConversion = true,
860 .shaderDrawParameters = true,
861
862 /* Vulkan 1.2 */
863 .samplerMirrorClampToEdge = true,
864 .drawIndirectCount = true,
865 .storageBuffer8BitAccess = true,
866 .uniformAndStorageBuffer8BitAccess = true,
867 .storagePushConstant8 = true,
868 .shaderBufferInt64Atomics = true,
869 .shaderSharedInt64Atomics = true,
870 .shaderFloat16 = pdev->info.has_packed_math_16bit,
871 .shaderInt8 = true,
872
873 .descriptorIndexing = true,
874 .shaderInputAttachmentArrayDynamicIndexing = true,
875 .shaderUniformTexelBufferArrayDynamicIndexing = true,
876 .shaderStorageTexelBufferArrayDynamicIndexing = true,
877 .shaderUniformBufferArrayNonUniformIndexing = true,
878 .shaderSampledImageArrayNonUniformIndexing = true,
879 .shaderStorageBufferArrayNonUniformIndexing = true,
880 .shaderStorageImageArrayNonUniformIndexing = true,
881 .shaderInputAttachmentArrayNonUniformIndexing = true,
882 .shaderUniformTexelBufferArrayNonUniformIndexing = true,
883 .shaderStorageTexelBufferArrayNonUniformIndexing = true,
884 .descriptorBindingUniformBufferUpdateAfterBind = true,
885 .descriptorBindingSampledImageUpdateAfterBind = true,
886 .descriptorBindingStorageImageUpdateAfterBind = true,
887 .descriptorBindingStorageBufferUpdateAfterBind = true,
888 .descriptorBindingUniformTexelBufferUpdateAfterBind = true,
889 .descriptorBindingStorageTexelBufferUpdateAfterBind = true,
890 .descriptorBindingUpdateUnusedWhilePending = true,
891 .descriptorBindingPartiallyBound = true,
892 .descriptorBindingVariableDescriptorCount = true,
893 .runtimeDescriptorArray = true,
894
895 .samplerFilterMinmax = true,
896 .scalarBlockLayout = pdev->info.gfx_level >= GFX7,
897 .imagelessFramebuffer = true,
898 .uniformBufferStandardLayout = true,
899 .shaderSubgroupExtendedTypes = true,
900 .separateDepthStencilLayouts = true,
901 .hostQueryReset = true,
902 .timelineSemaphore = true,
903 .bufferDeviceAddress = true,
904 .bufferDeviceAddressCaptureReplay = true,
905 .bufferDeviceAddressMultiDevice = false,
906 .vulkanMemoryModel = true,
907 .vulkanMemoryModelDeviceScope = true,
908 .vulkanMemoryModelAvailabilityVisibilityChains = false,
909 .shaderOutputViewportIndex = true,
910 .shaderOutputLayer = true,
911 .subgroupBroadcastDynamicId = true,
912
913 /* Vulkan 1.3 */
914 .robustImageAccess = true,
915 .inlineUniformBlock = true,
916 .descriptorBindingInlineUniformBlockUpdateAfterBind = true,
917 .pipelineCreationCacheControl = true,
918 .privateData = true,
919 .shaderDemoteToHelperInvocation = true,
920 .shaderTerminateInvocation = true,
921 .subgroupSizeControl = true,
922 .computeFullSubgroups = true,
923 .synchronization2 = true,
924 .textureCompressionASTC_HDR = false,
925 .shaderZeroInitializeWorkgroupMemory = true,
926 .dynamicRendering = true,
927 .shaderIntegerDotProduct = true,
928 .maintenance4 = true,
929
930 /* Vulkan 1.4 */
931 .globalPriorityQuery = true,
932 .shaderSubgroupRotate = true,
933 .shaderSubgroupRotateClustered = true,
934 .shaderFloatControls2 = true,
935 .shaderExpectAssume = true,
936 .rectangularLines = true,
937 .bresenhamLines = true,
938 .smoothLines = true,
939 .stippledRectangularLines = false,
940 .stippledBresenhamLines = true,
941 .stippledSmoothLines = false,
942 .vertexAttributeInstanceRateDivisor = true,
943 .vertexAttributeInstanceRateZeroDivisor = true,
944 .indexTypeUint8 = pdev->info.gfx_level >= GFX8,
945 .dynamicRenderingLocalRead = true,
946 .maintenance5 = true,
947 .maintenance6 = true,
948 .pipelineProtectedAccess = false,
949 .pipelineRobustness = true,
950 .hostImageCopy = false,
951 .pushDescriptor = true,
952
953 /* VK_EXT_conditional_rendering */
954 .conditionalRendering = true,
955 .inheritedConditionalRendering = false,
956
957 /* VK_KHR_vertex_attribute_divisor */
958 .vertexAttributeInstanceRateDivisor = true,
959 .vertexAttributeInstanceRateZeroDivisor = true,
960
961 /* VK_EXT_transform_feedback */
962 .transformFeedback = true,
963 .geometryStreams = true,
964
965 /* VK_EXT_memory_priority */
966 .memoryPriority = true,
967
968 /* VK_EXT_depth_clip_enable */
969 .depthClipEnable = true,
970
971 /* VK_KHR_compute_shader_derivatives */
972 .computeDerivativeGroupQuads = false,
973 .computeDerivativeGroupLinear = true,
974
975 /* VK_EXT_ycbcr_image_arrays */
976 .ycbcrImageArrays = true,
977
978 /* VK_KHR_index_type_uint8 */
979 .indexTypeUint8 = pdev->info.gfx_level >= GFX8,
980
981 /* VK_KHR_pipeline_executable_properties */
982 .pipelineExecutableInfo = true,
983
984 /* VK_KHR_shader_clock */
985 .shaderSubgroupClock = true,
986 .shaderDeviceClock = pdev->info.gfx_level >= GFX8,
987
988 /* VK_EXT_texel_buffer_alignment */
989 .texelBufferAlignment = true,
990
991 /* VK_AMD_device_coherent_memory */
992 .deviceCoherentMemory = pdev->info.has_l2_uncached,
993
994 /* VK_KHR_line_rasterization */
995 .rectangularLines = true,
996 .bresenhamLines = true,
997 .smoothLines = true,
998 .stippledRectangularLines = false,
999 .stippledBresenhamLines = true,
1000 .stippledSmoothLines = false,
1001
1002 /* VK_EXT_robustness2 */
1003 .robustBufferAccess2 = true,
1004 .robustImageAccess2 = true,
1005 .nullDescriptor = true,
1006
1007 /* VK_EXT_custom_border_color */
1008 .customBorderColors = true,
1009 .customBorderColorWithoutFormat = true,
1010
1011 /* VK_EXT_extended_dynamic_state */
1012 .extendedDynamicState = true,
1013
1014 /* VK_EXT_shader_atomic_float */
1015 .shaderBufferFloat32Atomics = true,
1016 .shaderBufferFloat32AtomicAdd = pdev->info.gfx_level >= GFX11,
1017 .shaderBufferFloat64Atomics = true,
1018 .shaderBufferFloat64AtomicAdd = false,
1019 .shaderSharedFloat32Atomics = true,
1020 .shaderSharedFloat32AtomicAdd = pdev->info.gfx_level >= GFX8,
1021 .shaderSharedFloat64Atomics = true,
1022 .shaderSharedFloat64AtomicAdd = false,
1023 .shaderImageFloat32Atomics = true,
1024 .shaderImageFloat32AtomicAdd = false,
1025 .sparseImageFloat32Atomics = true,
1026 .sparseImageFloat32AtomicAdd = false,
1027
1028 /* VK_EXT_4444_formats */
1029 .formatA4R4G4B4 = true,
1030 .formatA4B4G4R4 = true,
1031
1032 /* VK_EXT_shader_image_atomic_int64 */
1033 .shaderImageInt64Atomics = true,
1034 .sparseImageInt64Atomics = true,
1035
1036 /* VK_EXT_mutable_descriptor_type */
1037 .mutableDescriptorType = true,
1038
1039 /* VK_KHR_fragment_shading_rate */
1040 .pipelineFragmentShadingRate = true,
1041 .primitiveFragmentShadingRate = true,
1042 .attachmentFragmentShadingRate = radv_vrs_attachment_enabled(pdev),
1043
1044 /* VK_KHR_workgroup_memory_explicit_layout */
1045 .workgroupMemoryExplicitLayout = true,
1046 .workgroupMemoryExplicitLayoutScalarBlockLayout = true,
1047 .workgroupMemoryExplicitLayout8BitAccess = true,
1048 .workgroupMemoryExplicitLayout16BitAccess = true,
1049
1050 /* VK_EXT_provoking_vertex */
1051 .provokingVertexLast = true,
1052 .transformFeedbackPreservesProvokingVertex = true,
1053
1054 /* VK_EXT_extended_dynamic_state2 */
1055 .extendedDynamicState2 = true,
1056 .extendedDynamicState2LogicOp = true,
1057 .extendedDynamicState2PatchControlPoints = true,
1058
1059 /* VK_EXT_global_priority_query */
1060 .globalPriorityQuery = true,
1061
1062 /* VK_KHR_acceleration_structure */
1063 .accelerationStructure = true,
1064 .accelerationStructureCaptureReplay = true,
1065 .accelerationStructureIndirectBuild = false,
1066 .accelerationStructureHostCommands = false,
1067 .descriptorBindingAccelerationStructureUpdateAfterBind = true,
1068
1069 /* VK_EXT_buffer_device_address */
1070 .bufferDeviceAddressCaptureReplayEXT = false,
1071
1072 /* VK_KHR_shader_subgroup_uniform_control_flow */
1073 .shaderSubgroupUniformControlFlow = true,
1074
1075 /* VK_EXT_map_memory_placed */
1076 .memoryMapPlaced = true,
1077 .memoryMapRangePlaced = false,
1078 .memoryUnmapReserve = true,
1079
1080 /* VK_EXT_multi_draw */
1081 .multiDraw = true,
1082
1083 /* VK_EXT_color_write_enable */
1084 .colorWriteEnable = true,
1085
1086 /* VK_EXT_shader_atomic_float2 */
1087 .shaderBufferFloat16Atomics = false,
1088 .shaderBufferFloat16AtomicAdd = false,
1089 .shaderBufferFloat16AtomicMinMax = false,
1090 .shaderBufferFloat32AtomicMinMax = radv_has_shader_buffer_float_minmax(pdev, 32),
1091 .shaderBufferFloat64AtomicMinMax = radv_has_shader_buffer_float_minmax(pdev, 64),
1092 .shaderSharedFloat16Atomics = false,
1093 .shaderSharedFloat16AtomicAdd = false,
1094 .shaderSharedFloat16AtomicMinMax = false,
1095 .shaderSharedFloat32AtomicMinMax = true,
1096 .shaderSharedFloat64AtomicMinMax = true,
1097 .shaderImageFloat32AtomicMinMax = has_shader_image_float_minmax,
1098 .sparseImageFloat32AtomicMinMax = has_shader_image_float_minmax,
1099
1100 /* VK_KHR_present_id */
1101 .presentId = pdev->vk.supported_extensions.KHR_present_id,
1102
1103 /* VK_KHR_present_wait */
1104 .presentWait = pdev->vk.supported_extensions.KHR_present_wait,
1105
1106 /* VK_EXT_primitive_topology_list_restart */
1107 .primitiveTopologyListRestart = true,
1108 .primitiveTopologyPatchListRestart = false,
1109
1110 /* VK_KHR_ray_query */
1111 .rayQuery = true,
1112
1113 /* VK_EXT_pipeline_library_group_handles */
1114 .pipelineLibraryGroupHandles = true,
1115
1116 /* VK_KHR_ray_tracing_pipeline */
1117 .rayTracingPipeline = true,
1118 .rayTracingPipelineShaderGroupHandleCaptureReplay = true,
1119 .rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false,
1120 .rayTracingPipelineTraceRaysIndirect = true,
1121 .rayTraversalPrimitiveCulling = true,
1122
1123 /* VK_KHR_ray_tracing_maintenance1 */
1124 .rayTracingMaintenance1 = true,
1125 .rayTracingPipelineTraceRaysIndirect2 = radv_enable_rt(pdev),
1126
1127 /* VK_KHR_ray_tracing_position_fetch */
1128 .rayTracingPositionFetch = true,
1129
1130 /* VK_EXT_vertex_input_dynamic_state */
1131 .vertexInputDynamicState = true,
1132
1133 /* VK_EXT_image_view_min_lod */
1134 .minLod = true,
1135
1136 /* VK_EXT_mesh_shader */
1137 .meshShader = taskmesh_en,
1138 .taskShader = taskmesh_en,
1139 .multiviewMeshShader = taskmesh_en,
1140 .primitiveFragmentShadingRateMeshShader = taskmesh_en,
1141 .meshShaderQueries = false,
1142
1143 /* VK_EXT_depth_clip_control */
1144 .depthClipControl = true,
1145
1146 /* VK_EXT_image_2d_view_of_3d */
1147 .image2DViewOf3D = true,
1148 .sampler2DViewOf3D = false,
1149
1150 /* VK_INTEL_shader_integer_functions2 */
1151 .shaderIntegerFunctions2 = true,
1152
1153 /* VK_EXT_primitives_generated_query */
1154 .primitivesGeneratedQuery = true,
1155 .primitivesGeneratedQueryWithRasterizerDiscard = true,
1156 .primitivesGeneratedQueryWithNonZeroStreams = true,
1157
1158 /* VK_EXT_non_seamless_cube_map */
1159 .nonSeamlessCubeMap = true,
1160
1161 /* VK_EXT_border_color_swizzle */
1162 .borderColorSwizzle = true,
1163 .borderColorSwizzleFromImage = true,
1164
1165 /* VK_EXT_shader_module_identifier */
1166 .shaderModuleIdentifier = true,
1167
1168 /* VK_KHR_performance_query */
1169 .performanceCounterQueryPools = has_perf_query,
1170 .performanceCounterMultipleQueryPools = has_perf_query,
1171
1172 /* VK_EXT_attachment_feedback_loop_layout */
1173 .attachmentFeedbackLoopLayout = true,
1174
1175 /* VK_EXT_graphics_pipeline_library */
1176 .graphicsPipelineLibrary = true,
1177
1178 /* VK_EXT_extended_dynamic_state3 */
1179 .extendedDynamicState3TessellationDomainOrigin = true,
1180 .extendedDynamicState3PolygonMode = true,
1181 .extendedDynamicState3SampleMask = true,
1182 .extendedDynamicState3AlphaToCoverageEnable = !pdev->use_llvm,
1183 .extendedDynamicState3LogicOpEnable = true,
1184 .extendedDynamicState3LineStippleEnable = true,
1185 .extendedDynamicState3ColorBlendEnable = !pdev->use_llvm,
1186 .extendedDynamicState3DepthClipEnable = true,
1187 .extendedDynamicState3ConservativeRasterizationMode = pdev->info.gfx_level >= GFX9,
1188 .extendedDynamicState3DepthClipNegativeOneToOne = true,
1189 .extendedDynamicState3ProvokingVertexMode = true,
1190 .extendedDynamicState3DepthClampEnable = true,
1191 .extendedDynamicState3ColorWriteMask = !pdev->use_llvm,
1192 .extendedDynamicState3RasterizationSamples = true,
1193 .extendedDynamicState3ColorBlendEquation = !pdev->use_llvm,
1194 .extendedDynamicState3SampleLocationsEnable = pdev->info.gfx_level < GFX10,
1195 .extendedDynamicState3LineRasterizationMode = true,
1196 .extendedDynamicState3ExtraPrimitiveOverestimationSize = false,
1197 .extendedDynamicState3AlphaToOneEnable = !pdev->use_llvm,
1198 .extendedDynamicState3RasterizationStream = false,
1199 .extendedDynamicState3ColorBlendAdvanced = false,
1200 .extendedDynamicState3ViewportWScalingEnable = false,
1201 .extendedDynamicState3ViewportSwizzle = false,
1202 .extendedDynamicState3CoverageToColorEnable = false,
1203 .extendedDynamicState3CoverageToColorLocation = false,
1204 .extendedDynamicState3CoverageModulationMode = false,
1205 .extendedDynamicState3CoverageModulationTableEnable = false,
1206 .extendedDynamicState3CoverageModulationTable = false,
1207 .extendedDynamicState3CoverageReductionMode = false,
1208 .extendedDynamicState3RepresentativeFragmentTestEnable = false,
1209 .extendedDynamicState3ShadingRateImageEnable = false,
1210
1211 /* VK_EXT_descriptor_buffer */
1212 .descriptorBuffer = true,
1213 .descriptorBufferCaptureReplay = true,
1214 .descriptorBufferImageLayoutIgnored = true,
1215 .descriptorBufferPushDescriptors = true,
1216
1217 /* VK_AMD_shader_early_and_late_fragment_tests */
1218 .shaderEarlyAndLateFragmentTests = true,
1219
1220 /* VK_EXT_image_sliced_view_of_3d */
1221 .imageSlicedViewOf3D = true,
1222
1223 #ifdef RADV_USE_WSI_PLATFORM
1224 /* VK_EXT_swapchain_maintenance1 */
1225 .swapchainMaintenance1 = true,
1226 #endif
1227
1228 /* VK_EXT_attachment_feedback_loop_dynamic_state */
1229 .attachmentFeedbackLoopDynamicState = true,
1230
1231 /* VK_EXT_dynamic_rendering_unused_attachments */
1232 .dynamicRenderingUnusedAttachments = true,
1233
1234 /* VK_KHR_fragment_shader_barycentric */
1235 .fragmentShaderBarycentric = true,
1236
1237 /* VK_EXT_depth_bias_control */
1238 .depthBiasControl = true,
1239 .leastRepresentableValueForceUnormRepresentation = true,
1240 .floatRepresentation = true,
1241 .depthBiasExact = true,
1242
1243 /* VK_EXT_fragment_shader_interlock */
1244 .fragmentShaderSampleInterlock = has_fragment_shader_interlock,
1245 .fragmentShaderPixelInterlock = has_fragment_shader_interlock,
1246 .fragmentShaderShadingRateInterlock = false,
1247
1248 /* VK_EXT_pipeline_robustness */
1249 .pipelineRobustness = true,
1250
1251 /* VK_KHR_maintenance5 */
1252 .maintenance5 = true,
1253
1254 /* VK_KHR_cooperative_matrix */
1255 .cooperativeMatrix = radv_cooperative_matrix_enabled(pdev),
1256 .cooperativeMatrixRobustBufferAccess = radv_cooperative_matrix_enabled(pdev),
1257
1258 /* VK_EXT_image_compression_control */
1259 .imageCompressionControl = true,
1260
1261 /* VK_EXT_device_fault */
1262 .deviceFault = true,
1263 .deviceFaultVendorBinary = instance->debug_flags & RADV_DEBUG_HANG,
1264
1265 /* VK_KHR_depth_clamp_zero_one */
1266 .depthClampZeroOne = true,
1267
1268 /* VK_KHR_maintenance6 */
1269 .maintenance6 = true,
1270
1271 /* VK_KHR_shader_subgroup_rotate */
1272 .shaderSubgroupRotate = true,
1273 .shaderSubgroupRotateClustered = true,
1274
1275 /* VK_EXT_shader_object */
1276 .shaderObject = true,
1277
1278 /* VK_KHR_shader_expect_assume */
1279 .shaderExpectAssume = true,
1280
1281 /* VK_KHR_shader_maximal_reconvergence */
1282 .shaderMaximalReconvergence = true,
1283
1284 /* VK_KHR_shader_quad_control */
1285 .shaderQuadControl = true,
1286
1287 /* VK_EXT_address_binding_report */
1288 .reportAddressBinding = true,
1289
1290 /* VK_EXT_nested_command_buffer */
1291 .nestedCommandBuffer = true,
1292 .nestedCommandBufferRendering = true,
1293 .nestedCommandBufferSimultaneousUse = true,
1294
1295 /* VK_KHR_dynamic_rendering_local_read */
1296 .dynamicRenderingLocalRead = true,
1297
1298 /* VK_EXT_legacy_vertex_attributes */
1299 .legacyVertexAttributes = true,
1300
1301 /* VK_MESA_image_alignment_control */
1302 .imageAlignmentControl = true,
1303
1304 /* VK_EXT_shader_replicated_composites */
1305 .shaderReplicatedComposites = true,
1306
1307 /* VK_KHR_maintenance7 */
1308 .maintenance7 = true,
1309
1310 /* VK_KHR_video_maintenance1 */
1311 .videoMaintenance1 = true,
1312
1313 /* VK_KHR_pipeline_binary */
1314 .pipelineBinaries = true,
1315
1316 /* VK_KHR_shader_relaxed_extended_instruction */
1317 .shaderRelaxedExtendedInstruction = true,
1318
1319 /* VK_KHR_shader_float_controls2 */
1320 .shaderFloatControls2 = true,
1321
1322 /* VK_EXT_depth_clamp_control */
1323 .depthClampControl = true,
1324
1325 /* VK_EXT_device_generated_commands */
1326 .deviceGeneratedCommands = true,
1327 .dynamicGeneratedPipelineLayout = true,
1328
1329 /* VK_KHR_maintenance8 */
1330 .maintenance8 = true,
1331 };
1332 }
1333
1334 static size_t
radv_max_descriptor_set_size()1335 radv_max_descriptor_set_size()
1336 {
1337 /* make sure that the entire descriptor set is addressable with a signed
1338 * 32-bit int. So the sum of all limits scaled by descriptor size has to
1339 * be at most 2 GiB. the combined image & samples object count as one of
1340 * both. This limit is for the pipeline layout, not for the set layout, but
1341 * there is no set limit, so we just set a pipeline limit. I don't think
1342 * any app is going to hit this soon. */
1343 return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS - MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1344 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1345 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1346 32 /* sampler, largest when combined with image */ + 64 /* sampled image */ + 64 /* storage image */);
1347 }
1348
1349 static uint32_t
radv_uniform_buffer_offset_alignment(const struct radv_physical_device * pdev)1350 radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdev)
1351 {
1352 const struct radv_instance *instance = radv_physical_device_instance(pdev);
1353 uint32_t uniform_offset_alignment = instance->drirc.override_uniform_offset_alignment;
1354 if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {
1355 fprintf(stderr,
1356 "ERROR: invalid radv_override_uniform_offset_alignment setting %d:"
1357 "not a power of two\n",
1358 uniform_offset_alignment);
1359 uniform_offset_alignment = 0;
1360 }
1361
1362 /* Take at least the hardware limit. */
1363 return MAX2(uniform_offset_alignment, 4);
1364 }
1365
1366 static const char *
radv_get_compiler_string(struct radv_physical_device * pdev)1367 radv_get_compiler_string(struct radv_physical_device *pdev)
1368 {
1369 const struct radv_instance *instance = radv_physical_device_instance(pdev);
1370
1371 if (!pdev->use_llvm) {
1372 /* Some games like SotTR apply shader workarounds if the LLVM
1373 * version is too old or if the LLVM version string is
1374 * missing. This gives 2-5% performance with SotTR and ACO.
1375 */
1376 if (instance->drirc.report_llvm9_version_string) {
1377 return " (LLVM 9.0.1)";
1378 }
1379
1380 return "";
1381 }
1382
1383 #if AMD_LLVM_AVAILABLE
1384 return " (LLVM " MESA_LLVM_VERSION_STRING ")";
1385 #else
1386 unreachable("LLVM is not available");
1387 #endif
1388 }
1389
1390 static void
radv_get_physical_device_properties(struct radv_physical_device * pdev)1391 radv_get_physical_device_properties(struct radv_physical_device *pdev)
1392 {
1393 VkSampleCountFlags sample_counts = 0xf;
1394
1395 size_t max_descriptor_set_size = radv_max_descriptor_set_size();
1396
1397 VkPhysicalDeviceType device_type;
1398 if (pdev->info.has_dedicated_vram) {
1399 device_type = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
1400 } else {
1401 device_type = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
1402 }
1403
1404 bool has_fp16 = pdev->info.has_packed_math_16bit;
1405
1406 VkShaderStageFlags taskmesh_stages =
1407 radv_taskmesh_enabled(pdev) ? VK_SHADER_STAGE_MESH_BIT_EXT | VK_SHADER_STAGE_TASK_BIT_EXT : 0;
1408 VkShaderStageFlags rt_stages = radv_enable_rt(pdev) ? RADV_RT_STAGE_BITS : 0;
1409
1410 bool accel_dot = pdev->info.has_accelerated_dot_product;
1411 bool gfx11plus = pdev->info.gfx_level >= GFX11;
1412
1413 VkExtent2D vrs_texel_extent = radv_vrs_attachment_enabled(pdev) ? (VkExtent2D){8, 8} : (VkExtent2D){0, 0};
1414 const int32_t max_viewport_size = pdev->info.gfx_level >= GFX12 ? 32768 : 16384;
1415
1416 uint64_t os_page_size = 4096;
1417 os_get_page_size(&os_page_size);
1418
1419 pdev->vk.properties = (struct vk_properties){
1420 #ifdef ANDROID_STRICT
1421 .apiVersion = RADV_API_VERSION,
1422 #else
1423 .apiVersion = pdev->info.gfx_level >= GFX8 ? RADV_API_VERSION : RADV_API_VERSION_1_3,
1424 #endif
1425 .driverVersion = vk_get_driver_version(),
1426 .vendorID = ATI_VENDOR_ID,
1427 .deviceID = pdev->info.pci_id,
1428 .deviceType = device_type,
1429 .maxImageDimension1D = (1 << 14),
1430 .maxImageDimension2D = (1 << 14),
1431 .maxImageDimension3D = (1 << 11),
1432 .maxImageDimensionCube = (1 << 14),
1433 .maxImageArrayLayers = (1 << 11),
1434 .maxTexelBufferElements = UINT32_MAX,
1435 .maxUniformBufferRange = UINT32_MAX,
1436 .maxStorageBufferRange = UINT32_MAX,
1437 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1438 .maxMemoryAllocationCount = UINT32_MAX,
1439 .maxSamplerAllocationCount = 64 * 1024,
1440 .bufferImageGranularity = 1,
1441 .sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
1442 .maxBoundDescriptorSets = MAX_SETS,
1443 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
1444 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1445 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1446 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1447 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1448 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
1449 .maxPerStageResources = max_descriptor_set_size,
1450 .maxDescriptorSetSamplers = max_descriptor_set_size,
1451 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1452 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1453 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1454 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1455 .maxDescriptorSetSampledImages = max_descriptor_set_size,
1456 .maxDescriptorSetStorageImages = max_descriptor_set_size,
1457 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
1458 .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
1459 .maxVertexInputBindings = MAX_VBS,
1460 .maxVertexInputAttributeOffset = UINT32_MAX,
1461 .maxVertexInputBindingStride = 2048,
1462 .maxVertexOutputComponents = 128,
1463 .maxTessellationGenerationLevel = 64,
1464 .maxTessellationPatchSize = 32,
1465 .maxTessellationControlPerVertexInputComponents = 128,
1466 .maxTessellationControlPerVertexOutputComponents = 128,
1467 .maxTessellationControlPerPatchOutputComponents = 120,
1468 .maxTessellationControlTotalOutputComponents = 4096,
1469 .maxTessellationEvaluationInputComponents = 128,
1470 .maxTessellationEvaluationOutputComponents = 128,
1471 .maxGeometryShaderInvocations = 32,
1472 .maxGeometryInputComponents = 64,
1473 .maxGeometryOutputComponents = 128,
1474 .maxGeometryOutputVertices = 256,
1475 .maxGeometryTotalOutputComponents = 1024,
1476 .maxFragmentInputComponents = 128,
1477 .maxFragmentOutputAttachments = 8,
1478 .maxFragmentDualSrcAttachments = 1,
1479 .maxFragmentCombinedOutputResources = max_descriptor_set_size,
1480 .maxComputeSharedMemorySize = pdev->max_shared_size,
1481 .maxComputeWorkGroupCount = {4294967295, 65535, 65535},
1482 .maxComputeWorkGroupInvocations = 1024,
1483 .maxComputeWorkGroupSize = {1024, 1024, 1024},
1484 .subPixelPrecisionBits = 8,
1485 .subTexelPrecisionBits = 8,
1486 .mipmapPrecisionBits = 8,
1487 .maxDrawIndexedIndexValue = UINT32_MAX,
1488 .maxDrawIndirectCount = UINT32_MAX,
1489 .maxSamplerLodBias = 16,
1490 .maxSamplerAnisotropy = 16,
1491 .maxViewports = MAX_VIEWPORTS,
1492 .maxViewportDimensions = {max_viewport_size, max_viewport_size},
1493 .viewportBoundsRange = {-2 * max_viewport_size, 2 * max_viewport_size - 1},
1494 .viewportSubPixelBits = 8,
1495 .minMemoryMapAlignment = 4096, /* A page */
1496 .minTexelBufferOffsetAlignment = 4,
1497 .minUniformBufferOffsetAlignment = radv_uniform_buffer_offset_alignment(pdev),
1498 .minStorageBufferOffsetAlignment = 4,
1499 .minTexelOffset = -32,
1500 .maxTexelOffset = 31,
1501 .minTexelGatherOffset = -32,
1502 .maxTexelGatherOffset = 31,
1503 .minInterpolationOffset = -2,
1504 .maxInterpolationOffset = 2,
1505 .subPixelInterpolationOffsetBits = 8,
1506 .maxFramebufferWidth = MAX_FRAMEBUFFER_WIDTH,
1507 .maxFramebufferHeight = MAX_FRAMEBUFFER_HEIGHT,
1508 .maxFramebufferLayers = (1 << 10),
1509 .framebufferColorSampleCounts = sample_counts,
1510 .framebufferDepthSampleCounts = sample_counts,
1511 .framebufferStencilSampleCounts = sample_counts,
1512 .framebufferNoAttachmentsSampleCounts = sample_counts,
1513 .maxColorAttachments = MAX_RTS,
1514 .sampledImageColorSampleCounts = sample_counts,
1515 .sampledImageIntegerSampleCounts = sample_counts,
1516 .sampledImageDepthSampleCounts = sample_counts,
1517 .sampledImageStencilSampleCounts = sample_counts,
1518 .storageImageSampleCounts = sample_counts,
1519 .maxSampleMaskWords = 1,
1520 .timestampComputeAndGraphics = true,
1521 .timestampPeriod = 1000000.0 / pdev->info.clock_crystal_freq,
1522 .maxClipDistances = 8,
1523 .maxCullDistances = 8,
1524 .maxCombinedClipAndCullDistances = 8,
1525 .discreteQueuePriorities = 2,
1526 .pointSizeRange = {0.0, 8191.875},
1527 .lineWidthRange = {0.0, 8.0},
1528 .pointSizeGranularity = (1.0 / 8.0),
1529 .lineWidthGranularity = (1.0 / 8.0),
1530 .strictLines = false, /* FINISHME */
1531 .standardSampleLocations = true,
1532 .optimalBufferCopyOffsetAlignment = 1,
1533 .optimalBufferCopyRowPitchAlignment = 1,
1534 .nonCoherentAtomSize = 64,
1535 .sparseResidencyNonResidentStrict = pdev->info.family >= CHIP_POLARIS10,
1536 .sparseResidencyStandard2DBlockShape = pdev->info.family >= CHIP_POLARIS10,
1537 .sparseResidencyStandard3DBlockShape = pdev->info.gfx_level >= GFX9,
1538
1539 /* Vulkan 1.1 */
1540 .driverID = VK_DRIVER_ID_MESA_RADV,
1541 .deviceLUIDValid = false, /* The LUID is for Windows. */
1542 .deviceNodeMask = 0,
1543 .subgroupSize = RADV_SUBGROUP_SIZE,
1544 .subgroupSupportedStages =
1545 VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT | taskmesh_stages | rt_stages,
1546 .subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
1547 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
1548 VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
1549 VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
1550 VK_SUBGROUP_FEATURE_ROTATE_BIT | VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT,
1551 .subgroupQuadOperationsInAllStages = true,
1552 .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES,
1553 .maxMultiviewViewCount = MAX_VIEWS,
1554 .maxMultiviewInstanceIndex = INT_MAX,
1555 .protectedNoFault = false,
1556 .maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS,
1557 .maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE,
1558
1559 /* Vulkan 1.2 */
1560 .conformanceVersion = radv_get_conformance_version(pdev),
1561 /* On AMD hardware, denormals and rounding modes for fp16/fp64 are
1562 * controlled by the same config register.
1563 */
1564 .denormBehaviorIndependence =
1565 has_fp16 ? VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY : VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
1566 .roundingModeIndependence =
1567 has_fp16 ? VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY : VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
1568 /* With LLVM, do not allow both preserving and flushing denorms because
1569 * different shaders in the same pipeline can have different settings and
1570 * this won't work for merged shaders. To make it work, this requires LLVM
1571 * support for changing the register. The same logic applies for the
1572 * rounding modes because they are configured with the same config
1573 * register.
1574 */
1575 .shaderDenormFlushToZeroFloat32 = true,
1576 .shaderDenormPreserveFloat32 = !pdev->use_llvm,
1577 .shaderRoundingModeRTEFloat32 = true,
1578 .shaderRoundingModeRTZFloat32 = !pdev->use_llvm,
1579 .shaderSignedZeroInfNanPreserveFloat32 = true,
1580 .shaderDenormFlushToZeroFloat16 = has_fp16 && !pdev->use_llvm,
1581 .shaderDenormPreserveFloat16 = has_fp16,
1582 .shaderRoundingModeRTEFloat16 = has_fp16,
1583 .shaderRoundingModeRTZFloat16 = has_fp16 && !pdev->use_llvm,
1584 .shaderSignedZeroInfNanPreserveFloat16 = has_fp16,
1585 .shaderDenormFlushToZeroFloat64 = pdev->info.gfx_level >= GFX8 && !pdev->use_llvm,
1586 .shaderDenormPreserveFloat64 = pdev->info.gfx_level >= GFX8,
1587 .shaderRoundingModeRTEFloat64 = pdev->info.gfx_level >= GFX8,
1588 .shaderRoundingModeRTZFloat64 = pdev->info.gfx_level >= GFX8 && !pdev->use_llvm,
1589 .shaderSignedZeroInfNanPreserveFloat64 = pdev->info.gfx_level >= GFX8,
1590 .maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64,
1591 .shaderUniformBufferArrayNonUniformIndexingNative = false,
1592 .shaderSampledImageArrayNonUniformIndexingNative = false,
1593 .shaderStorageBufferArrayNonUniformIndexingNative = false,
1594 .shaderStorageImageArrayNonUniformIndexingNative = false,
1595 .shaderInputAttachmentArrayNonUniformIndexingNative = false,
1596 .robustBufferAccessUpdateAfterBind = true,
1597 .quadDivergentImplicitLod = false,
1598 .maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size,
1599 .maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size,
1600 .maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size,
1601 .maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size,
1602 .maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size,
1603 .maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size,
1604 .maxPerStageUpdateAfterBindResources = max_descriptor_set_size,
1605 .maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size,
1606 .maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size,
1607 .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1608 .maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size,
1609 .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1610 .maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size,
1611 .maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size,
1612 .maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size,
1613 /* We support all of the depth resolve modes */
1614 .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_AVERAGE_BIT |
1615 VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT,
1616 /* Average doesn't make sense for stencil so we don't support that */
1617 .supportedStencilResolveModes =
1618 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT | VK_RESOLVE_MODE_MIN_BIT | VK_RESOLVE_MODE_MAX_BIT,
1619 .independentResolveNone = true,
1620 .independentResolve = true,
1621 /* GFX6-8 only support single channel min/max filter. */
1622 .filterMinmaxImageComponentMapping = pdev->info.gfx_level >= GFX9,
1623 .filterMinmaxSingleComponentFormats = true,
1624 .maxTimelineSemaphoreValueDifference = UINT64_MAX,
1625 .framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1626
1627 /* Vulkan 1.3 */
1628 .minSubgroupSize = pdev->info.gfx_level >= GFX10 ? 32 : 64,
1629 .maxSubgroupSize = 64,
1630 .maxComputeWorkgroupSubgroups = UINT32_MAX,
1631 .requiredSubgroupSizeStages = pdev->info.gfx_level >= GFX10 ? VK_SHADER_STAGE_COMPUTE_BIT | taskmesh_stages : 0,
1632 .maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE,
1633 .maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS,
1634 .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS,
1635 .maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT,
1636 .maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT,
1637 .maxInlineUniformTotalSize = UINT16_MAX,
1638 .integerDotProduct8BitUnsignedAccelerated = accel_dot,
1639 .integerDotProduct8BitSignedAccelerated = accel_dot,
1640 .integerDotProduct8BitMixedSignednessAccelerated = accel_dot && gfx11plus,
1641 .integerDotProduct4x8BitPackedUnsignedAccelerated = accel_dot,
1642 .integerDotProduct4x8BitPackedSignedAccelerated = accel_dot,
1643 .integerDotProduct4x8BitPackedMixedSignednessAccelerated = accel_dot && gfx11plus,
1644 .integerDotProduct16BitUnsignedAccelerated = accel_dot && !gfx11plus,
1645 .integerDotProduct16BitSignedAccelerated = accel_dot && !gfx11plus,
1646 .integerDotProduct16BitMixedSignednessAccelerated = false,
1647 .integerDotProduct32BitUnsignedAccelerated = false,
1648 .integerDotProduct32BitSignedAccelerated = false,
1649 .integerDotProduct32BitMixedSignednessAccelerated = false,
1650 .integerDotProduct64BitUnsignedAccelerated = false,
1651 .integerDotProduct64BitSignedAccelerated = false,
1652 .integerDotProduct64BitMixedSignednessAccelerated = false,
1653 .integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = accel_dot,
1654 .integerDotProductAccumulatingSaturating8BitSignedAccelerated = accel_dot,
1655 .integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = accel_dot && gfx11plus,
1656 .integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = accel_dot,
1657 .integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = accel_dot,
1658 .integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = accel_dot && gfx11plus,
1659 .integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = accel_dot && !gfx11plus,
1660 .integerDotProductAccumulatingSaturating16BitSignedAccelerated = accel_dot && !gfx11plus,
1661 .integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false,
1662 .integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false,
1663 .integerDotProductAccumulatingSaturating32BitSignedAccelerated = false,
1664 .integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false,
1665 .integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false,
1666 .integerDotProductAccumulatingSaturating64BitSignedAccelerated = false,
1667 .integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false,
1668 .storageTexelBufferOffsetAlignmentBytes = 4,
1669 .storageTexelBufferOffsetSingleTexelAlignment = true,
1670 .uniformTexelBufferOffsetAlignmentBytes = 4,
1671 .uniformTexelBufferOffsetSingleTexelAlignment = true,
1672 .maxBufferSize = RADV_MAX_MEMORY_ALLOCATION_SIZE,
1673
1674 /* Vulkan 1.4 */
1675 .lineSubPixelPrecisionBits = 4,
1676 .maxVertexAttribDivisor = UINT32_MAX,
1677 .supportsNonZeroFirstInstance = true,
1678 .maxPushDescriptors = MAX_PUSH_DESCRIPTORS,
1679 .dynamicRenderingLocalReadDepthStencilAttachments = true,
1680 .dynamicRenderingLocalReadMultisampledAttachments = true,
1681 .earlyFragmentMultisampleCoverageAfterSampleCounting = true,
1682 .earlyFragmentSampleMaskTestBeforeSampleCounting = true,
1683 .depthStencilSwizzleOneSupport = true,
1684 .polygonModePointSize = true,
1685 .nonStrictSinglePixelWideLinesUseParallelogram = true,
1686 .nonStrictWideLinesUseParallelogram = true,
1687 .blockTexelViewCompatibleMultipleLayers = true,
1688 .maxCombinedImageSamplerDescriptorCount = 1,
1689 .fragmentShadingRateClampCombinerInputs = true,
1690 .defaultRobustnessStorageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS,
1691 .defaultRobustnessUniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS,
1692 .defaultRobustnessVertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED,
1693 .defaultRobustnessImages = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2,
1694 .copySrcLayoutCount = 0,
1695 .pCopySrcLayouts = NULL,
1696 .copyDstLayoutCount = 0,
1697 .pCopyDstLayouts = NULL,
1698 .identicalMemoryTypeRequirements = false,
1699
1700 /* VK_EXT_discard_rectangles */
1701 .maxDiscardRectangles = MAX_DISCARD_RECTANGLES,
1702
1703 /* VK_EXT_external_memory_host */
1704 .minImportedHostPointerAlignment = 4096,
1705
1706 /* VK_AMD_shader_core_properties */
1707 /* Shader engines. */
1708 .shaderEngineCount = pdev->info.max_se,
1709 .shaderArraysPerEngineCount = pdev->info.max_sa_per_se,
1710 .computeUnitsPerShaderArray = pdev->info.min_good_cu_per_sa,
1711 .simdPerComputeUnit = pdev->info.num_simd_per_compute_unit,
1712 .wavefrontsPerSimd = pdev->info.max_waves_per_simd,
1713 .wavefrontSize = 64,
1714
1715 /* SGPR. */
1716 .sgprsPerSimd = pdev->info.num_physical_sgprs_per_simd,
1717 .minSgprAllocation = pdev->info.min_sgpr_alloc,
1718 .maxSgprAllocation = pdev->info.max_sgpr_alloc,
1719 .sgprAllocationGranularity = pdev->info.sgpr_alloc_granularity,
1720
1721 /* VGPR. */
1722 .vgprsPerSimd = pdev->info.num_physical_wave64_vgprs_per_simd,
1723 .minVgprAllocation = pdev->info.min_wave64_vgpr_alloc,
1724 .maxVgprAllocation = pdev->info.max_vgpr_alloc,
1725 .vgprAllocationGranularity = pdev->info.wave64_vgpr_alloc_granularity,
1726
1727 /* VK_AMD_shader_core_properties2 */
1728 .shaderCoreFeatures = 0,
1729 .activeComputeUnitCount = pdev->info.num_cu,
1730
1731 /* VK_EXT_conservative_rasterization */
1732 .primitiveOverestimationSize = 0,
1733 .maxExtraPrimitiveOverestimationSize = 0,
1734 .extraPrimitiveOverestimationSizeGranularity = 0,
1735 .primitiveUnderestimation = true,
1736 .conservativePointAndLineRasterization = false,
1737 .degenerateTrianglesRasterized = true,
1738 .degenerateLinesRasterized = false,
1739 .fullyCoveredFragmentShaderInputVariable = true,
1740 .conservativeRasterizationPostDepthCoverage = false,
1741
1742 #ifndef _WIN32
1743 /* VK_EXT_pci_bus_info */
1744 .pciDomain = pdev->bus_info.domain,
1745 .pciBus = pdev->bus_info.bus,
1746 .pciDevice = pdev->bus_info.dev,
1747 .pciFunction = pdev->bus_info.func,
1748 #endif
1749
1750 /* VK_EXT_transform_feedback */
1751 .maxTransformFeedbackStreams = MAX_SO_STREAMS,
1752 .maxTransformFeedbackBuffers = MAX_SO_BUFFERS,
1753 .maxTransformFeedbackBufferSize = UINT32_MAX,
1754 .maxTransformFeedbackStreamDataSize = 512,
1755 .maxTransformFeedbackBufferDataSize = 512,
1756 .maxTransformFeedbackBufferDataStride = 512,
1757 .transformFeedbackQueries = true,
1758 .transformFeedbackStreamsLinesTriangles = true,
1759 .transformFeedbackRasterizationStreamSelect = false,
1760 .transformFeedbackDraw = true,
1761
1762 /* VK_EXT_sample_locations */
1763 .sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT,
1764 .maxSampleLocationGridSize = (VkExtent2D){2, 2},
1765 .sampleLocationCoordinateRange = {0.0f, 0.9375f},
1766 .sampleLocationSubPixelBits = 4,
1767 .variableSampleLocations = true,
1768
1769 /* VK_EXT_robustness2 */
1770 .robustStorageBufferAccessSizeAlignment = 4,
1771 .robustUniformBufferAccessSizeAlignment = 4,
1772
1773 /* VK_EXT_custom_border_color */
1774 .maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT,
1775
1776 /* VK_KHR_fragment_shading_rate */
1777 .minFragmentShadingRateAttachmentTexelSize = vrs_texel_extent,
1778 .maxFragmentShadingRateAttachmentTexelSize = vrs_texel_extent,
1779 .maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1,
1780 .primitiveFragmentShadingRateWithMultipleViewports = true,
1781 .layeredShadingRateAttachments = false, /* TODO */
1782 .fragmentShadingRateNonTrivialCombinerOps = true,
1783 .maxFragmentSize = (VkExtent2D){2, 2},
1784 .maxFragmentSizeAspectRatio = 2,
1785 .maxFragmentShadingRateCoverageSamples = pdev->info.gfx_level >= GFX12 ? 16 : 32,
1786 .maxFragmentShadingRateRasterizationSamples =
1787 pdev->info.gfx_level >= GFX12 ? VK_SAMPLE_COUNT_4_BIT : VK_SAMPLE_COUNT_8_BIT,
1788 .fragmentShadingRateWithShaderDepthStencilWrites = !pdev->info.has_vrs_ds_export_bug,
1789 .fragmentShadingRateWithSampleMask = true,
1790 .fragmentShadingRateWithShaderSampleMask = false,
1791 .fragmentShadingRateWithConservativeRasterization = true,
1792 .fragmentShadingRateWithFragmentShaderInterlock = pdev->info.gfx_level >= GFX11 && radv_has_pops(pdev),
1793 .fragmentShadingRateWithCustomSampleLocations = false,
1794 .fragmentShadingRateStrictMultiplyCombiner = true,
1795
1796 /* VK_EXT_provoking_vertex */
1797 .provokingVertexModePerPipeline = true,
1798 .transformFeedbackPreservesTriangleFanProvokingVertex = true,
1799
1800 /* VK_KHR_acceleration_structure */
1801 .maxGeometryCount = (1 << 24) - 1,
1802 .maxInstanceCount = (1 << 24) - 1,
1803 .maxPrimitiveCount = (1 << 29) - 1,
1804 .maxPerStageDescriptorAccelerationStructures = max_descriptor_set_size,
1805 .maxPerStageDescriptorUpdateAfterBindAccelerationStructures = max_descriptor_set_size,
1806 .maxDescriptorSetAccelerationStructures = max_descriptor_set_size,
1807 .maxDescriptorSetUpdateAfterBindAccelerationStructures = max_descriptor_set_size,
1808 .minAccelerationStructureScratchOffsetAlignment = 128,
1809
1810 /* VK_EXT_multi_draw */
1811 .maxMultiDrawCount = 2048,
1812
1813 /* VK_KHR_ray_tracing_pipeline */
1814 .shaderGroupHandleSize = RADV_RT_HANDLE_SIZE,
1815 .maxRayRecursionDepth = 31, /* Minimum allowed for DXR. */
1816 .maxShaderGroupStride = 16384, /* dummy */
1817 /* This isn't strictly necessary, but Doom Eternal breaks if the
1818 * alignment is any lower. */
1819 .shaderGroupBaseAlignment = RADV_RT_HANDLE_SIZE,
1820 .shaderGroupHandleCaptureReplaySize = sizeof(struct radv_rt_capture_replay_handle),
1821 .maxRayDispatchInvocationCount = 1024 * 1024 * 64,
1822 .shaderGroupHandleAlignment = 16,
1823 .maxRayHitAttributeSize = RADV_MAX_HIT_ATTRIB_SIZE,
1824
1825 /* VK_KHR_performance_query */
1826 .allowCommandBufferQueryCopies = false,
1827
1828 /* VK_EXT_graphics_pipeline_library */
1829 .graphicsPipelineLibraryFastLinking = true,
1830 .graphicsPipelineLibraryIndependentInterpolationDecoration = true,
1831
1832 /* VK_EXT_mesh_shader */
1833 .maxTaskWorkGroupTotalCount = 4194304, /* 2^22 min required */
1834 .maxTaskWorkGroupCount = {65535, 65535, 65535},
1835 .maxTaskWorkGroupInvocations = 1024,
1836 .maxTaskWorkGroupSize = {1024, 1024, 1024},
1837 .maxTaskPayloadSize = 16384, /* 16K min required */
1838 .maxTaskSharedMemorySize = 65536,
1839 .maxTaskPayloadAndSharedMemorySize = 65536,
1840
1841 .maxMeshWorkGroupTotalCount = 4194304, /* 2^22 min required */
1842 .maxMeshWorkGroupCount = {65535, 65535, 65535},
1843 .maxMeshWorkGroupInvocations = 256, /* Max NGG HW limit */
1844 .maxMeshWorkGroupSize = {256, 256, 256},
1845 .maxMeshOutputMemorySize = 32 * 1024, /* 32K min required */
1846 .maxMeshSharedMemorySize = 28672, /* 28K min required */
1847 .maxMeshPayloadAndSharedMemorySize = 16384 + 28672, /* 28K min required */
1848 .maxMeshPayloadAndOutputMemorySize = 16384 + 32 * 1024, /* 47K min required */
1849 .maxMeshOutputComponents = 128, /* 32x vec4 min required */
1850 .maxMeshOutputVertices = 256,
1851 .maxMeshOutputPrimitives = 256,
1852 .maxMeshOutputLayers = 8,
1853 .maxMeshMultiviewViewCount = MAX_VIEWS,
1854 .meshOutputPerVertexGranularity = 1,
1855 .meshOutputPerPrimitiveGranularity = 1,
1856
1857 .maxPreferredTaskWorkGroupInvocations = 64,
1858 .maxPreferredMeshWorkGroupInvocations = 128,
1859 .prefersLocalInvocationVertexOutput = true,
1860 .prefersLocalInvocationPrimitiveOutput = true,
1861 .prefersCompactVertexOutput = true,
1862 .prefersCompactPrimitiveOutput = false,
1863
1864 /* VK_EXT_extended_dynamic_state3 */
1865 .dynamicPrimitiveTopologyUnrestricted = false,
1866
1867 /* VK_EXT_descriptor_buffer */
1868 .combinedImageSamplerDescriptorSingleArray = true,
1869 .bufferlessPushDescriptors = true,
1870 .allowSamplerImageViewPostSubmitCreation = false,
1871 .descriptorBufferOffsetAlignment = 4,
1872 .maxDescriptorBufferBindings = MAX_SETS,
1873 .maxResourceDescriptorBufferBindings = MAX_SETS,
1874 .maxSamplerDescriptorBufferBindings = MAX_SETS,
1875 .maxEmbeddedImmutableSamplerBindings = MAX_SETS,
1876 .maxEmbeddedImmutableSamplers = radv_max_descriptor_set_size(),
1877 /* No data required for capture/replay but these values need to be non-zero. */
1878 .bufferCaptureReplayDescriptorDataSize = 1,
1879 .imageCaptureReplayDescriptorDataSize = 1,
1880 .imageViewCaptureReplayDescriptorDataSize = 1,
1881 .samplerCaptureReplayDescriptorDataSize = 1,
1882 .accelerationStructureCaptureReplayDescriptorDataSize = 1,
1883 .samplerDescriptorSize = 16,
1884 .combinedImageSamplerDescriptorSize = 96,
1885 .sampledImageDescriptorSize = 64,
1886 .storageImageDescriptorSize = 32,
1887 .uniformTexelBufferDescriptorSize = 16,
1888 .robustUniformTexelBufferDescriptorSize = 16,
1889 .storageTexelBufferDescriptorSize = 16,
1890 .robustStorageTexelBufferDescriptorSize = 16,
1891 .uniformBufferDescriptorSize = 16,
1892 .robustUniformBufferDescriptorSize = 16,
1893 .storageBufferDescriptorSize = 16,
1894 .robustStorageBufferDescriptorSize = 16,
1895 .inputAttachmentDescriptorSize = 64,
1896 .accelerationStructureDescriptorSize = 16,
1897 .maxSamplerDescriptorBufferRange = UINT32_MAX,
1898 .maxResourceDescriptorBufferRange = UINT32_MAX,
1899 .samplerDescriptorBufferAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE,
1900 .resourceDescriptorBufferAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE,
1901 .descriptorBufferAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE,
1902
1903 /* VK_KHR_fragment_shader_barycentric */
1904 .triStripVertexOrderIndependentOfProvokingVertex = false,
1905
1906 /* VK_EXT_pipeline_robustness */
1907 .defaultRobustnessStorageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS,
1908 .defaultRobustnessUniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS,
1909 .defaultRobustnessVertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DISABLED,
1910 .defaultRobustnessImages = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2,
1911
1912 /* VK_KHR_cooperative_matrix */
1913 .cooperativeMatrixSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT,
1914
1915 /* VK_EXT_map_memory_placed */
1916 .minPlacedMemoryMapAlignment = os_page_size,
1917
1918 /* VK_EXT_nested_command_buffer */
1919 .maxCommandBufferNestingLevel = UINT32_MAX,
1920
1921 /* VK_EXT_legacy_vertex_attributes */
1922 .nativeUnalignedPerformance = false,
1923
1924 /* VK_MESA_image_alignment_control */
1925 .supportedImageAlignmentMask = (4 * 1024) | (64 * 1024) | (gfx11plus ? 256 * 1024 : 0),
1926
1927 /* VK_KHR_maintenance7 */
1928 .robustFragmentShadingRateAttachmentAccess = true,
1929 .separateDepthStencilAttachmentAccess = true,
1930 .maxDescriptorSetTotalUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1931 .maxDescriptorSetTotalStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1932 .maxDescriptorSetTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS,
1933 .maxDescriptorSetUpdateAfterBindTotalUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1934 .maxDescriptorSetUpdateAfterBindTotalStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1935 .maxDescriptorSetUpdateAfterBindTotalBuffersDynamic = MAX_DYNAMIC_BUFFERS,
1936
1937 /* VK_KHR_pipeline_binary */
1938 .pipelineBinaryInternalCache = true,
1939 .pipelineBinaryInternalCacheControl = true,
1940 .pipelineBinaryPrefersInternalCache = false,
1941 .pipelineBinaryPrecompiledInternalCache = false,
1942 .pipelineBinaryCompressedData = false,
1943
1944 /* VK_KHR_compute_shader_derivatives */
1945 .meshAndTaskShaderDerivatives = radv_taskmesh_enabled(pdev),
1946
1947 /* VK_EXT_device_generated_commands */
1948 .maxIndirectPipelineCount = 4096,
1949 .maxIndirectShaderObjectCount = 4096,
1950 .maxIndirectSequenceCount = 1048576,
1951 .maxIndirectCommandsTokenCount = 128,
1952 .maxIndirectCommandsTokenOffset = 2047,
1953 .maxIndirectCommandsIndirectStride = 2048,
1954 .supportedIndirectCommandsInputModes = VK_INDIRECT_COMMANDS_INPUT_MODE_VULKAN_INDEX_BUFFER_EXT |
1955 VK_INDIRECT_COMMANDS_INPUT_MODE_DXGI_INDEX_BUFFER_EXT,
1956 .supportedIndirectCommandsShaderStages =
1957 VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT | taskmesh_stages | rt_stages,
1958 .supportedIndirectCommandsShaderStagesPipelineBinding = VK_SHADER_STAGE_COMPUTE_BIT,
1959 .supportedIndirectCommandsShaderStagesShaderBinding = VK_SHADER_STAGE_COMPUTE_BIT,
1960 .deviceGeneratedCommandsTransformFeedback = true,
1961 .deviceGeneratedCommandsMultiDrawIndirectCount = true,
1962 };
1963
1964 struct vk_properties *p = &pdev->vk.properties;
1965
1966 strcpy(p->deviceName, pdev->marketing_name);
1967 memcpy(p->pipelineCacheUUID, pdev->cache_uuid, VK_UUID_SIZE);
1968
1969 memcpy(p->deviceUUID, pdev->device_uuid, VK_UUID_SIZE);
1970 memcpy(p->driverUUID, pdev->driver_uuid, VK_UUID_SIZE);
1971 memset(p->deviceLUID, 0, VK_LUID_SIZE);
1972
1973 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
1974 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 "%s",
1975 radv_get_compiler_string(pdev));
1976
1977 memset(p->optimalTilingLayoutUUID, 0, sizeof(p->optimalTilingLayoutUUID));
1978
1979 /* VK_EXT_physical_device_drm */
1980 #ifndef _WIN32
1981 if (pdev->available_nodes & (1 << DRM_NODE_PRIMARY)) {
1982 p->drmHasPrimary = true;
1983 p->drmPrimaryMajor = (int64_t)major(pdev->primary_devid);
1984 p->drmPrimaryMinor = (int64_t)minor(pdev->primary_devid);
1985 } else {
1986 p->drmHasPrimary = false;
1987 }
1988 if (pdev->available_nodes & (1 << DRM_NODE_RENDER)) {
1989 p->drmHasRender = true;
1990 p->drmRenderMajor = (int64_t)major(pdev->render_devid);
1991 p->drmRenderMinor = (int64_t)minor(pdev->render_devid);
1992 } else {
1993 p->drmHasRender = false;
1994 }
1995 #endif
1996
1997 /* VK_EXT_shader_module_identifier */
1998 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) == sizeof(p->shaderModuleIdentifierAlgorithmUUID));
1999 memcpy(p->shaderModuleIdentifierAlgorithmUUID, vk_shaderModuleIdentifierAlgorithmUUID,
2000 sizeof(p->shaderModuleIdentifierAlgorithmUUID));
2001
2002 /* VK_EXT_shader_object */
2003 radv_device_get_cache_uuid(pdev, p->shaderBinaryUUID);
2004 p->shaderBinaryVersion = 1;
2005 }
2006
2007 static VkResult
radv_physical_device_try_create(struct radv_instance * instance,drmDevicePtr drm_device,struct radv_physical_device ** pdev_out)2008 radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm_device,
2009 struct radv_physical_device **pdev_out)
2010 {
2011 VkResult result;
2012 int fd = -1;
2013 int master_fd = -1;
2014
2015 #ifdef _WIN32
2016 assert(drm_device == NULL);
2017 #else
2018 bool is_virtio = false;
2019 if (drm_device) {
2020 const char *path = drm_device->nodes[DRM_NODE_RENDER];
2021 drmVersionPtr version;
2022
2023 fd = open(path, O_RDWR | O_CLOEXEC);
2024 if (fd < 0) {
2025 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "Could not open device %s: %m", path);
2026 }
2027
2028 version = drmGetVersion(fd);
2029 if (!version) {
2030 close(fd);
2031
2032 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2033 "Could not get the kernel driver version for device %s: %m", path);
2034 }
2035
2036 if (!strcmp(version->name, "amdgpu")) {
2037 /* nothing to do. */
2038 } else
2039 #ifdef HAVE_AMDGPU_VIRTIO
2040 if (!strcmp(version->name, "virtio_gpu")) {
2041 is_virtio = true;
2042 } else
2043 #endif
2044 {
2045 drmFreeVersion(version);
2046 close(fd);
2047
2048 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
2049 "Device '%s' is not using the AMDGPU kernel driver: %m", path);
2050 }
2051 drmFreeVersion(version);
2052
2053 if (instance->debug_flags & RADV_DEBUG_STARTUP)
2054 fprintf(stderr, "radv: info: Found compatible device '%s'.\n", path);
2055 }
2056 #endif
2057
2058 struct radv_physical_device *pdev =
2059 vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*pdev), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
2060 if (!pdev) {
2061 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2062 goto fail_fd;
2063 }
2064
2065 struct vk_physical_device_dispatch_table dispatch_table;
2066 vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_physical_device_entrypoints, true);
2067 vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table, &wsi_physical_device_entrypoints, false);
2068
2069 result = vk_physical_device_init(&pdev->vk, &instance->vk, NULL, NULL, NULL, &dispatch_table);
2070 if (result != VK_SUCCESS) {
2071 goto fail_alloc;
2072 }
2073
2074 #ifdef _WIN32
2075 pdev->ws = radv_null_winsys_create();
2076 #else
2077 if (drm_device) {
2078 bool reserve_vmid = instance->vk.trace_mode & RADV_TRACE_MODE_RGP;
2079
2080 pdev->ws =
2081 radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags, reserve_vmid, is_virtio);
2082 } else {
2083 pdev->ws = radv_null_winsys_create();
2084 }
2085 #endif
2086
2087 if (!pdev->ws) {
2088 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to initialize winsys");
2089 goto fail_base;
2090 }
2091
2092 pdev->vk.supported_sync_types = pdev->ws->get_sync_types(pdev->ws);
2093
2094 #ifndef _WIN32
2095 if (drm_device && instance->vk.enabled_extensions.KHR_display) {
2096 master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
2097 if (master_fd >= 0) {
2098 uint32_t accel_working = 0;
2099 struct drm_amdgpu_info request = {.return_pointer = (uintptr_t)&accel_working,
2100 .return_size = sizeof(accel_working),
2101 .query = AMDGPU_INFO_ACCEL_WORKING};
2102
2103 if (drm_ioctl_write(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) < 0 ||
2104 !accel_working) {
2105 close(master_fd);
2106 master_fd = -1;
2107 }
2108 }
2109 }
2110 #endif
2111
2112 pdev->master_fd = master_fd;
2113 pdev->local_fd = fd;
2114 pdev->ws->query_info(pdev->ws, &pdev->info);
2115 pdev->info.family_overridden = drm_device == NULL;
2116
2117 if (drm_device) {
2118 pdev->addrlib = ac_addrlib_create(&pdev->info, &pdev->info.max_alignment);
2119 if (!pdev->addrlib) {
2120 result = VK_ERROR_INITIALIZATION_FAILED;
2121 goto fail_wsi;
2122 }
2123 }
2124
2125 pdev->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
2126 #if !AMD_LLVM_AVAILABLE
2127 if (pdev->use_llvm) {
2128 fprintf(stderr, "ERROR: LLVM compiler backend selected for radv, but LLVM support was not "
2129 "enabled at build time.\n");
2130 abort();
2131 }
2132 #endif
2133
2134 #if DETECT_OS_ANDROID
2135 pdev->emulate_etc2 = !pdev->info.has_etc_support;
2136 pdev->emulate_astc = true;
2137 #else
2138 pdev->emulate_etc2 = !pdev->info.has_etc_support && instance->drirc.vk_require_etc2;
2139 pdev->emulate_astc = instance->drirc.vk_require_astc;
2140 #endif
2141
2142 snprintf(pdev->name, sizeof(pdev->name), "AMD RADV %s%s", pdev->info.name, radv_get_compiler_string(pdev));
2143
2144 const char *marketing_name = pdev->ws->get_chip_name(pdev->ws);
2145 snprintf(pdev->marketing_name, sizeof(pdev->name), "%s (RADV %s%s)", marketing_name ? marketing_name : "AMD Unknown",
2146 pdev->info.name, radv_get_compiler_string(pdev));
2147
2148 if (pdev->info.gfx_level >= GFX12)
2149 vk_warn_non_conformant_implementation("radv");
2150
2151 radv_get_driver_uuid(&pdev->driver_uuid);
2152 radv_get_device_uuid(&pdev->info, &pdev->device_uuid);
2153
2154 pdev->dcc_msaa_allowed = (instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
2155
2156 pdev->use_fmask = pdev->info.gfx_level < GFX11 && !(instance->debug_flags & RADV_DEBUG_NO_FMASK);
2157
2158 pdev->use_ngg = (pdev->info.gfx_level >= GFX10 && pdev->info.family != CHIP_NAVI14 &&
2159 !(instance->debug_flags & RADV_DEBUG_NO_NGG)) ||
2160 pdev->info.gfx_level >= GFX11;
2161
2162 /* TODO: Investigate if NGG culling helps on GFX11. */
2163 pdev->use_ngg_culling = pdev->use_ngg && pdev->info.max_render_backends > 1 &&
2164 (pdev->info.gfx_level == GFX10_3 || pdev->info.gfx_level == GFX10 ||
2165 (instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
2166 !(instance->debug_flags & RADV_DEBUG_NO_NGGC);
2167
2168 pdev->use_ngg_streamout = pdev->info.gfx_level >= GFX11;
2169
2170 pdev->emulate_ngg_gs_query_pipeline_stat = pdev->use_ngg && pdev->info.gfx_level < GFX11;
2171
2172 pdev->mesh_fast_launch_2 = pdev->info.gfx_level >= GFX11;
2173
2174 pdev->emulate_mesh_shader_queries = pdev->info.gfx_level == GFX10_3;
2175
2176 /* Determine the number of threads per wave for all stages. */
2177 pdev->cs_wave_size = 64;
2178 pdev->ps_wave_size = 64;
2179 pdev->ge_wave_size = 64;
2180 pdev->rt_wave_size = 64;
2181
2182 if (pdev->info.gfx_level >= GFX10) {
2183 if (instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
2184 pdev->cs_wave_size = 32;
2185
2186 /* For pixel shaders, wave64 is recommended. */
2187 if (instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
2188 pdev->ps_wave_size = 32;
2189
2190 if (instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
2191 pdev->ge_wave_size = 32;
2192
2193 /* Default to 32 on RDNA1-2 as that gives better perf due to less issues with divergence.
2194 * However, on RDNA3+ default to wave64 as implicit dual issuing is likely better than
2195 * wave32 VOPD for VALU dependent code.
2196 * (as well as the SALU count becoming more problematic with wave32)
2197 */
2198 if (instance->perftest_flags & RADV_PERFTEST_RT_WAVE_32 || pdev->info.gfx_level < GFX11)
2199 pdev->rt_wave_size = 32;
2200
2201 if (instance->perftest_flags & RADV_PERFTEST_RT_WAVE_64 || instance->drirc.force_rt_wave64)
2202 pdev->rt_wave_size = 64;
2203 }
2204
2205 radv_probe_video_decode(pdev);
2206 radv_probe_video_encode(pdev);
2207
2208 pdev->max_shared_size = pdev->info.gfx_level >= GFX7 ? 65536 : 32768;
2209
2210 radv_physical_device_init_mem_types(pdev);
2211
2212 radv_physical_device_get_supported_extensions(pdev, &pdev->vk.supported_extensions);
2213 radv_physical_device_get_features(pdev, &pdev->vk.supported_features);
2214
2215 radv_get_nir_options(pdev);
2216
2217 #ifndef _WIN32
2218 if (drm_device) {
2219 struct stat primary_stat = {0}, render_stat = {0};
2220
2221 pdev->available_nodes = drm_device->available_nodes;
2222 pdev->bus_info = *drm_device->businfo.pci;
2223
2224 if ((drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) &&
2225 stat(drm_device->nodes[DRM_NODE_PRIMARY], &primary_stat) != 0) {
2226 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM primary node %s",
2227 drm_device->nodes[DRM_NODE_PRIMARY]);
2228 goto fail_perfcounters;
2229 }
2230 pdev->primary_devid = primary_stat.st_rdev;
2231
2232 if ((drm_device->available_nodes & (1 << DRM_NODE_RENDER)) &&
2233 stat(drm_device->nodes[DRM_NODE_RENDER], &render_stat) != 0) {
2234 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to stat DRM render node %s",
2235 drm_device->nodes[DRM_NODE_RENDER]);
2236 goto fail_perfcounters;
2237 }
2238 pdev->render_devid = render_stat.st_rdev;
2239 }
2240 #endif
2241
2242 radv_physical_device_init_cache_key(pdev);
2243
2244 if (radv_device_get_cache_uuid(pdev, pdev->cache_uuid)) {
2245 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID");
2246 goto fail_wsi;
2247 }
2248
2249 /* The gpu id is already embedded in the uuid so we just pass "radv"
2250 * when creating the cache.
2251 */
2252 char buf[VK_UUID_SIZE * 2 + 1];
2253 mesa_bytes_to_hex(buf, pdev->cache_uuid, VK_UUID_SIZE);
2254 pdev->vk.disk_cache = disk_cache_create(pdev->name, buf, 0);
2255
2256 pdev->disk_cache_meta = disk_cache_create_custom(pdev->name, buf, 0, "radv_builtin_shaders", 1024 * 32 /* 32MiB */);
2257
2258 radv_get_physical_device_properties(pdev);
2259
2260 if ((instance->debug_flags & RADV_DEBUG_INFO))
2261 ac_print_gpu_info(&pdev->info, stdout);
2262
2263 radv_init_physical_device_decoder(pdev);
2264 radv_init_physical_device_encoder(pdev);
2265
2266 radv_physical_device_init_queue_table(pdev);
2267
2268 /* We don't check the error code, but later check if it is initialized. */
2269 ac_init_perfcounters(&pdev->info, false, false, &pdev->ac_perfcounters);
2270
2271 /* The WSI is structured as a layer on top of the driver, so this has
2272 * to be the last part of initialization (at least until we get other
2273 * semi-layers).
2274 */
2275 result = radv_init_wsi(pdev);
2276 if (result != VK_SUCCESS) {
2277 vk_error(instance, result);
2278 goto fail_perfcounters;
2279 }
2280
2281 pdev->gs_table_depth = ac_get_gs_table_depth(pdev->info.gfx_level, pdev->info.family);
2282
2283 ac_get_hs_info(&pdev->info, &pdev->hs);
2284 ac_get_task_info(&pdev->info, &pdev->task_info);
2285 radv_get_binning_settings(pdev, &pdev->binning_settings);
2286
2287 if (pdev->info.has_distributed_tess) {
2288 if (pdev->info.family == CHIP_FIJI || pdev->info.family >= CHIP_POLARIS10)
2289 pdev->tess_distribution_mode = V_028B6C_TRAPEZOIDS;
2290 else
2291 pdev->tess_distribution_mode = V_028B6C_DONUTS;
2292 } else {
2293 pdev->tess_distribution_mode = V_028B6C_NO_DIST;
2294 }
2295
2296 *pdev_out = pdev;
2297
2298 return VK_SUCCESS;
2299
2300 fail_perfcounters:
2301 ac_destroy_perfcounters(&pdev->ac_perfcounters);
2302 disk_cache_destroy(pdev->vk.disk_cache);
2303 disk_cache_destroy(pdev->disk_cache_meta);
2304 fail_wsi:
2305 if (pdev->addrlib)
2306 ac_addrlib_destroy(pdev->addrlib);
2307 pdev->ws->destroy(pdev->ws);
2308 fail_base:
2309 vk_physical_device_finish(&pdev->vk);
2310 fail_alloc:
2311 vk_free(&instance->vk.alloc, pdev);
2312 fail_fd:
2313 if (fd != -1)
2314 close(fd);
2315 if (master_fd != -1)
2316 close(master_fd);
2317 return result;
2318 }
2319
2320 VkResult
create_null_physical_device(struct vk_instance * vk_instance)2321 create_null_physical_device(struct vk_instance *vk_instance)
2322 {
2323 struct radv_instance *instance = container_of(vk_instance, struct radv_instance, vk);
2324 struct radv_physical_device *pdev;
2325
2326 VkResult result = radv_physical_device_try_create(instance, NULL, &pdev);
2327 if (result != VK_SUCCESS)
2328 return result;
2329
2330 list_addtail(&pdev->vk.link, &instance->vk.physical_devices.list);
2331 return VK_SUCCESS;
2332 }
2333
2334 VkResult
create_drm_physical_device(struct vk_instance * vk_instance,struct _drmDevice * device,struct vk_physical_device ** out)2335 create_drm_physical_device(struct vk_instance *vk_instance, struct _drmDevice *device, struct vk_physical_device **out)
2336 {
2337 #ifndef _WIN32
2338 bool supported_device = false;
2339
2340 if (!(device->available_nodes & (1 << DRM_NODE_RENDER)) || device->bustype != DRM_BUS_PCI)
2341 return VK_ERROR_INCOMPATIBLE_DRIVER;
2342
2343 #ifdef HAVE_AMDGPU_VIRTIO
2344 supported_device |= device->deviceinfo.pci->vendor_id == VIRTGPU_PCI_VENDOR_ID;
2345 #endif
2346
2347 supported_device |= device->deviceinfo.pci->vendor_id == ATI_VENDOR_ID;
2348
2349 if (!supported_device)
2350 return VK_ERROR_INCOMPATIBLE_DRIVER;
2351
2352 return radv_physical_device_try_create((struct radv_instance *)vk_instance, device,
2353 (struct radv_physical_device **)out);
2354 #else
2355 return VK_SUCCESS;
2356 #endif
2357 }
2358
2359 void
radv_physical_device_destroy(struct vk_physical_device * vk_device)2360 radv_physical_device_destroy(struct vk_physical_device *vk_device)
2361 {
2362 struct radv_physical_device *pdev = container_of(vk_device, struct radv_physical_device, vk);
2363 const struct radv_instance *instance = radv_physical_device_instance(pdev);
2364
2365 radv_finish_wsi(pdev);
2366 ac_destroy_perfcounters(&pdev->ac_perfcounters);
2367 if (pdev->addrlib)
2368 ac_addrlib_destroy(pdev->addrlib);
2369 pdev->ws->destroy(pdev->ws);
2370 disk_cache_destroy(pdev->vk.disk_cache);
2371 disk_cache_destroy(pdev->disk_cache_meta);
2372 if (pdev->local_fd != -1)
2373 close(pdev->local_fd);
2374 if (pdev->master_fd != -1)
2375 close(pdev->master_fd);
2376 vk_physical_device_finish(&pdev->vk);
2377 vk_free(&instance->vk.alloc, pdev);
2378 }
2379
2380 static void
radv_get_physical_device_queue_family_properties(struct radv_physical_device * pdev,uint32_t * pCount,VkQueueFamilyProperties ** pQueueFamilyProperties)2381 radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdev, uint32_t *pCount,
2382 VkQueueFamilyProperties **pQueueFamilyProperties)
2383 {
2384 const struct radv_instance *instance = radv_physical_device_instance(pdev);
2385 int num_queue_families = 1;
2386 int idx;
2387 if (pdev->info.ip[AMD_IP_COMPUTE].num_queues > 0 && !(instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
2388 num_queue_families++;
2389
2390 if (pdev->video_decode_enabled) {
2391 if (pdev->info.ip[pdev->vid_decode_ip].num_queues > 0)
2392 num_queue_families++;
2393 }
2394
2395 if (radv_transfer_queue_enabled(pdev)) {
2396 num_queue_families++;
2397 }
2398
2399 if (pdev->video_encode_enabled) {
2400 if (pdev->info.ip[AMD_IP_VCN_ENC].num_queues > 0)
2401 num_queue_families++;
2402 }
2403
2404 if (radv_sparse_queue_enabled(pdev)) {
2405 num_queue_families++;
2406 }
2407
2408 if (pQueueFamilyProperties == NULL) {
2409 *pCount = num_queue_families;
2410 return;
2411 }
2412
2413 if (!*pCount)
2414 return;
2415
2416 idx = 0;
2417 if (*pCount >= 1) {
2418 VkQueueFlags gfx_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
2419 if (!radv_sparse_queue_enabled(pdev))
2420 gfx_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
2421 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2422 .queueFlags = gfx_flags,
2423 .queueCount = 1,
2424 .timestampValidBits = 64,
2425 .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2426 };
2427 idx++;
2428 }
2429
2430 if (pdev->info.ip[AMD_IP_COMPUTE].num_queues > 0 && !(instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
2431 VkQueueFlags compute_flags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
2432 if (!radv_sparse_queue_enabled(pdev))
2433 compute_flags |= VK_QUEUE_SPARSE_BINDING_BIT;
2434 if (*pCount > idx) {
2435 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2436 .queueFlags = compute_flags,
2437 .queueCount = pdev->info.ip[AMD_IP_COMPUTE].num_queues,
2438 .timestampValidBits = 64,
2439 .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2440 };
2441 idx++;
2442 }
2443 }
2444
2445 if (pdev->video_decode_enabled) {
2446 if (pdev->info.ip[pdev->vid_decode_ip].num_queues > 0) {
2447 if (*pCount > idx) {
2448 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2449 .queueFlags = VK_QUEUE_VIDEO_DECODE_BIT_KHR,
2450 .queueCount = pdev->info.ip[pdev->vid_decode_ip].num_queues,
2451 .timestampValidBits = 0,
2452 .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2453 };
2454 idx++;
2455 }
2456 }
2457 }
2458
2459 if (radv_transfer_queue_enabled(pdev)) {
2460 if (*pCount > idx) {
2461 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2462 .queueFlags = VK_QUEUE_TRANSFER_BIT,
2463 .queueCount = pdev->info.ip[AMD_IP_SDMA].num_queues,
2464 .timestampValidBits = 64,
2465 .minImageTransferGranularity = (VkExtent3D){16, 16, 8},
2466 };
2467 idx++;
2468 }
2469 }
2470
2471 if (pdev->video_encode_enabled) {
2472 if (pdev->info.ip[AMD_IP_VCN_ENC].num_queues > 0) {
2473 if (*pCount > idx) {
2474 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2475 .queueFlags = VK_QUEUE_VIDEO_ENCODE_BIT_KHR,
2476 .queueCount = pdev->info.ip[AMD_IP_VCN_ENC].num_queues,
2477 .timestampValidBits = 0,
2478 .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2479 };
2480 idx++;
2481 }
2482 }
2483 }
2484
2485 if (radv_sparse_queue_enabled(pdev)) {
2486 if (*pCount > idx) {
2487 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2488 .queueFlags = VK_QUEUE_SPARSE_BINDING_BIT,
2489 .queueCount = 1,
2490 .timestampValidBits = 64,
2491 .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2492 };
2493 idx++;
2494 }
2495 }
2496
2497 *pCount = idx;
2498 }
2499
2500 static const VkQueueGlobalPriority radv_global_queue_priorities[] = {
2501 VK_QUEUE_GLOBAL_PRIORITY_LOW,
2502 VK_QUEUE_GLOBAL_PRIORITY_MEDIUM,
2503 VK_QUEUE_GLOBAL_PRIORITY_HIGH,
2504 VK_QUEUE_GLOBAL_PRIORITY_REALTIME,
2505 };
2506
2507 VKAPI_ATTR void VKAPI_CALL
radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2508 radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, uint32_t *pCount,
2509 VkQueueFamilyProperties2 *pQueueFamilyProperties)
2510 {
2511 VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2512 if (!pQueueFamilyProperties) {
2513 radv_get_physical_device_queue_family_properties(pdev, pCount, NULL);
2514 return;
2515 }
2516 VkQueueFamilyProperties *properties[] = {
2517 &pQueueFamilyProperties[0].queueFamilyProperties, &pQueueFamilyProperties[1].queueFamilyProperties,
2518 &pQueueFamilyProperties[2].queueFamilyProperties, &pQueueFamilyProperties[3].queueFamilyProperties,
2519 &pQueueFamilyProperties[4].queueFamilyProperties, &pQueueFamilyProperties[5].queueFamilyProperties,
2520 };
2521 radv_get_physical_device_queue_family_properties(pdev, pCount, properties);
2522 assert(*pCount <= 6);
2523
2524 for (uint32_t i = 0; i < *pCount; i++) {
2525 vk_foreach_struct (ext, pQueueFamilyProperties[i].pNext) {
2526 switch (ext->sType) {
2527 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES: {
2528 VkQueueFamilyGlobalPriorityProperties *prop = (VkQueueFamilyGlobalPriorityProperties *)ext;
2529 STATIC_ASSERT(ARRAY_SIZE(radv_global_queue_priorities) <= VK_MAX_GLOBAL_PRIORITY_SIZE);
2530 prop->priorityCount = ARRAY_SIZE(radv_global_queue_priorities);
2531 memcpy(&prop->priorities, radv_global_queue_priorities, sizeof(radv_global_queue_priorities));
2532 break;
2533 }
2534 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_QUERY_RESULT_STATUS_PROPERTIES_KHR: {
2535 VkQueueFamilyQueryResultStatusPropertiesKHR *prop = (VkQueueFamilyQueryResultStatusPropertiesKHR *)ext;
2536 prop->queryResultStatusSupport = VK_FALSE;
2537 break;
2538 }
2539 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_VIDEO_PROPERTIES_KHR: {
2540 VkQueueFamilyVideoPropertiesKHR *prop = (VkQueueFamilyVideoPropertiesKHR *)ext;
2541 prop->videoCodecOperations = 0;
2542 if (pQueueFamilyProperties[i].queueFamilyProperties.queueFlags & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
2543 if (VIDEO_CODEC_H264DEC)
2544 prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_H264_BIT_KHR;
2545 if (VIDEO_CODEC_H265DEC)
2546 prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_H265_BIT_KHR;
2547 if (VIDEO_CODEC_AV1DEC && pdev->info.vcn_ip_version >= VCN_3_0_0 &&
2548 pdev->info.vcn_ip_version != VCN_3_0_33)
2549 prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_DECODE_AV1_BIT_KHR;
2550 }
2551 if (pQueueFamilyProperties[i].queueFamilyProperties.queueFlags & VK_QUEUE_VIDEO_ENCODE_BIT_KHR) {
2552 if (VIDEO_CODEC_H264ENC)
2553 prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_ENCODE_H264_BIT_KHR;
2554 if (VIDEO_CODEC_H265ENC)
2555 prop->videoCodecOperations |= VK_VIDEO_CODEC_OPERATION_ENCODE_H265_BIT_KHR;
2556 }
2557 break;
2558 }
2559 default:
2560 break;
2561 }
2562 }
2563 }
2564 }
2565
2566 static void
radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2567 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
2568 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2569 {
2570 VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2571 const struct radv_instance *instance = radv_physical_device_instance(pdev);
2572 VkPhysicalDeviceMemoryProperties *memory_properties = &pdev->memory_properties;
2573
2574 /* For all memory heaps, the computation of budget is as follow:
2575 * heap_budget = heap_size - global_heap_usage + app_heap_usage
2576 *
2577 * The Vulkan spec 1.1.97 says that the budget should include any
2578 * currently allocated device memory.
2579 *
2580 * Note that the application heap usages are not really accurate (eg.
2581 * in presence of shared buffers).
2582 */
2583 if (!pdev->info.has_dedicated_vram) {
2584 if (instance->drirc.enable_unified_heap_on_apu) {
2585 /* When the heaps are unified, only the visible VRAM heap is exposed on APUs. */
2586 assert(pdev->heaps == RADV_HEAP_VRAM_VIS);
2587 assert(pdev->memory_properties.memoryHeaps[0].flags == VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
2588 const uint8_t vram_vis_heap_idx = 0;
2589
2590 /* Get the total heap size which is the visible VRAM heap size. */
2591 uint64_t total_heap_size = pdev->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
2592
2593 /* Get the different memory usages. */
2594 uint64_t vram_vis_internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM_VIS) +
2595 pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM);
2596 uint64_t gtt_internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_GTT);
2597 uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage;
2598 uint64_t total_system_usage =
2599 pdev->ws->query_value(pdev->ws, RADEON_VRAM_VIS_USAGE) + pdev->ws->query_value(pdev->ws, RADEON_GTT_USAGE);
2600 uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);
2601
2602 /* Compute the total free space that can be allocated for this process across all heaps. */
2603 uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
2604
2605 memoryBudget->heapBudget[vram_vis_heap_idx] = total_free_space + total_internal_usage;
2606 memoryBudget->heapUsage[vram_vis_heap_idx] = total_internal_usage;
2607 } else {
2608 /* On APUs, the driver exposes fake heaps to the application because usually the carveout
2609 * is too small for games but the budgets need to be redistributed accordingly.
2610 */
2611 assert(pdev->heaps == (RADV_HEAP_GTT | RADV_HEAP_VRAM_VIS));
2612 assert(pdev->memory_properties.memoryHeaps[0].flags == 0); /* GTT */
2613 assert(pdev->memory_properties.memoryHeaps[1].flags == VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
2614 const uint8_t gtt_heap_idx = 0, vram_vis_heap_idx = 1;
2615
2616 /* Get the visible VRAM/GTT heap sizes and internal usages. */
2617 uint64_t gtt_heap_size = pdev->memory_properties.memoryHeaps[gtt_heap_idx].size;
2618 uint64_t vram_vis_heap_size = pdev->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
2619
2620 uint64_t vram_vis_internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM_VIS) +
2621 pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM);
2622 uint64_t gtt_internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_GTT);
2623
2624 /* Compute the total heap size, internal and system usage. */
2625 uint64_t total_heap_size = vram_vis_heap_size + gtt_heap_size;
2626 uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage;
2627 uint64_t total_system_usage =
2628 pdev->ws->query_value(pdev->ws, RADEON_VRAM_VIS_USAGE) + pdev->ws->query_value(pdev->ws, RADEON_GTT_USAGE);
2629
2630 uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);
2631
2632 /* Compute the total free space that can be allocated for this process across all heaps. */
2633 uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
2634
2635 /* Compute the remaining visible VRAM size for this process. */
2636 uint64_t vram_vis_free_space = vram_vis_heap_size - MIN2(vram_vis_heap_size, vram_vis_internal_usage);
2637
2638 /* Distribute the total free space (2/3rd as VRAM and 1/3rd as GTT) to match the heap
2639 * sizes, and align down to the page size to be conservative.
2640 */
2641 vram_vis_free_space =
2642 ROUND_DOWN_TO(MIN2((total_free_space * 2) / 3, vram_vis_free_space), pdev->info.gart_page_size);
2643 uint64_t gtt_free_space = total_free_space - vram_vis_free_space;
2644
2645 memoryBudget->heapBudget[vram_vis_heap_idx] = vram_vis_free_space + vram_vis_internal_usage;
2646 memoryBudget->heapUsage[vram_vis_heap_idx] = vram_vis_internal_usage;
2647 memoryBudget->heapBudget[gtt_heap_idx] = gtt_free_space + gtt_internal_usage;
2648 memoryBudget->heapUsage[gtt_heap_idx] = gtt_internal_usage;
2649 }
2650 } else {
2651 unsigned mask = pdev->heaps;
2652 unsigned heap = 0;
2653 while (mask) {
2654 uint64_t internal_usage = 0, system_usage = 0;
2655 unsigned type = 1u << u_bit_scan(&mask);
2656
2657 switch (type) {
2658 case RADV_HEAP_VRAM:
2659 internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM);
2660 system_usage = pdev->ws->query_value(pdev->ws, RADEON_VRAM_USAGE);
2661 break;
2662 case RADV_HEAP_VRAM_VIS:
2663 internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM_VIS);
2664 if (!(pdev->heaps & RADV_HEAP_VRAM))
2665 internal_usage += pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_VRAM);
2666 system_usage = pdev->ws->query_value(pdev->ws, RADEON_VRAM_VIS_USAGE);
2667 break;
2668 case RADV_HEAP_GTT:
2669 internal_usage = pdev->ws->query_value(pdev->ws, RADEON_ALLOCATED_GTT);
2670 system_usage = pdev->ws->query_value(pdev->ws, RADEON_GTT_USAGE);
2671 break;
2672 }
2673
2674 uint64_t total_usage = MAX2(internal_usage, system_usage);
2675
2676 uint64_t free_space = pdev->memory_properties.memoryHeaps[heap].size -
2677 MIN2(pdev->memory_properties.memoryHeaps[heap].size, total_usage);
2678 memoryBudget->heapBudget[heap] = free_space + internal_usage;
2679 memoryBudget->heapUsage[heap] = internal_usage;
2680 ++heap;
2681 }
2682
2683 assert(heap == memory_properties->memoryHeapCount);
2684 }
2685
2686 /* The heapBudget value must be less than or equal to VkMemoryHeap::size for each heap. */
2687 for (uint32_t i = 0; i < memory_properties->memoryHeapCount; i++) {
2688 memoryBudget->heapBudget[i] = MIN2(memory_properties->memoryHeaps[i].size, memoryBudget->heapBudget[i]);
2689 }
2690
2691 /* The heapBudget and heapUsage values must be zero for array elements
2692 * greater than or equal to
2693 * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
2694 */
2695 for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
2696 memoryBudget->heapBudget[i] = 0;
2697 memoryBudget->heapUsage[i] = 0;
2698 }
2699 }
2700
2701 VKAPI_ATTR void VKAPI_CALL
radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2702 radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
2703 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
2704 {
2705 VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2706
2707 pMemoryProperties->memoryProperties = pdev->memory_properties;
2708
2709 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
2710 vk_find_struct(pMemoryProperties->pNext, PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
2711 if (memory_budget)
2712 radv_get_memory_budget_properties(physicalDevice, memory_budget);
2713 }
2714
2715 VKAPI_ATTR void VKAPI_CALL
radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)2716 radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice, VkSampleCountFlagBits samples,
2717 VkMultisamplePropertiesEXT *pMultisampleProperties)
2718 {
2719 VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
2720
2721 if (samples & supported_samples) {
2722 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
2723 } else {
2724 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
2725 }
2726 }
2727
2728 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)2729 radv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount,
2730 VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
2731 {
2732 VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2733 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates,
2734 pFragmentShadingRateCount);
2735
2736 #define append_rate(w, h, s) \
2737 { \
2738 VkPhysicalDeviceFragmentShadingRateKHR rate = { \
2739 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, \
2740 .sampleCounts = s, \
2741 .fragmentSize = {.width = w, .height = h}, \
2742 }; \
2743 vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate; \
2744 }
2745
2746 for (uint32_t x = 2; x >= 1; x--) {
2747 for (uint32_t y = 2; y >= 1; y--) {
2748 VkSampleCountFlagBits samples;
2749
2750 if (x == 1 && y == 1) {
2751 samples = ~0;
2752 } else {
2753 samples = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
2754
2755 /* VRS coarse shading with 8x MSAA isn't supported on GFX12 and the
2756 * hw automatically clamps to 1x1.
2757 */
2758 if (pdev->info.gfx_level < GFX12)
2759 samples |= VK_SAMPLE_COUNT_8_BIT;
2760 }
2761
2762 append_rate(x, y, samples);
2763 }
2764 }
2765 #undef append_rate
2766
2767 return vk_outarray_status(&out);
2768 }
2769
2770 /* VK_EXT_tooling_info */
2771 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceToolProperties(VkPhysicalDevice physicalDevice,uint32_t * pToolCount,VkPhysicalDeviceToolProperties * pToolProperties)2772 radv_GetPhysicalDeviceToolProperties(VkPhysicalDevice physicalDevice, uint32_t *pToolCount,
2773 VkPhysicalDeviceToolProperties *pToolProperties)
2774 {
2775 VK_FROM_HANDLE(radv_physical_device, pdev, physicalDevice);
2776 const struct radv_instance *instance = radv_physical_device_instance(pdev);
2777 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceToolProperties, out, pToolProperties, pToolCount);
2778 bool rgp_enabled, rmv_enabled, rra_enabled;
2779 uint32_t tool_count = 0;
2780
2781 /* RGP */
2782 rgp_enabled = instance->vk.trace_mode & RADV_TRACE_MODE_RGP;
2783 if (rgp_enabled)
2784 tool_count++;
2785
2786 /* RMV */
2787 rmv_enabled = instance->vk.trace_mode & VK_TRACE_MODE_RMV;
2788 if (rmv_enabled)
2789 tool_count++;
2790
2791 /* RRA */
2792 rra_enabled = instance->vk.trace_mode & RADV_TRACE_MODE_RRA;
2793 if (rra_enabled)
2794 tool_count++;
2795
2796 if (!pToolProperties) {
2797 *pToolCount = tool_count;
2798 return VK_SUCCESS;
2799 }
2800
2801 if (rgp_enabled) {
2802 VkPhysicalDeviceToolProperties tool = {
2803 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TOOL_PROPERTIES,
2804 .name = "Radeon GPU Profiler",
2805 .version = "1.15",
2806 .description = "A ground-breaking low-level optimization tool that provides detailed "
2807 "information on Radeon GPUs.",
2808 .purposes = VK_TOOL_PURPOSE_PROFILING_BIT | VK_TOOL_PURPOSE_TRACING_BIT |
2809 /* VK_EXT_debug_marker is only exposed if SQTT is enabled. */
2810 VK_TOOL_PURPOSE_ADDITIONAL_FEATURES_BIT | VK_TOOL_PURPOSE_DEBUG_MARKERS_BIT_EXT,
2811 };
2812 vk_outarray_append_typed(VkPhysicalDeviceToolProperties, &out, t) *t = tool;
2813 }
2814
2815 if (rmv_enabled) {
2816 VkPhysicalDeviceToolProperties tool = {
2817 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TOOL_PROPERTIES,
2818 .name = "Radeon Memory Visualizer",
2819 .version = "1.6",
2820 .description = "A tool to allow you to gain a deep understanding of how your application "
2821 "uses memory for graphics resources.",
2822 .purposes = VK_TOOL_PURPOSE_PROFILING_BIT | VK_TOOL_PURPOSE_TRACING_BIT,
2823 };
2824 vk_outarray_append_typed(VkPhysicalDeviceToolProperties, &out, t) *t = tool;
2825 }
2826
2827 if (rra_enabled) {
2828 VkPhysicalDeviceToolProperties tool = {
2829 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TOOL_PROPERTIES,
2830 .name = "Radeon Raytracing Analyzer",
2831 .version = "1.2",
2832 .description = "A tool to investigate the performance of your ray tracing applications and "
2833 "highlight potential bottlenecks.",
2834 .purposes = VK_TOOL_PURPOSE_PROFILING_BIT | VK_TOOL_PURPOSE_TRACING_BIT,
2835 };
2836 vk_outarray_append_typed(VkPhysicalDeviceToolProperties, &out, t) *t = tool;
2837 }
2838
2839 return vk_outarray_status(&out);
2840 }
2841
2842 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkCooperativeMatrixPropertiesKHR * pProperties)2843 radv_GetPhysicalDeviceCooperativeMatrixPropertiesKHR(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount,
2844 VkCooperativeMatrixPropertiesKHR *pProperties)
2845 {
2846 VK_OUTARRAY_MAKE_TYPED(VkCooperativeMatrixPropertiesKHR, out, pProperties, pPropertyCount);
2847
2848 vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
2849 {
2850 *p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
2851 .MSize = 16,
2852 .NSize = 16,
2853 .KSize = 16,
2854 .AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2855 .BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2856 .CType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2857 .ResultType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2858 .saturatingAccumulation = false,
2859 .scope = VK_SCOPE_SUBGROUP_KHR};
2860 }
2861
2862 vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
2863 {
2864 *p = (struct VkCooperativeMatrixPropertiesKHR){.sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
2865 .MSize = 16,
2866 .NSize = 16,
2867 .KSize = 16,
2868 .AType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2869 .BType = VK_COMPONENT_TYPE_FLOAT16_KHR,
2870 .CType = VK_COMPONENT_TYPE_FLOAT32_KHR,
2871 .ResultType = VK_COMPONENT_TYPE_FLOAT32_KHR,
2872 .saturatingAccumulation = false,
2873 .scope = VK_SCOPE_SUBGROUP_KHR};
2874 }
2875
2876 for (unsigned asigned = 0; asigned < 2; asigned++) {
2877 for (unsigned bsigned = 0; bsigned < 2; bsigned++) {
2878 for (unsigned csigned = 0; csigned < 2; csigned++) {
2879 for (unsigned saturate = 0; saturate < 2; saturate++) {
2880 if (!csigned && saturate)
2881 continue; /* The HW only supports signed acc. */
2882 vk_outarray_append_typed(VkCooperativeMatrixPropertiesKHR, &out, p)
2883 {
2884 *p = (struct VkCooperativeMatrixPropertiesKHR){
2885 .sType = VK_STRUCTURE_TYPE_COOPERATIVE_MATRIX_PROPERTIES_KHR,
2886 .MSize = 16,
2887 .NSize = 16,
2888 .KSize = 16,
2889 .AType = asigned ? VK_COMPONENT_TYPE_SINT8_KHR : VK_COMPONENT_TYPE_UINT8_KHR,
2890 .BType = bsigned ? VK_COMPONENT_TYPE_SINT8_KHR : VK_COMPONENT_TYPE_UINT8_KHR,
2891 .CType = csigned ? VK_COMPONENT_TYPE_SINT32_KHR : VK_COMPONENT_TYPE_UINT32_KHR,
2892 .ResultType = csigned ? VK_COMPONENT_TYPE_SINT32_KHR : VK_COMPONENT_TYPE_UINT32_KHR,
2893 .saturatingAccumulation = saturate,
2894 .scope = VK_SCOPE_SUBGROUP_KHR};
2895 }
2896 }
2897 }
2898 }
2899 }
2900
2901 return vk_outarray_status(&out);
2902 }
2903