1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include "dirent.h"
29
30 #include <stdatomic.h>
31 #include <stdbool.h>
32 #include <string.h>
33 #include <unistd.h>
34 #include <fcntl.h>
35
36 #include "radv_debug.h"
37 #include "radv_private.h"
38 #include "radv_shader.h"
39 #include "radv_cs.h"
40 #include "util/disk_cache.h"
41 #include "vk_util.h"
42 #include <xf86drm.h>
43 #include <amdgpu.h>
44 #include "drm-uapi/amdgpu_drm.h"
45 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
46 #include "winsys/null/radv_null_winsys_public.h"
47 #include "ac_llvm_util.h"
48 #include "vk_format.h"
49 #include "sid.h"
50 #include "git_sha1.h"
51 #include "util/build_id.h"
52 #include "util/debug.h"
53 #include "util/mesa-sha1.h"
54 #include "util/timespec.h"
55 #include "util/u_atomic.h"
56 #include "compiler/glsl_types.h"
57 #include "util/driconf.h"
58
59 /* The number of IBs per submit isn't infinite, it depends on the ring type
60 * (ie. some initial setup needed for a submit) and the number of IBs (4 DW).
61 * This limit is arbitrary but should be safe for now. Ideally, we should get
62 * this limit from the KMD.
63 */
64 #define RADV_MAX_IBS_PER_SUBMIT 192
65
66 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
67 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
68 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
69 #endif
70
71 static struct radv_timeline_point *
72 radv_timeline_find_point_at_least_locked(struct radv_device *device,
73 struct radv_timeline *timeline,
74 uint64_t p);
75
76 static struct radv_timeline_point *
77 radv_timeline_add_point_locked(struct radv_device *device,
78 struct radv_timeline *timeline,
79 uint64_t p);
80
81 static void
82 radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
83 struct list_head *processing_list);
84
85 static
86 void radv_destroy_semaphore_part(struct radv_device *device,
87 struct radv_semaphore_part *part);
88
89 static VkResult
90 radv_create_pthread_cond(pthread_cond_t *cond);
91
radv_get_current_time(void)92 uint64_t radv_get_current_time(void)
93 {
94 struct timespec tv;
95 clock_gettime(CLOCK_MONOTONIC, &tv);
96 return tv.tv_nsec + tv.tv_sec*1000000000ull;
97 }
98
radv_get_absolute_timeout(uint64_t timeout)99 static uint64_t radv_get_absolute_timeout(uint64_t timeout)
100 {
101 uint64_t current_time = radv_get_current_time();
102
103 timeout = MIN2(UINT64_MAX - current_time, timeout);
104
105 return current_time + timeout;
106 }
107
108 static int
radv_device_get_cache_uuid(enum radeon_family family,void * uuid)109 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
110 {
111 struct mesa_sha1 ctx;
112 unsigned char sha1[20];
113 unsigned ptr_size = sizeof(void*);
114
115 memset(uuid, 0, VK_UUID_SIZE);
116 _mesa_sha1_init(&ctx);
117
118 if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) ||
119 !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
120 return -1;
121
122 _mesa_sha1_update(&ctx, &family, sizeof(family));
123 _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
124 _mesa_sha1_final(&ctx, sha1);
125
126 memcpy(uuid, sha1, VK_UUID_SIZE);
127 return 0;
128 }
129
130 static void
radv_get_driver_uuid(void * uuid)131 radv_get_driver_uuid(void *uuid)
132 {
133 ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
134 }
135
136 static void
radv_get_device_uuid(struct radeon_info * info,void * uuid)137 radv_get_device_uuid(struct radeon_info *info, void *uuid)
138 {
139 ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
140 }
141
142 static uint64_t
radv_get_adjusted_vram_size(struct radv_physical_device * device)143 radv_get_adjusted_vram_size(struct radv_physical_device *device)
144 {
145 int ov = driQueryOptioni(&device->instance->dri_options,
146 "override_vram_size");
147 if (ov >= 0)
148 return MIN2(device->rad_info.vram_size, (uint64_t)ov << 20);
149 return device->rad_info.vram_size;
150 }
151
152 static uint64_t
radv_get_visible_vram_size(struct radv_physical_device * device)153 radv_get_visible_vram_size(struct radv_physical_device *device)
154 {
155 return MIN2(radv_get_adjusted_vram_size(device) , device->rad_info.vram_vis_size);
156 }
157
158 static uint64_t
radv_get_vram_size(struct radv_physical_device * device)159 radv_get_vram_size(struct radv_physical_device *device)
160 {
161 return radv_get_adjusted_vram_size(device) - device->rad_info.vram_vis_size;
162 }
163
164 enum radv_heap {
165 RADV_HEAP_VRAM = 1 << 0,
166 RADV_HEAP_GTT = 1 << 1,
167 RADV_HEAP_VRAM_VIS = 1 << 2,
168 RADV_HEAP_MAX = 1 << 3,
169 };
170
171 static void
radv_physical_device_init_mem_types(struct radv_physical_device * device)172 radv_physical_device_init_mem_types(struct radv_physical_device *device)
173 {
174 uint64_t visible_vram_size = radv_get_visible_vram_size(device);
175 uint64_t vram_size = radv_get_vram_size(device);
176 int vram_index = -1, visible_vram_index = -1, gart_index = -1;
177 device->memory_properties.memoryHeapCount = 0;
178 device->heaps = 0;
179
180 /* Only get a VRAM heap if it is significant, not if it is a 16 MiB
181 * remainder above visible VRAM. */
182 if (vram_size > 0 && vram_size * 9 >= visible_vram_size) {
183 vram_index = device->memory_properties.memoryHeapCount++;
184 device->heaps |= RADV_HEAP_VRAM;
185 device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap) {
186 .size = vram_size,
187 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
188 };
189 }
190
191 if (device->rad_info.gart_size > 0) {
192 gart_index = device->memory_properties.memoryHeapCount++;
193 device->heaps |= RADV_HEAP_GTT;
194 device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap) {
195 .size = device->rad_info.gart_size,
196 .flags = 0,
197 };
198 }
199
200 if (visible_vram_size) {
201 visible_vram_index = device->memory_properties.memoryHeapCount++;
202 device->heaps |= RADV_HEAP_VRAM_VIS;
203 device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap) {
204 .size = visible_vram_size,
205 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
206 };
207 }
208
209 unsigned type_count = 0;
210
211 if (vram_index >= 0 || visible_vram_index >= 0) {
212 device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
213 device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
214 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
215 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
216 .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
217 };
218 }
219
220 if (gart_index >= 0) {
221 device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
222 device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
223 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
224 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
225 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
226 .heapIndex = gart_index,
227 };
228 }
229 if (visible_vram_index >= 0) {
230 device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
231 device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
232 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
233 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
234 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
235 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
236 .heapIndex = visible_vram_index,
237 };
238 }
239
240 if (gart_index >= 0) {
241 device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
242 device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
243 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
244 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
245 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
246 VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
247 .heapIndex = gart_index,
248 };
249 }
250 device->memory_properties.memoryTypeCount = type_count;
251
252 if (device->rad_info.has_l2_uncached) {
253 for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
254 VkMemoryType mem_type = device->memory_properties.memoryTypes[i];
255
256 if ((mem_type.propertyFlags & (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
257 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
258 mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
259
260 VkMemoryPropertyFlags property_flags = mem_type.propertyFlags |
261 VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
262 VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
263
264 device->memory_domains[type_count] = device->memory_domains[i];
265 device->memory_flags[type_count] = device->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
266 device->memory_properties.memoryTypes[type_count++] = (VkMemoryType) {
267 .propertyFlags = property_flags,
268 .heapIndex = mem_type.heapIndex,
269 };
270 }
271 }
272 device->memory_properties.memoryTypeCount = type_count;
273 }
274 }
275
276 static const char *
radv_get_compiler_string(struct radv_physical_device * pdevice)277 radv_get_compiler_string(struct radv_physical_device *pdevice)
278 {
279 if (!pdevice->use_llvm) {
280 /* Some games like SotTR apply shader workarounds if the LLVM
281 * version is too old or if the LLVM version string is
282 * missing. This gives 2-5% performance with SotTR and ACO.
283 */
284 if (driQueryOptionb(&pdevice->instance->dri_options,
285 "radv_report_llvm9_version_string")) {
286 return "ACO/LLVM 9.0.1";
287 }
288
289 return "ACO";
290 }
291
292 return "LLVM " MESA_LLVM_VERSION_STRING;
293 }
294
295 static VkResult
radv_physical_device_try_create(struct radv_instance * instance,drmDevicePtr drm_device,struct radv_physical_device ** device_out)296 radv_physical_device_try_create(struct radv_instance *instance,
297 drmDevicePtr drm_device,
298 struct radv_physical_device **device_out)
299 {
300 VkResult result;
301 int fd = -1;
302 int master_fd = -1;
303
304 if (drm_device) {
305 const char *path = drm_device->nodes[DRM_NODE_RENDER];
306 drmVersionPtr version;
307
308 fd = open(path, O_RDWR | O_CLOEXEC);
309 if (fd < 0) {
310 if (instance->debug_flags & RADV_DEBUG_STARTUP)
311 radv_logi("Could not open device '%s'", path);
312
313 return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
314 }
315
316 version = drmGetVersion(fd);
317 if (!version) {
318 close(fd);
319
320 if (instance->debug_flags & RADV_DEBUG_STARTUP)
321 radv_logi("Could not get the kernel driver version for device '%s'", path);
322
323 return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
324 "failed to get version %s: %m", path);
325 }
326
327 if (strcmp(version->name, "amdgpu")) {
328 drmFreeVersion(version);
329 close(fd);
330
331 if (instance->debug_flags & RADV_DEBUG_STARTUP)
332 radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
333
334 return VK_ERROR_INCOMPATIBLE_DRIVER;
335 }
336 drmFreeVersion(version);
337
338 if (instance->debug_flags & RADV_DEBUG_STARTUP)
339 radv_logi("Found compatible device '%s'.", path);
340 }
341
342 struct radv_physical_device *device =
343 vk_zalloc2(&instance->alloc, NULL, sizeof(*device), 8,
344 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
345 if (!device) {
346 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
347 goto fail_fd;
348 }
349
350 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
351 device->instance = instance;
352
353 if (drm_device) {
354 device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags,
355 instance->perftest_flags);
356 } else {
357 device->ws = radv_null_winsys_create();
358 }
359
360 if (!device->ws) {
361 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
362 "failed to initialize winsys");
363 goto fail_alloc;
364 }
365
366 if (drm_device && instance->enabled_extensions.KHR_display) {
367 master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
368 if (master_fd >= 0) {
369 uint32_t accel_working = 0;
370 struct drm_amdgpu_info request = {
371 .return_pointer = (uintptr_t)&accel_working,
372 .return_size = sizeof(accel_working),
373 .query = AMDGPU_INFO_ACCEL_WORKING
374 };
375
376 if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof (struct drm_amdgpu_info)) < 0 || !accel_working) {
377 close(master_fd);
378 master_fd = -1;
379 }
380 }
381 }
382
383 device->master_fd = master_fd;
384 device->local_fd = fd;
385 device->ws->query_info(device->ws, &device->rad_info);
386
387 device->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
388
389 snprintf(device->name, sizeof(device->name),
390 "AMD RADV %s (%s)",
391 device->rad_info.name, radv_get_compiler_string(device));
392
393 if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
394 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
395 "cannot generate UUID");
396 goto fail_wsi;
397 }
398
399 /* These flags affect shader compilation. */
400 uint64_t shader_env_flags = (device->use_llvm ? 0 : 0x2);
401
402 /* The gpu id is already embedded in the uuid so we just pass "radv"
403 * when creating the cache.
404 */
405 char buf[VK_UUID_SIZE * 2 + 1];
406 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
407 device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
408
409 if (device->rad_info.chip_class < GFX8 ||
410 device->rad_info.chip_class > GFX10)
411 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n");
412
413 radv_get_driver_uuid(&device->driver_uuid);
414 radv_get_device_uuid(&device->rad_info, &device->device_uuid);
415
416 device->out_of_order_rast_allowed = device->rad_info.has_out_of_order_rast &&
417 !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
418
419 device->dcc_msaa_allowed =
420 (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
421
422 device->use_ngg = device->rad_info.chip_class >= GFX10 &&
423 device->rad_info.family != CHIP_NAVI14 &&
424 device->rad_info.has_dedicated_vram &&
425 !(device->instance->debug_flags & RADV_DEBUG_NO_NGG);
426
427 device->use_ngg_streamout = false;
428
429 /* Determine the number of threads per wave for all stages. */
430 device->cs_wave_size = 64;
431 device->ps_wave_size = 64;
432 device->ge_wave_size = 64;
433
434 if (device->rad_info.chip_class >= GFX10) {
435 if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
436 device->cs_wave_size = 32;
437
438 /* For pixel shaders, wave64 is recommanded. */
439 if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
440 device->ps_wave_size = 32;
441
442 if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
443 device->ge_wave_size = 32;
444 }
445
446 radv_physical_device_init_mem_types(device);
447
448 radv_physical_device_get_supported_extensions(device,
449 &device->supported_extensions);
450
451 if (drm_device)
452 device->bus_info = *drm_device->businfo.pci;
453
454 if ((device->instance->debug_flags & RADV_DEBUG_INFO))
455 ac_print_gpu_info(&device->rad_info, stdout);
456
457 /* The WSI is structured as a layer on top of the driver, so this has
458 * to be the last part of initialization (at least until we get other
459 * semi-layers).
460 */
461 result = radv_init_wsi(device);
462 if (result != VK_SUCCESS) {
463 vk_error(instance, result);
464 goto fail_disk_cache;
465 }
466
467 *device_out = device;
468
469 return VK_SUCCESS;
470
471 fail_disk_cache:
472 disk_cache_destroy(device->disk_cache);
473 fail_wsi:
474 device->ws->destroy(device->ws);
475 fail_alloc:
476 vk_free(&instance->alloc, device);
477 fail_fd:
478 if (fd != -1)
479 close(fd);
480 if (master_fd != -1)
481 close(master_fd);
482 return result;
483 }
484
485 static void
radv_physical_device_destroy(struct radv_physical_device * device)486 radv_physical_device_destroy(struct radv_physical_device *device)
487 {
488 radv_finish_wsi(device);
489 device->ws->destroy(device->ws);
490 disk_cache_destroy(device->disk_cache);
491 if (device->local_fd != -1)
492 close(device->local_fd);
493 if (device->master_fd != -1)
494 close(device->master_fd);
495 vk_free(&device->instance->alloc, device);
496 }
497
498 static void *
default_alloc_func(void * pUserData,size_t size,size_t align,VkSystemAllocationScope allocationScope)499 default_alloc_func(void *pUserData, size_t size, size_t align,
500 VkSystemAllocationScope allocationScope)
501 {
502 return malloc(size);
503 }
504
505 static void *
default_realloc_func(void * pUserData,void * pOriginal,size_t size,size_t align,VkSystemAllocationScope allocationScope)506 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
507 size_t align, VkSystemAllocationScope allocationScope)
508 {
509 return realloc(pOriginal, size);
510 }
511
512 static void
default_free_func(void * pUserData,void * pMemory)513 default_free_func(void *pUserData, void *pMemory)
514 {
515 free(pMemory);
516 }
517
518 static const VkAllocationCallbacks default_alloc = {
519 .pUserData = NULL,
520 .pfnAllocation = default_alloc_func,
521 .pfnReallocation = default_realloc_func,
522 .pfnFree = default_free_func,
523 };
524
525 static const struct debug_control radv_debug_options[] = {
526 {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
527 {"nodcc", RADV_DEBUG_NO_DCC},
528 {"shaders", RADV_DEBUG_DUMP_SHADERS},
529 {"nocache", RADV_DEBUG_NO_CACHE},
530 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
531 {"nohiz", RADV_DEBUG_NO_HIZ},
532 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
533 {"allbos", RADV_DEBUG_ALL_BOS},
534 {"noibs", RADV_DEBUG_NO_IBS},
535 {"spirv", RADV_DEBUG_DUMP_SPIRV},
536 {"vmfaults", RADV_DEBUG_VM_FAULTS},
537 {"zerovram", RADV_DEBUG_ZERO_VRAM},
538 {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
539 {"preoptir", RADV_DEBUG_PREOPTIR},
540 {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
541 {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
542 {"info", RADV_DEBUG_INFO},
543 {"errors", RADV_DEBUG_ERRORS},
544 {"startup", RADV_DEBUG_STARTUP},
545 {"checkir", RADV_DEBUG_CHECKIR},
546 {"nothreadllvm", RADV_DEBUG_NOTHREADLLVM},
547 {"nobinning", RADV_DEBUG_NOBINNING},
548 {"nongg", RADV_DEBUG_NO_NGG},
549 {"allentrypoints", RADV_DEBUG_ALL_ENTRYPOINTS},
550 {"metashaders", RADV_DEBUG_DUMP_META_SHADERS},
551 {"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE},
552 {"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},
553 {"llvm", RADV_DEBUG_LLVM},
554 {"forcecompress", RADV_DEBUG_FORCE_COMPRESS},
555 {"hang", RADV_DEBUG_HANG},
556 {"invariantgeom", RADV_DEBUG_INVARIANT_GEOM},
557 {NULL, 0}
558 };
559
560 const char *
radv_get_debug_option_name(int id)561 radv_get_debug_option_name(int id)
562 {
563 assert(id < ARRAY_SIZE(radv_debug_options) - 1);
564 return radv_debug_options[id].string;
565 }
566
567 static const struct debug_control radv_perftest_options[] = {
568 {"localbos", RADV_PERFTEST_LOCAL_BOS},
569 {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
570 {"bolist", RADV_PERFTEST_BO_LIST},
571 {"tccompatcmask", RADV_PERFTEST_TC_COMPAT_CMASK},
572 {"cswave32", RADV_PERFTEST_CS_WAVE_32},
573 {"pswave32", RADV_PERFTEST_PS_WAVE_32},
574 {"gewave32", RADV_PERFTEST_GE_WAVE_32},
575 {"dfsm", RADV_PERFTEST_DFSM},
576 {NULL, 0}
577 };
578
579 const char *
radv_get_perftest_option_name(int id)580 radv_get_perftest_option_name(int id)
581 {
582 assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
583 return radv_perftest_options[id].string;
584 }
585
586 static void
radv_handle_per_app_options(struct radv_instance * instance,const VkApplicationInfo * info)587 radv_handle_per_app_options(struct radv_instance *instance,
588 const VkApplicationInfo *info)
589 {
590 const char *name = info ? info->pApplicationName : NULL;
591 const char *engine_name = info ? info->pEngineName : NULL;
592
593 if (name) {
594 if (!strcmp(name, "DOOM_VFR")) {
595 /* Work around a Doom VFR game bug */
596 instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
597 } else if (!strcmp(name, "Fledge")) {
598 /*
599 * Zero VRAM for "The Surge 2"
600 *
601 * This avoid a hang when when rendering any level. Likely
602 * uninitialized data in an indirect draw.
603 */
604 instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
605 } else if (!strcmp(name, "No Man's Sky")) {
606 /* Work around a NMS game bug */
607 instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE;
608 } else if (!strcmp(name, "DOOMEternal")) {
609 /* Zero VRAM for Doom Eternal to fix rendering issues. */
610 instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
611 } else if (!strcmp(name, "ShadowOfTheTomb")) {
612 /* Work around flickering foliage for native Shadow of the Tomb Raider
613 * on GFX10.3 */
614 instance->debug_flags |= RADV_DEBUG_INVARIANT_GEOM;
615 }
616 }
617
618 if (engine_name) {
619 if (!strcmp(engine_name, "vkd3d")) {
620 /* Zero VRAM for all VKD3D (DX12->VK) games to fix
621 * rendering issues.
622 */
623 instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
624 } else if (!strcmp(engine_name, "Quantic Dream Engine")) {
625 /* Fix various artifacts in Detroit: Become Human */
626 instance->debug_flags |= RADV_DEBUG_ZERO_VRAM |
627 RADV_DEBUG_DISCARD_TO_DEMOTE;
628
629 /* Fix rendering issues in Detroit: Become Human
630 * because the game uses render loops (it
631 * samples/renders from/to the same depth/stencil
632 * texture inside the same draw) without input
633 * attachments and that is invalid Vulkan usage.
634 */
635 instance->disable_tc_compat_htile_in_general = true;
636 }
637 }
638
639 instance->enable_mrt_output_nan_fixup =
640 driQueryOptionb(&instance->dri_options,
641 "radv_enable_mrt_output_nan_fixup");
642
643 if (driQueryOptionb(&instance->dri_options, "radv_no_dynamic_bounds"))
644 instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
645 }
646
647 static const driOptionDescription radv_dri_options[] = {
648 DRI_CONF_SECTION_PERFORMANCE
649 DRI_CONF_ADAPTIVE_SYNC(true)
650 DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
651 DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
652 DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
653 DRI_CONF_RADV_REPORT_LLVM9_VERSION_STRING(false)
654 DRI_CONF_RADV_ENABLE_MRT_OUTPUT_NAN_FIXUP(false)
655 DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false)
656 DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0)
657 DRI_CONF_SECTION_END
658
659 DRI_CONF_SECTION_DEBUG
660 DRI_CONF_OVERRIDE_VRAM_SIZE()
661 DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
662 DRI_CONF_SECTION_END
663 };
664
radv_init_dri_options(struct radv_instance * instance)665 static void radv_init_dri_options(struct radv_instance *instance)
666 {
667 driParseOptionInfo(&instance->available_dri_options, radv_dri_options, ARRAY_SIZE(radv_dri_options));
668 driParseConfigFiles(&instance->dri_options,
669 &instance->available_dri_options,
670 0, "radv", NULL,
671 instance->applicationName,
672 instance->applicationVersion,
673 instance->engineName,
674 instance->engineVersion);
675 }
676
radv_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)677 VkResult radv_CreateInstance(
678 const VkInstanceCreateInfo* pCreateInfo,
679 const VkAllocationCallbacks* pAllocator,
680 VkInstance* pInstance)
681 {
682 struct radv_instance *instance;
683 VkResult result;
684
685 instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
686 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
687 if (!instance)
688 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
689
690 vk_object_base_init(NULL, &instance->base, VK_OBJECT_TYPE_INSTANCE);
691
692 if (pAllocator)
693 instance->alloc = *pAllocator;
694 else
695 instance->alloc = default_alloc;
696
697 if (pCreateInfo->pApplicationInfo) {
698 const VkApplicationInfo *app = pCreateInfo->pApplicationInfo;
699
700 instance->applicationName =
701 vk_strdup(&instance->alloc, app->pApplicationName,
702 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
703 instance->applicationVersion = app->applicationVersion;
704
705 instance->engineName =
706 vk_strdup(&instance->alloc, app->pEngineName,
707 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
708 instance->engineVersion = app->engineVersion;
709 instance->apiVersion = app->apiVersion;
710 }
711
712 if (instance->apiVersion == 0)
713 instance->apiVersion = VK_API_VERSION_1_0;
714
715 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"),
716 radv_debug_options);
717
718 const char *radv_perftest_str = getenv("RADV_PERFTEST");
719 instance->perftest_flags = parse_debug_string(radv_perftest_str,
720 radv_perftest_options);
721
722 if (radv_perftest_str) {
723 /* Output warnings for famous RADV_PERFTEST options that no
724 * longer exist or are deprecated.
725 */
726 if (strstr(radv_perftest_str, "aco")) {
727 fprintf(stderr, "*******************************************************************************\n");
728 fprintf(stderr, "* WARNING: Unknown option RADV_PERFTEST='aco'. ACO is enabled by default now. *\n");
729 fprintf(stderr, "*******************************************************************************\n");
730 }
731 if (strstr(radv_perftest_str, "llvm")) {
732 fprintf(stderr, "*********************************************************************************\n");
733 fprintf(stderr, "* WARNING: Unknown option 'RADV_PERFTEST=llvm'. Did you mean 'RADV_DEBUG=llvm'? *\n");
734 fprintf(stderr, "*********************************************************************************\n");
735 abort();
736 }
737 }
738
739 if (instance->debug_flags & RADV_DEBUG_STARTUP)
740 radv_logi("Created an instance");
741
742 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
743 int idx;
744 for (idx = 0; idx < RADV_INSTANCE_EXTENSION_COUNT; idx++) {
745 if (!strcmp(pCreateInfo->ppEnabledExtensionNames[i],
746 radv_instance_extensions[idx].extensionName))
747 break;
748 }
749
750 if (idx >= RADV_INSTANCE_EXTENSION_COUNT ||
751 !radv_instance_extensions_supported.extensions[idx]) {
752 vk_object_base_finish(&instance->base);
753 vk_free2(&default_alloc, pAllocator, instance);
754 return vk_error(instance, VK_ERROR_EXTENSION_NOT_PRESENT);
755 }
756
757 instance->enabled_extensions.extensions[idx] = true;
758 }
759
760 bool unchecked = instance->debug_flags & RADV_DEBUG_ALL_ENTRYPOINTS;
761
762 for (unsigned i = 0; i < ARRAY_SIZE(instance->dispatch.entrypoints); i++) {
763 /* Vulkan requires that entrypoints for extensions which have
764 * not been enabled must not be advertised.
765 */
766 if (!unchecked &&
767 !radv_instance_entrypoint_is_enabled(i, instance->apiVersion,
768 &instance->enabled_extensions)) {
769 instance->dispatch.entrypoints[i] = NULL;
770 } else {
771 instance->dispatch.entrypoints[i] =
772 radv_instance_dispatch_table.entrypoints[i];
773 }
774 }
775
776 for (unsigned i = 0; i < ARRAY_SIZE(instance->physical_device_dispatch.entrypoints); i++) {
777 /* Vulkan requires that entrypoints for extensions which have
778 * not been enabled must not be advertised.
779 */
780 if (!unchecked &&
781 !radv_physical_device_entrypoint_is_enabled(i, instance->apiVersion,
782 &instance->enabled_extensions)) {
783 instance->physical_device_dispatch.entrypoints[i] = NULL;
784 } else {
785 instance->physical_device_dispatch.entrypoints[i] =
786 radv_physical_device_dispatch_table.entrypoints[i];
787 }
788 }
789
790 for (unsigned i = 0; i < ARRAY_SIZE(instance->device_dispatch.entrypoints); i++) {
791 /* Vulkan requires that entrypoints for extensions which have
792 * not been enabled must not be advertised.
793 */
794 if (!unchecked &&
795 !radv_device_entrypoint_is_enabled(i, instance->apiVersion,
796 &instance->enabled_extensions, NULL)) {
797 instance->device_dispatch.entrypoints[i] = NULL;
798 } else {
799 instance->device_dispatch.entrypoints[i] =
800 radv_device_dispatch_table.entrypoints[i];
801 }
802 }
803
804 instance->physical_devices_enumerated = false;
805 list_inithead(&instance->physical_devices);
806
807 result = vk_debug_report_instance_init(&instance->debug_report_callbacks);
808 if (result != VK_SUCCESS) {
809 vk_object_base_finish(&instance->base);
810 vk_free2(&default_alloc, pAllocator, instance);
811 return vk_error(instance, result);
812 }
813
814 glsl_type_singleton_init_or_ref();
815
816 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
817
818 radv_init_dri_options(instance);
819 radv_handle_per_app_options(instance, pCreateInfo->pApplicationInfo);
820
821 *pInstance = radv_instance_to_handle(instance);
822
823 return VK_SUCCESS;
824 }
825
radv_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)826 void radv_DestroyInstance(
827 VkInstance _instance,
828 const VkAllocationCallbacks* pAllocator)
829 {
830 RADV_FROM_HANDLE(radv_instance, instance, _instance);
831
832 if (!instance)
833 return;
834
835 list_for_each_entry_safe(struct radv_physical_device, pdevice,
836 &instance->physical_devices, link) {
837 radv_physical_device_destroy(pdevice);
838 }
839
840 vk_free(&instance->alloc, instance->engineName);
841 vk_free(&instance->alloc, instance->applicationName);
842
843 VG(VALGRIND_DESTROY_MEMPOOL(instance));
844
845 glsl_type_singleton_decref();
846
847 driDestroyOptionCache(&instance->dri_options);
848 driDestroyOptionInfo(&instance->available_dri_options);
849
850 vk_debug_report_instance_destroy(&instance->debug_report_callbacks);
851
852 vk_object_base_finish(&instance->base);
853 vk_free(&instance->alloc, instance);
854 }
855
856 static VkResult
radv_enumerate_physical_devices(struct radv_instance * instance)857 radv_enumerate_physical_devices(struct radv_instance *instance)
858 {
859 if (instance->physical_devices_enumerated)
860 return VK_SUCCESS;
861
862 instance->physical_devices_enumerated = true;
863
864 /* TODO: Check for more devices ? */
865 drmDevicePtr devices[8];
866 VkResult result = VK_SUCCESS;
867 int max_devices;
868
869 if (getenv("RADV_FORCE_FAMILY")) {
870 /* When RADV_FORCE_FAMILY is set, the driver creates a nul
871 * device that allows to test the compiler without having an
872 * AMDGPU instance.
873 */
874 struct radv_physical_device *pdevice;
875
876 result = radv_physical_device_try_create(instance, NULL, &pdevice);
877 if (result != VK_SUCCESS)
878 return result;
879
880 list_addtail(&pdevice->link, &instance->physical_devices);
881 return VK_SUCCESS;
882 }
883
884 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
885
886 if (instance->debug_flags & RADV_DEBUG_STARTUP)
887 radv_logi("Found %d drm nodes", max_devices);
888
889 if (max_devices < 1)
890 return vk_error(instance, VK_SUCCESS);
891
892 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
893 if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
894 devices[i]->bustype == DRM_BUS_PCI &&
895 devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
896
897 struct radv_physical_device *pdevice;
898 result = radv_physical_device_try_create(instance, devices[i],
899 &pdevice);
900 /* Incompatible DRM device, skip. */
901 if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
902 result = VK_SUCCESS;
903 continue;
904 }
905
906 /* Error creating the physical device, report the error. */
907 if (result != VK_SUCCESS)
908 break;
909
910 list_addtail(&pdevice->link, &instance->physical_devices);
911 }
912 }
913 drmFreeDevices(devices, max_devices);
914
915 /* If we successfully enumerated any devices, call it success */
916 return result;
917 }
918
radv_EnumeratePhysicalDevices(VkInstance _instance,uint32_t * pPhysicalDeviceCount,VkPhysicalDevice * pPhysicalDevices)919 VkResult radv_EnumeratePhysicalDevices(
920 VkInstance _instance,
921 uint32_t* pPhysicalDeviceCount,
922 VkPhysicalDevice* pPhysicalDevices)
923 {
924 RADV_FROM_HANDLE(radv_instance, instance, _instance);
925 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices,
926 pPhysicalDeviceCount);
927
928 VkResult result = radv_enumerate_physical_devices(instance);
929 if (result != VK_SUCCESS)
930 return result;
931
932 list_for_each_entry(struct radv_physical_device, pdevice,
933 &instance->physical_devices, link) {
934 vk_outarray_append_typed(VkPhysicalDevice , &out, i) {
935 *i = radv_physical_device_to_handle(pdevice);
936 }
937 }
938
939 return vk_outarray_status(&out);
940 }
941
radv_EnumeratePhysicalDeviceGroups(VkInstance _instance,uint32_t * pPhysicalDeviceGroupCount,VkPhysicalDeviceGroupProperties * pPhysicalDeviceGroupProperties)942 VkResult radv_EnumeratePhysicalDeviceGroups(
943 VkInstance _instance,
944 uint32_t* pPhysicalDeviceGroupCount,
945 VkPhysicalDeviceGroupProperties* pPhysicalDeviceGroupProperties)
946 {
947 RADV_FROM_HANDLE(radv_instance, instance, _instance);
948 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out,
949 pPhysicalDeviceGroupProperties,
950 pPhysicalDeviceGroupCount);
951
952 VkResult result = radv_enumerate_physical_devices(instance);
953 if (result != VK_SUCCESS)
954 return result;
955
956 list_for_each_entry(struct radv_physical_device, pdevice,
957 &instance->physical_devices, link) {
958 vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p) {
959 p->physicalDeviceCount = 1;
960 memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
961 p->physicalDevices[0] = radv_physical_device_to_handle(pdevice);
962 p->subsetAllocation = false;
963 }
964 }
965
966 return vk_outarray_status(&out);
967 }
968
radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures * pFeatures)969 void radv_GetPhysicalDeviceFeatures(
970 VkPhysicalDevice physicalDevice,
971 VkPhysicalDeviceFeatures* pFeatures)
972 {
973 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
974 memset(pFeatures, 0, sizeof(*pFeatures));
975
976 *pFeatures = (VkPhysicalDeviceFeatures) {
977 .robustBufferAccess = true,
978 .fullDrawIndexUint32 = true,
979 .imageCubeArray = true,
980 .independentBlend = true,
981 .geometryShader = true,
982 .tessellationShader = true,
983 .sampleRateShading = true,
984 .dualSrcBlend = true,
985 .logicOp = true,
986 .multiDrawIndirect = true,
987 .drawIndirectFirstInstance = true,
988 .depthClamp = true,
989 .depthBiasClamp = true,
990 .fillModeNonSolid = true,
991 .depthBounds = true,
992 .wideLines = true,
993 .largePoints = true,
994 .alphaToOne = false,
995 .multiViewport = true,
996 .samplerAnisotropy = true,
997 .textureCompressionETC2 = radv_device_supports_etc(pdevice),
998 .textureCompressionASTC_LDR = false,
999 .textureCompressionBC = true,
1000 .occlusionQueryPrecise = true,
1001 .pipelineStatisticsQuery = true,
1002 .vertexPipelineStoresAndAtomics = true,
1003 .fragmentStoresAndAtomics = true,
1004 .shaderTessellationAndGeometryPointSize = true,
1005 .shaderImageGatherExtended = true,
1006 .shaderStorageImageExtendedFormats = true,
1007 .shaderStorageImageMultisample = true,
1008 .shaderUniformBufferArrayDynamicIndexing = true,
1009 .shaderSampledImageArrayDynamicIndexing = true,
1010 .shaderStorageBufferArrayDynamicIndexing = true,
1011 .shaderStorageImageArrayDynamicIndexing = true,
1012 .shaderStorageImageReadWithoutFormat = true,
1013 .shaderStorageImageWriteWithoutFormat = true,
1014 .shaderClipDistance = true,
1015 .shaderCullDistance = true,
1016 .shaderFloat64 = true,
1017 .shaderInt64 = true,
1018 .shaderInt16 = true,
1019 .sparseBinding = true,
1020 .variableMultisampleRate = true,
1021 .shaderResourceMinLod = true,
1022 .inheritedQueries = true,
1023 };
1024 }
1025
1026 static void
radv_get_physical_device_features_1_1(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan11Features * f)1027 radv_get_physical_device_features_1_1(struct radv_physical_device *pdevice,
1028 VkPhysicalDeviceVulkan11Features *f)
1029 {
1030 assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
1031
1032 f->storageBuffer16BitAccess = true;
1033 f->uniformAndStorageBuffer16BitAccess = true;
1034 f->storagePushConstant16 = true;
1035 f->storageInputOutput16 = pdevice->rad_info.has_packed_math_16bit && (LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm);
1036 f->multiview = true;
1037 f->multiviewGeometryShader = true;
1038 f->multiviewTessellationShader = true;
1039 f->variablePointersStorageBuffer = true;
1040 f->variablePointers = true;
1041 f->protectedMemory = false;
1042 f->samplerYcbcrConversion = true;
1043 f->shaderDrawParameters = true;
1044 }
1045
1046 static void
radv_get_physical_device_features_1_2(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan12Features * f)1047 radv_get_physical_device_features_1_2(struct radv_physical_device *pdevice,
1048 VkPhysicalDeviceVulkan12Features *f)
1049 {
1050 assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
1051
1052 f->samplerMirrorClampToEdge = true;
1053 f->drawIndirectCount = true;
1054 f->storageBuffer8BitAccess = true;
1055 f->uniformAndStorageBuffer8BitAccess = true;
1056 f->storagePushConstant8 = true;
1057 f->shaderBufferInt64Atomics = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
1058 f->shaderSharedInt64Atomics = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
1059 f->shaderFloat16 = pdevice->rad_info.has_packed_math_16bit;
1060 f->shaderInt8 = true;
1061
1062 f->descriptorIndexing = true;
1063 f->shaderInputAttachmentArrayDynamicIndexing = true;
1064 f->shaderUniformTexelBufferArrayDynamicIndexing = true;
1065 f->shaderStorageTexelBufferArrayDynamicIndexing = true;
1066 f->shaderUniformBufferArrayNonUniformIndexing = true;
1067 f->shaderSampledImageArrayNonUniformIndexing = true;
1068 f->shaderStorageBufferArrayNonUniformIndexing = true;
1069 f->shaderStorageImageArrayNonUniformIndexing = true;
1070 f->shaderInputAttachmentArrayNonUniformIndexing = true;
1071 f->shaderUniformTexelBufferArrayNonUniformIndexing = true;
1072 f->shaderStorageTexelBufferArrayNonUniformIndexing = true;
1073 f->descriptorBindingUniformBufferUpdateAfterBind = true;
1074 f->descriptorBindingSampledImageUpdateAfterBind = true;
1075 f->descriptorBindingStorageImageUpdateAfterBind = true;
1076 f->descriptorBindingStorageBufferUpdateAfterBind = true;
1077 f->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
1078 f->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
1079 f->descriptorBindingUpdateUnusedWhilePending = true;
1080 f->descriptorBindingPartiallyBound = true;
1081 f->descriptorBindingVariableDescriptorCount = true;
1082 f->runtimeDescriptorArray = true;
1083
1084 f->samplerFilterMinmax = true;
1085 f->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
1086 f->imagelessFramebuffer = true;
1087 f->uniformBufferStandardLayout = true;
1088 f->shaderSubgroupExtendedTypes = true;
1089 f->separateDepthStencilLayouts = true;
1090 f->hostQueryReset = true;
1091 f->timelineSemaphore = pdevice->rad_info.has_syncobj_wait_for_submit;
1092 f->bufferDeviceAddress = true;
1093 f->bufferDeviceAddressCaptureReplay = false;
1094 f->bufferDeviceAddressMultiDevice = false;
1095 f->vulkanMemoryModel = true;
1096 f->vulkanMemoryModelDeviceScope = true;
1097 f->vulkanMemoryModelAvailabilityVisibilityChains = false;
1098 f->shaderOutputViewportIndex = true;
1099 f->shaderOutputLayer = true;
1100 f->subgroupBroadcastDynamicId = true;
1101 }
1102
radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures2 * pFeatures)1103 void radv_GetPhysicalDeviceFeatures2(
1104 VkPhysicalDevice physicalDevice,
1105 VkPhysicalDeviceFeatures2 *pFeatures)
1106 {
1107 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1108 radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
1109
1110 VkPhysicalDeviceVulkan11Features core_1_1 = {
1111 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1112 };
1113 radv_get_physical_device_features_1_1(pdevice, &core_1_1);
1114
1115 VkPhysicalDeviceVulkan12Features core_1_2 = {
1116 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1117 };
1118 radv_get_physical_device_features_1_2(pdevice, &core_1_2);
1119
1120 #define CORE_FEATURE(major, minor, feature) \
1121 features->feature = core_##major##_##minor.feature
1122
1123 vk_foreach_struct(ext, pFeatures->pNext) {
1124 switch (ext->sType) {
1125 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
1126 VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext;
1127 CORE_FEATURE(1, 1, variablePointersStorageBuffer);
1128 CORE_FEATURE(1, 1, variablePointers);
1129 break;
1130 }
1131 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
1132 VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures*)ext;
1133 CORE_FEATURE(1, 1, multiview);
1134 CORE_FEATURE(1, 1, multiviewGeometryShader);
1135 CORE_FEATURE(1, 1, multiviewTessellationShader);
1136 break;
1137 }
1138 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
1139 VkPhysicalDeviceShaderDrawParametersFeatures *features =
1140 (VkPhysicalDeviceShaderDrawParametersFeatures*)ext;
1141 CORE_FEATURE(1, 1, shaderDrawParameters);
1142 break;
1143 }
1144 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
1145 VkPhysicalDeviceProtectedMemoryFeatures *features =
1146 (VkPhysicalDeviceProtectedMemoryFeatures*)ext;
1147 CORE_FEATURE(1, 1, protectedMemory);
1148 break;
1149 }
1150 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
1151 VkPhysicalDevice16BitStorageFeatures *features =
1152 (VkPhysicalDevice16BitStorageFeatures*)ext;
1153 CORE_FEATURE(1, 1, storageBuffer16BitAccess);
1154 CORE_FEATURE(1, 1, uniformAndStorageBuffer16BitAccess);
1155 CORE_FEATURE(1, 1, storagePushConstant16);
1156 CORE_FEATURE(1, 1, storageInputOutput16);
1157 break;
1158 }
1159 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
1160 VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
1161 (VkPhysicalDeviceSamplerYcbcrConversionFeatures*)ext;
1162 CORE_FEATURE(1, 1, samplerYcbcrConversion);
1163 break;
1164 }
1165 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES: {
1166 VkPhysicalDeviceDescriptorIndexingFeatures *features =
1167 (VkPhysicalDeviceDescriptorIndexingFeatures*)ext;
1168 CORE_FEATURE(1, 2, shaderInputAttachmentArrayDynamicIndexing);
1169 CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayDynamicIndexing);
1170 CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayDynamicIndexing);
1171 CORE_FEATURE(1, 2, shaderUniformBufferArrayNonUniformIndexing);
1172 CORE_FEATURE(1, 2, shaderSampledImageArrayNonUniformIndexing);
1173 CORE_FEATURE(1, 2, shaderStorageBufferArrayNonUniformIndexing);
1174 CORE_FEATURE(1, 2, shaderStorageImageArrayNonUniformIndexing);
1175 CORE_FEATURE(1, 2, shaderInputAttachmentArrayNonUniformIndexing);
1176 CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayNonUniformIndexing);
1177 CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayNonUniformIndexing);
1178 CORE_FEATURE(1, 2, descriptorBindingUniformBufferUpdateAfterBind);
1179 CORE_FEATURE(1, 2, descriptorBindingSampledImageUpdateAfterBind);
1180 CORE_FEATURE(1, 2, descriptorBindingStorageImageUpdateAfterBind);
1181 CORE_FEATURE(1, 2, descriptorBindingStorageBufferUpdateAfterBind);
1182 CORE_FEATURE(1, 2, descriptorBindingUniformTexelBufferUpdateAfterBind);
1183 CORE_FEATURE(1, 2, descriptorBindingStorageTexelBufferUpdateAfterBind);
1184 CORE_FEATURE(1, 2, descriptorBindingUpdateUnusedWhilePending);
1185 CORE_FEATURE(1, 2, descriptorBindingPartiallyBound);
1186 CORE_FEATURE(1, 2, descriptorBindingVariableDescriptorCount);
1187 CORE_FEATURE(1, 2, runtimeDescriptorArray);
1188 break;
1189 }
1190 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
1191 VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
1192 (VkPhysicalDeviceConditionalRenderingFeaturesEXT*)ext;
1193 features->conditionalRendering = true;
1194 features->inheritedConditionalRendering = false;
1195 break;
1196 }
1197 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
1198 VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
1199 (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
1200 features->vertexAttributeInstanceRateDivisor = true;
1201 features->vertexAttributeInstanceRateZeroDivisor = true;
1202 break;
1203 }
1204 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
1205 VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
1206 (VkPhysicalDeviceTransformFeedbackFeaturesEXT*)ext;
1207 features->transformFeedback = true;
1208 features->geometryStreams = !pdevice->use_ngg_streamout;
1209 break;
1210 }
1211 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: {
1212 VkPhysicalDeviceScalarBlockLayoutFeatures *features =
1213 (VkPhysicalDeviceScalarBlockLayoutFeatures *)ext;
1214 CORE_FEATURE(1, 2, scalarBlockLayout);
1215 break;
1216 }
1217 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
1218 VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
1219 (VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
1220 features->memoryPriority = true;
1221 break;
1222 }
1223 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
1224 VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features =
1225 (VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext;
1226 features->bufferDeviceAddress = true;
1227 features->bufferDeviceAddressCaptureReplay = false;
1228 features->bufferDeviceAddressMultiDevice = false;
1229 break;
1230 }
1231 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES: {
1232 VkPhysicalDeviceBufferDeviceAddressFeatures *features =
1233 (VkPhysicalDeviceBufferDeviceAddressFeatures *)ext;
1234 CORE_FEATURE(1, 2, bufferDeviceAddress);
1235 CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay);
1236 CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice);
1237 break;
1238 }
1239 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
1240 VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
1241 (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
1242 features->depthClipEnable = true;
1243 break;
1244 }
1245 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES: {
1246 VkPhysicalDeviceHostQueryResetFeatures *features =
1247 (VkPhysicalDeviceHostQueryResetFeatures *)ext;
1248 CORE_FEATURE(1, 2, hostQueryReset);
1249 break;
1250 }
1251 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES: {
1252 VkPhysicalDevice8BitStorageFeatures *features =
1253 (VkPhysicalDevice8BitStorageFeatures *)ext;
1254 CORE_FEATURE(1, 2, storageBuffer8BitAccess);
1255 CORE_FEATURE(1, 2, uniformAndStorageBuffer8BitAccess);
1256 CORE_FEATURE(1, 2, storagePushConstant8);
1257 break;
1258 }
1259 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES: {
1260 VkPhysicalDeviceShaderFloat16Int8Features *features =
1261 (VkPhysicalDeviceShaderFloat16Int8Features*)ext;
1262 CORE_FEATURE(1, 2, shaderFloat16);
1263 CORE_FEATURE(1, 2, shaderInt8);
1264 break;
1265 }
1266 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES: {
1267 VkPhysicalDeviceShaderAtomicInt64Features *features =
1268 (VkPhysicalDeviceShaderAtomicInt64Features *)ext;
1269 CORE_FEATURE(1, 2, shaderBufferInt64Atomics);
1270 CORE_FEATURE(1, 2, shaderSharedInt64Atomics);
1271 break;
1272 }
1273 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {
1274 VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features =
1275 (VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *)ext;
1276 features->shaderDemoteToHelperInvocation = LLVM_VERSION_MAJOR >= 9 || !pdevice->use_llvm;
1277 break;
1278 }
1279 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
1280 VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
1281 (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
1282
1283 features->inlineUniformBlock = true;
1284 features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
1285 break;
1286 }
1287 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
1288 VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
1289 (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
1290 features->computeDerivativeGroupQuads = false;
1291 features->computeDerivativeGroupLinear = true;
1292 break;
1293 }
1294 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
1295 VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
1296 (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT*)ext;
1297 features->ycbcrImageArrays = true;
1298 break;
1299 }
1300 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES: {
1301 VkPhysicalDeviceUniformBufferStandardLayoutFeatures *features =
1302 (VkPhysicalDeviceUniformBufferStandardLayoutFeatures *)ext;
1303 CORE_FEATURE(1, 2, uniformBufferStandardLayout);
1304 break;
1305 }
1306 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
1307 VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
1308 (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
1309 features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8;
1310 break;
1311 }
1312 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES: {
1313 VkPhysicalDeviceImagelessFramebufferFeatures *features =
1314 (VkPhysicalDeviceImagelessFramebufferFeatures *)ext;
1315 CORE_FEATURE(1, 2, imagelessFramebuffer);
1316 break;
1317 }
1318 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
1319 VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
1320 (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
1321 features->pipelineExecutableInfo = true;
1322 break;
1323 }
1324 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
1325 VkPhysicalDeviceShaderClockFeaturesKHR *features =
1326 (VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
1327 features->shaderSubgroupClock = true;
1328 features->shaderDeviceClock = pdevice->rad_info.chip_class >= GFX8;
1329 break;
1330 }
1331 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
1332 VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
1333 (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
1334 features->texelBufferAlignment = true;
1335 break;
1336 }
1337 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
1338 VkPhysicalDeviceTimelineSemaphoreFeatures *features =
1339 (VkPhysicalDeviceTimelineSemaphoreFeatures *) ext;
1340 CORE_FEATURE(1, 2, timelineSemaphore);
1341 break;
1342 }
1343 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
1344 VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =
1345 (VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;
1346 features->subgroupSizeControl = true;
1347 features->computeFullSubgroups = true;
1348 break;
1349 }
1350 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: {
1351 VkPhysicalDeviceCoherentMemoryFeaturesAMD *features =
1352 (VkPhysicalDeviceCoherentMemoryFeaturesAMD *)ext;
1353 features->deviceCoherentMemory = pdevice->rad_info.has_l2_uncached;
1354 break;
1355 }
1356 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES: {
1357 VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *features =
1358 (VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *)ext;
1359 CORE_FEATURE(1, 2, shaderSubgroupExtendedTypes);
1360 break;
1361 }
1362 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES_KHR: {
1363 VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *features =
1364 (VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *)ext;
1365 CORE_FEATURE(1, 2, separateDepthStencilLayouts);
1366 break;
1367 }
1368 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: {
1369 radv_get_physical_device_features_1_1(pdevice, (void *)ext);
1370 break;
1371 }
1372 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES: {
1373 radv_get_physical_device_features_1_2(pdevice, (void *)ext);
1374 break;
1375 }
1376 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
1377 VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
1378 (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
1379 features->rectangularLines = false;
1380 features->bresenhamLines = true;
1381 features->smoothLines = false;
1382 features->stippledRectangularLines = false;
1383 features->stippledBresenhamLines = true;
1384 features->stippledSmoothLines = false;
1385 break;
1386 }
1387 case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
1388 VkDeviceMemoryOverallocationCreateInfoAMD *features =
1389 (VkDeviceMemoryOverallocationCreateInfoAMD *)ext;
1390 features->overallocationBehavior = true;
1391 break;
1392 }
1393 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1394 VkPhysicalDeviceRobustness2FeaturesEXT *features =
1395 (VkPhysicalDeviceRobustness2FeaturesEXT *)ext;
1396 features->robustBufferAccess2 = true;
1397 features->robustImageAccess2 = true;
1398 features->nullDescriptor = true;
1399 break;
1400 }
1401 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1402 VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
1403 (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
1404 features->customBorderColors = true;
1405 features->customBorderColorWithoutFormat = true;
1406 break;
1407 }
1408 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: {
1409 VkPhysicalDevicePrivateDataFeaturesEXT *features =
1410 (VkPhysicalDevicePrivateDataFeaturesEXT *)ext;
1411 features->privateData = true;
1412 break;
1413 }
1414 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: {
1415 VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features =
1416 (VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *)ext;
1417 features-> pipelineCreationCacheControl = true;
1418 break;
1419 }
1420 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR: {
1421 VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *features =
1422 (VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *)ext;
1423 CORE_FEATURE(1, 2, vulkanMemoryModel);
1424 CORE_FEATURE(1, 2, vulkanMemoryModelDeviceScope);
1425 CORE_FEATURE(1, 2, vulkanMemoryModelAvailabilityVisibilityChains);
1426 break;
1427 }
1428 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
1429 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
1430 (VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *) ext;
1431 features->extendedDynamicState = true;
1432 break;
1433 }
1434 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: {
1435 VkPhysicalDeviceImageRobustnessFeaturesEXT *features =
1436 (VkPhysicalDeviceImageRobustnessFeaturesEXT *)ext;
1437 features->robustImageAccess = true;
1438 break;
1439 }
1440 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
1441 VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features =
1442 (VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *)ext;
1443 features->shaderBufferFloat32Atomics = true;
1444 features->shaderBufferFloat32AtomicAdd = false;
1445 features->shaderBufferFloat64Atomics = true;
1446 features->shaderBufferFloat64AtomicAdd = false;
1447 features->shaderSharedFloat32Atomics = true;
1448 features->shaderSharedFloat32AtomicAdd = pdevice->rad_info.chip_class >= GFX8 &&
1449 (!pdevice->use_llvm || LLVM_VERSION_MAJOR >= 10);
1450 features->shaderSharedFloat64Atomics = true;
1451 features->shaderSharedFloat64AtomicAdd = false;
1452 features->shaderImageFloat32Atomics = true;
1453 features->shaderImageFloat32AtomicAdd = false;
1454 features->sparseImageFloat32Atomics = false;
1455 features->sparseImageFloat32AtomicAdd = false;
1456 break;
1457 }
1458 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
1459 VkPhysicalDevice4444FormatsFeaturesEXT *features =
1460 (VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
1461 features->formatA4R4G4B4 = true;
1462 features->formatA4B4G4R4 = true;
1463 break;
1464 }
1465 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TERMINATE_INVOCATION_FEATURES_KHR: {
1466 VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *features =
1467 (VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *)ext;
1468 features->shaderTerminateInvocation = true;
1469 break;
1470 }
1471 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT: {
1472 VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *features =
1473 (VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *)ext;
1474 features->shaderImageInt64Atomics = LLVM_VERSION_MAJOR >= 11 || !pdevice->use_llvm;
1475 features->sparseImageInt64Atomics = false;
1476 break;
1477 }
1478 default:
1479 break;
1480 }
1481 }
1482 #undef CORE_FEATURE
1483 }
1484
1485 static size_t
radv_max_descriptor_set_size()1486 radv_max_descriptor_set_size()
1487 {
1488 /* make sure that the entire descriptor set is addressable with a signed
1489 * 32-bit int. So the sum of all limits scaled by descriptor size has to
1490 * be at most 2 GiB. the combined image & samples object count as one of
1491 * both. This limit is for the pipeline layout, not for the set layout, but
1492 * there is no set limit, so we just set a pipeline limit. I don't think
1493 * any app is going to hit this soon. */
1494 return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS
1495 - MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1496 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1497 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1498 32 /* sampler, largest when combined with image */ +
1499 64 /* sampled image */ +
1500 64 /* storage image */);
1501 }
1502
1503 static uint32_t
radv_uniform_buffer_offset_alignment(const struct radv_physical_device * pdevice)1504 radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdevice)
1505 {
1506 uint32_t uniform_offset_alignment = driQueryOptioni(&pdevice->instance->dri_options,
1507 "radv_override_uniform_offset_alignment");
1508 if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {
1509 fprintf(stderr, "ERROR: invalid radv_override_uniform_offset_alignment setting %d:"
1510 "not a power of two\n", uniform_offset_alignment);
1511 uniform_offset_alignment = 0;
1512 }
1513
1514 /* Take at least the hardware limit. */
1515 return MAX2(uniform_offset_alignment, 4);
1516 }
1517
radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties * pProperties)1518 void radv_GetPhysicalDeviceProperties(
1519 VkPhysicalDevice physicalDevice,
1520 VkPhysicalDeviceProperties* pProperties)
1521 {
1522 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1523 VkSampleCountFlags sample_counts = 0xf;
1524
1525 size_t max_descriptor_set_size = radv_max_descriptor_set_size();
1526
1527 VkPhysicalDeviceLimits limits = {
1528 .maxImageDimension1D = (1 << 14),
1529 .maxImageDimension2D = (1 << 14),
1530 .maxImageDimension3D = (1 << 11),
1531 .maxImageDimensionCube = (1 << 14),
1532 .maxImageArrayLayers = (1 << 11),
1533 .maxTexelBufferElements = UINT32_MAX,
1534 .maxUniformBufferRange = UINT32_MAX,
1535 .maxStorageBufferRange = UINT32_MAX,
1536 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1537 .maxMemoryAllocationCount = UINT32_MAX,
1538 .maxSamplerAllocationCount = 64 * 1024,
1539 .bufferImageGranularity = 64, /* A cache line */
1540 .sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
1541 .maxBoundDescriptorSets = MAX_SETS,
1542 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
1543 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1544 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1545 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1546 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1547 .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
1548 .maxPerStageResources = max_descriptor_set_size,
1549 .maxDescriptorSetSamplers = max_descriptor_set_size,
1550 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1551 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1552 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1553 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1554 .maxDescriptorSetSampledImages = max_descriptor_set_size,
1555 .maxDescriptorSetStorageImages = max_descriptor_set_size,
1556 .maxDescriptorSetInputAttachments = max_descriptor_set_size,
1557 .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
1558 .maxVertexInputBindings = MAX_VBS,
1559 .maxVertexInputAttributeOffset = 2047,
1560 .maxVertexInputBindingStride = 2048,
1561 .maxVertexOutputComponents = 128,
1562 .maxTessellationGenerationLevel = 64,
1563 .maxTessellationPatchSize = 32,
1564 .maxTessellationControlPerVertexInputComponents = 128,
1565 .maxTessellationControlPerVertexOutputComponents = 128,
1566 .maxTessellationControlPerPatchOutputComponents = 120,
1567 .maxTessellationControlTotalOutputComponents = 4096,
1568 .maxTessellationEvaluationInputComponents = 128,
1569 .maxTessellationEvaluationOutputComponents = 128,
1570 .maxGeometryShaderInvocations = 127,
1571 .maxGeometryInputComponents = 64,
1572 .maxGeometryOutputComponents = 128,
1573 .maxGeometryOutputVertices = 256,
1574 .maxGeometryTotalOutputComponents = 1024,
1575 .maxFragmentInputComponents = 128,
1576 .maxFragmentOutputAttachments = 8,
1577 .maxFragmentDualSrcAttachments = 1,
1578 .maxFragmentCombinedOutputResources = 8,
1579 .maxComputeSharedMemorySize = 32768,
1580 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
1581 .maxComputeWorkGroupInvocations = 1024,
1582 .maxComputeWorkGroupSize = {
1583 1024,
1584 1024,
1585 1024
1586 },
1587 .subPixelPrecisionBits = 8,
1588 .subTexelPrecisionBits = 8,
1589 .mipmapPrecisionBits = 8,
1590 .maxDrawIndexedIndexValue = UINT32_MAX,
1591 .maxDrawIndirectCount = UINT32_MAX,
1592 .maxSamplerLodBias = 16,
1593 .maxSamplerAnisotropy = 16,
1594 .maxViewports = MAX_VIEWPORTS,
1595 .maxViewportDimensions = { (1 << 14), (1 << 14) },
1596 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
1597 .viewportSubPixelBits = 8,
1598 .minMemoryMapAlignment = 4096, /* A page */
1599 .minTexelBufferOffsetAlignment = 4,
1600 .minUniformBufferOffsetAlignment = radv_uniform_buffer_offset_alignment(pdevice),
1601 .minStorageBufferOffsetAlignment = 4,
1602 .minTexelOffset = -32,
1603 .maxTexelOffset = 31,
1604 .minTexelGatherOffset = -32,
1605 .maxTexelGatherOffset = 31,
1606 .minInterpolationOffset = -2,
1607 .maxInterpolationOffset = 2,
1608 .subPixelInterpolationOffsetBits = 8,
1609 .maxFramebufferWidth = (1 << 14),
1610 .maxFramebufferHeight = (1 << 14),
1611 .maxFramebufferLayers = (1 << 10),
1612 .framebufferColorSampleCounts = sample_counts,
1613 .framebufferDepthSampleCounts = sample_counts,
1614 .framebufferStencilSampleCounts = sample_counts,
1615 .framebufferNoAttachmentsSampleCounts = sample_counts,
1616 .maxColorAttachments = MAX_RTS,
1617 .sampledImageColorSampleCounts = sample_counts,
1618 .sampledImageIntegerSampleCounts = sample_counts,
1619 .sampledImageDepthSampleCounts = sample_counts,
1620 .sampledImageStencilSampleCounts = sample_counts,
1621 .storageImageSampleCounts = sample_counts,
1622 .maxSampleMaskWords = 1,
1623 .timestampComputeAndGraphics = true,
1624 .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
1625 .maxClipDistances = 8,
1626 .maxCullDistances = 8,
1627 .maxCombinedClipAndCullDistances = 8,
1628 .discreteQueuePriorities = 2,
1629 .pointSizeRange = { 0.0, 8191.875 },
1630 .lineWidthRange = { 0.0, 8191.875 },
1631 .pointSizeGranularity = (1.0 / 8.0),
1632 .lineWidthGranularity = (1.0 / 8.0),
1633 .strictLines = false, /* FINISHME */
1634 .standardSampleLocations = true,
1635 .optimalBufferCopyOffsetAlignment = 128,
1636 .optimalBufferCopyRowPitchAlignment = 128,
1637 .nonCoherentAtomSize = 64,
1638 };
1639
1640 *pProperties = (VkPhysicalDeviceProperties) {
1641 .apiVersion = radv_physical_device_api_version(pdevice),
1642 .driverVersion = vk_get_driver_version(),
1643 .vendorID = ATI_VENDOR_ID,
1644 .deviceID = pdevice->rad_info.pci_id,
1645 .deviceType = pdevice->rad_info.has_dedicated_vram ? VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU : VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1646 .limits = limits,
1647 .sparseProperties = {0},
1648 };
1649
1650 strcpy(pProperties->deviceName, pdevice->name);
1651 memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1652 }
1653
1654 static void
radv_get_physical_device_properties_1_1(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan11Properties * p)1655 radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice,
1656 VkPhysicalDeviceVulkan11Properties *p)
1657 {
1658 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
1659
1660 memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1661 memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1662 memset(p->deviceLUID, 0, VK_LUID_SIZE);
1663 /* The LUID is for Windows. */
1664 p->deviceLUIDValid = false;
1665 p->deviceNodeMask = 0;
1666
1667 p->subgroupSize = RADV_SUBGROUP_SIZE;
1668 p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS |
1669 VK_SHADER_STAGE_COMPUTE_BIT;
1670 p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
1671 VK_SUBGROUP_FEATURE_VOTE_BIT |
1672 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT |
1673 VK_SUBGROUP_FEATURE_BALLOT_BIT |
1674 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
1675 VK_SUBGROUP_FEATURE_QUAD_BIT |
1676 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
1677 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
1678 p->subgroupQuadOperationsInAllStages = true;
1679
1680 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
1681 p->maxMultiviewViewCount = MAX_VIEWS;
1682 p->maxMultiviewInstanceIndex = INT_MAX;
1683 p->protectedNoFault = false;
1684 p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS;
1685 p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1686 }
1687
1688 static void
radv_get_physical_device_properties_1_2(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan12Properties * p)1689 radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice,
1690 VkPhysicalDeviceVulkan12Properties *p)
1691 {
1692 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
1693
1694 p->driverID = VK_DRIVER_ID_MESA_RADV;
1695 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
1696 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
1697 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 " (%s)",
1698 radv_get_compiler_string(pdevice));
1699 p->conformanceVersion = (VkConformanceVersion) {
1700 .major = 1,
1701 .minor = 2,
1702 .subminor = 3,
1703 .patch = 0,
1704 };
1705
1706 /* On AMD hardware, denormals and rounding modes for fp16/fp64 are
1707 * controlled by the same config register.
1708 */
1709 if (pdevice->rad_info.has_packed_math_16bit) {
1710 p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
1711 p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
1712 } else {
1713 p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
1714 p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
1715 }
1716
1717 /* With LLVM, do not allow both preserving and flushing denorms because
1718 * different shaders in the same pipeline can have different settings and
1719 * this won't work for merged shaders. To make it work, this requires LLVM
1720 * support for changing the register. The same logic applies for the
1721 * rounding modes because they are configured with the same config
1722 * register.
1723 */
1724 p->shaderDenormFlushToZeroFloat32 = true;
1725 p->shaderDenormPreserveFloat32 = !pdevice->use_llvm;
1726 p->shaderRoundingModeRTEFloat32 = true;
1727 p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm;
1728 p->shaderSignedZeroInfNanPreserveFloat32 = true;
1729
1730 p->shaderDenormFlushToZeroFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
1731 p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
1732 p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit;
1733 p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
1734 p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
1735
1736 p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
1737 p->shaderDenormPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
1738 p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.chip_class >= GFX8;
1739 p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
1740 p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
1741
1742 p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1743 p->shaderUniformBufferArrayNonUniformIndexingNative = false;
1744 p->shaderSampledImageArrayNonUniformIndexingNative = false;
1745 p->shaderStorageBufferArrayNonUniformIndexingNative = false;
1746 p->shaderStorageImageArrayNonUniformIndexingNative = false;
1747 p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1748 p->robustBufferAccessUpdateAfterBind = false;
1749 p->quadDivergentImplicitLod = false;
1750
1751 size_t max_descriptor_set_size = ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
1752 MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1753 (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1754 32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1755 32 /* sampler, largest when combined with image */ +
1756 64 /* sampled image */ +
1757 64 /* storage image */);
1758 p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1759 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1760 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1761 p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1762 p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1763 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1764 p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1765 p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1766 p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1767 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1768 p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1769 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1770 p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1771 p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1772 p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1773
1774 /* We support all of the depth resolve modes */
1775 p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1776 VK_RESOLVE_MODE_AVERAGE_BIT_KHR |
1777 VK_RESOLVE_MODE_MIN_BIT_KHR |
1778 VK_RESOLVE_MODE_MAX_BIT_KHR;
1779
1780 /* Average doesn't make sense for stencil so we don't support that */
1781 p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1782 VK_RESOLVE_MODE_MIN_BIT_KHR |
1783 VK_RESOLVE_MODE_MAX_BIT_KHR;
1784
1785 p->independentResolveNone = true;
1786 p->independentResolve = true;
1787
1788 /* GFX6-8 only support single channel min/max filter. */
1789 p->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
1790 p->filterMinmaxSingleComponentFormats = true;
1791
1792 p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1793
1794 p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;
1795 }
1796
radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties2 * pProperties)1797 void radv_GetPhysicalDeviceProperties2(
1798 VkPhysicalDevice physicalDevice,
1799 VkPhysicalDeviceProperties2 *pProperties)
1800 {
1801 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1802 radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
1803
1804 VkPhysicalDeviceVulkan11Properties core_1_1 = {
1805 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
1806 };
1807 radv_get_physical_device_properties_1_1(pdevice, &core_1_1);
1808
1809 VkPhysicalDeviceVulkan12Properties core_1_2 = {
1810 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
1811 };
1812 radv_get_physical_device_properties_1_2(pdevice, &core_1_2);
1813
1814 #define CORE_RENAMED_PROPERTY(major, minor, ext_property, core_property) \
1815 memcpy(&properties->ext_property, &core_##major##_##minor.core_property, \
1816 sizeof(core_##major##_##minor.core_property))
1817
1818 #define CORE_PROPERTY(major, minor, property) \
1819 CORE_RENAMED_PROPERTY(major, minor, property, property)
1820
1821 vk_foreach_struct(ext, pProperties->pNext) {
1822 switch (ext->sType) {
1823 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1824 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1825 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
1826 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1827 break;
1828 }
1829 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
1830 VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties*)ext;
1831 CORE_PROPERTY(1, 1, deviceUUID);
1832 CORE_PROPERTY(1, 1, driverUUID);
1833 CORE_PROPERTY(1, 1, deviceLUID);
1834 CORE_PROPERTY(1, 1, deviceLUIDValid);
1835 break;
1836 }
1837 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
1838 VkPhysicalDeviceMultiviewProperties *properties = (VkPhysicalDeviceMultiviewProperties*)ext;
1839 CORE_PROPERTY(1, 1, maxMultiviewViewCount);
1840 CORE_PROPERTY(1, 1, maxMultiviewInstanceIndex);
1841 break;
1842 }
1843 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
1844 VkPhysicalDevicePointClippingProperties *properties =
1845 (VkPhysicalDevicePointClippingProperties*)ext;
1846 CORE_PROPERTY(1, 1, pointClippingBehavior);
1847 break;
1848 }
1849 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
1850 VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
1851 (VkPhysicalDeviceDiscardRectanglePropertiesEXT*)ext;
1852 properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
1853 break;
1854 }
1855 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
1856 VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
1857 (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *) ext;
1858 properties->minImportedHostPointerAlignment = 4096;
1859 break;
1860 }
1861 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
1862 VkPhysicalDeviceSubgroupProperties *properties =
1863 (VkPhysicalDeviceSubgroupProperties*)ext;
1864 CORE_PROPERTY(1, 1, subgroupSize);
1865 CORE_RENAMED_PROPERTY(1, 1, supportedStages,
1866 subgroupSupportedStages);
1867 CORE_RENAMED_PROPERTY(1, 1, supportedOperations,
1868 subgroupSupportedOperations);
1869 CORE_RENAMED_PROPERTY(1, 1, quadOperationsInAllStages,
1870 subgroupQuadOperationsInAllStages);
1871 break;
1872 }
1873 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
1874 VkPhysicalDeviceMaintenance3Properties *properties =
1875 (VkPhysicalDeviceMaintenance3Properties*)ext;
1876 CORE_PROPERTY(1, 1, maxPerSetDescriptors);
1877 CORE_PROPERTY(1, 1, maxMemoryAllocationSize);
1878 break;
1879 }
1880 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES: {
1881 VkPhysicalDeviceSamplerFilterMinmaxProperties *properties =
1882 (VkPhysicalDeviceSamplerFilterMinmaxProperties *)ext;
1883 CORE_PROPERTY(1, 2, filterMinmaxImageComponentMapping);
1884 CORE_PROPERTY(1, 2, filterMinmaxSingleComponentFormats);
1885 break;
1886 }
1887 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
1888 VkPhysicalDeviceShaderCorePropertiesAMD *properties =
1889 (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
1890
1891 /* Shader engines. */
1892 properties->shaderEngineCount =
1893 pdevice->rad_info.max_se;
1894 properties->shaderArraysPerEngineCount =
1895 pdevice->rad_info.max_sh_per_se;
1896 properties->computeUnitsPerShaderArray =
1897 pdevice->rad_info.min_good_cu_per_sa;
1898 properties->simdPerComputeUnit =
1899 pdevice->rad_info.num_simd_per_compute_unit;
1900 properties->wavefrontsPerSimd =
1901 pdevice->rad_info.max_wave64_per_simd;
1902 properties->wavefrontSize = 64;
1903
1904 /* SGPR. */
1905 properties->sgprsPerSimd =
1906 pdevice->rad_info.num_physical_sgprs_per_simd;
1907 properties->minSgprAllocation =
1908 pdevice->rad_info.min_sgpr_alloc;
1909 properties->maxSgprAllocation =
1910 pdevice->rad_info.max_sgpr_alloc;
1911 properties->sgprAllocationGranularity =
1912 pdevice->rad_info.sgpr_alloc_granularity;
1913
1914 /* VGPR. */
1915 properties->vgprsPerSimd =
1916 pdevice->rad_info.num_physical_wave64_vgprs_per_simd;
1917 properties->minVgprAllocation =
1918 pdevice->rad_info.min_wave64_vgpr_alloc;
1919 properties->maxVgprAllocation =
1920 pdevice->rad_info.max_vgpr_alloc;
1921 properties->vgprAllocationGranularity =
1922 pdevice->rad_info.wave64_vgpr_alloc_granularity;
1923 break;
1924 }
1925 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
1926 VkPhysicalDeviceShaderCoreProperties2AMD *properties =
1927 (VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
1928
1929 properties->shaderCoreFeatures = 0;
1930 properties->activeComputeUnitCount =
1931 pdevice->rad_info.num_good_compute_units;
1932 break;
1933 }
1934 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
1935 VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
1936 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
1937 properties->maxVertexAttribDivisor = UINT32_MAX;
1938 break;
1939 }
1940 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES: {
1941 VkPhysicalDeviceDescriptorIndexingProperties *properties =
1942 (VkPhysicalDeviceDescriptorIndexingProperties*)ext;
1943 CORE_PROPERTY(1, 2, maxUpdateAfterBindDescriptorsInAllPools);
1944 CORE_PROPERTY(1, 2, shaderUniformBufferArrayNonUniformIndexingNative);
1945 CORE_PROPERTY(1, 2, shaderSampledImageArrayNonUniformIndexingNative);
1946 CORE_PROPERTY(1, 2, shaderStorageBufferArrayNonUniformIndexingNative);
1947 CORE_PROPERTY(1, 2, shaderStorageImageArrayNonUniformIndexingNative);
1948 CORE_PROPERTY(1, 2, shaderInputAttachmentArrayNonUniformIndexingNative);
1949 CORE_PROPERTY(1, 2, robustBufferAccessUpdateAfterBind);
1950 CORE_PROPERTY(1, 2, quadDivergentImplicitLod);
1951 CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSamplers);
1952 CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindUniformBuffers);
1953 CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageBuffers);
1954 CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSampledImages);
1955 CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageImages);
1956 CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindInputAttachments);
1957 CORE_PROPERTY(1, 2, maxPerStageUpdateAfterBindResources);
1958 CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSamplers);
1959 CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffers);
1960 CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffersDynamic);
1961 CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffers);
1962 CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffersDynamic);
1963 CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSampledImages);
1964 CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageImages);
1965 CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindInputAttachments);
1966 break;
1967 }
1968 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
1969 VkPhysicalDeviceProtectedMemoryProperties *properties =
1970 (VkPhysicalDeviceProtectedMemoryProperties *)ext;
1971 CORE_PROPERTY(1, 1, protectedNoFault);
1972 break;
1973 }
1974 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
1975 VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
1976 (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
1977 properties->primitiveOverestimationSize = 0;
1978 properties->maxExtraPrimitiveOverestimationSize = 0;
1979 properties->extraPrimitiveOverestimationSizeGranularity = 0;
1980 properties->primitiveUnderestimation = false;
1981 properties->conservativePointAndLineRasterization = false;
1982 properties->degenerateTrianglesRasterized = false;
1983 properties->degenerateLinesRasterized = false;
1984 properties->fullyCoveredFragmentShaderInputVariable = false;
1985 properties->conservativeRasterizationPostDepthCoverage = false;
1986 break;
1987 }
1988 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
1989 VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
1990 (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
1991 properties->pciDomain = pdevice->bus_info.domain;
1992 properties->pciBus = pdevice->bus_info.bus;
1993 properties->pciDevice = pdevice->bus_info.dev;
1994 properties->pciFunction = pdevice->bus_info.func;
1995 break;
1996 }
1997 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES: {
1998 VkPhysicalDeviceDriverProperties *properties =
1999 (VkPhysicalDeviceDriverProperties *) ext;
2000 CORE_PROPERTY(1, 2, driverID);
2001 CORE_PROPERTY(1, 2, driverName);
2002 CORE_PROPERTY(1, 2, driverInfo);
2003 CORE_PROPERTY(1, 2, conformanceVersion);
2004 break;
2005 }
2006 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
2007 VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
2008 (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
2009 properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
2010 properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
2011 properties->maxTransformFeedbackBufferSize = UINT32_MAX;
2012 properties->maxTransformFeedbackStreamDataSize = 512;
2013 properties->maxTransformFeedbackBufferDataSize = UINT32_MAX;
2014 properties->maxTransformFeedbackBufferDataStride = 512;
2015 properties->transformFeedbackQueries = !pdevice->use_ngg_streamout;
2016 properties->transformFeedbackStreamsLinesTriangles = !pdevice->use_ngg_streamout;
2017 properties->transformFeedbackRasterizationStreamSelect = false;
2018 properties->transformFeedbackDraw = true;
2019 break;
2020 }
2021 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
2022 VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
2023 (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
2024
2025 props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
2026 props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
2027 props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
2028 props->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
2029 props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
2030 break;
2031 }
2032 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
2033 VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
2034 (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
2035 properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT |
2036 VK_SAMPLE_COUNT_4_BIT |
2037 VK_SAMPLE_COUNT_8_BIT;
2038 properties->maxSampleLocationGridSize = (VkExtent2D){ 2 , 2 };
2039 properties->sampleLocationCoordinateRange[0] = 0.0f;
2040 properties->sampleLocationCoordinateRange[1] = 0.9375f;
2041 properties->sampleLocationSubPixelBits = 4;
2042 properties->variableSampleLocations = false;
2043 break;
2044 }
2045 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES: {
2046 VkPhysicalDeviceDepthStencilResolveProperties *properties =
2047 (VkPhysicalDeviceDepthStencilResolveProperties *)ext;
2048 CORE_PROPERTY(1, 2, supportedDepthResolveModes);
2049 CORE_PROPERTY(1, 2, supportedStencilResolveModes);
2050 CORE_PROPERTY(1, 2, independentResolveNone);
2051 CORE_PROPERTY(1, 2, independentResolve);
2052 break;
2053 }
2054 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
2055 VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *properties =
2056 (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;
2057 properties->storageTexelBufferOffsetAlignmentBytes = 4;
2058 properties->storageTexelBufferOffsetSingleTexelAlignment = true;
2059 properties->uniformTexelBufferOffsetAlignmentBytes = 4;
2060 properties->uniformTexelBufferOffsetSingleTexelAlignment = true;
2061 break;
2062 }
2063 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES : {
2064 VkPhysicalDeviceFloatControlsProperties *properties =
2065 (VkPhysicalDeviceFloatControlsProperties *)ext;
2066 CORE_PROPERTY(1, 2, denormBehaviorIndependence);
2067 CORE_PROPERTY(1, 2, roundingModeIndependence);
2068 CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat16);
2069 CORE_PROPERTY(1, 2, shaderDenormPreserveFloat16);
2070 CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat16);
2071 CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat16);
2072 CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat16);
2073 CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat32);
2074 CORE_PROPERTY(1, 2, shaderDenormPreserveFloat32);
2075 CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat32);
2076 CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat32);
2077 CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat32);
2078 CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat64);
2079 CORE_PROPERTY(1, 2, shaderDenormPreserveFloat64);
2080 CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat64);
2081 CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat64);
2082 CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat64);
2083 break;
2084 }
2085 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES: {
2086 VkPhysicalDeviceTimelineSemaphoreProperties *properties =
2087 (VkPhysicalDeviceTimelineSemaphoreProperties *) ext;
2088 CORE_PROPERTY(1, 2, maxTimelineSemaphoreValueDifference);
2089 break;
2090 }
2091 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
2092 VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
2093 (VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
2094 props->minSubgroupSize = 64;
2095 props->maxSubgroupSize = 64;
2096 props->maxComputeWorkgroupSubgroups = UINT32_MAX;
2097 props->requiredSubgroupSizeStages = 0;
2098
2099 if (pdevice->rad_info.chip_class >= GFX10) {
2100 /* Only GFX10+ supports wave32. */
2101 props->minSubgroupSize = 32;
2102 props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
2103 }
2104 break;
2105 }
2106 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES:
2107 radv_get_physical_device_properties_1_1(pdevice, (void *)ext);
2108 break;
2109 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES:
2110 radv_get_physical_device_properties_1_2(pdevice, (void *)ext);
2111 break;
2112 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
2113 VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
2114 (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
2115 props->lineSubPixelPrecisionBits = 4;
2116 break;
2117 }
2118 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
2119 VkPhysicalDeviceRobustness2PropertiesEXT *properties =
2120 (VkPhysicalDeviceRobustness2PropertiesEXT *)ext;
2121 properties->robustStorageBufferAccessSizeAlignment = 4;
2122 properties->robustUniformBufferAccessSizeAlignment = 4;
2123 break;
2124 }
2125 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
2126 VkPhysicalDeviceCustomBorderColorPropertiesEXT *props =
2127 (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
2128 props->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;
2129 break;
2130 }
2131 default:
2132 break;
2133 }
2134 }
2135 }
2136
radv_get_physical_device_queue_family_properties(struct radv_physical_device * pdevice,uint32_t * pCount,VkQueueFamilyProperties ** pQueueFamilyProperties)2137 static void radv_get_physical_device_queue_family_properties(
2138 struct radv_physical_device* pdevice,
2139 uint32_t* pCount,
2140 VkQueueFamilyProperties** pQueueFamilyProperties)
2141 {
2142 int num_queue_families = 1;
2143 int idx;
2144 if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
2145 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
2146 num_queue_families++;
2147
2148 if (pQueueFamilyProperties == NULL) {
2149 *pCount = num_queue_families;
2150 return;
2151 }
2152
2153 if (!*pCount)
2154 return;
2155
2156 idx = 0;
2157 if (*pCount >= 1) {
2158 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
2159 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
2160 VK_QUEUE_COMPUTE_BIT |
2161 VK_QUEUE_TRANSFER_BIT |
2162 VK_QUEUE_SPARSE_BINDING_BIT,
2163 .queueCount = 1,
2164 .timestampValidBits = 64,
2165 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
2166 };
2167 idx++;
2168 }
2169
2170 if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
2171 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
2172 if (*pCount > idx) {
2173 *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) {
2174 .queueFlags = VK_QUEUE_COMPUTE_BIT |
2175 VK_QUEUE_TRANSFER_BIT |
2176 VK_QUEUE_SPARSE_BINDING_BIT,
2177 .queueCount = pdevice->rad_info.num_rings[RING_COMPUTE],
2178 .timestampValidBits = 64,
2179 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
2180 };
2181 idx++;
2182 }
2183 }
2184 *pCount = idx;
2185 }
2186
radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties * pQueueFamilyProperties)2187 void radv_GetPhysicalDeviceQueueFamilyProperties(
2188 VkPhysicalDevice physicalDevice,
2189 uint32_t* pCount,
2190 VkQueueFamilyProperties* pQueueFamilyProperties)
2191 {
2192 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2193 if (!pQueueFamilyProperties) {
2194 radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
2195 return;
2196 }
2197 VkQueueFamilyProperties *properties[] = {
2198 pQueueFamilyProperties + 0,
2199 pQueueFamilyProperties + 1,
2200 pQueueFamilyProperties + 2,
2201 };
2202 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
2203 assert(*pCount <= 3);
2204 }
2205
radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2206 void radv_GetPhysicalDeviceQueueFamilyProperties2(
2207 VkPhysicalDevice physicalDevice,
2208 uint32_t* pCount,
2209 VkQueueFamilyProperties2 *pQueueFamilyProperties)
2210 {
2211 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2212 if (!pQueueFamilyProperties) {
2213 radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
2214 return;
2215 }
2216 VkQueueFamilyProperties *properties[] = {
2217 &pQueueFamilyProperties[0].queueFamilyProperties,
2218 &pQueueFamilyProperties[1].queueFamilyProperties,
2219 &pQueueFamilyProperties[2].queueFamilyProperties,
2220 };
2221 radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
2222 assert(*pCount <= 3);
2223 }
2224
radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2225 void radv_GetPhysicalDeviceMemoryProperties(
2226 VkPhysicalDevice physicalDevice,
2227 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
2228 {
2229 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2230
2231 *pMemoryProperties = physical_device->memory_properties;
2232 }
2233
2234 static void
radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2235 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
2236 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2237 {
2238 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2239 VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
2240
2241 /* For all memory heaps, the computation of budget is as follow:
2242 * heap_budget = heap_size - global_heap_usage + app_heap_usage
2243 *
2244 * The Vulkan spec 1.1.97 says that the budget should include any
2245 * currently allocated device memory.
2246 *
2247 * Note that the application heap usages are not really accurate (eg.
2248 * in presence of shared buffers).
2249 */
2250 unsigned mask = device->heaps;
2251 unsigned heap = 0;
2252 while (mask) {
2253 uint64_t internal_usage = 0, total_usage = 0;
2254 unsigned type = 1u << u_bit_scan(&mask);
2255
2256 switch(type) {
2257 case RADV_HEAP_VRAM:
2258 internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2259 total_usage = device->ws->query_value(device->ws, RADEON_VRAM_USAGE);
2260 break;
2261 case RADV_HEAP_VRAM_VIS:
2262 internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS);
2263 if (!(device->heaps & RADV_HEAP_VRAM))
2264 internal_usage += device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2265 total_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE);
2266 break;
2267 case RADV_HEAP_GTT:
2268 internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
2269 total_usage = device->ws->query_value(device->ws, RADEON_GTT_USAGE);
2270 break;
2271 }
2272
2273 uint64_t free_space = device->memory_properties.memoryHeaps[heap].size -
2274 MIN2(device->memory_properties.memoryHeaps[heap].size,
2275 total_usage);
2276 memoryBudget->heapBudget[heap] = free_space + internal_usage;
2277 memoryBudget->heapUsage[heap] = internal_usage;
2278 ++heap;
2279 }
2280
2281 assert(heap == memory_properties->memoryHeapCount);
2282
2283 /* The heapBudget and heapUsage values must be zero for array elements
2284 * greater than or equal to
2285 * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
2286 */
2287 for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
2288 memoryBudget->heapBudget[i] = 0;
2289 memoryBudget->heapUsage[i] = 0;
2290 }
2291 }
2292
radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2293 void radv_GetPhysicalDeviceMemoryProperties2(
2294 VkPhysicalDevice physicalDevice,
2295 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
2296 {
2297 radv_GetPhysicalDeviceMemoryProperties(physicalDevice,
2298 &pMemoryProperties->memoryProperties);
2299
2300 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
2301 vk_find_struct(pMemoryProperties->pNext,
2302 PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
2303 if (memory_budget)
2304 radv_get_memory_budget_properties(physicalDevice, memory_budget);
2305 }
2306
radv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)2307 VkResult radv_GetMemoryHostPointerPropertiesEXT(
2308 VkDevice _device,
2309 VkExternalMemoryHandleTypeFlagBits handleType,
2310 const void *pHostPointer,
2311 VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
2312 {
2313 RADV_FROM_HANDLE(radv_device, device, _device);
2314
2315 switch (handleType)
2316 {
2317 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
2318 const struct radv_physical_device *physical_device = device->physical_device;
2319 uint32_t memoryTypeBits = 0;
2320 for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
2321 if (physical_device->memory_domains[i] == RADEON_DOMAIN_GTT &&
2322 !(physical_device->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
2323 memoryTypeBits = (1 << i);
2324 break;
2325 }
2326 }
2327 pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
2328 return VK_SUCCESS;
2329 }
2330 default:
2331 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
2332 }
2333 }
2334
2335 static enum radeon_ctx_priority
radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT * pObj)2336 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
2337 {
2338 /* Default to MEDIUM when a specific global priority isn't requested */
2339 if (!pObj)
2340 return RADEON_CTX_PRIORITY_MEDIUM;
2341
2342 switch(pObj->globalPriority) {
2343 case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
2344 return RADEON_CTX_PRIORITY_REALTIME;
2345 case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
2346 return RADEON_CTX_PRIORITY_HIGH;
2347 case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
2348 return RADEON_CTX_PRIORITY_MEDIUM;
2349 case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
2350 return RADEON_CTX_PRIORITY_LOW;
2351 default:
2352 unreachable("Illegal global priority value");
2353 return RADEON_CTX_PRIORITY_INVALID;
2354 }
2355 }
2356
2357 static int
radv_queue_init(struct radv_device * device,struct radv_queue * queue,uint32_t queue_family_index,int idx,VkDeviceQueueCreateFlags flags,const VkDeviceQueueGlobalPriorityCreateInfoEXT * global_priority)2358 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
2359 uint32_t queue_family_index, int idx,
2360 VkDeviceQueueCreateFlags flags,
2361 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
2362 {
2363 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
2364 queue->device = device;
2365 queue->queue_family_index = queue_family_index;
2366 queue->queue_idx = idx;
2367 queue->priority = radv_get_queue_global_priority(global_priority);
2368 queue->flags = flags;
2369 queue->hw_ctx = NULL;
2370
2371 VkResult result = device->ws->ctx_create(device->ws, queue->priority, &queue->hw_ctx);
2372 if (result != VK_SUCCESS)
2373 return vk_error(device->instance, result);
2374
2375 list_inithead(&queue->pending_submissions);
2376 pthread_mutex_init(&queue->pending_mutex, NULL);
2377
2378 pthread_mutex_init(&queue->thread_mutex, NULL);
2379 queue->thread_submission = NULL;
2380 queue->thread_running = queue->thread_exit = false;
2381 result = radv_create_pthread_cond(&queue->thread_cond);
2382 if (result != VK_SUCCESS)
2383 return vk_error(device->instance, result);
2384
2385 return VK_SUCCESS;
2386 }
2387
2388 static void
radv_queue_finish(struct radv_queue * queue)2389 radv_queue_finish(struct radv_queue *queue)
2390 {
2391 if (queue->thread_running) {
2392 p_atomic_set(&queue->thread_exit, true);
2393 pthread_cond_broadcast(&queue->thread_cond);
2394 pthread_join(queue->submission_thread, NULL);
2395 }
2396 pthread_cond_destroy(&queue->thread_cond);
2397 pthread_mutex_destroy(&queue->pending_mutex);
2398 pthread_mutex_destroy(&queue->thread_mutex);
2399
2400 if (queue->hw_ctx)
2401 queue->device->ws->ctx_destroy(queue->hw_ctx);
2402
2403 if (queue->initial_full_flush_preamble_cs)
2404 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2405 if (queue->initial_preamble_cs)
2406 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
2407 if (queue->continue_preamble_cs)
2408 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
2409 if (queue->descriptor_bo)
2410 queue->device->ws->buffer_destroy(queue->descriptor_bo);
2411 if (queue->scratch_bo)
2412 queue->device->ws->buffer_destroy(queue->scratch_bo);
2413 if (queue->esgs_ring_bo)
2414 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
2415 if (queue->gsvs_ring_bo)
2416 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
2417 if (queue->tess_rings_bo)
2418 queue->device->ws->buffer_destroy(queue->tess_rings_bo);
2419 if (queue->gds_bo)
2420 queue->device->ws->buffer_destroy(queue->gds_bo);
2421 if (queue->gds_oa_bo)
2422 queue->device->ws->buffer_destroy(queue->gds_oa_bo);
2423 if (queue->compute_scratch_bo)
2424 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
2425 }
2426
2427 static void
radv_bo_list_init(struct radv_bo_list * bo_list)2428 radv_bo_list_init(struct radv_bo_list *bo_list)
2429 {
2430 u_rwlock_init(&bo_list->rwlock);
2431 bo_list->list.count = bo_list->capacity = 0;
2432 bo_list->list.bos = NULL;
2433 }
2434
2435 static void
radv_bo_list_finish(struct radv_bo_list * bo_list)2436 radv_bo_list_finish(struct radv_bo_list *bo_list)
2437 {
2438 free(bo_list->list.bos);
2439 u_rwlock_destroy(&bo_list->rwlock);
2440 }
2441
radv_bo_list_add(struct radv_device * device,struct radeon_winsys_bo * bo)2442 VkResult radv_bo_list_add(struct radv_device *device,
2443 struct radeon_winsys_bo *bo)
2444 {
2445 struct radv_bo_list *bo_list = &device->bo_list;
2446
2447 if (bo->is_local)
2448 return VK_SUCCESS;
2449
2450 if (unlikely(!device->use_global_bo_list))
2451 return VK_SUCCESS;
2452
2453 u_rwlock_wrlock(&bo_list->rwlock);
2454 if (bo_list->list.count == bo_list->capacity) {
2455 unsigned capacity = MAX2(4, bo_list->capacity * 2);
2456 void *data = realloc(bo_list->list.bos, capacity * sizeof(struct radeon_winsys_bo*));
2457
2458 if (!data) {
2459 u_rwlock_wrunlock(&bo_list->rwlock);
2460 return VK_ERROR_OUT_OF_HOST_MEMORY;
2461 }
2462
2463 bo_list->list.bos = (struct radeon_winsys_bo**)data;
2464 bo_list->capacity = capacity;
2465 }
2466
2467 bo_list->list.bos[bo_list->list.count++] = bo;
2468 u_rwlock_wrunlock(&bo_list->rwlock);
2469 return VK_SUCCESS;
2470 }
2471
radv_bo_list_remove(struct radv_device * device,struct radeon_winsys_bo * bo)2472 void radv_bo_list_remove(struct radv_device *device,
2473 struct radeon_winsys_bo *bo)
2474 {
2475 struct radv_bo_list *bo_list = &device->bo_list;
2476
2477 if (bo->is_local)
2478 return;
2479
2480 if (unlikely(!device->use_global_bo_list))
2481 return;
2482
2483 u_rwlock_wrlock(&bo_list->rwlock);
2484 /* Loop the list backwards so we find the most recently added
2485 * memory first. */
2486 for(unsigned i = bo_list->list.count; i-- > 0;) {
2487 if (bo_list->list.bos[i] == bo) {
2488 bo_list->list.bos[i] = bo_list->list.bos[bo_list->list.count - 1];
2489 --bo_list->list.count;
2490 break;
2491 }
2492 }
2493 u_rwlock_wrunlock(&bo_list->rwlock);
2494 }
2495
2496 static void
radv_device_init_gs_info(struct radv_device * device)2497 radv_device_init_gs_info(struct radv_device *device)
2498 {
2499 device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
2500 device->physical_device->rad_info.family);
2501 }
2502
radv_get_device_extension_index(const char * name)2503 static int radv_get_device_extension_index(const char *name)
2504 {
2505 for (unsigned i = 0; i < RADV_DEVICE_EXTENSION_COUNT; ++i) {
2506 if (strcmp(name, radv_device_extensions[i].extensionName) == 0)
2507 return i;
2508 }
2509 return -1;
2510 }
2511
2512 static int
radv_get_int_debug_option(const char * name,int default_value)2513 radv_get_int_debug_option(const char *name, int default_value)
2514 {
2515 const char *str;
2516 int result;
2517
2518 str = getenv(name);
2519 if (!str) {
2520 result = default_value;
2521 } else {
2522 char *endptr;
2523
2524 result = strtol(str, &endptr, 0);
2525 if (str == endptr) {
2526 /* No digits founs. */
2527 result = default_value;
2528 }
2529 }
2530
2531 return result;
2532 }
2533
radv_thread_trace_enabled()2534 static bool radv_thread_trace_enabled()
2535 {
2536 return radv_get_int_debug_option("RADV_THREAD_TRACE", -1) >= 0 ||
2537 getenv("RADV_THREAD_TRACE_TRIGGER");
2538 }
2539
2540 static void
radv_device_init_dispatch(struct radv_device * device)2541 radv_device_init_dispatch(struct radv_device *device)
2542 {
2543 const struct radv_instance *instance = device->physical_device->instance;
2544 const struct radv_device_dispatch_table *dispatch_table_layer = NULL;
2545 bool unchecked = instance->debug_flags & RADV_DEBUG_ALL_ENTRYPOINTS;
2546
2547 if (radv_thread_trace_enabled()) {
2548 /* Use device entrypoints from the SQTT layer if enabled. */
2549 dispatch_table_layer = &sqtt_device_dispatch_table;
2550 }
2551
2552 for (unsigned i = 0; i < ARRAY_SIZE(device->dispatch.entrypoints); i++) {
2553 /* Vulkan requires that entrypoints for extensions which have not been
2554 * enabled must not be advertised.
2555 */
2556 if (!unchecked &&
2557 !radv_device_entrypoint_is_enabled(i, instance->apiVersion,
2558 &instance->enabled_extensions,
2559 &device->enabled_extensions)) {
2560 device->dispatch.entrypoints[i] = NULL;
2561 } else if (dispatch_table_layer &&
2562 dispatch_table_layer->entrypoints[i]) {
2563 device->dispatch.entrypoints[i] =
2564 dispatch_table_layer->entrypoints[i];
2565 } else {
2566 device->dispatch.entrypoints[i] =
2567 radv_device_dispatch_table.entrypoints[i];
2568 }
2569 }
2570 }
2571
2572 static VkResult
radv_create_pthread_cond(pthread_cond_t * cond)2573 radv_create_pthread_cond(pthread_cond_t *cond)
2574 {
2575 pthread_condattr_t condattr;
2576 if (pthread_condattr_init(&condattr)) {
2577 return VK_ERROR_INITIALIZATION_FAILED;
2578 }
2579
2580 if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC)) {
2581 pthread_condattr_destroy(&condattr);
2582 return VK_ERROR_INITIALIZATION_FAILED;
2583 }
2584 if (pthread_cond_init(cond, &condattr)) {
2585 pthread_condattr_destroy(&condattr);
2586 return VK_ERROR_INITIALIZATION_FAILED;
2587 }
2588 pthread_condattr_destroy(&condattr);
2589 return VK_SUCCESS;
2590 }
2591
2592 static VkResult
check_physical_device_features(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceFeatures * features)2593 check_physical_device_features(VkPhysicalDevice physicalDevice,
2594 const VkPhysicalDeviceFeatures *features)
2595 {
2596 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2597 VkPhysicalDeviceFeatures supported_features;
2598 radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
2599 VkBool32 *supported_feature = (VkBool32 *)&supported_features;
2600 VkBool32 *enabled_feature = (VkBool32 *)features;
2601 unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
2602 for (uint32_t i = 0; i < num_features; i++) {
2603 if (enabled_feature[i] && !supported_feature[i])
2604 return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
2605 }
2606
2607 return VK_SUCCESS;
2608 }
2609
radv_device_init_border_color(struct radv_device * device)2610 static VkResult radv_device_init_border_color(struct radv_device *device)
2611 {
2612 device->border_color_data.bo =
2613 device->ws->buffer_create(device->ws,
2614 RADV_BORDER_COLOR_BUFFER_SIZE,
2615 4096,
2616 RADEON_DOMAIN_VRAM,
2617 RADEON_FLAG_CPU_ACCESS |
2618 RADEON_FLAG_READ_ONLY |
2619 RADEON_FLAG_NO_INTERPROCESS_SHARING,
2620 RADV_BO_PRIORITY_SHADER);
2621
2622 if (device->border_color_data.bo == NULL)
2623 return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2624
2625 device->border_color_data.colors_gpu_ptr =
2626 device->ws->buffer_map(device->border_color_data.bo);
2627 if (!device->border_color_data.colors_gpu_ptr)
2628 return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2629 pthread_mutex_init(&device->border_color_data.mutex, NULL);
2630
2631 return VK_SUCCESS;
2632 }
2633
radv_device_finish_border_color(struct radv_device * device)2634 static void radv_device_finish_border_color(struct radv_device *device)
2635 {
2636 if (device->border_color_data.bo) {
2637 device->ws->buffer_destroy(device->border_color_data.bo);
2638
2639 pthread_mutex_destroy(&device->border_color_data.mutex);
2640 }
2641 }
2642
2643 VkResult
_radv_device_set_lost(struct radv_device * device,const char * file,int line,const char * msg,...)2644 _radv_device_set_lost(struct radv_device *device,
2645 const char *file, int line,
2646 const char *msg, ...)
2647 {
2648 VkResult err;
2649 va_list ap;
2650
2651 p_atomic_inc(&device->lost);
2652
2653 va_start(ap, msg);
2654 err = __vk_errorv(device->physical_device->instance, device,
2655 VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT,
2656 VK_ERROR_DEVICE_LOST, file, line, msg, ap);
2657 va_end(ap);
2658
2659 return err;
2660 }
2661
radv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2662 VkResult radv_CreateDevice(
2663 VkPhysicalDevice physicalDevice,
2664 const VkDeviceCreateInfo* pCreateInfo,
2665 const VkAllocationCallbacks* pAllocator,
2666 VkDevice* pDevice)
2667 {
2668 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2669 VkResult result;
2670 struct radv_device *device;
2671
2672 bool keep_shader_info = false;
2673 bool robust_buffer_access = false;
2674 bool overallocation_disallowed = false;
2675 bool custom_border_colors = false;
2676
2677 /* Check enabled features */
2678 if (pCreateInfo->pEnabledFeatures) {
2679 result = check_physical_device_features(physicalDevice,
2680 pCreateInfo->pEnabledFeatures);
2681 if (result != VK_SUCCESS)
2682 return result;
2683
2684 if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
2685 robust_buffer_access = true;
2686 }
2687
2688 vk_foreach_struct_const(ext, pCreateInfo->pNext) {
2689 switch (ext->sType) {
2690 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
2691 const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
2692 result = check_physical_device_features(physicalDevice,
2693 &features->features);
2694 if (result != VK_SUCCESS)
2695 return result;
2696
2697 if (features->features.robustBufferAccess)
2698 robust_buffer_access = true;
2699 break;
2700 }
2701 case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
2702 const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext;
2703 if (overallocation->overallocationBehavior == VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)
2704 overallocation_disallowed = true;
2705 break;
2706 }
2707 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
2708 const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features = (const void *)ext;
2709 custom_border_colors = border_color_features->customBorderColors;
2710 break;
2711 }
2712 default:
2713 break;
2714 }
2715 }
2716
2717 device = vk_zalloc2(&physical_device->instance->alloc, pAllocator,
2718 sizeof(*device), 8,
2719 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2720 if (!device)
2721 return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2722
2723 vk_device_init(&device->vk, pCreateInfo,
2724 &physical_device->instance->alloc, pAllocator);
2725
2726 device->instance = physical_device->instance;
2727 device->physical_device = physical_device;
2728
2729 device->ws = physical_device->ws;
2730
2731 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
2732 const char *ext_name = pCreateInfo->ppEnabledExtensionNames[i];
2733 int index = radv_get_device_extension_index(ext_name);
2734 if (index < 0 || !physical_device->supported_extensions.extensions[index]) {
2735 vk_free(&device->vk.alloc, device);
2736 return vk_error(physical_device->instance, VK_ERROR_EXTENSION_NOT_PRESENT);
2737 }
2738
2739 device->enabled_extensions.extensions[index] = true;
2740 }
2741
2742 radv_device_init_dispatch(device);
2743
2744 keep_shader_info = device->enabled_extensions.AMD_shader_info;
2745
2746 /* With update after bind we can't attach bo's to the command buffer
2747 * from the descriptor set anymore, so we have to use a global BO list.
2748 */
2749 device->use_global_bo_list =
2750 (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
2751 device->enabled_extensions.EXT_descriptor_indexing ||
2752 device->enabled_extensions.EXT_buffer_device_address ||
2753 device->enabled_extensions.KHR_buffer_device_address;
2754
2755 device->robust_buffer_access = robust_buffer_access;
2756
2757 mtx_init(&device->shader_slab_mutex, mtx_plain);
2758 list_inithead(&device->shader_slabs);
2759
2760 device->overallocation_disallowed = overallocation_disallowed;
2761 mtx_init(&device->overallocation_mutex, mtx_plain);
2762
2763 radv_bo_list_init(&device->bo_list);
2764
2765 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2766 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
2767 uint32_t qfi = queue_create->queueFamilyIndex;
2768 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
2769 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
2770
2771 assert(!global_priority || device->physical_device->rad_info.has_ctx_priority);
2772
2773 device->queues[qfi] = vk_alloc(&device->vk.alloc,
2774 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2775 if (!device->queues[qfi]) {
2776 result = VK_ERROR_OUT_OF_HOST_MEMORY;
2777 goto fail;
2778 }
2779
2780 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
2781
2782 device->queue_count[qfi] = queue_create->queueCount;
2783
2784 for (unsigned q = 0; q < queue_create->queueCount; q++) {
2785 result = radv_queue_init(device, &device->queues[qfi][q],
2786 qfi, q, queue_create->flags,
2787 global_priority);
2788 if (result != VK_SUCCESS)
2789 goto fail;
2790 }
2791 }
2792
2793 device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
2794 !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
2795
2796 /* Disable DFSM by default. As of 2019-09-15 Talos on Low is still 3% slower on Raven. */
2797 device->dfsm_allowed = device->pbb_allowed &&
2798 (device->instance->perftest_flags & RADV_PERFTEST_DFSM);
2799
2800 device->always_use_syncobj = device->physical_device->rad_info.has_syncobj_wait_for_submit;
2801
2802 /* The maximum number of scratch waves. Scratch space isn't divided
2803 * evenly between CUs. The number is only a function of the number of CUs.
2804 * We can decrease the constant to decrease the scratch buffer size.
2805 *
2806 * sctx->scratch_waves must be >= the maximum possible size of
2807 * 1 threadgroup, so that the hw doesn't hang from being unable
2808 * to start any.
2809 *
2810 * The recommended value is 4 per CU at most. Higher numbers don't
2811 * bring much benefit, but they still occupy chip resources (think
2812 * async compute). I've seen ~2% performance difference between 4 and 32.
2813 */
2814 uint32_t max_threads_per_block = 2048;
2815 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_good_compute_units,
2816 max_threads_per_block / 64);
2817
2818 device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
2819
2820 if (device->physical_device->rad_info.chip_class >= GFX7) {
2821 /* If the KMD allows it (there is a KMD hw register for it),
2822 * allow launching waves out-of-order.
2823 */
2824 device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
2825 }
2826
2827 radv_device_init_gs_info(device);
2828
2829 device->tess_offchip_block_dw_size =
2830 device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
2831
2832 if (getenv("RADV_TRACE_FILE")) {
2833 fprintf(stderr, "***********************************************************************************\n");
2834 fprintf(stderr, "* WARNING: RADV_TRACE_FILE=<file> is deprecated and replaced by RADV_DEBUG=hang *\n");
2835 fprintf(stderr, "***********************************************************************************\n");
2836 abort();
2837 }
2838
2839 if (device->instance->debug_flags & RADV_DEBUG_HANG) {
2840 /* Enable GPU hangs detection and dump logs if a GPU hang is
2841 * detected.
2842 */
2843 keep_shader_info = true;
2844
2845 if (!radv_init_trace(device))
2846 goto fail;
2847
2848 fprintf(stderr, "*****************************************************************************\n");
2849 fprintf(stderr, "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
2850 fprintf(stderr, "*****************************************************************************\n");
2851
2852 /* Wait for idle after every draw/dispatch to identify the
2853 * first bad call.
2854 */
2855 device->instance->debug_flags |= RADV_DEBUG_SYNC_SHADERS;
2856
2857 radv_dump_enabled_options(device, stderr);
2858 }
2859
2860 if (radv_thread_trace_enabled()) {
2861 fprintf(stderr, "*************************************************\n");
2862 fprintf(stderr, "* WARNING: Thread trace support is experimental *\n");
2863 fprintf(stderr, "*************************************************\n");
2864
2865 if (device->physical_device->rad_info.chip_class < GFX8) {
2866 fprintf(stderr, "GPU hardware not supported: refer to "
2867 "the RGP documentation for the list of "
2868 "supported GPUs!\n");
2869 abort();
2870 }
2871
2872 if (device->physical_device->rad_info.chip_class > GFX10) {
2873 fprintf(stderr, "radv: Thread trace is not supported "
2874 "for that GPU!\n");
2875 exit(1);
2876 }
2877
2878 /* Default buffer size set to 1MB per SE. */
2879 device->thread_trace_buffer_size =
2880 radv_get_int_debug_option("RADV_THREAD_TRACE_BUFFER_SIZE", 1024 * 1024);
2881 device->thread_trace_start_frame = radv_get_int_debug_option("RADV_THREAD_TRACE", -1);
2882
2883 const char *trigger_file = getenv("RADV_THREAD_TRACE_TRIGGER");
2884 if (trigger_file)
2885 device->thread_trace_trigger_file = strdup(trigger_file);
2886
2887 if (!radv_thread_trace_init(device))
2888 goto fail;
2889 }
2890
2891 if (getenv("RADV_TRAP_HANDLER")) {
2892 /* TODO: Add support for more hardware. */
2893 assert(device->physical_device->rad_info.chip_class == GFX8);
2894
2895 fprintf(stderr, "**********************************************************************\n");
2896 fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
2897 fprintf(stderr, "**********************************************************************\n");
2898
2899 /* To get the disassembly of the faulty shaders, we have to
2900 * keep some shader info around.
2901 */
2902 keep_shader_info = true;
2903
2904 if (!radv_trap_handler_init(device))
2905 goto fail;
2906 }
2907
2908 device->keep_shader_info = keep_shader_info;
2909 result = radv_device_init_meta(device);
2910 if (result != VK_SUCCESS)
2911 goto fail;
2912
2913 radv_device_init_msaa(device);
2914
2915 /* If the border color extension is enabled, let's create the buffer we need. */
2916 if (custom_border_colors) {
2917 result = radv_device_init_border_color(device);
2918 if (result != VK_SUCCESS)
2919 goto fail;
2920 }
2921
2922 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
2923 device->empty_cs[family] = device->ws->cs_create(device->ws, family);
2924 if (!device->empty_cs[family])
2925 goto fail;
2926
2927 switch (family) {
2928 case RADV_QUEUE_GENERAL:
2929 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
2930 radeon_emit(device->empty_cs[family], CC0_UPDATE_LOAD_ENABLES(1));
2931 radeon_emit(device->empty_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));
2932 break;
2933 case RADV_QUEUE_COMPUTE:
2934 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
2935 radeon_emit(device->empty_cs[family], 0);
2936 break;
2937 }
2938
2939 result = device->ws->cs_finalize(device->empty_cs[family]);
2940 if (result != VK_SUCCESS)
2941 goto fail;
2942 }
2943
2944 if (device->physical_device->rad_info.chip_class >= GFX7)
2945 cik_create_gfx_config(device);
2946
2947 VkPipelineCacheCreateInfo ci;
2948 ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
2949 ci.pNext = NULL;
2950 ci.flags = 0;
2951 ci.pInitialData = NULL;
2952 ci.initialDataSize = 0;
2953 VkPipelineCache pc;
2954 result = radv_CreatePipelineCache(radv_device_to_handle(device),
2955 &ci, NULL, &pc);
2956 if (result != VK_SUCCESS)
2957 goto fail_meta;
2958
2959 device->mem_cache = radv_pipeline_cache_from_handle(pc);
2960
2961 result = radv_create_pthread_cond(&device->timeline_cond);
2962 if (result != VK_SUCCESS)
2963 goto fail_mem_cache;
2964
2965 device->force_aniso =
2966 MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
2967 if (device->force_aniso >= 0) {
2968 fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
2969 1 << util_logbase2(device->force_aniso));
2970 }
2971
2972 *pDevice = radv_device_to_handle(device);
2973 return VK_SUCCESS;
2974
2975 fail_mem_cache:
2976 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
2977 fail_meta:
2978 radv_device_finish_meta(device);
2979 fail:
2980 radv_bo_list_finish(&device->bo_list);
2981
2982 radv_thread_trace_finish(device);
2983 free(device->thread_trace_trigger_file);
2984
2985 radv_trap_handler_finish(device);
2986
2987 if (device->trace_bo)
2988 device->ws->buffer_destroy(device->trace_bo);
2989
2990 if (device->gfx_init)
2991 device->ws->buffer_destroy(device->gfx_init);
2992
2993 radv_device_finish_border_color(device);
2994
2995 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
2996 for (unsigned q = 0; q < device->queue_count[i]; q++)
2997 radv_queue_finish(&device->queues[i][q]);
2998 if (device->queue_count[i])
2999 vk_free(&device->vk.alloc, device->queues[i]);
3000 }
3001
3002 vk_free(&device->vk.alloc, device);
3003 return result;
3004 }
3005
radv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)3006 void radv_DestroyDevice(
3007 VkDevice _device,
3008 const VkAllocationCallbacks* pAllocator)
3009 {
3010 RADV_FROM_HANDLE(radv_device, device, _device);
3011
3012 if (!device)
3013 return;
3014
3015 if (device->trace_bo)
3016 device->ws->buffer_destroy(device->trace_bo);
3017
3018 if (device->gfx_init)
3019 device->ws->buffer_destroy(device->gfx_init);
3020
3021 radv_device_finish_border_color(device);
3022
3023 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
3024 for (unsigned q = 0; q < device->queue_count[i]; q++)
3025 radv_queue_finish(&device->queues[i][q]);
3026 if (device->queue_count[i])
3027 vk_free(&device->vk.alloc, device->queues[i]);
3028 if (device->empty_cs[i])
3029 device->ws->cs_destroy(device->empty_cs[i]);
3030 }
3031 radv_device_finish_meta(device);
3032
3033 VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
3034 radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
3035
3036 radv_trap_handler_finish(device);
3037
3038 radv_destroy_shader_slabs(device);
3039
3040 pthread_cond_destroy(&device->timeline_cond);
3041 radv_bo_list_finish(&device->bo_list);
3042
3043 free(device->thread_trace_trigger_file);
3044 radv_thread_trace_finish(device);
3045
3046 vk_free(&device->vk.alloc, device);
3047 }
3048
radv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)3049 VkResult radv_EnumerateInstanceLayerProperties(
3050 uint32_t* pPropertyCount,
3051 VkLayerProperties* pProperties)
3052 {
3053 if (pProperties == NULL) {
3054 *pPropertyCount = 0;
3055 return VK_SUCCESS;
3056 }
3057
3058 /* None supported at this time */
3059 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3060 }
3061
radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkLayerProperties * pProperties)3062 VkResult radv_EnumerateDeviceLayerProperties(
3063 VkPhysicalDevice physicalDevice,
3064 uint32_t* pPropertyCount,
3065 VkLayerProperties* pProperties)
3066 {
3067 if (pProperties == NULL) {
3068 *pPropertyCount = 0;
3069 return VK_SUCCESS;
3070 }
3071
3072 /* None supported at this time */
3073 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3074 }
3075
radv_GetDeviceQueue2(VkDevice _device,const VkDeviceQueueInfo2 * pQueueInfo,VkQueue * pQueue)3076 void radv_GetDeviceQueue2(
3077 VkDevice _device,
3078 const VkDeviceQueueInfo2* pQueueInfo,
3079 VkQueue* pQueue)
3080 {
3081 RADV_FROM_HANDLE(radv_device, device, _device);
3082 struct radv_queue *queue;
3083
3084 queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
3085 if (pQueueInfo->flags != queue->flags) {
3086 /* From the Vulkan 1.1.70 spec:
3087 *
3088 * "The queue returned by vkGetDeviceQueue2 must have the same
3089 * flags value from this structure as that used at device
3090 * creation time in a VkDeviceQueueCreateInfo instance. If no
3091 * matching flags were specified at device creation time then
3092 * pQueue will return VK_NULL_HANDLE."
3093 */
3094 *pQueue = VK_NULL_HANDLE;
3095 return;
3096 }
3097
3098 *pQueue = radv_queue_to_handle(queue);
3099 }
3100
radv_GetDeviceQueue(VkDevice _device,uint32_t queueFamilyIndex,uint32_t queueIndex,VkQueue * pQueue)3101 void radv_GetDeviceQueue(
3102 VkDevice _device,
3103 uint32_t queueFamilyIndex,
3104 uint32_t queueIndex,
3105 VkQueue* pQueue)
3106 {
3107 const VkDeviceQueueInfo2 info = (VkDeviceQueueInfo2) {
3108 .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
3109 .queueFamilyIndex = queueFamilyIndex,
3110 .queueIndex = queueIndex
3111 };
3112
3113 radv_GetDeviceQueue2(_device, &info, pQueue);
3114 }
3115
3116 static void
fill_geom_tess_rings(struct radv_queue * queue,uint32_t * map,bool add_sample_positions,uint32_t esgs_ring_size,struct radeon_winsys_bo * esgs_ring_bo,uint32_t gsvs_ring_size,struct radeon_winsys_bo * gsvs_ring_bo,uint32_t tess_factor_ring_size,uint32_t tess_offchip_ring_offset,uint32_t tess_offchip_ring_size,struct radeon_winsys_bo * tess_rings_bo)3117 fill_geom_tess_rings(struct radv_queue *queue,
3118 uint32_t *map,
3119 bool add_sample_positions,
3120 uint32_t esgs_ring_size,
3121 struct radeon_winsys_bo *esgs_ring_bo,
3122 uint32_t gsvs_ring_size,
3123 struct radeon_winsys_bo *gsvs_ring_bo,
3124 uint32_t tess_factor_ring_size,
3125 uint32_t tess_offchip_ring_offset,
3126 uint32_t tess_offchip_ring_size,
3127 struct radeon_winsys_bo *tess_rings_bo)
3128 {
3129 uint32_t *desc = &map[4];
3130
3131 if (esgs_ring_bo) {
3132 uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
3133
3134 /* stride 0, num records - size, add tid, swizzle, elsize4,
3135 index stride 64 */
3136 desc[0] = esgs_va;
3137 desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) |
3138 S_008F04_SWIZZLE_ENABLE(true);
3139 desc[2] = esgs_ring_size;
3140 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3141 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3142 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3143 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3144 S_008F0C_INDEX_STRIDE(3) |
3145 S_008F0C_ADD_TID_ENABLE(1);
3146
3147 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3148 desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3149 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
3150 S_008F0C_RESOURCE_LEVEL(1);
3151 } else {
3152 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3153 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
3154 S_008F0C_ELEMENT_SIZE(1);
3155 }
3156
3157 /* GS entry for ES->GS ring */
3158 /* stride 0, num records - size, elsize0,
3159 index stride 0 */
3160 desc[4] = esgs_va;
3161 desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
3162 desc[6] = esgs_ring_size;
3163 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3164 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3165 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3166 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3167
3168 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3169 desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3170 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
3171 S_008F0C_RESOURCE_LEVEL(1);
3172 } else {
3173 desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3174 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3175 }
3176 }
3177
3178 desc += 8;
3179
3180 if (gsvs_ring_bo) {
3181 uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
3182
3183 /* VS entry for GS->VS ring */
3184 /* stride 0, num records - size, elsize0,
3185 index stride 0 */
3186 desc[0] = gsvs_va;
3187 desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
3188 desc[2] = gsvs_ring_size;
3189 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3190 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3191 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3192 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3193
3194 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3195 desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3196 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
3197 S_008F0C_RESOURCE_LEVEL(1);
3198 } else {
3199 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3200 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3201 }
3202
3203 /* stride gsvs_itemsize, num records 64
3204 elsize 4, index stride 16 */
3205 /* shader will patch stride and desc[2] */
3206 desc[4] = gsvs_va;
3207 desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) |
3208 S_008F04_SWIZZLE_ENABLE(1);
3209 desc[6] = 0;
3210 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3211 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3212 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3213 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3214 S_008F0C_INDEX_STRIDE(1) |
3215 S_008F0C_ADD_TID_ENABLE(true);
3216
3217 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3218 desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3219 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) |
3220 S_008F0C_RESOURCE_LEVEL(1);
3221 } else {
3222 desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3223 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) |
3224 S_008F0C_ELEMENT_SIZE(1);
3225 }
3226
3227 }
3228
3229 desc += 8;
3230
3231 if (tess_rings_bo) {
3232 uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
3233 uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
3234
3235 desc[0] = tess_va;
3236 desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
3237 desc[2] = tess_factor_ring_size;
3238 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3239 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3240 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3241 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3242
3243 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3244 desc[3] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3245 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
3246 S_008F0C_RESOURCE_LEVEL(1);
3247 } else {
3248 desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3249 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3250 }
3251
3252 desc[4] = tess_offchip_va;
3253 desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
3254 desc[6] = tess_offchip_ring_size;
3255 desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
3256 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3257 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
3258 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3259
3260 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3261 desc[7] |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
3262 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
3263 S_008F0C_RESOURCE_LEVEL(1);
3264 } else {
3265 desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3266 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3267 }
3268 }
3269
3270 desc += 8;
3271
3272 if (add_sample_positions) {
3273 /* add sample positions after all rings */
3274 memcpy(desc, queue->device->sample_locations_1x, 8);
3275 desc += 2;
3276 memcpy(desc, queue->device->sample_locations_2x, 16);
3277 desc += 4;
3278 memcpy(desc, queue->device->sample_locations_4x, 32);
3279 desc += 8;
3280 memcpy(desc, queue->device->sample_locations_8x, 64);
3281 }
3282 }
3283
3284 static unsigned
radv_get_hs_offchip_param(struct radv_device * device,uint32_t * max_offchip_buffers_p)3285 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
3286 {
3287 bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 &&
3288 device->physical_device->rad_info.family != CHIP_CARRIZO &&
3289 device->physical_device->rad_info.family != CHIP_STONEY;
3290 unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
3291 unsigned max_offchip_buffers;
3292 unsigned offchip_granularity;
3293 unsigned hs_offchip_param;
3294
3295 /*
3296 * Per RadeonSI:
3297 * This must be one less than the maximum number due to a hw limitation.
3298 * Various hardware bugs need thGFX7
3299 *
3300 * Per AMDVLK:
3301 * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
3302 * Gfx7 should limit max_offchip_buffers to 508
3303 * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
3304 *
3305 * Follow AMDVLK here.
3306 */
3307 if (device->physical_device->rad_info.chip_class >= GFX10) {
3308 max_offchip_buffers_per_se = 256;
3309 } else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
3310 device->physical_device->rad_info.chip_class == GFX7 ||
3311 device->physical_device->rad_info.chip_class == GFX6)
3312 --max_offchip_buffers_per_se;
3313
3314 max_offchip_buffers = max_offchip_buffers_per_se *
3315 device->physical_device->rad_info.max_se;
3316
3317 /* Hawaii has a bug with offchip buffers > 256 that can be worked
3318 * around by setting 4K granularity.
3319 */
3320 if (device->tess_offchip_block_dw_size == 4096) {
3321 assert(device->physical_device->rad_info.family == CHIP_HAWAII);
3322 offchip_granularity = V_03093C_X_4K_DWORDS;
3323 } else {
3324 assert(device->tess_offchip_block_dw_size == 8192);
3325 offchip_granularity = V_03093C_X_8K_DWORDS;
3326 }
3327
3328 switch (device->physical_device->rad_info.chip_class) {
3329 case GFX6:
3330 max_offchip_buffers = MIN2(max_offchip_buffers, 126);
3331 break;
3332 case GFX7:
3333 case GFX8:
3334 case GFX9:
3335 max_offchip_buffers = MIN2(max_offchip_buffers, 508);
3336 break;
3337 case GFX10:
3338 break;
3339 default:
3340 break;
3341 }
3342
3343 *max_offchip_buffers_p = max_offchip_buffers;
3344 if (device->physical_device->rad_info.chip_class >= GFX10_3) {
3345 hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
3346 S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
3347 } else if (device->physical_device->rad_info.chip_class >= GFX7) {
3348 if (device->physical_device->rad_info.chip_class >= GFX8)
3349 --max_offchip_buffers;
3350 hs_offchip_param =
3351 S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |
3352 S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);
3353 } else {
3354 hs_offchip_param =
3355 S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
3356 }
3357 return hs_offchip_param;
3358 }
3359
3360 static void
radv_emit_gs_ring_sizes(struct radv_queue * queue,struct radeon_cmdbuf * cs,struct radeon_winsys_bo * esgs_ring_bo,uint32_t esgs_ring_size,struct radeon_winsys_bo * gsvs_ring_bo,uint32_t gsvs_ring_size)3361 radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3362 struct radeon_winsys_bo *esgs_ring_bo,
3363 uint32_t esgs_ring_size,
3364 struct radeon_winsys_bo *gsvs_ring_bo,
3365 uint32_t gsvs_ring_size)
3366 {
3367 if (!esgs_ring_bo && !gsvs_ring_bo)
3368 return;
3369
3370 if (esgs_ring_bo)
3371 radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
3372
3373 if (gsvs_ring_bo)
3374 radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
3375
3376 if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
3377 radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
3378 radeon_emit(cs, esgs_ring_size >> 8);
3379 radeon_emit(cs, gsvs_ring_size >> 8);
3380 } else {
3381 radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
3382 radeon_emit(cs, esgs_ring_size >> 8);
3383 radeon_emit(cs, gsvs_ring_size >> 8);
3384 }
3385 }
3386
3387 static void
radv_emit_tess_factor_ring(struct radv_queue * queue,struct radeon_cmdbuf * cs,unsigned hs_offchip_param,unsigned tf_ring_size,struct radeon_winsys_bo * tess_rings_bo)3388 radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3389 unsigned hs_offchip_param, unsigned tf_ring_size,
3390 struct radeon_winsys_bo *tess_rings_bo)
3391 {
3392 uint64_t tf_va;
3393
3394 if (!tess_rings_bo)
3395 return;
3396
3397 tf_va = radv_buffer_get_va(tess_rings_bo);
3398
3399 radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
3400
3401 if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
3402 radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE,
3403 S_030938_SIZE(tf_ring_size / 4));
3404 radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE,
3405 tf_va >> 8);
3406
3407 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3408 radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI_UMD,
3409 S_030984_BASE_HI(tf_va >> 40));
3410 } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
3411 radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI,
3412 S_030944_BASE_HI(tf_va >> 40));
3413 }
3414 radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM,
3415 hs_offchip_param);
3416 } else {
3417 radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE,
3418 S_008988_SIZE(tf_ring_size / 4));
3419 radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE,
3420 tf_va >> 8);
3421 radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM,
3422 hs_offchip_param);
3423 }
3424 }
3425
3426 static void
radv_emit_graphics_scratch(struct radv_queue * queue,struct radeon_cmdbuf * cs,uint32_t size_per_wave,uint32_t waves,struct radeon_winsys_bo * scratch_bo)3427 radv_emit_graphics_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3428 uint32_t size_per_wave, uint32_t waves,
3429 struct radeon_winsys_bo *scratch_bo)
3430 {
3431 if (queue->queue_family_index != RADV_QUEUE_GENERAL)
3432 return;
3433
3434 if (!scratch_bo)
3435 return;
3436
3437 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
3438
3439 radeon_set_context_reg(cs, R_0286E8_SPI_TMPRING_SIZE,
3440 S_0286E8_WAVES(waves) |
3441 S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024)));
3442 }
3443
3444 static void
radv_emit_compute_scratch(struct radv_queue * queue,struct radeon_cmdbuf * cs,uint32_t size_per_wave,uint32_t waves,struct radeon_winsys_bo * compute_scratch_bo)3445 radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3446 uint32_t size_per_wave, uint32_t waves,
3447 struct radeon_winsys_bo *compute_scratch_bo)
3448 {
3449 uint64_t scratch_va;
3450
3451 if (!compute_scratch_bo)
3452 return;
3453
3454 scratch_va = radv_buffer_get_va(compute_scratch_bo);
3455
3456 radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
3457
3458 radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
3459 radeon_emit(cs, scratch_va);
3460 radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
3461 S_008F04_SWIZZLE_ENABLE(1));
3462
3463 radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
3464 S_00B860_WAVES(waves) |
3465 S_00B860_WAVESIZE(round_up_u32(size_per_wave, 1024)));
3466 }
3467
3468 static void
radv_emit_global_shader_pointers(struct radv_queue * queue,struct radeon_cmdbuf * cs,struct radeon_winsys_bo * descriptor_bo)3469 radv_emit_global_shader_pointers(struct radv_queue *queue,
3470 struct radeon_cmdbuf *cs,
3471 struct radeon_winsys_bo *descriptor_bo)
3472 {
3473 uint64_t va;
3474
3475 if (!descriptor_bo)
3476 return;
3477
3478 va = radv_buffer_get_va(descriptor_bo);
3479
3480 radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
3481
3482 if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3483 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
3484 R_00B130_SPI_SHADER_USER_DATA_VS_0,
3485 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
3486 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
3487
3488 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3489 radv_emit_shader_pointer(queue->device, cs, regs[i],
3490 va, true);
3491 }
3492 } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
3493 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
3494 R_00B130_SPI_SHADER_USER_DATA_VS_0,
3495 R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
3496 R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
3497
3498 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3499 radv_emit_shader_pointer(queue->device, cs, regs[i],
3500 va, true);
3501 }
3502 } else {
3503 uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0,
3504 R_00B130_SPI_SHADER_USER_DATA_VS_0,
3505 R_00B230_SPI_SHADER_USER_DATA_GS_0,
3506 R_00B330_SPI_SHADER_USER_DATA_ES_0,
3507 R_00B430_SPI_SHADER_USER_DATA_HS_0,
3508 R_00B530_SPI_SHADER_USER_DATA_LS_0};
3509
3510 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3511 radv_emit_shader_pointer(queue->device, cs, regs[i],
3512 va, true);
3513 }
3514 }
3515 }
3516
3517 static void
radv_emit_trap_handler(struct radv_queue * queue,struct radeon_cmdbuf * cs,struct radeon_winsys_bo * tma_bo)3518 radv_emit_trap_handler(struct radv_queue *queue,
3519 struct radeon_cmdbuf *cs,
3520 struct radeon_winsys_bo *tma_bo)
3521 {
3522 struct radv_device *device = queue->device;
3523 struct radeon_winsys_bo *tba_bo;
3524 uint64_t tba_va, tma_va;
3525
3526 if (!device->trap_handler_shader || !tma_bo)
3527 return;
3528
3529 tba_bo = device->trap_handler_shader->bo;
3530
3531 tba_va = radv_buffer_get_va(tba_bo) + device->trap_handler_shader->bo_offset;
3532 tma_va = radv_buffer_get_va(tma_bo);
3533
3534 radv_cs_add_buffer(queue->device->ws, cs, tba_bo);
3535 radv_cs_add_buffer(queue->device->ws, cs, tma_bo);
3536
3537 if (queue->queue_family_index == RADV_QUEUE_GENERAL) {
3538 uint32_t regs[] = {R_00B000_SPI_SHADER_TBA_LO_PS,
3539 R_00B100_SPI_SHADER_TBA_LO_VS,
3540 R_00B200_SPI_SHADER_TBA_LO_GS,
3541 R_00B300_SPI_SHADER_TBA_LO_ES,
3542 R_00B400_SPI_SHADER_TBA_LO_HS,
3543 R_00B500_SPI_SHADER_TBA_LO_LS};
3544
3545 for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3546 radeon_set_sh_reg_seq(cs, regs[i], 4);
3547 radeon_emit(cs, tba_va >> 8);
3548 radeon_emit(cs, tba_va >> 40);
3549 radeon_emit(cs, tma_va >> 8);
3550 radeon_emit(cs, tma_va >> 40);
3551 }
3552 } else {
3553 radeon_set_sh_reg_seq(cs, R_00B838_COMPUTE_TBA_LO, 4);
3554 radeon_emit(cs, tba_va >> 8);
3555 radeon_emit(cs, tba_va >> 40);
3556 radeon_emit(cs, tma_va >> 8);
3557 radeon_emit(cs, tma_va >> 40);
3558 }
3559 }
3560
3561 static void
radv_init_graphics_state(struct radeon_cmdbuf * cs,struct radv_queue * queue)3562 radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
3563 {
3564 struct radv_device *device = queue->device;
3565
3566 if (device->gfx_init) {
3567 uint64_t va = radv_buffer_get_va(device->gfx_init);
3568
3569 radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
3570 radeon_emit(cs, va);
3571 radeon_emit(cs, va >> 32);
3572 radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
3573
3574 radv_cs_add_buffer(device->ws, cs, device->gfx_init);
3575 } else {
3576 si_emit_graphics(device, cs);
3577 }
3578 }
3579
3580 static void
radv_init_compute_state(struct radeon_cmdbuf * cs,struct radv_queue * queue)3581 radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
3582 {
3583 si_emit_compute(queue->device, cs);
3584 }
3585
3586 static VkResult
radv_get_preamble_cs(struct radv_queue * queue,uint32_t scratch_size_per_wave,uint32_t scratch_waves,uint32_t compute_scratch_size_per_wave,uint32_t compute_scratch_waves,uint32_t esgs_ring_size,uint32_t gsvs_ring_size,bool needs_tess_rings,bool needs_gds,bool needs_gds_oa,bool needs_sample_positions,struct radeon_cmdbuf ** initial_full_flush_preamble_cs,struct radeon_cmdbuf ** initial_preamble_cs,struct radeon_cmdbuf ** continue_preamble_cs)3587 radv_get_preamble_cs(struct radv_queue *queue,
3588 uint32_t scratch_size_per_wave,
3589 uint32_t scratch_waves,
3590 uint32_t compute_scratch_size_per_wave,
3591 uint32_t compute_scratch_waves,
3592 uint32_t esgs_ring_size,
3593 uint32_t gsvs_ring_size,
3594 bool needs_tess_rings,
3595 bool needs_gds,
3596 bool needs_gds_oa,
3597 bool needs_sample_positions,
3598 struct radeon_cmdbuf **initial_full_flush_preamble_cs,
3599 struct radeon_cmdbuf **initial_preamble_cs,
3600 struct radeon_cmdbuf **continue_preamble_cs)
3601 {
3602 struct radeon_winsys_bo *scratch_bo = NULL;
3603 struct radeon_winsys_bo *descriptor_bo = NULL;
3604 struct radeon_winsys_bo *compute_scratch_bo = NULL;
3605 struct radeon_winsys_bo *esgs_ring_bo = NULL;
3606 struct radeon_winsys_bo *gsvs_ring_bo = NULL;
3607 struct radeon_winsys_bo *tess_rings_bo = NULL;
3608 struct radeon_winsys_bo *gds_bo = NULL;
3609 struct radeon_winsys_bo *gds_oa_bo = NULL;
3610 struct radeon_cmdbuf *dest_cs[3] = {0};
3611 bool add_tess_rings = false, add_gds = false, add_gds_oa = false, add_sample_positions = false;
3612 unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
3613 unsigned max_offchip_buffers;
3614 unsigned hs_offchip_param = 0;
3615 unsigned tess_offchip_ring_offset;
3616 uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
3617 if (!queue->has_tess_rings) {
3618 if (needs_tess_rings)
3619 add_tess_rings = true;
3620 }
3621 if (!queue->has_gds) {
3622 if (needs_gds)
3623 add_gds = true;
3624 }
3625 if (!queue->has_gds_oa) {
3626 if (needs_gds_oa)
3627 add_gds_oa = true;
3628 }
3629 if (!queue->has_sample_positions) {
3630 if (needs_sample_positions)
3631 add_sample_positions = true;
3632 }
3633 tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
3634 hs_offchip_param = radv_get_hs_offchip_param(queue->device,
3635 &max_offchip_buffers);
3636 tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
3637 tess_offchip_ring_size = max_offchip_buffers *
3638 queue->device->tess_offchip_block_dw_size * 4;
3639
3640 scratch_size_per_wave = MAX2(scratch_size_per_wave, queue->scratch_size_per_wave);
3641 if (scratch_size_per_wave)
3642 scratch_waves = MIN2(scratch_waves, UINT32_MAX / scratch_size_per_wave);
3643 else
3644 scratch_waves = 0;
3645
3646 compute_scratch_size_per_wave = MAX2(compute_scratch_size_per_wave, queue->compute_scratch_size_per_wave);
3647 if (compute_scratch_size_per_wave)
3648 compute_scratch_waves = MIN2(compute_scratch_waves, UINT32_MAX / compute_scratch_size_per_wave);
3649 else
3650 compute_scratch_waves = 0;
3651
3652 if (scratch_size_per_wave <= queue->scratch_size_per_wave &&
3653 scratch_waves <= queue->scratch_waves &&
3654 compute_scratch_size_per_wave <= queue->compute_scratch_size_per_wave &&
3655 compute_scratch_waves <= queue->compute_scratch_waves &&
3656 esgs_ring_size <= queue->esgs_ring_size &&
3657 gsvs_ring_size <= queue->gsvs_ring_size &&
3658 !add_tess_rings && !add_gds && !add_gds_oa && !add_sample_positions &&
3659 queue->initial_preamble_cs) {
3660 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
3661 *initial_preamble_cs = queue->initial_preamble_cs;
3662 *continue_preamble_cs = queue->continue_preamble_cs;
3663 if (!scratch_size_per_wave && !compute_scratch_size_per_wave &&
3664 !esgs_ring_size && !gsvs_ring_size && !needs_tess_rings &&
3665 !needs_gds && !needs_gds_oa && !needs_sample_positions)
3666 *continue_preamble_cs = NULL;
3667 return VK_SUCCESS;
3668 }
3669
3670 uint32_t scratch_size = scratch_size_per_wave * scratch_waves;
3671 uint32_t queue_scratch_size = queue->scratch_size_per_wave * queue->scratch_waves;
3672 if (scratch_size > queue_scratch_size) {
3673 scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
3674 scratch_size,
3675 4096,
3676 RADEON_DOMAIN_VRAM,
3677 ring_bo_flags,
3678 RADV_BO_PRIORITY_SCRATCH);
3679 if (!scratch_bo)
3680 goto fail;
3681 } else
3682 scratch_bo = queue->scratch_bo;
3683
3684 uint32_t compute_scratch_size = compute_scratch_size_per_wave * compute_scratch_waves;
3685 uint32_t compute_queue_scratch_size = queue->compute_scratch_size_per_wave * queue->compute_scratch_waves;
3686 if (compute_scratch_size > compute_queue_scratch_size) {
3687 compute_scratch_bo = queue->device->ws->buffer_create(queue->device->ws,
3688 compute_scratch_size,
3689 4096,
3690 RADEON_DOMAIN_VRAM,
3691 ring_bo_flags,
3692 RADV_BO_PRIORITY_SCRATCH);
3693 if (!compute_scratch_bo)
3694 goto fail;
3695
3696 } else
3697 compute_scratch_bo = queue->compute_scratch_bo;
3698
3699 if (esgs_ring_size > queue->esgs_ring_size) {
3700 esgs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
3701 esgs_ring_size,
3702 4096,
3703 RADEON_DOMAIN_VRAM,
3704 ring_bo_flags,
3705 RADV_BO_PRIORITY_SCRATCH);
3706 if (!esgs_ring_bo)
3707 goto fail;
3708 } else {
3709 esgs_ring_bo = queue->esgs_ring_bo;
3710 esgs_ring_size = queue->esgs_ring_size;
3711 }
3712
3713 if (gsvs_ring_size > queue->gsvs_ring_size) {
3714 gsvs_ring_bo = queue->device->ws->buffer_create(queue->device->ws,
3715 gsvs_ring_size,
3716 4096,
3717 RADEON_DOMAIN_VRAM,
3718 ring_bo_flags,
3719 RADV_BO_PRIORITY_SCRATCH);
3720 if (!gsvs_ring_bo)
3721 goto fail;
3722 } else {
3723 gsvs_ring_bo = queue->gsvs_ring_bo;
3724 gsvs_ring_size = queue->gsvs_ring_size;
3725 }
3726
3727 if (add_tess_rings) {
3728 tess_rings_bo = queue->device->ws->buffer_create(queue->device->ws,
3729 tess_offchip_ring_offset + tess_offchip_ring_size,
3730 256,
3731 RADEON_DOMAIN_VRAM,
3732 ring_bo_flags,
3733 RADV_BO_PRIORITY_SCRATCH);
3734 if (!tess_rings_bo)
3735 goto fail;
3736 } else {
3737 tess_rings_bo = queue->tess_rings_bo;
3738 }
3739
3740 if (add_gds) {
3741 assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
3742
3743 /* 4 streamout GDS counters.
3744 * We need 256B (64 dw) of GDS, otherwise streamout hangs.
3745 */
3746 gds_bo = queue->device->ws->buffer_create(queue->device->ws,
3747 256, 4,
3748 RADEON_DOMAIN_GDS,
3749 ring_bo_flags,
3750 RADV_BO_PRIORITY_SCRATCH);
3751 if (!gds_bo)
3752 goto fail;
3753 } else {
3754 gds_bo = queue->gds_bo;
3755 }
3756
3757 if (add_gds_oa) {
3758 assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
3759
3760 gds_oa_bo = queue->device->ws->buffer_create(queue->device->ws,
3761 4, 1,
3762 RADEON_DOMAIN_OA,
3763 ring_bo_flags,
3764 RADV_BO_PRIORITY_SCRATCH);
3765 if (!gds_oa_bo)
3766 goto fail;
3767 } else {
3768 gds_oa_bo = queue->gds_oa_bo;
3769 }
3770
3771 if (scratch_bo != queue->scratch_bo ||
3772 esgs_ring_bo != queue->esgs_ring_bo ||
3773 gsvs_ring_bo != queue->gsvs_ring_bo ||
3774 tess_rings_bo != queue->tess_rings_bo ||
3775 add_sample_positions) {
3776 uint32_t size = 0;
3777 if (gsvs_ring_bo || esgs_ring_bo ||
3778 tess_rings_bo || add_sample_positions) {
3779 size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
3780 if (add_sample_positions)
3781 size += 128; /* 64+32+16+8 = 120 bytes */
3782 }
3783 else if (scratch_bo)
3784 size = 8; /* 2 dword */
3785
3786 descriptor_bo = queue->device->ws->buffer_create(queue->device->ws,
3787 size,
3788 4096,
3789 RADEON_DOMAIN_VRAM,
3790 RADEON_FLAG_CPU_ACCESS |
3791 RADEON_FLAG_NO_INTERPROCESS_SHARING |
3792 RADEON_FLAG_READ_ONLY,
3793 RADV_BO_PRIORITY_DESCRIPTOR);
3794 if (!descriptor_bo)
3795 goto fail;
3796 } else
3797 descriptor_bo = queue->descriptor_bo;
3798
3799 if (descriptor_bo != queue->descriptor_bo) {
3800 uint32_t *map = (uint32_t*)queue->device->ws->buffer_map(descriptor_bo);
3801 if (!map)
3802 goto fail;
3803
3804 if (scratch_bo) {
3805 uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
3806 uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) |
3807 S_008F04_SWIZZLE_ENABLE(1);
3808 map[0] = scratch_va;
3809 map[1] = rsrc1;
3810 }
3811
3812 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions)
3813 fill_geom_tess_rings(queue, map, add_sample_positions,
3814 esgs_ring_size, esgs_ring_bo,
3815 gsvs_ring_size, gsvs_ring_bo,
3816 tess_factor_ring_size,
3817 tess_offchip_ring_offset,
3818 tess_offchip_ring_size,
3819 tess_rings_bo);
3820
3821 queue->device->ws->buffer_unmap(descriptor_bo);
3822 }
3823
3824 for(int i = 0; i < 3; ++i) {
3825 enum rgp_flush_bits sqtt_flush_bits = 0;
3826 struct radeon_cmdbuf *cs = NULL;
3827 cs = queue->device->ws->cs_create(queue->device->ws,
3828 queue->queue_family_index ? RING_COMPUTE : RING_GFX);
3829 if (!cs)
3830 goto fail;
3831
3832 dest_cs[i] = cs;
3833
3834 if (scratch_bo)
3835 radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
3836
3837 /* Emit initial configuration. */
3838 switch (queue->queue_family_index) {
3839 case RADV_QUEUE_GENERAL:
3840 radv_init_graphics_state(cs, queue);
3841 break;
3842 case RADV_QUEUE_COMPUTE:
3843 radv_init_compute_state(cs, queue);
3844 break;
3845 case RADV_QUEUE_TRANSFER:
3846 break;
3847 }
3848
3849 if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) {
3850 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
3851 radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
3852
3853 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
3854 radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
3855 }
3856
3857 radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size,
3858 gsvs_ring_bo, gsvs_ring_size);
3859 radv_emit_tess_factor_ring(queue, cs, hs_offchip_param,
3860 tess_factor_ring_size, tess_rings_bo);
3861 radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
3862 radv_emit_compute_scratch(queue, cs, compute_scratch_size_per_wave,
3863 compute_scratch_waves, compute_scratch_bo);
3864 radv_emit_graphics_scratch(queue, cs, scratch_size_per_wave,
3865 scratch_waves, scratch_bo);
3866 radv_emit_trap_handler(queue, cs, queue->device->tma_bo);
3867
3868 if (gds_bo)
3869 radv_cs_add_buffer(queue->device->ws, cs, gds_bo);
3870 if (gds_oa_bo)
3871 radv_cs_add_buffer(queue->device->ws, cs, gds_oa_bo);
3872
3873 if (queue->device->trace_bo)
3874 radv_cs_add_buffer(queue->device->ws, cs, queue->device->trace_bo);
3875
3876 if (queue->device->border_color_data.bo)
3877 radv_cs_add_buffer(queue->device->ws, cs,
3878 queue->device->border_color_data.bo);
3879
3880 if (i == 0) {
3881 si_cs_emit_cache_flush(cs,
3882 queue->device->physical_device->rad_info.chip_class,
3883 NULL, 0,
3884 queue->queue_family_index == RING_COMPUTE &&
3885 queue->device->physical_device->rad_info.chip_class >= GFX7,
3886 (queue->queue_family_index == RADV_QUEUE_COMPUTE ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
3887 RADV_CMD_FLAG_INV_ICACHE |
3888 RADV_CMD_FLAG_INV_SCACHE |
3889 RADV_CMD_FLAG_INV_VCACHE |
3890 RADV_CMD_FLAG_INV_L2 |
3891 RADV_CMD_FLAG_START_PIPELINE_STATS, &sqtt_flush_bits, 0);
3892 } else if (i == 1) {
3893 si_cs_emit_cache_flush(cs,
3894 queue->device->physical_device->rad_info.chip_class,
3895 NULL, 0,
3896 queue->queue_family_index == RING_COMPUTE &&
3897 queue->device->physical_device->rad_info.chip_class >= GFX7,
3898 RADV_CMD_FLAG_INV_ICACHE |
3899 RADV_CMD_FLAG_INV_SCACHE |
3900 RADV_CMD_FLAG_INV_VCACHE |
3901 RADV_CMD_FLAG_INV_L2 |
3902 RADV_CMD_FLAG_START_PIPELINE_STATS, &sqtt_flush_bits, 0);
3903 }
3904
3905 if (queue->device->ws->cs_finalize(cs) != VK_SUCCESS)
3906 goto fail;
3907 }
3908
3909 if (queue->initial_full_flush_preamble_cs)
3910 queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
3911
3912 if (queue->initial_preamble_cs)
3913 queue->device->ws->cs_destroy(queue->initial_preamble_cs);
3914
3915 if (queue->continue_preamble_cs)
3916 queue->device->ws->cs_destroy(queue->continue_preamble_cs);
3917
3918 queue->initial_full_flush_preamble_cs = dest_cs[0];
3919 queue->initial_preamble_cs = dest_cs[1];
3920 queue->continue_preamble_cs = dest_cs[2];
3921
3922 if (scratch_bo != queue->scratch_bo) {
3923 if (queue->scratch_bo)
3924 queue->device->ws->buffer_destroy(queue->scratch_bo);
3925 queue->scratch_bo = scratch_bo;
3926 }
3927 queue->scratch_size_per_wave = scratch_size_per_wave;
3928 queue->scratch_waves = scratch_waves;
3929
3930 if (compute_scratch_bo != queue->compute_scratch_bo) {
3931 if (queue->compute_scratch_bo)
3932 queue->device->ws->buffer_destroy(queue->compute_scratch_bo);
3933 queue->compute_scratch_bo = compute_scratch_bo;
3934 }
3935 queue->compute_scratch_size_per_wave = compute_scratch_size_per_wave;
3936 queue->compute_scratch_waves = compute_scratch_waves;
3937
3938 if (esgs_ring_bo != queue->esgs_ring_bo) {
3939 if (queue->esgs_ring_bo)
3940 queue->device->ws->buffer_destroy(queue->esgs_ring_bo);
3941 queue->esgs_ring_bo = esgs_ring_bo;
3942 queue->esgs_ring_size = esgs_ring_size;
3943 }
3944
3945 if (gsvs_ring_bo != queue->gsvs_ring_bo) {
3946 if (queue->gsvs_ring_bo)
3947 queue->device->ws->buffer_destroy(queue->gsvs_ring_bo);
3948 queue->gsvs_ring_bo = gsvs_ring_bo;
3949 queue->gsvs_ring_size = gsvs_ring_size;
3950 }
3951
3952 if (tess_rings_bo != queue->tess_rings_bo) {
3953 queue->tess_rings_bo = tess_rings_bo;
3954 queue->has_tess_rings = true;
3955 }
3956
3957 if (gds_bo != queue->gds_bo) {
3958 queue->gds_bo = gds_bo;
3959 queue->has_gds = true;
3960 }
3961
3962 if (gds_oa_bo != queue->gds_oa_bo) {
3963 queue->gds_oa_bo = gds_oa_bo;
3964 queue->has_gds_oa = true;
3965 }
3966
3967 if (descriptor_bo != queue->descriptor_bo) {
3968 if (queue->descriptor_bo)
3969 queue->device->ws->buffer_destroy(queue->descriptor_bo);
3970
3971 queue->descriptor_bo = descriptor_bo;
3972 }
3973
3974 if (add_sample_positions)
3975 queue->has_sample_positions = true;
3976
3977 *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
3978 *initial_preamble_cs = queue->initial_preamble_cs;
3979 *continue_preamble_cs = queue->continue_preamble_cs;
3980 if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
3981 *continue_preamble_cs = NULL;
3982 return VK_SUCCESS;
3983 fail:
3984 for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
3985 if (dest_cs[i])
3986 queue->device->ws->cs_destroy(dest_cs[i]);
3987 if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
3988 queue->device->ws->buffer_destroy(descriptor_bo);
3989 if (scratch_bo && scratch_bo != queue->scratch_bo)
3990 queue->device->ws->buffer_destroy(scratch_bo);
3991 if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
3992 queue->device->ws->buffer_destroy(compute_scratch_bo);
3993 if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
3994 queue->device->ws->buffer_destroy(esgs_ring_bo);
3995 if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
3996 queue->device->ws->buffer_destroy(gsvs_ring_bo);
3997 if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
3998 queue->device->ws->buffer_destroy(tess_rings_bo);
3999 if (gds_bo && gds_bo != queue->gds_bo)
4000 queue->device->ws->buffer_destroy(gds_bo);
4001 if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo)
4002 queue->device->ws->buffer_destroy(gds_oa_bo);
4003
4004 return vk_error(queue->device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
4005 }
4006
radv_alloc_sem_counts(struct radv_device * device,struct radv_winsys_sem_counts * counts,int num_sems,struct radv_semaphore_part ** sems,const uint64_t * timeline_values,VkFence _fence,bool is_signal)4007 static VkResult radv_alloc_sem_counts(struct radv_device *device,
4008 struct radv_winsys_sem_counts *counts,
4009 int num_sems,
4010 struct radv_semaphore_part **sems,
4011 const uint64_t *timeline_values,
4012 VkFence _fence,
4013 bool is_signal)
4014 {
4015 int syncobj_idx = 0, non_reset_idx = 0, sem_idx = 0, timeline_idx = 0;
4016
4017 if (num_sems == 0 && _fence == VK_NULL_HANDLE)
4018 return VK_SUCCESS;
4019
4020 for (uint32_t i = 0; i < num_sems; i++) {
4021 switch(sems[i]->kind) {
4022 case RADV_SEMAPHORE_SYNCOBJ:
4023 counts->syncobj_count++;
4024 counts->syncobj_reset_count++;
4025 break;
4026 case RADV_SEMAPHORE_WINSYS:
4027 counts->sem_count++;
4028 break;
4029 case RADV_SEMAPHORE_NONE:
4030 break;
4031 case RADV_SEMAPHORE_TIMELINE:
4032 counts->syncobj_count++;
4033 break;
4034 case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
4035 counts->timeline_syncobj_count++;
4036 break;
4037 }
4038 }
4039
4040 if (_fence != VK_NULL_HANDLE) {
4041 RADV_FROM_HANDLE(radv_fence, fence, _fence);
4042
4043 struct radv_fence_part *part =
4044 fence->temporary.kind != RADV_FENCE_NONE ?
4045 &fence->temporary : &fence->permanent;
4046 if (part->kind == RADV_FENCE_SYNCOBJ)
4047 counts->syncobj_count++;
4048 }
4049
4050 if (counts->syncobj_count || counts->timeline_syncobj_count) {
4051 counts->points = (uint64_t *)malloc(
4052 sizeof(*counts->syncobj) * counts->syncobj_count +
4053 (sizeof(*counts->syncobj) + sizeof(*counts->points)) * counts->timeline_syncobj_count);
4054 if (!counts->points)
4055 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4056 counts->syncobj = (uint32_t*)(counts->points + counts->timeline_syncobj_count);
4057 }
4058
4059 if (counts->sem_count) {
4060 counts->sem = (struct radeon_winsys_sem **)malloc(sizeof(struct radeon_winsys_sem *) * counts->sem_count);
4061 if (!counts->sem) {
4062 free(counts->syncobj);
4063 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4064 }
4065 }
4066
4067 non_reset_idx = counts->syncobj_reset_count;
4068
4069 for (uint32_t i = 0; i < num_sems; i++) {
4070 switch(sems[i]->kind) {
4071 case RADV_SEMAPHORE_NONE:
4072 unreachable("Empty semaphore");
4073 break;
4074 case RADV_SEMAPHORE_SYNCOBJ:
4075 counts->syncobj[syncobj_idx++] = sems[i]->syncobj;
4076 break;
4077 case RADV_SEMAPHORE_WINSYS:
4078 counts->sem[sem_idx++] = sems[i]->ws_sem;
4079 break;
4080 case RADV_SEMAPHORE_TIMELINE: {
4081 pthread_mutex_lock(&sems[i]->timeline.mutex);
4082 struct radv_timeline_point *point = NULL;
4083 if (is_signal) {
4084 point = radv_timeline_add_point_locked(device, &sems[i]->timeline, timeline_values[i]);
4085 } else {
4086 point = radv_timeline_find_point_at_least_locked(device, &sems[i]->timeline, timeline_values[i]);
4087 }
4088
4089 pthread_mutex_unlock(&sems[i]->timeline.mutex);
4090
4091 if (point) {
4092 counts->syncobj[non_reset_idx++] = point->syncobj;
4093 } else {
4094 /* Explicitly remove the semaphore so we might not find
4095 * a point later post-submit. */
4096 sems[i] = NULL;
4097 }
4098 break;
4099 }
4100 case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
4101 counts->syncobj[counts->syncobj_count + timeline_idx] = sems[i]->syncobj;
4102 counts->points[timeline_idx] = timeline_values[i];
4103 ++timeline_idx;
4104 break;
4105 }
4106 }
4107
4108 if (_fence != VK_NULL_HANDLE) {
4109 RADV_FROM_HANDLE(radv_fence, fence, _fence);
4110
4111 struct radv_fence_part *part =
4112 fence->temporary.kind != RADV_FENCE_NONE ?
4113 &fence->temporary : &fence->permanent;
4114 if (part->kind == RADV_FENCE_SYNCOBJ)
4115 counts->syncobj[non_reset_idx++] = part->syncobj;
4116 }
4117
4118 assert(MAX2(syncobj_idx, non_reset_idx) <= counts->syncobj_count);
4119 counts->syncobj_count = MAX2(syncobj_idx, non_reset_idx);
4120
4121 return VK_SUCCESS;
4122 }
4123
4124 static void
radv_free_sem_info(struct radv_winsys_sem_info * sem_info)4125 radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
4126 {
4127 free(sem_info->wait.points);
4128 free(sem_info->wait.sem);
4129 free(sem_info->signal.points);
4130 free(sem_info->signal.sem);
4131 }
4132
4133
radv_free_temp_syncobjs(struct radv_device * device,int num_sems,struct radv_semaphore_part * sems)4134 static void radv_free_temp_syncobjs(struct radv_device *device,
4135 int num_sems,
4136 struct radv_semaphore_part *sems)
4137 {
4138 for (uint32_t i = 0; i < num_sems; i++) {
4139 radv_destroy_semaphore_part(device, sems + i);
4140 }
4141 }
4142
4143 static VkResult
radv_alloc_sem_info(struct radv_device * device,struct radv_winsys_sem_info * sem_info,int num_wait_sems,struct radv_semaphore_part ** wait_sems,const uint64_t * wait_values,int num_signal_sems,struct radv_semaphore_part ** signal_sems,const uint64_t * signal_values,VkFence fence)4144 radv_alloc_sem_info(struct radv_device *device,
4145 struct radv_winsys_sem_info *sem_info,
4146 int num_wait_sems,
4147 struct radv_semaphore_part **wait_sems,
4148 const uint64_t *wait_values,
4149 int num_signal_sems,
4150 struct radv_semaphore_part **signal_sems,
4151 const uint64_t *signal_values,
4152 VkFence fence)
4153 {
4154 VkResult ret;
4155 memset(sem_info, 0, sizeof(*sem_info));
4156
4157 ret = radv_alloc_sem_counts(device, &sem_info->wait, num_wait_sems, wait_sems, wait_values, VK_NULL_HANDLE, false);
4158 if (ret)
4159 return ret;
4160 ret = radv_alloc_sem_counts(device, &sem_info->signal, num_signal_sems, signal_sems, signal_values, fence, true);
4161 if (ret)
4162 radv_free_sem_info(sem_info);
4163
4164 /* caller can override these */
4165 sem_info->cs_emit_wait = true;
4166 sem_info->cs_emit_signal = true;
4167 return ret;
4168 }
4169
4170 static void
radv_finalize_timelines(struct radv_device * device,uint32_t num_wait_sems,struct radv_semaphore_part ** wait_sems,const uint64_t * wait_values,uint32_t num_signal_sems,struct radv_semaphore_part ** signal_sems,const uint64_t * signal_values,struct list_head * processing_list)4171 radv_finalize_timelines(struct radv_device *device,
4172 uint32_t num_wait_sems,
4173 struct radv_semaphore_part **wait_sems,
4174 const uint64_t *wait_values,
4175 uint32_t num_signal_sems,
4176 struct radv_semaphore_part **signal_sems,
4177 const uint64_t *signal_values,
4178 struct list_head *processing_list)
4179 {
4180 for (uint32_t i = 0; i < num_wait_sems; ++i) {
4181 if (wait_sems[i] && wait_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4182 pthread_mutex_lock(&wait_sems[i]->timeline.mutex);
4183 struct radv_timeline_point *point =
4184 radv_timeline_find_point_at_least_locked(device, &wait_sems[i]->timeline, wait_values[i]);
4185 point->wait_count -= 2;
4186 pthread_mutex_unlock(&wait_sems[i]->timeline.mutex);
4187 }
4188 }
4189 for (uint32_t i = 0; i < num_signal_sems; ++i) {
4190 if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4191 pthread_mutex_lock(&signal_sems[i]->timeline.mutex);
4192 struct radv_timeline_point *point =
4193 radv_timeline_find_point_at_least_locked(device, &signal_sems[i]->timeline, signal_values[i]);
4194 signal_sems[i]->timeline.highest_submitted =
4195 MAX2(signal_sems[i]->timeline.highest_submitted, point->value);
4196 point->wait_count -= 2;
4197 radv_timeline_trigger_waiters_locked(&signal_sems[i]->timeline, processing_list);
4198 pthread_mutex_unlock(&signal_sems[i]->timeline.mutex);
4199 } else if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ) {
4200 signal_sems[i]->timeline_syncobj.max_point =
4201 MAX2(signal_sems[i]->timeline_syncobj.max_point, signal_values[i]);
4202 }
4203 }
4204 }
4205
4206 static VkResult
radv_sparse_buffer_bind_memory(struct radv_device * device,const VkSparseBufferMemoryBindInfo * bind)4207 radv_sparse_buffer_bind_memory(struct radv_device *device,
4208 const VkSparseBufferMemoryBindInfo *bind)
4209 {
4210 RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
4211 VkResult result;
4212
4213 for (uint32_t i = 0; i < bind->bindCount; ++i) {
4214 struct radv_device_memory *mem = NULL;
4215
4216 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4217 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4218
4219 result = device->ws->buffer_virtual_bind(buffer->bo,
4220 bind->pBinds[i].resourceOffset,
4221 bind->pBinds[i].size,
4222 mem ? mem->bo : NULL,
4223 bind->pBinds[i].memoryOffset);
4224 if (result != VK_SUCCESS)
4225 return result;
4226 }
4227
4228 return VK_SUCCESS;
4229 }
4230
4231 static VkResult
radv_sparse_image_opaque_bind_memory(struct radv_device * device,const VkSparseImageOpaqueMemoryBindInfo * bind)4232 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
4233 const VkSparseImageOpaqueMemoryBindInfo *bind)
4234 {
4235 RADV_FROM_HANDLE(radv_image, image, bind->image);
4236 VkResult result;
4237
4238 for (uint32_t i = 0; i < bind->bindCount; ++i) {
4239 struct radv_device_memory *mem = NULL;
4240
4241 if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4242 mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4243
4244 result = device->ws->buffer_virtual_bind(image->bo,
4245 bind->pBinds[i].resourceOffset,
4246 bind->pBinds[i].size,
4247 mem ? mem->bo : NULL,
4248 bind->pBinds[i].memoryOffset);
4249 if (result != VK_SUCCESS)
4250 return result;
4251 }
4252
4253 return VK_SUCCESS;
4254 }
4255
4256 static VkResult
radv_get_preambles(struct radv_queue * queue,const VkCommandBuffer * cmd_buffers,uint32_t cmd_buffer_count,struct radeon_cmdbuf ** initial_full_flush_preamble_cs,struct radeon_cmdbuf ** initial_preamble_cs,struct radeon_cmdbuf ** continue_preamble_cs)4257 radv_get_preambles(struct radv_queue *queue,
4258 const VkCommandBuffer *cmd_buffers,
4259 uint32_t cmd_buffer_count,
4260 struct radeon_cmdbuf **initial_full_flush_preamble_cs,
4261 struct radeon_cmdbuf **initial_preamble_cs,
4262 struct radeon_cmdbuf **continue_preamble_cs)
4263 {
4264 uint32_t scratch_size_per_wave = 0, waves_wanted = 0;
4265 uint32_t compute_scratch_size_per_wave = 0, compute_waves_wanted = 0;
4266 uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
4267 bool tess_rings_needed = false;
4268 bool gds_needed = false;
4269 bool gds_oa_needed = false;
4270 bool sample_positions_needed = false;
4271
4272 for (uint32_t j = 0; j < cmd_buffer_count; j++) {
4273 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer,
4274 cmd_buffers[j]);
4275
4276 scratch_size_per_wave = MAX2(scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed);
4277 waves_wanted = MAX2(waves_wanted, cmd_buffer->scratch_waves_wanted);
4278 compute_scratch_size_per_wave = MAX2(compute_scratch_size_per_wave,
4279 cmd_buffer->compute_scratch_size_per_wave_needed);
4280 compute_waves_wanted = MAX2(compute_waves_wanted,
4281 cmd_buffer->compute_scratch_waves_wanted);
4282 esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
4283 gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
4284 tess_rings_needed |= cmd_buffer->tess_rings_needed;
4285 gds_needed |= cmd_buffer->gds_needed;
4286 gds_oa_needed |= cmd_buffer->gds_oa_needed;
4287 sample_positions_needed |= cmd_buffer->sample_positions_needed;
4288 }
4289
4290 return radv_get_preamble_cs(queue, scratch_size_per_wave, waves_wanted,
4291 compute_scratch_size_per_wave, compute_waves_wanted,
4292 esgs_ring_size, gsvs_ring_size, tess_rings_needed,
4293 gds_needed, gds_oa_needed, sample_positions_needed,
4294 initial_full_flush_preamble_cs,
4295 initial_preamble_cs, continue_preamble_cs);
4296 }
4297
4298 struct radv_deferred_queue_submission {
4299 struct radv_queue *queue;
4300 VkCommandBuffer *cmd_buffers;
4301 uint32_t cmd_buffer_count;
4302
4303 /* Sparse bindings that happen on a queue. */
4304 VkSparseBufferMemoryBindInfo *buffer_binds;
4305 uint32_t buffer_bind_count;
4306 VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
4307 uint32_t image_opaque_bind_count;
4308
4309 bool flush_caches;
4310 VkShaderStageFlags wait_dst_stage_mask;
4311 struct radv_semaphore_part **wait_semaphores;
4312 uint32_t wait_semaphore_count;
4313 struct radv_semaphore_part **signal_semaphores;
4314 uint32_t signal_semaphore_count;
4315 VkFence fence;
4316
4317 uint64_t *wait_values;
4318 uint64_t *signal_values;
4319
4320 struct radv_semaphore_part *temporary_semaphore_parts;
4321 uint32_t temporary_semaphore_part_count;
4322
4323 struct list_head queue_pending_list;
4324 uint32_t submission_wait_count;
4325 struct radv_timeline_waiter *wait_nodes;
4326
4327 struct list_head processing_list;
4328 };
4329
4330 struct radv_queue_submission {
4331 const VkCommandBuffer *cmd_buffers;
4332 uint32_t cmd_buffer_count;
4333
4334 /* Sparse bindings that happen on a queue. */
4335 const VkSparseBufferMemoryBindInfo *buffer_binds;
4336 uint32_t buffer_bind_count;
4337 const VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
4338 uint32_t image_opaque_bind_count;
4339
4340 bool flush_caches;
4341 VkPipelineStageFlags wait_dst_stage_mask;
4342 const VkSemaphore *wait_semaphores;
4343 uint32_t wait_semaphore_count;
4344 const VkSemaphore *signal_semaphores;
4345 uint32_t signal_semaphore_count;
4346 VkFence fence;
4347
4348 const uint64_t *wait_values;
4349 uint32_t wait_value_count;
4350 const uint64_t *signal_values;
4351 uint32_t signal_value_count;
4352 };
4353
4354 static VkResult
4355 radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission,
4356 uint32_t decrement,
4357 struct list_head *processing_list);
4358
4359 static VkResult
radv_create_deferred_submission(struct radv_queue * queue,const struct radv_queue_submission * submission,struct radv_deferred_queue_submission ** out)4360 radv_create_deferred_submission(struct radv_queue *queue,
4361 const struct radv_queue_submission *submission,
4362 struct radv_deferred_queue_submission **out)
4363 {
4364 struct radv_deferred_queue_submission *deferred = NULL;
4365 size_t size = sizeof(struct radv_deferred_queue_submission);
4366
4367 uint32_t temporary_count = 0;
4368 for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4369 RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
4370 if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE)
4371 ++temporary_count;
4372 }
4373
4374 size += submission->cmd_buffer_count * sizeof(VkCommandBuffer);
4375 size += submission->buffer_bind_count * sizeof(VkSparseBufferMemoryBindInfo);
4376 size += submission->image_opaque_bind_count * sizeof(VkSparseImageOpaqueMemoryBindInfo);
4377 size += submission->wait_semaphore_count * sizeof(struct radv_semaphore_part *);
4378 size += temporary_count * sizeof(struct radv_semaphore_part);
4379 size += submission->signal_semaphore_count * sizeof(struct radv_semaphore_part *);
4380 size += submission->wait_value_count * sizeof(uint64_t);
4381 size += submission->signal_value_count * sizeof(uint64_t);
4382 size += submission->wait_semaphore_count * sizeof(struct radv_timeline_waiter);
4383
4384 deferred = calloc(1, size);
4385 if (!deferred)
4386 return VK_ERROR_OUT_OF_HOST_MEMORY;
4387
4388 deferred->queue = queue;
4389
4390 deferred->cmd_buffers = (void*)(deferred + 1);
4391 deferred->cmd_buffer_count = submission->cmd_buffer_count;
4392 if (submission->cmd_buffer_count) {
4393 memcpy(deferred->cmd_buffers, submission->cmd_buffers,
4394 submission->cmd_buffer_count * sizeof(*deferred->cmd_buffers));
4395 }
4396
4397 deferred->buffer_binds = (void*)(deferred->cmd_buffers + submission->cmd_buffer_count);
4398 deferred->buffer_bind_count = submission->buffer_bind_count;
4399 if (submission->buffer_bind_count) {
4400 memcpy(deferred->buffer_binds, submission->buffer_binds,
4401 submission->buffer_bind_count * sizeof(*deferred->buffer_binds));
4402 }
4403
4404 deferred->image_opaque_binds = (void*)(deferred->buffer_binds + submission->buffer_bind_count);
4405 deferred->image_opaque_bind_count = submission->image_opaque_bind_count;
4406 if (submission->image_opaque_bind_count) {
4407 memcpy(deferred->image_opaque_binds, submission->image_opaque_binds,
4408 submission->image_opaque_bind_count * sizeof(*deferred->image_opaque_binds));
4409 }
4410
4411 deferred->flush_caches = submission->flush_caches;
4412 deferred->wait_dst_stage_mask = submission->wait_dst_stage_mask;
4413
4414 deferred->wait_semaphores = (void*)(deferred->image_opaque_binds + deferred->image_opaque_bind_count);
4415 deferred->wait_semaphore_count = submission->wait_semaphore_count;
4416
4417 deferred->signal_semaphores = (void*)(deferred->wait_semaphores + deferred->wait_semaphore_count);
4418 deferred->signal_semaphore_count = submission->signal_semaphore_count;
4419
4420 deferred->fence = submission->fence;
4421
4422 deferred->temporary_semaphore_parts = (void*)(deferred->signal_semaphores + deferred->signal_semaphore_count);
4423 deferred->temporary_semaphore_part_count = temporary_count;
4424
4425 uint32_t temporary_idx = 0;
4426 for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4427 RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
4428 if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
4429 deferred->wait_semaphores[i] = &deferred->temporary_semaphore_parts[temporary_idx];
4430 deferred->temporary_semaphore_parts[temporary_idx] = semaphore->temporary;
4431 semaphore->temporary.kind = RADV_SEMAPHORE_NONE;
4432 ++temporary_idx;
4433 } else
4434 deferred->wait_semaphores[i] = &semaphore->permanent;
4435 }
4436
4437 for (uint32_t i = 0; i < submission->signal_semaphore_count; ++i) {
4438 RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->signal_semaphores[i]);
4439 if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
4440 deferred->signal_semaphores[i] = &semaphore->temporary;
4441 } else {
4442 deferred->signal_semaphores[i] = &semaphore->permanent;
4443 }
4444 }
4445
4446 deferred->wait_values = (void*)(deferred->temporary_semaphore_parts + temporary_count);
4447 if (submission->wait_value_count) {
4448 memcpy(deferred->wait_values, submission->wait_values, submission->wait_value_count * sizeof(uint64_t));
4449 }
4450 deferred->signal_values = deferred->wait_values + submission->wait_value_count;
4451 if (submission->signal_value_count) {
4452 memcpy(deferred->signal_values, submission->signal_values, submission->signal_value_count * sizeof(uint64_t));
4453 }
4454
4455 deferred->wait_nodes = (void*)(deferred->signal_values + submission->signal_value_count);
4456 /* This is worst-case. radv_queue_enqueue_submission will fill in further, but this
4457 * ensure the submission is not accidentally triggered early when adding wait timelines. */
4458 deferred->submission_wait_count = 1 + submission->wait_semaphore_count;
4459
4460 *out = deferred;
4461 return VK_SUCCESS;
4462 }
4463
4464 static VkResult
radv_queue_enqueue_submission(struct radv_deferred_queue_submission * submission,struct list_head * processing_list)4465 radv_queue_enqueue_submission(struct radv_deferred_queue_submission *submission,
4466 struct list_head *processing_list)
4467 {
4468 uint32_t wait_cnt = 0;
4469 struct radv_timeline_waiter *waiter = submission->wait_nodes;
4470 for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4471 if (submission->wait_semaphores[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4472 pthread_mutex_lock(&submission->wait_semaphores[i]->timeline.mutex);
4473 if (submission->wait_semaphores[i]->timeline.highest_submitted < submission->wait_values[i]) {
4474 ++wait_cnt;
4475 waiter->value = submission->wait_values[i];
4476 waiter->submission = submission;
4477 list_addtail(&waiter->list, &submission->wait_semaphores[i]->timeline.waiters);
4478 ++waiter;
4479 }
4480 pthread_mutex_unlock(&submission->wait_semaphores[i]->timeline.mutex);
4481 }
4482 }
4483
4484 pthread_mutex_lock(&submission->queue->pending_mutex);
4485
4486 bool is_first = list_is_empty(&submission->queue->pending_submissions);
4487 list_addtail(&submission->queue_pending_list, &submission->queue->pending_submissions);
4488
4489 pthread_mutex_unlock(&submission->queue->pending_mutex);
4490
4491 /* If there is already a submission in the queue, that will decrement the counter by 1 when
4492 * submitted, but if the queue was empty, we decrement ourselves as there is no previous
4493 * submission. */
4494 uint32_t decrement = submission->wait_semaphore_count - wait_cnt + (is_first ? 1 : 0);
4495
4496 /* if decrement is zero, then we don't have a refcounted reference to the
4497 * submission anymore, so it is not safe to access the submission. */
4498 if (!decrement)
4499 return VK_SUCCESS;
4500
4501 return radv_queue_trigger_submission(submission, decrement, processing_list);
4502 }
4503
4504 static void
radv_queue_submission_update_queue(struct radv_deferred_queue_submission * submission,struct list_head * processing_list)4505 radv_queue_submission_update_queue(struct radv_deferred_queue_submission *submission,
4506 struct list_head *processing_list)
4507 {
4508 pthread_mutex_lock(&submission->queue->pending_mutex);
4509 list_del(&submission->queue_pending_list);
4510
4511 /* trigger the next submission in the queue. */
4512 if (!list_is_empty(&submission->queue->pending_submissions)) {
4513 struct radv_deferred_queue_submission *next_submission =
4514 list_first_entry(&submission->queue->pending_submissions,
4515 struct radv_deferred_queue_submission,
4516 queue_pending_list);
4517 radv_queue_trigger_submission(next_submission, 1, processing_list);
4518 }
4519 pthread_mutex_unlock(&submission->queue->pending_mutex);
4520
4521 pthread_cond_broadcast(&submission->queue->device->timeline_cond);
4522 }
4523
4524 static VkResult
radv_queue_submit_deferred(struct radv_deferred_queue_submission * submission,struct list_head * processing_list)4525 radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission,
4526 struct list_head *processing_list)
4527 {
4528 RADV_FROM_HANDLE(radv_fence, fence, submission->fence);
4529 struct radv_queue *queue = submission->queue;
4530 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4531 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
4532 struct radeon_winsys_fence *base_fence = NULL;
4533 bool do_flush = submission->flush_caches || submission->wait_dst_stage_mask;
4534 bool can_patch = true;
4535 uint32_t advance;
4536 struct radv_winsys_sem_info sem_info;
4537 VkResult result;
4538 struct radeon_cmdbuf *initial_preamble_cs = NULL;
4539 struct radeon_cmdbuf *initial_flush_preamble_cs = NULL;
4540 struct radeon_cmdbuf *continue_preamble_cs = NULL;
4541
4542 if (fence) {
4543 /* Under most circumstances, out fences won't be temporary.
4544 * However, the spec does allow it for opaque_fd.
4545 *
4546 * From the Vulkan 1.0.53 spec:
4547 *
4548 * "If the import is temporary, the implementation must
4549 * restore the semaphore to its prior permanent state after
4550 * submitting the next semaphore wait operation."
4551 */
4552 struct radv_fence_part *part =
4553 fence->temporary.kind != RADV_FENCE_NONE ?
4554 &fence->temporary : &fence->permanent;
4555 if (part->kind == RADV_FENCE_WINSYS)
4556 base_fence = part->fence;
4557 }
4558
4559 result = radv_get_preambles(queue, submission->cmd_buffers,
4560 submission->cmd_buffer_count,
4561 &initial_preamble_cs,
4562 &initial_flush_preamble_cs,
4563 &continue_preamble_cs);
4564 if (result != VK_SUCCESS)
4565 goto fail;
4566
4567 result = radv_alloc_sem_info(queue->device,
4568 &sem_info,
4569 submission->wait_semaphore_count,
4570 submission->wait_semaphores,
4571 submission->wait_values,
4572 submission->signal_semaphore_count,
4573 submission->signal_semaphores,
4574 submission->signal_values,
4575 submission->fence);
4576 if (result != VK_SUCCESS)
4577 goto fail;
4578
4579 for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) {
4580 result = radv_sparse_buffer_bind_memory(queue->device,
4581 submission->buffer_binds + i);
4582 if (result != VK_SUCCESS)
4583 goto fail;
4584 }
4585
4586 for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {
4587 result = radv_sparse_image_opaque_bind_memory(queue->device,
4588 submission->image_opaque_binds + i);
4589 if (result != VK_SUCCESS)
4590 goto fail;
4591 }
4592
4593 if (!submission->cmd_buffer_count) {
4594 result = queue->device->ws->cs_submit(ctx, queue->queue_idx,
4595 &queue->device->empty_cs[queue->queue_family_index],
4596 1, NULL, NULL,
4597 &sem_info, NULL,
4598 false, base_fence);
4599 if (result != VK_SUCCESS)
4600 goto fail;
4601 } else {
4602 struct radeon_cmdbuf **cs_array = malloc(sizeof(struct radeon_cmdbuf *) *
4603 (submission->cmd_buffer_count));
4604
4605 for (uint32_t j = 0; j < submission->cmd_buffer_count; j++) {
4606 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submission->cmd_buffers[j]);
4607 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4608
4609 cs_array[j] = cmd_buffer->cs;
4610 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
4611 can_patch = false;
4612
4613 cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
4614 }
4615
4616 for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) {
4617 struct radeon_cmdbuf *initial_preamble = (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
4618 const struct radv_winsys_bo_list *bo_list = NULL;
4619
4620 advance = MIN2(max_cs_submission,
4621 submission->cmd_buffer_count - j);
4622
4623 if (queue->device->trace_bo)
4624 *queue->device->trace_id_ptr = 0;
4625
4626 sem_info.cs_emit_wait = j == 0;
4627 sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count;
4628
4629 if (unlikely(queue->device->use_global_bo_list)) {
4630 u_rwlock_rdlock(&queue->device->bo_list.rwlock);
4631 bo_list = &queue->device->bo_list.list;
4632 }
4633
4634 result = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j,
4635 advance, initial_preamble, continue_preamble_cs,
4636 &sem_info, bo_list,
4637 can_patch, base_fence);
4638
4639 if (unlikely(queue->device->use_global_bo_list))
4640 u_rwlock_rdunlock(&queue->device->bo_list.rwlock);
4641
4642 if (result != VK_SUCCESS)
4643 goto fail;
4644
4645 if (queue->device->trace_bo) {
4646 radv_check_gpu_hangs(queue, cs_array[j]);
4647 }
4648
4649 if (queue->device->tma_bo) {
4650 radv_check_trap_handler(queue);
4651 }
4652 }
4653
4654 free(cs_array);
4655 }
4656
4657 radv_free_temp_syncobjs(queue->device,
4658 submission->temporary_semaphore_part_count,
4659 submission->temporary_semaphore_parts);
4660 radv_finalize_timelines(queue->device,
4661 submission->wait_semaphore_count,
4662 submission->wait_semaphores,
4663 submission->wait_values,
4664 submission->signal_semaphore_count,
4665 submission->signal_semaphores,
4666 submission->signal_values,
4667 processing_list);
4668 /* Has to happen after timeline finalization to make sure the
4669 * condition variable is only triggered when timelines and queue have
4670 * been updated. */
4671 radv_queue_submission_update_queue(submission, processing_list);
4672 radv_free_sem_info(&sem_info);
4673 free(submission);
4674 return VK_SUCCESS;
4675
4676 fail:
4677 if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
4678 /* When something bad happened during the submission, such as
4679 * an out of memory issue, it might be hard to recover from
4680 * this inconsistent state. To avoid this sort of problem, we
4681 * assume that we are in a really bad situation and return
4682 * VK_ERROR_DEVICE_LOST to ensure the clients do not attempt
4683 * to submit the same job again to this device.
4684 */
4685 result = radv_device_set_lost(queue->device, "vkQueueSubmit() failed");
4686 }
4687
4688 radv_free_temp_syncobjs(queue->device,
4689 submission->temporary_semaphore_part_count,
4690 submission->temporary_semaphore_parts);
4691 free(submission);
4692 return result;
4693 }
4694
4695 static VkResult
radv_process_submissions(struct list_head * processing_list)4696 radv_process_submissions(struct list_head *processing_list)
4697 {
4698 while(!list_is_empty(processing_list)) {
4699 struct radv_deferred_queue_submission *submission =
4700 list_first_entry(processing_list, struct radv_deferred_queue_submission, processing_list);
4701 list_del(&submission->processing_list);
4702
4703 VkResult result = radv_queue_submit_deferred(submission, processing_list);
4704 if (result != VK_SUCCESS)
4705 return result;
4706 }
4707 return VK_SUCCESS;
4708 }
4709
4710 static VkResult
wait_for_submission_timelines_available(struct radv_deferred_queue_submission * submission,uint64_t timeout)4711 wait_for_submission_timelines_available(struct radv_deferred_queue_submission *submission,
4712 uint64_t timeout)
4713 {
4714 struct radv_device *device = submission->queue->device;
4715 uint32_t syncobj_count = 0;
4716 uint32_t syncobj_idx = 0;
4717
4718 for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4719 if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
4720 continue;
4721
4722 if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
4723 continue;
4724 ++syncobj_count;
4725 }
4726
4727 if (!syncobj_count)
4728 return VK_SUCCESS;
4729
4730 uint64_t *points = malloc((sizeof(uint64_t) + sizeof(uint32_t)) * syncobj_count);
4731 if (!points)
4732 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4733
4734 uint32_t *syncobj = (uint32_t*)(points + syncobj_count);
4735
4736 for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4737 if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
4738 continue;
4739
4740 if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
4741 continue;
4742
4743 syncobj[syncobj_idx] = submission->wait_semaphores[i]->syncobj;
4744 points[syncobj_idx] = submission->wait_values[i];
4745 ++syncobj_idx;
4746 }
4747 bool success = device->ws->wait_timeline_syncobj(device->ws, syncobj, points, syncobj_idx, true, true, timeout);
4748
4749 free(points);
4750 return success ? VK_SUCCESS : VK_TIMEOUT;
4751 }
4752
radv_queue_submission_thread_run(void * q)4753 static void* radv_queue_submission_thread_run(void *q)
4754 {
4755 struct radv_queue *queue = q;
4756
4757 pthread_mutex_lock(&queue->thread_mutex);
4758 while (!p_atomic_read(&queue->thread_exit)) {
4759 struct radv_deferred_queue_submission *submission = queue->thread_submission;
4760 struct list_head processing_list;
4761 VkResult result = VK_SUCCESS;
4762 if (!submission) {
4763 pthread_cond_wait(&queue->thread_cond, &queue->thread_mutex);
4764 continue;
4765 }
4766 pthread_mutex_unlock(&queue->thread_mutex);
4767
4768 /* Wait at most 5 seconds so we have a chance to notice shutdown when
4769 * a semaphore never gets signaled. If it takes longer we just retry
4770 * the wait next iteration. */
4771 result = wait_for_submission_timelines_available(submission,
4772 radv_get_absolute_timeout(5000000000));
4773 if (result != VK_SUCCESS) {
4774 pthread_mutex_lock(&queue->thread_mutex);
4775 continue;
4776 }
4777
4778 /* The lock isn't held but nobody will add one until we finish
4779 * the current submission. */
4780 p_atomic_set(&queue->thread_submission, NULL);
4781
4782 list_inithead(&processing_list);
4783 list_addtail(&submission->processing_list, &processing_list);
4784 result = radv_process_submissions(&processing_list);
4785
4786 pthread_mutex_lock(&queue->thread_mutex);
4787 }
4788 pthread_mutex_unlock(&queue->thread_mutex);
4789 return NULL;
4790 }
4791
4792 static VkResult
radv_queue_trigger_submission(struct radv_deferred_queue_submission * submission,uint32_t decrement,struct list_head * processing_list)4793 radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission,
4794 uint32_t decrement,
4795 struct list_head *processing_list)
4796 {
4797 struct radv_queue *queue = submission->queue;
4798 int ret;
4799 if (p_atomic_add_return(&submission->submission_wait_count, -decrement))
4800 return VK_SUCCESS;
4801
4802 if (wait_for_submission_timelines_available(submission, radv_get_absolute_timeout(0)) == VK_SUCCESS) {
4803 list_addtail(&submission->processing_list, processing_list);
4804 return VK_SUCCESS;
4805 }
4806
4807 pthread_mutex_lock(&queue->thread_mutex);
4808
4809 /* A submission can only be ready for the thread if it doesn't have
4810 * any predecessors in the same queue, so there can only be one such
4811 * submission at a time. */
4812 assert(queue->thread_submission == NULL);
4813
4814 /* Only start the thread on demand to save resources for the many games
4815 * which only use binary semaphores. */
4816 if (!queue->thread_running) {
4817 ret = pthread_create(&queue->submission_thread, NULL,
4818 radv_queue_submission_thread_run, queue);
4819 if (ret) {
4820 pthread_mutex_unlock(&queue->thread_mutex);
4821 return vk_errorf(queue->device->instance,
4822 VK_ERROR_DEVICE_LOST,
4823 "Failed to start submission thread");
4824 }
4825 queue->thread_running = true;
4826 }
4827
4828 queue->thread_submission = submission;
4829 pthread_mutex_unlock(&queue->thread_mutex);
4830
4831 pthread_cond_signal(&queue->thread_cond);
4832 return VK_SUCCESS;
4833 }
4834
radv_queue_submit(struct radv_queue * queue,const struct radv_queue_submission * submission)4835 static VkResult radv_queue_submit(struct radv_queue *queue,
4836 const struct radv_queue_submission *submission)
4837 {
4838 struct radv_deferred_queue_submission *deferred = NULL;
4839
4840 VkResult result = radv_create_deferred_submission(queue, submission, &deferred);
4841 if (result != VK_SUCCESS)
4842 return result;
4843
4844 struct list_head processing_list;
4845 list_inithead(&processing_list);
4846
4847 result = radv_queue_enqueue_submission(deferred, &processing_list);
4848 if (result != VK_SUCCESS) {
4849 /* If anything is in the list we leak. */
4850 assert(list_is_empty(&processing_list));
4851 return result;
4852 }
4853 return radv_process_submissions(&processing_list);
4854 }
4855
4856 bool
radv_queue_internal_submit(struct radv_queue * queue,struct radeon_cmdbuf * cs)4857 radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs)
4858 {
4859 struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4860 struct radv_winsys_sem_info sem_info;
4861 VkResult result;
4862
4863 result = radv_alloc_sem_info(queue->device, &sem_info, 0, NULL, 0, 0,
4864 0, NULL, VK_NULL_HANDLE);
4865 if (result != VK_SUCCESS)
4866 return false;
4867
4868 result = queue->device->ws->cs_submit(ctx, queue->queue_idx, &cs, 1,
4869 NULL, NULL, &sem_info, NULL,
4870 false, NULL);
4871 radv_free_sem_info(&sem_info);
4872 if (result != VK_SUCCESS)
4873 return false;
4874
4875 return true;
4876
4877 }
4878
4879 /* Signals fence as soon as all the work currently put on queue is done. */
radv_signal_fence(struct radv_queue * queue,VkFence fence)4880 static VkResult radv_signal_fence(struct radv_queue *queue,
4881 VkFence fence)
4882 {
4883 return radv_queue_submit(queue, &(struct radv_queue_submission) {
4884 .fence = fence
4885 });
4886 }
4887
radv_submit_has_effects(const VkSubmitInfo * info)4888 static bool radv_submit_has_effects(const VkSubmitInfo *info)
4889 {
4890 return info->commandBufferCount ||
4891 info->waitSemaphoreCount ||
4892 info->signalSemaphoreCount;
4893 }
4894
radv_QueueSubmit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence)4895 VkResult radv_QueueSubmit(
4896 VkQueue _queue,
4897 uint32_t submitCount,
4898 const VkSubmitInfo* pSubmits,
4899 VkFence fence)
4900 {
4901 RADV_FROM_HANDLE(radv_queue, queue, _queue);
4902 VkResult result;
4903 uint32_t fence_idx = 0;
4904 bool flushed_caches = false;
4905
4906 if (radv_device_is_lost(queue->device))
4907 return VK_ERROR_DEVICE_LOST;
4908
4909 if (fence != VK_NULL_HANDLE) {
4910 for (uint32_t i = 0; i < submitCount; ++i)
4911 if (radv_submit_has_effects(pSubmits + i))
4912 fence_idx = i;
4913 } else
4914 fence_idx = UINT32_MAX;
4915
4916 for (uint32_t i = 0; i < submitCount; i++) {
4917 if (!radv_submit_has_effects(pSubmits + i) && fence_idx != i)
4918 continue;
4919
4920 VkPipelineStageFlags wait_dst_stage_mask = 0;
4921 for (unsigned j = 0; j < pSubmits[i].waitSemaphoreCount; ++j) {
4922 wait_dst_stage_mask |= pSubmits[i].pWaitDstStageMask[j];
4923 }
4924
4925 const VkTimelineSemaphoreSubmitInfo *timeline_info =
4926 vk_find_struct_const(pSubmits[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
4927
4928 result = radv_queue_submit(queue, &(struct radv_queue_submission) {
4929 .cmd_buffers = pSubmits[i].pCommandBuffers,
4930 .cmd_buffer_count = pSubmits[i].commandBufferCount,
4931 .wait_dst_stage_mask = wait_dst_stage_mask,
4932 .flush_caches = !flushed_caches,
4933 .wait_semaphores = pSubmits[i].pWaitSemaphores,
4934 .wait_semaphore_count = pSubmits[i].waitSemaphoreCount,
4935 .signal_semaphores = pSubmits[i].pSignalSemaphores,
4936 .signal_semaphore_count = pSubmits[i].signalSemaphoreCount,
4937 .fence = i == fence_idx ? fence : VK_NULL_HANDLE,
4938 .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
4939 .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues ? timeline_info->waitSemaphoreValueCount : 0,
4940 .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
4941 .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues ? timeline_info->signalSemaphoreValueCount : 0,
4942 });
4943 if (result != VK_SUCCESS)
4944 return result;
4945
4946 flushed_caches = true;
4947 }
4948
4949 if (fence != VK_NULL_HANDLE && !submitCount) {
4950 result = radv_signal_fence(queue, fence);
4951 if (result != VK_SUCCESS)
4952 return result;
4953 }
4954
4955 return VK_SUCCESS;
4956 }
4957
4958 static const char *
radv_get_queue_family_name(struct radv_queue * queue)4959 radv_get_queue_family_name(struct radv_queue *queue)
4960 {
4961 switch (queue->queue_family_index) {
4962 case RADV_QUEUE_GENERAL:
4963 return "graphics";
4964 case RADV_QUEUE_COMPUTE:
4965 return "compute";
4966 case RADV_QUEUE_TRANSFER:
4967 return "transfer";
4968 default:
4969 unreachable("Unknown queue family");
4970 }
4971 }
4972
radv_QueueWaitIdle(VkQueue _queue)4973 VkResult radv_QueueWaitIdle(
4974 VkQueue _queue)
4975 {
4976 RADV_FROM_HANDLE(radv_queue, queue, _queue);
4977
4978 if (radv_device_is_lost(queue->device))
4979 return VK_ERROR_DEVICE_LOST;
4980
4981 pthread_mutex_lock(&queue->pending_mutex);
4982 while (!list_is_empty(&queue->pending_submissions)) {
4983 pthread_cond_wait(&queue->device->timeline_cond, &queue->pending_mutex);
4984 }
4985 pthread_mutex_unlock(&queue->pending_mutex);
4986
4987 if (!queue->device->ws->ctx_wait_idle(queue->hw_ctx,
4988 radv_queue_family_to_ring(queue->queue_family_index),
4989 queue->queue_idx)) {
4990 return radv_device_set_lost(queue->device,
4991 "Failed to wait for a '%s' queue "
4992 "to be idle. GPU hang ?",
4993 radv_get_queue_family_name(queue));
4994 }
4995
4996 return VK_SUCCESS;
4997 }
4998
radv_DeviceWaitIdle(VkDevice _device)4999 VkResult radv_DeviceWaitIdle(
5000 VkDevice _device)
5001 {
5002 RADV_FROM_HANDLE(radv_device, device, _device);
5003
5004 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
5005 for (unsigned q = 0; q < device->queue_count[i]; q++) {
5006 VkResult result =
5007 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
5008
5009 if (result != VK_SUCCESS)
5010 return result;
5011 }
5012 }
5013 return VK_SUCCESS;
5014 }
5015
radv_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)5016 VkResult radv_EnumerateInstanceExtensionProperties(
5017 const char* pLayerName,
5018 uint32_t* pPropertyCount,
5019 VkExtensionProperties* pProperties)
5020 {
5021 VK_OUTARRAY_MAKE_TYPED(VkExtensionProperties, out, pProperties,
5022 pPropertyCount);
5023
5024 for (int i = 0; i < RADV_INSTANCE_EXTENSION_COUNT; i++) {
5025 if (radv_instance_extensions_supported.extensions[i]) {
5026 vk_outarray_append_typed(VkExtensionProperties, &out, prop) {
5027 *prop = radv_instance_extensions[i];
5028 }
5029 }
5030 }
5031
5032 return vk_outarray_status(&out);
5033 }
5034
radv_EnumerateDeviceExtensionProperties(VkPhysicalDevice physicalDevice,const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)5035 VkResult radv_EnumerateDeviceExtensionProperties(
5036 VkPhysicalDevice physicalDevice,
5037 const char* pLayerName,
5038 uint32_t* pPropertyCount,
5039 VkExtensionProperties* pProperties)
5040 {
5041 RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
5042 VK_OUTARRAY_MAKE_TYPED(VkExtensionProperties, out, pProperties,
5043 pPropertyCount);
5044
5045 for (int i = 0; i < RADV_DEVICE_EXTENSION_COUNT; i++) {
5046 if (device->supported_extensions.extensions[i]) {
5047 vk_outarray_append_typed(VkExtensionProperties, &out, prop) {
5048 *prop = radv_device_extensions[i];
5049 }
5050 }
5051 }
5052
5053 return vk_outarray_status(&out);
5054 }
5055
radv_GetInstanceProcAddr(VkInstance _instance,const char * pName)5056 PFN_vkVoidFunction radv_GetInstanceProcAddr(
5057 VkInstance _instance,
5058 const char* pName)
5059 {
5060 RADV_FROM_HANDLE(radv_instance, instance, _instance);
5061
5062 /* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly
5063 * when we have to return valid function pointers, NULL, or it's left
5064 * undefined. See the table for exact details.
5065 */
5066 if (pName == NULL)
5067 return NULL;
5068
5069 #define LOOKUP_RADV_ENTRYPOINT(entrypoint) \
5070 if (strcmp(pName, "vk" #entrypoint) == 0) \
5071 return (PFN_vkVoidFunction)radv_##entrypoint
5072
5073 LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceExtensionProperties);
5074 LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceLayerProperties);
5075 LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceVersion);
5076 LOOKUP_RADV_ENTRYPOINT(CreateInstance);
5077
5078 /* GetInstanceProcAddr() can also be called with a NULL instance.
5079 * See https://gitlab.khronos.org/vulkan/vulkan/issues/2057
5080 */
5081 LOOKUP_RADV_ENTRYPOINT(GetInstanceProcAddr);
5082
5083 #undef LOOKUP_RADV_ENTRYPOINT
5084
5085 if (instance == NULL)
5086 return NULL;
5087
5088 int idx = radv_get_instance_entrypoint_index(pName);
5089 if (idx >= 0)
5090 return instance->dispatch.entrypoints[idx];
5091
5092 idx = radv_get_physical_device_entrypoint_index(pName);
5093 if (idx >= 0)
5094 return instance->physical_device_dispatch.entrypoints[idx];
5095
5096 idx = radv_get_device_entrypoint_index(pName);
5097 if (idx >= 0)
5098 return instance->device_dispatch.entrypoints[idx];
5099
5100 return NULL;
5101 }
5102
5103 /* The loader wants us to expose a second GetInstanceProcAddr function
5104 * to work around certain LD_PRELOAD issues seen in apps.
5105 */
5106 PUBLIC
5107 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
5108 VkInstance instance,
5109 const char* pName);
5110
5111 PUBLIC
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)5112 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
5113 VkInstance instance,
5114 const char* pName)
5115 {
5116 return radv_GetInstanceProcAddr(instance, pName);
5117 }
5118
5119 PUBLIC
5120 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
5121 VkInstance _instance,
5122 const char* pName);
5123
5124 PUBLIC
vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,const char * pName)5125 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetPhysicalDeviceProcAddr(
5126 VkInstance _instance,
5127 const char* pName)
5128 {
5129 RADV_FROM_HANDLE(radv_instance, instance, _instance);
5130
5131 if (!pName || !instance)
5132 return NULL;
5133
5134 int idx = radv_get_physical_device_entrypoint_index(pName);
5135 if (idx < 0)
5136 return NULL;
5137
5138 return instance->physical_device_dispatch.entrypoints[idx];
5139 }
5140
radv_GetDeviceProcAddr(VkDevice _device,const char * pName)5141 PFN_vkVoidFunction radv_GetDeviceProcAddr(
5142 VkDevice _device,
5143 const char* pName)
5144 {
5145 RADV_FROM_HANDLE(radv_device, device, _device);
5146
5147 if (!device || !pName)
5148 return NULL;
5149
5150 int idx = radv_get_device_entrypoint_index(pName);
5151 if (idx < 0)
5152 return NULL;
5153
5154 return device->dispatch.entrypoints[idx];
5155 }
5156
radv_get_memory_fd(struct radv_device * device,struct radv_device_memory * memory,int * pFD)5157 bool radv_get_memory_fd(struct radv_device *device,
5158 struct radv_device_memory *memory,
5159 int *pFD)
5160 {
5161 struct radeon_bo_metadata metadata;
5162
5163 if (memory->image && memory->image->tiling != VK_IMAGE_TILING_LINEAR) {
5164 radv_init_metadata(device, memory->image, &metadata);
5165 device->ws->buffer_set_metadata(memory->bo, &metadata);
5166 }
5167
5168 return device->ws->buffer_get_fd(device->ws, memory->bo,
5169 pFD);
5170 }
5171
5172
5173 void
radv_free_memory(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_device_memory * mem)5174 radv_free_memory(struct radv_device *device,
5175 const VkAllocationCallbacks* pAllocator,
5176 struct radv_device_memory *mem)
5177 {
5178 if (mem == NULL)
5179 return;
5180
5181 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
5182 if (mem->android_hardware_buffer)
5183 AHardwareBuffer_release(mem->android_hardware_buffer);
5184 #endif
5185
5186 if (mem->bo) {
5187 if (device->overallocation_disallowed) {
5188 mtx_lock(&device->overallocation_mutex);
5189 device->allocated_memory_size[mem->heap_index] -= mem->alloc_size;
5190 mtx_unlock(&device->overallocation_mutex);
5191 }
5192
5193 radv_bo_list_remove(device, mem->bo);
5194 device->ws->buffer_destroy(mem->bo);
5195 mem->bo = NULL;
5196 }
5197
5198 vk_object_base_finish(&mem->base);
5199 vk_free2(&device->vk.alloc, pAllocator, mem);
5200 }
5201
radv_alloc_memory(struct radv_device * device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)5202 static VkResult radv_alloc_memory(struct radv_device *device,
5203 const VkMemoryAllocateInfo* pAllocateInfo,
5204 const VkAllocationCallbacks* pAllocator,
5205 VkDeviceMemory* pMem)
5206 {
5207 struct radv_device_memory *mem;
5208 VkResult result;
5209 enum radeon_bo_domain domain;
5210 uint32_t flags = 0;
5211
5212 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
5213
5214 const VkImportMemoryFdInfoKHR *import_info =
5215 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
5216 const VkMemoryDedicatedAllocateInfo *dedicate_info =
5217 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
5218 const VkExportMemoryAllocateInfo *export_info =
5219 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
5220 const struct VkImportAndroidHardwareBufferInfoANDROID *ahb_import_info =
5221 vk_find_struct_const(pAllocateInfo->pNext,
5222 IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID);
5223 const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
5224 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
5225
5226 const struct wsi_memory_allocate_info *wsi_info =
5227 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
5228
5229 if (pAllocateInfo->allocationSize == 0 && !ahb_import_info &&
5230 !(export_info && (export_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) {
5231 /* Apparently, this is allowed */
5232 *pMem = VK_NULL_HANDLE;
5233 return VK_SUCCESS;
5234 }
5235
5236 mem = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8,
5237 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5238 if (mem == NULL)
5239 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5240
5241 vk_object_base_init(&device->vk, &mem->base,
5242 VK_OBJECT_TYPE_DEVICE_MEMORY);
5243
5244 if (wsi_info && wsi_info->implicit_sync)
5245 flags |= RADEON_FLAG_IMPLICIT_SYNC;
5246
5247 if (dedicate_info) {
5248 mem->image = radv_image_from_handle(dedicate_info->image);
5249 mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
5250 } else {
5251 mem->image = NULL;
5252 mem->buffer = NULL;
5253 }
5254
5255 float priority_float = 0.5;
5256 const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
5257 vk_find_struct_const(pAllocateInfo->pNext,
5258 MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
5259 if (priority_ext)
5260 priority_float = priority_ext->priority;
5261
5262 unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
5263 (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
5264
5265 mem->user_ptr = NULL;
5266 mem->bo = NULL;
5267
5268 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
5269 mem->android_hardware_buffer = NULL;
5270 #endif
5271
5272 if (ahb_import_info) {
5273 result = radv_import_ahb_memory(device, mem, priority, ahb_import_info);
5274 if (result != VK_SUCCESS)
5275 goto fail;
5276 } else if(export_info && (export_info->handleTypes & VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) {
5277 result = radv_create_ahb_memory(device, mem, priority, pAllocateInfo);
5278 if (result != VK_SUCCESS)
5279 goto fail;
5280 } else if (import_info) {
5281 assert(import_info->handleType ==
5282 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
5283 import_info->handleType ==
5284 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
5285 mem->bo = device->ws->buffer_from_fd(device->ws, import_info->fd,
5286 priority, NULL);
5287 if (!mem->bo) {
5288 result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
5289 goto fail;
5290 } else {
5291 close(import_info->fd);
5292 }
5293
5294 if (mem->image && mem->image->plane_count == 1 &&
5295 !vk_format_is_depth_or_stencil(mem->image->vk_format) &&
5296 mem->image->info.samples == 1) {
5297 struct radeon_bo_metadata metadata;
5298 device->ws->buffer_get_metadata(mem->bo, &metadata);
5299
5300 struct radv_image_create_info create_info = {
5301 .no_metadata_planes = true,
5302 .bo_metadata = &metadata
5303 };
5304
5305 /* This gives a basic ability to import radeonsi images
5306 * that don't have DCC. This is not guaranteed by any
5307 * spec and can be removed after we support modifiers. */
5308 result = radv_image_create_layout(device, create_info, mem->image);
5309 if (result != VK_SUCCESS) {
5310 device->ws->buffer_destroy(mem->bo);
5311 goto fail;
5312 }
5313 }
5314 } else if (host_ptr_info) {
5315 assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
5316 mem->bo = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
5317 pAllocateInfo->allocationSize,
5318 priority);
5319 if (!mem->bo) {
5320 result = VK_ERROR_INVALID_EXTERNAL_HANDLE;
5321 goto fail;
5322 } else {
5323 mem->user_ptr = host_ptr_info->pHostPointer;
5324 }
5325 } else {
5326 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
5327 uint32_t heap_index;
5328
5329 heap_index = device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex].heapIndex;
5330 domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex];
5331 flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex];
5332
5333 if (!dedicate_info && !import_info && (!export_info || !export_info->handleTypes)) {
5334 flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
5335 if (device->use_global_bo_list) {
5336 flags |= RADEON_FLAG_PREFER_LOCAL_BO;
5337 }
5338 }
5339
5340 if (device->overallocation_disallowed) {
5341 uint64_t total_size =
5342 device->physical_device->memory_properties.memoryHeaps[heap_index].size;
5343
5344 mtx_lock(&device->overallocation_mutex);
5345 if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {
5346 mtx_unlock(&device->overallocation_mutex);
5347 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
5348 goto fail;
5349 }
5350 device->allocated_memory_size[heap_index] += alloc_size;
5351 mtx_unlock(&device->overallocation_mutex);
5352 }
5353
5354 mem->bo = device->ws->buffer_create(device->ws, alloc_size, device->physical_device->rad_info.max_alignment,
5355 domain, flags, priority);
5356
5357 if (!mem->bo) {
5358 if (device->overallocation_disallowed) {
5359 mtx_lock(&device->overallocation_mutex);
5360 device->allocated_memory_size[heap_index] -= alloc_size;
5361 mtx_unlock(&device->overallocation_mutex);
5362 }
5363 result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
5364 goto fail;
5365 }
5366
5367 mem->heap_index = heap_index;
5368 mem->alloc_size = alloc_size;
5369 }
5370
5371 if (!wsi_info) {
5372 result = radv_bo_list_add(device, mem->bo);
5373 if (result != VK_SUCCESS)
5374 goto fail;
5375 }
5376
5377 *pMem = radv_device_memory_to_handle(mem);
5378
5379 return VK_SUCCESS;
5380
5381 fail:
5382 radv_free_memory(device, pAllocator,mem);
5383
5384 return result;
5385 }
5386
radv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)5387 VkResult radv_AllocateMemory(
5388 VkDevice _device,
5389 const VkMemoryAllocateInfo* pAllocateInfo,
5390 const VkAllocationCallbacks* pAllocator,
5391 VkDeviceMemory* pMem)
5392 {
5393 RADV_FROM_HANDLE(radv_device, device, _device);
5394 return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
5395 }
5396
radv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)5397 void radv_FreeMemory(
5398 VkDevice _device,
5399 VkDeviceMemory _mem,
5400 const VkAllocationCallbacks* pAllocator)
5401 {
5402 RADV_FROM_HANDLE(radv_device, device, _device);
5403 RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
5404
5405 radv_free_memory(device, pAllocator, mem);
5406 }
5407
radv_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)5408 VkResult radv_MapMemory(
5409 VkDevice _device,
5410 VkDeviceMemory _memory,
5411 VkDeviceSize offset,
5412 VkDeviceSize size,
5413 VkMemoryMapFlags flags,
5414 void** ppData)
5415 {
5416 RADV_FROM_HANDLE(radv_device, device, _device);
5417 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5418
5419 if (mem == NULL) {
5420 *ppData = NULL;
5421 return VK_SUCCESS;
5422 }
5423
5424 if (mem->user_ptr)
5425 *ppData = mem->user_ptr;
5426 else
5427 *ppData = device->ws->buffer_map(mem->bo);
5428
5429 if (*ppData) {
5430 *ppData += offset;
5431 return VK_SUCCESS;
5432 }
5433
5434 return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED);
5435 }
5436
radv_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)5437 void radv_UnmapMemory(
5438 VkDevice _device,
5439 VkDeviceMemory _memory)
5440 {
5441 RADV_FROM_HANDLE(radv_device, device, _device);
5442 RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5443
5444 if (mem == NULL)
5445 return;
5446
5447 if (mem->user_ptr == NULL)
5448 device->ws->buffer_unmap(mem->bo);
5449 }
5450
radv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)5451 VkResult radv_FlushMappedMemoryRanges(
5452 VkDevice _device,
5453 uint32_t memoryRangeCount,
5454 const VkMappedMemoryRange* pMemoryRanges)
5455 {
5456 return VK_SUCCESS;
5457 }
5458
radv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)5459 VkResult radv_InvalidateMappedMemoryRanges(
5460 VkDevice _device,
5461 uint32_t memoryRangeCount,
5462 const VkMappedMemoryRange* pMemoryRanges)
5463 {
5464 return VK_SUCCESS;
5465 }
5466
radv_GetBufferMemoryRequirements(VkDevice _device,VkBuffer _buffer,VkMemoryRequirements * pMemoryRequirements)5467 void radv_GetBufferMemoryRequirements(
5468 VkDevice _device,
5469 VkBuffer _buffer,
5470 VkMemoryRequirements* pMemoryRequirements)
5471 {
5472 RADV_FROM_HANDLE(radv_device, device, _device);
5473 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
5474
5475 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5476
5477 if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
5478 pMemoryRequirements->alignment = 4096;
5479 else
5480 pMemoryRequirements->alignment = 16;
5481
5482 pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
5483 }
5484
radv_GetBufferMemoryRequirements2(VkDevice device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)5485 void radv_GetBufferMemoryRequirements2(
5486 VkDevice device,
5487 const VkBufferMemoryRequirementsInfo2 *pInfo,
5488 VkMemoryRequirements2 *pMemoryRequirements)
5489 {
5490 radv_GetBufferMemoryRequirements(device, pInfo->buffer,
5491 &pMemoryRequirements->memoryRequirements);
5492 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
5493 switch (ext->sType) {
5494 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5495 VkMemoryDedicatedRequirements *req =
5496 (VkMemoryDedicatedRequirements *) ext;
5497 req->requiresDedicatedAllocation = false;
5498 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5499 break;
5500 }
5501 default:
5502 break;
5503 }
5504 }
5505 }
5506
radv_GetImageMemoryRequirements(VkDevice _device,VkImage _image,VkMemoryRequirements * pMemoryRequirements)5507 void radv_GetImageMemoryRequirements(
5508 VkDevice _device,
5509 VkImage _image,
5510 VkMemoryRequirements* pMemoryRequirements)
5511 {
5512 RADV_FROM_HANDLE(radv_device, device, _device);
5513 RADV_FROM_HANDLE(radv_image, image, _image);
5514
5515 pMemoryRequirements->memoryTypeBits = (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5516
5517 pMemoryRequirements->size = image->size;
5518 pMemoryRequirements->alignment = image->alignment;
5519 }
5520
radv_GetImageMemoryRequirements2(VkDevice device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)5521 void radv_GetImageMemoryRequirements2(
5522 VkDevice device,
5523 const VkImageMemoryRequirementsInfo2 *pInfo,
5524 VkMemoryRequirements2 *pMemoryRequirements)
5525 {
5526 radv_GetImageMemoryRequirements(device, pInfo->image,
5527 &pMemoryRequirements->memoryRequirements);
5528
5529 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
5530
5531 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
5532 switch (ext->sType) {
5533 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5534 VkMemoryDedicatedRequirements *req =
5535 (VkMemoryDedicatedRequirements *) ext;
5536 req->requiresDedicatedAllocation = image->shareable &&
5537 image->tiling != VK_IMAGE_TILING_LINEAR;
5538 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5539 break;
5540 }
5541 default:
5542 break;
5543 }
5544 }
5545 }
5546
radv_GetImageSparseMemoryRequirements(VkDevice device,VkImage image,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements * pSparseMemoryRequirements)5547 void radv_GetImageSparseMemoryRequirements(
5548 VkDevice device,
5549 VkImage image,
5550 uint32_t* pSparseMemoryRequirementCount,
5551 VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
5552 {
5553 stub();
5554 }
5555
radv_GetImageSparseMemoryRequirements2(VkDevice device,const VkImageSparseMemoryRequirementsInfo2 * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)5556 void radv_GetImageSparseMemoryRequirements2(
5557 VkDevice device,
5558 const VkImageSparseMemoryRequirementsInfo2 *pInfo,
5559 uint32_t* pSparseMemoryRequirementCount,
5560 VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
5561 {
5562 stub();
5563 }
5564
radv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)5565 void radv_GetDeviceMemoryCommitment(
5566 VkDevice device,
5567 VkDeviceMemory memory,
5568 VkDeviceSize* pCommittedMemoryInBytes)
5569 {
5570 *pCommittedMemoryInBytes = 0;
5571 }
5572
radv_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)5573 VkResult radv_BindBufferMemory2(VkDevice device,
5574 uint32_t bindInfoCount,
5575 const VkBindBufferMemoryInfo *pBindInfos)
5576 {
5577 for (uint32_t i = 0; i < bindInfoCount; ++i) {
5578 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5579 RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
5580
5581 if (mem) {
5582 buffer->bo = mem->bo;
5583 buffer->offset = pBindInfos[i].memoryOffset;
5584 } else {
5585 buffer->bo = NULL;
5586 }
5587 }
5588 return VK_SUCCESS;
5589 }
5590
radv_BindBufferMemory(VkDevice device,VkBuffer buffer,VkDeviceMemory memory,VkDeviceSize memoryOffset)5591 VkResult radv_BindBufferMemory(
5592 VkDevice device,
5593 VkBuffer buffer,
5594 VkDeviceMemory memory,
5595 VkDeviceSize memoryOffset)
5596 {
5597 const VkBindBufferMemoryInfo info = {
5598 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
5599 .buffer = buffer,
5600 .memory = memory,
5601 .memoryOffset = memoryOffset
5602 };
5603
5604 return radv_BindBufferMemory2(device, 1, &info);
5605 }
5606
radv_BindImageMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos)5607 VkResult radv_BindImageMemory2(VkDevice device,
5608 uint32_t bindInfoCount,
5609 const VkBindImageMemoryInfo *pBindInfos)
5610 {
5611 for (uint32_t i = 0; i < bindInfoCount; ++i) {
5612 RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5613 RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
5614
5615 if (mem) {
5616 image->bo = mem->bo;
5617 image->offset = pBindInfos[i].memoryOffset;
5618 } else {
5619 image->bo = NULL;
5620 image->offset = 0;
5621 }
5622 }
5623 return VK_SUCCESS;
5624 }
5625
5626
radv_BindImageMemory(VkDevice device,VkImage image,VkDeviceMemory memory,VkDeviceSize memoryOffset)5627 VkResult radv_BindImageMemory(
5628 VkDevice device,
5629 VkImage image,
5630 VkDeviceMemory memory,
5631 VkDeviceSize memoryOffset)
5632 {
5633 const VkBindImageMemoryInfo info = {
5634 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
5635 .image = image,
5636 .memory = memory,
5637 .memoryOffset = memoryOffset
5638 };
5639
5640 return radv_BindImageMemory2(device, 1, &info);
5641 }
5642
radv_sparse_bind_has_effects(const VkBindSparseInfo * info)5643 static bool radv_sparse_bind_has_effects(const VkBindSparseInfo *info)
5644 {
5645 return info->bufferBindCount ||
5646 info->imageOpaqueBindCount ||
5647 info->imageBindCount ||
5648 info->waitSemaphoreCount ||
5649 info->signalSemaphoreCount;
5650 }
5651
radv_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)5652 VkResult radv_QueueBindSparse(
5653 VkQueue _queue,
5654 uint32_t bindInfoCount,
5655 const VkBindSparseInfo* pBindInfo,
5656 VkFence fence)
5657 {
5658 RADV_FROM_HANDLE(radv_queue, queue, _queue);
5659 VkResult result;
5660 uint32_t fence_idx = 0;
5661
5662 if (radv_device_is_lost(queue->device))
5663 return VK_ERROR_DEVICE_LOST;
5664
5665 if (fence != VK_NULL_HANDLE) {
5666 for (uint32_t i = 0; i < bindInfoCount; ++i)
5667 if (radv_sparse_bind_has_effects(pBindInfo + i))
5668 fence_idx = i;
5669 } else
5670 fence_idx = UINT32_MAX;
5671
5672 for (uint32_t i = 0; i < bindInfoCount; ++i) {
5673 if (i != fence_idx && !radv_sparse_bind_has_effects(pBindInfo + i))
5674 continue;
5675
5676 const VkTimelineSemaphoreSubmitInfo *timeline_info =
5677 vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
5678
5679 VkResult result = radv_queue_submit(queue, &(struct radv_queue_submission) {
5680 .buffer_binds = pBindInfo[i].pBufferBinds,
5681 .buffer_bind_count = pBindInfo[i].bufferBindCount,
5682 .image_opaque_binds = pBindInfo[i].pImageOpaqueBinds,
5683 .image_opaque_bind_count = pBindInfo[i].imageOpaqueBindCount,
5684 .wait_semaphores = pBindInfo[i].pWaitSemaphores,
5685 .wait_semaphore_count = pBindInfo[i].waitSemaphoreCount,
5686 .signal_semaphores = pBindInfo[i].pSignalSemaphores,
5687 .signal_semaphore_count = pBindInfo[i].signalSemaphoreCount,
5688 .fence = i == fence_idx ? fence : VK_NULL_HANDLE,
5689 .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
5690 .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues ? timeline_info->waitSemaphoreValueCount : 0,
5691 .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
5692 .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues ? timeline_info->signalSemaphoreValueCount : 0,
5693 });
5694
5695 if (result != VK_SUCCESS)
5696 return result;
5697 }
5698
5699 if (fence != VK_NULL_HANDLE && !bindInfoCount) {
5700 result = radv_signal_fence(queue, fence);
5701 if (result != VK_SUCCESS)
5702 return result;
5703 }
5704
5705 return VK_SUCCESS;
5706 }
5707
5708 static void
radv_destroy_fence_part(struct radv_device * device,struct radv_fence_part * part)5709 radv_destroy_fence_part(struct radv_device *device,
5710 struct radv_fence_part *part)
5711 {
5712 switch (part->kind) {
5713 case RADV_FENCE_NONE:
5714 break;
5715 case RADV_FENCE_WINSYS:
5716 device->ws->destroy_fence(part->fence);
5717 break;
5718 case RADV_FENCE_SYNCOBJ:
5719 device->ws->destroy_syncobj(device->ws, part->syncobj);
5720 break;
5721 default:
5722 unreachable("Invalid fence type");
5723 }
5724
5725 part->kind = RADV_FENCE_NONE;
5726 }
5727
5728 static void
radv_destroy_fence(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_fence * fence)5729 radv_destroy_fence(struct radv_device *device,
5730 const VkAllocationCallbacks *pAllocator,
5731 struct radv_fence *fence)
5732 {
5733 radv_destroy_fence_part(device, &fence->temporary);
5734 radv_destroy_fence_part(device, &fence->permanent);
5735
5736 vk_object_base_finish(&fence->base);
5737 vk_free2(&device->vk.alloc, pAllocator, fence);
5738 }
5739
radv_CreateFence(VkDevice _device,const VkFenceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFence * pFence)5740 VkResult radv_CreateFence(
5741 VkDevice _device,
5742 const VkFenceCreateInfo* pCreateInfo,
5743 const VkAllocationCallbacks* pAllocator,
5744 VkFence* pFence)
5745 {
5746 RADV_FROM_HANDLE(radv_device, device, _device);
5747 const VkExportFenceCreateInfo *export =
5748 vk_find_struct_const(pCreateInfo->pNext, EXPORT_FENCE_CREATE_INFO);
5749 VkExternalFenceHandleTypeFlags handleTypes =
5750 export ? export->handleTypes : 0;
5751 struct radv_fence *fence;
5752
5753 fence = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*fence), 8,
5754 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5755 if (!fence)
5756 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5757
5758 vk_object_base_init(&device->vk, &fence->base, VK_OBJECT_TYPE_FENCE);
5759
5760 if (device->always_use_syncobj || handleTypes) {
5761 fence->permanent.kind = RADV_FENCE_SYNCOBJ;
5762
5763 bool create_signaled = false;
5764 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
5765 create_signaled = true;
5766
5767 int ret = device->ws->create_syncobj(device->ws, create_signaled,
5768 &fence->permanent.syncobj);
5769 if (ret) {
5770 radv_destroy_fence(device, pAllocator, fence);
5771 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5772 }
5773 } else {
5774 fence->permanent.kind = RADV_FENCE_WINSYS;
5775
5776 fence->permanent.fence = device->ws->create_fence();
5777 if (!fence->permanent.fence) {
5778 vk_free2(&device->vk.alloc, pAllocator, fence);
5779 radv_destroy_fence(device, pAllocator, fence);
5780 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5781 }
5782 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
5783 device->ws->signal_fence(fence->permanent.fence);
5784 }
5785
5786 *pFence = radv_fence_to_handle(fence);
5787
5788 return VK_SUCCESS;
5789 }
5790
5791
radv_DestroyFence(VkDevice _device,VkFence _fence,const VkAllocationCallbacks * pAllocator)5792 void radv_DestroyFence(
5793 VkDevice _device,
5794 VkFence _fence,
5795 const VkAllocationCallbacks* pAllocator)
5796 {
5797 RADV_FROM_HANDLE(radv_device, device, _device);
5798 RADV_FROM_HANDLE(radv_fence, fence, _fence);
5799
5800 if (!fence)
5801 return;
5802
5803 radv_destroy_fence(device, pAllocator, fence);
5804 }
5805
radv_all_fences_plain_and_submitted(struct radv_device * device,uint32_t fenceCount,const VkFence * pFences)5806 static bool radv_all_fences_plain_and_submitted(struct radv_device *device,
5807 uint32_t fenceCount, const VkFence *pFences)
5808 {
5809 for (uint32_t i = 0; i < fenceCount; ++i) {
5810 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5811
5812 struct radv_fence_part *part =
5813 fence->temporary.kind != RADV_FENCE_NONE ?
5814 &fence->temporary : &fence->permanent;
5815 if (part->kind != RADV_FENCE_WINSYS ||
5816 !device->ws->is_fence_waitable(part->fence))
5817 return false;
5818 }
5819 return true;
5820 }
5821
radv_all_fences_syncobj(uint32_t fenceCount,const VkFence * pFences)5822 static bool radv_all_fences_syncobj(uint32_t fenceCount, const VkFence *pFences)
5823 {
5824 for (uint32_t i = 0; i < fenceCount; ++i) {
5825 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5826
5827 struct radv_fence_part *part =
5828 fence->temporary.kind != RADV_FENCE_NONE ?
5829 &fence->temporary : &fence->permanent;
5830 if (part->kind != RADV_FENCE_SYNCOBJ)
5831 return false;
5832 }
5833 return true;
5834 }
5835
radv_WaitForFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t timeout)5836 VkResult radv_WaitForFences(
5837 VkDevice _device,
5838 uint32_t fenceCount,
5839 const VkFence* pFences,
5840 VkBool32 waitAll,
5841 uint64_t timeout)
5842 {
5843 RADV_FROM_HANDLE(radv_device, device, _device);
5844
5845 if (radv_device_is_lost(device))
5846 return VK_ERROR_DEVICE_LOST;
5847
5848 timeout = radv_get_absolute_timeout(timeout);
5849
5850 if (device->always_use_syncobj &&
5851 radv_all_fences_syncobj(fenceCount, pFences))
5852 {
5853 uint32_t *handles = malloc(sizeof(uint32_t) * fenceCount);
5854 if (!handles)
5855 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5856
5857 for (uint32_t i = 0; i < fenceCount; ++i) {
5858 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5859
5860 struct radv_fence_part *part =
5861 fence->temporary.kind != RADV_FENCE_NONE ?
5862 &fence->temporary : &fence->permanent;
5863
5864 assert(part->kind == RADV_FENCE_SYNCOBJ);
5865 handles[i] = part->syncobj;
5866 }
5867
5868 bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
5869
5870 free(handles);
5871 return success ? VK_SUCCESS : VK_TIMEOUT;
5872 }
5873
5874 if (!waitAll && fenceCount > 1) {
5875 /* Not doing this by default for waitAll, due to needing to allocate twice. */
5876 if (device->physical_device->rad_info.drm_minor >= 10 && radv_all_fences_plain_and_submitted(device, fenceCount, pFences)) {
5877 uint32_t wait_count = 0;
5878 struct radeon_winsys_fence **fences = malloc(sizeof(struct radeon_winsys_fence *) * fenceCount);
5879 if (!fences)
5880 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5881
5882 for (uint32_t i = 0; i < fenceCount; ++i) {
5883 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5884
5885 struct radv_fence_part *part =
5886 fence->temporary.kind != RADV_FENCE_NONE ?
5887 &fence->temporary : &fence->permanent;
5888 assert(part->kind == RADV_FENCE_WINSYS);
5889
5890 if (device->ws->fence_wait(device->ws, part->fence, false, 0)) {
5891 free(fences);
5892 return VK_SUCCESS;
5893 }
5894
5895 fences[wait_count++] = part->fence;
5896 }
5897
5898 bool success = device->ws->fences_wait(device->ws, fences, wait_count,
5899 waitAll, timeout - radv_get_current_time());
5900
5901 free(fences);
5902 return success ? VK_SUCCESS : VK_TIMEOUT;
5903 }
5904
5905 while(radv_get_current_time() <= timeout) {
5906 for (uint32_t i = 0; i < fenceCount; ++i) {
5907 if (radv_GetFenceStatus(_device, pFences[i]) == VK_SUCCESS)
5908 return VK_SUCCESS;
5909 }
5910 }
5911 return VK_TIMEOUT;
5912 }
5913
5914 for (uint32_t i = 0; i < fenceCount; ++i) {
5915 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5916 bool expired = false;
5917
5918 struct radv_fence_part *part =
5919 fence->temporary.kind != RADV_FENCE_NONE ?
5920 &fence->temporary : &fence->permanent;
5921
5922 switch (part->kind) {
5923 case RADV_FENCE_NONE:
5924 break;
5925 case RADV_FENCE_WINSYS:
5926 if (!device->ws->is_fence_waitable(part->fence)) {
5927 while (!device->ws->is_fence_waitable(part->fence) &&
5928 radv_get_current_time() <= timeout)
5929 /* Do nothing */;
5930 }
5931
5932 expired = device->ws->fence_wait(device->ws,
5933 part->fence,
5934 true, timeout);
5935 if (!expired)
5936 return VK_TIMEOUT;
5937 break;
5938 case RADV_FENCE_SYNCOBJ:
5939 if (!device->ws->wait_syncobj(device->ws,
5940 &part->syncobj, 1, true,
5941 timeout))
5942 return VK_TIMEOUT;
5943 break;
5944 default:
5945 unreachable("Invalid fence type");
5946 }
5947 }
5948
5949 return VK_SUCCESS;
5950 }
5951
radv_ResetFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences)5952 VkResult radv_ResetFences(VkDevice _device,
5953 uint32_t fenceCount,
5954 const VkFence *pFences)
5955 {
5956 RADV_FROM_HANDLE(radv_device, device, _device);
5957
5958 for (unsigned i = 0; i < fenceCount; ++i) {
5959 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5960
5961 /* From the Vulkan 1.0.53 spec:
5962 *
5963 * "If any member of pFences currently has its payload
5964 * imported with temporary permanence, that fence’s prior
5965 * permanent payload is irst restored. The remaining
5966 * operations described therefore operate on the restored
5967 * payload."
5968 */
5969 if (fence->temporary.kind != RADV_FENCE_NONE)
5970 radv_destroy_fence_part(device, &fence->temporary);
5971
5972 struct radv_fence_part *part = &fence->permanent;
5973
5974 switch (part->kind) {
5975 case RADV_FENCE_WINSYS:
5976 device->ws->reset_fence(part->fence);
5977 break;
5978 case RADV_FENCE_SYNCOBJ:
5979 device->ws->reset_syncobj(device->ws, part->syncobj);
5980 break;
5981 default:
5982 unreachable("Invalid fence type");
5983 }
5984 }
5985
5986 return VK_SUCCESS;
5987 }
5988
radv_GetFenceStatus(VkDevice _device,VkFence _fence)5989 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence)
5990 {
5991 RADV_FROM_HANDLE(radv_device, device, _device);
5992 RADV_FROM_HANDLE(radv_fence, fence, _fence);
5993
5994 struct radv_fence_part *part =
5995 fence->temporary.kind != RADV_FENCE_NONE ?
5996 &fence->temporary : &fence->permanent;
5997
5998 if (radv_device_is_lost(device))
5999 return VK_ERROR_DEVICE_LOST;
6000
6001 switch (part->kind) {
6002 case RADV_FENCE_NONE:
6003 break;
6004 case RADV_FENCE_WINSYS:
6005 if (!device->ws->fence_wait(device->ws, part->fence, false, 0))
6006 return VK_NOT_READY;
6007 break;
6008 case RADV_FENCE_SYNCOBJ: {
6009 bool success = device->ws->wait_syncobj(device->ws,
6010 &part->syncobj, 1, true, 0);
6011 if (!success)
6012 return VK_NOT_READY;
6013 break;
6014 }
6015 default:
6016 unreachable("Invalid fence type");
6017 }
6018
6019 return VK_SUCCESS;
6020 }
6021
6022
6023 // Queue semaphore functions
6024
6025 static void
radv_create_timeline(struct radv_timeline * timeline,uint64_t value)6026 radv_create_timeline(struct radv_timeline *timeline, uint64_t value)
6027 {
6028 timeline->highest_signaled = value;
6029 timeline->highest_submitted = value;
6030 list_inithead(&timeline->points);
6031 list_inithead(&timeline->free_points);
6032 list_inithead(&timeline->waiters);
6033 pthread_mutex_init(&timeline->mutex, NULL);
6034 }
6035
6036 static void
radv_destroy_timeline(struct radv_device * device,struct radv_timeline * timeline)6037 radv_destroy_timeline(struct radv_device *device,
6038 struct radv_timeline *timeline)
6039 {
6040 list_for_each_entry_safe(struct radv_timeline_point, point,
6041 &timeline->free_points, list) {
6042 list_del(&point->list);
6043 device->ws->destroy_syncobj(device->ws, point->syncobj);
6044 free(point);
6045 }
6046 list_for_each_entry_safe(struct radv_timeline_point, point,
6047 &timeline->points, list) {
6048 list_del(&point->list);
6049 device->ws->destroy_syncobj(device->ws, point->syncobj);
6050 free(point);
6051 }
6052 pthread_mutex_destroy(&timeline->mutex);
6053 }
6054
6055 static void
radv_timeline_gc_locked(struct radv_device * device,struct radv_timeline * timeline)6056 radv_timeline_gc_locked(struct radv_device *device,
6057 struct radv_timeline *timeline)
6058 {
6059 list_for_each_entry_safe(struct radv_timeline_point, point,
6060 &timeline->points, list) {
6061 if (point->wait_count || point->value > timeline->highest_submitted)
6062 return;
6063
6064 if (device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, 0)) {
6065 timeline->highest_signaled = point->value;
6066 list_del(&point->list);
6067 list_add(&point->list, &timeline->free_points);
6068 }
6069 }
6070 }
6071
6072 static struct radv_timeline_point *
radv_timeline_find_point_at_least_locked(struct radv_device * device,struct radv_timeline * timeline,uint64_t p)6073 radv_timeline_find_point_at_least_locked(struct radv_device *device,
6074 struct radv_timeline *timeline,
6075 uint64_t p)
6076 {
6077 radv_timeline_gc_locked(device, timeline);
6078
6079 if (p <= timeline->highest_signaled)
6080 return NULL;
6081
6082 list_for_each_entry(struct radv_timeline_point, point,
6083 &timeline->points, list) {
6084 if (point->value >= p) {
6085 ++point->wait_count;
6086 return point;
6087 }
6088 }
6089 return NULL;
6090 }
6091
6092 static struct radv_timeline_point *
radv_timeline_add_point_locked(struct radv_device * device,struct radv_timeline * timeline,uint64_t p)6093 radv_timeline_add_point_locked(struct radv_device *device,
6094 struct radv_timeline *timeline,
6095 uint64_t p)
6096 {
6097 radv_timeline_gc_locked(device, timeline);
6098
6099 struct radv_timeline_point *ret = NULL;
6100 struct radv_timeline_point *prev = NULL;
6101 int r;
6102
6103 if (p <= timeline->highest_signaled)
6104 return NULL;
6105
6106 list_for_each_entry(struct radv_timeline_point, point,
6107 &timeline->points, list) {
6108 if (point->value == p) {
6109 return NULL;
6110 }
6111
6112 if (point->value < p)
6113 prev = point;
6114 }
6115
6116 if (list_is_empty(&timeline->free_points)) {
6117 ret = malloc(sizeof(struct radv_timeline_point));
6118 r = device->ws->create_syncobj(device->ws, false, &ret->syncobj);
6119 if (r) {
6120 free(ret);
6121 return NULL;
6122 }
6123 } else {
6124 ret = list_first_entry(&timeline->free_points, struct radv_timeline_point, list);
6125 list_del(&ret->list);
6126
6127 device->ws->reset_syncobj(device->ws, ret->syncobj);
6128 }
6129
6130 ret->value = p;
6131 ret->wait_count = 1;
6132
6133 if (prev) {
6134 list_add(&ret->list, &prev->list);
6135 } else {
6136 list_addtail(&ret->list, &timeline->points);
6137 }
6138 return ret;
6139 }
6140
6141
6142 static VkResult
radv_timeline_wait(struct radv_device * device,struct radv_timeline * timeline,uint64_t value,uint64_t abs_timeout)6143 radv_timeline_wait(struct radv_device *device,
6144 struct radv_timeline *timeline,
6145 uint64_t value,
6146 uint64_t abs_timeout)
6147 {
6148 pthread_mutex_lock(&timeline->mutex);
6149
6150 while(timeline->highest_submitted < value) {
6151 struct timespec abstime;
6152 timespec_from_nsec(&abstime, abs_timeout);
6153
6154 pthread_cond_timedwait(&device->timeline_cond, &timeline->mutex, &abstime);
6155
6156 if (radv_get_current_time() >= abs_timeout && timeline->highest_submitted < value) {
6157 pthread_mutex_unlock(&timeline->mutex);
6158 return VK_TIMEOUT;
6159 }
6160 }
6161
6162 struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(device, timeline, value);
6163 pthread_mutex_unlock(&timeline->mutex);
6164 if (!point)
6165 return VK_SUCCESS;
6166
6167 bool success = device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, abs_timeout);
6168
6169 pthread_mutex_lock(&timeline->mutex);
6170 point->wait_count--;
6171 pthread_mutex_unlock(&timeline->mutex);
6172 return success ? VK_SUCCESS : VK_TIMEOUT;
6173 }
6174
6175 static void
radv_timeline_trigger_waiters_locked(struct radv_timeline * timeline,struct list_head * processing_list)6176 radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
6177 struct list_head *processing_list)
6178 {
6179 list_for_each_entry_safe(struct radv_timeline_waiter, waiter,
6180 &timeline->waiters, list) {
6181 if (waiter->value > timeline->highest_submitted)
6182 continue;
6183
6184 radv_queue_trigger_submission(waiter->submission, 1, processing_list);
6185 list_del(&waiter->list);
6186 }
6187 }
6188
6189 static
radv_destroy_semaphore_part(struct radv_device * device,struct radv_semaphore_part * part)6190 void radv_destroy_semaphore_part(struct radv_device *device,
6191 struct radv_semaphore_part *part)
6192 {
6193 switch(part->kind) {
6194 case RADV_SEMAPHORE_NONE:
6195 break;
6196 case RADV_SEMAPHORE_WINSYS:
6197 device->ws->destroy_sem(part->ws_sem);
6198 break;
6199 case RADV_SEMAPHORE_TIMELINE:
6200 radv_destroy_timeline(device, &part->timeline);
6201 break;
6202 case RADV_SEMAPHORE_SYNCOBJ:
6203 case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
6204 device->ws->destroy_syncobj(device->ws, part->syncobj);
6205 break;
6206 }
6207 part->kind = RADV_SEMAPHORE_NONE;
6208 }
6209
6210 static VkSemaphoreTypeKHR
radv_get_semaphore_type(const void * pNext,uint64_t * initial_value)6211 radv_get_semaphore_type(const void *pNext, uint64_t *initial_value)
6212 {
6213 const VkSemaphoreTypeCreateInfo *type_info =
6214 vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO);
6215
6216 if (!type_info)
6217 return VK_SEMAPHORE_TYPE_BINARY;
6218
6219 if (initial_value)
6220 *initial_value = type_info->initialValue;
6221 return type_info->semaphoreType;
6222 }
6223
6224 static void
radv_destroy_semaphore(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_semaphore * sem)6225 radv_destroy_semaphore(struct radv_device *device,
6226 const VkAllocationCallbacks *pAllocator,
6227 struct radv_semaphore *sem)
6228 {
6229 radv_destroy_semaphore_part(device, &sem->temporary);
6230 radv_destroy_semaphore_part(device, &sem->permanent);
6231 vk_object_base_finish(&sem->base);
6232 vk_free2(&device->vk.alloc, pAllocator, sem);
6233 }
6234
radv_CreateSemaphore(VkDevice _device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore)6235 VkResult radv_CreateSemaphore(
6236 VkDevice _device,
6237 const VkSemaphoreCreateInfo* pCreateInfo,
6238 const VkAllocationCallbacks* pAllocator,
6239 VkSemaphore* pSemaphore)
6240 {
6241 RADV_FROM_HANDLE(radv_device, device, _device);
6242 const VkExportSemaphoreCreateInfo *export =
6243 vk_find_struct_const(pCreateInfo->pNext, EXPORT_SEMAPHORE_CREATE_INFO);
6244 VkExternalSemaphoreHandleTypeFlags handleTypes =
6245 export ? export->handleTypes : 0;
6246 uint64_t initial_value = 0;
6247 VkSemaphoreTypeKHR type = radv_get_semaphore_type(pCreateInfo->pNext, &initial_value);
6248
6249 struct radv_semaphore *sem = vk_alloc2(&device->vk.alloc, pAllocator,
6250 sizeof(*sem), 8,
6251 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6252 if (!sem)
6253 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6254
6255 vk_object_base_init(&device->vk, &sem->base,
6256 VK_OBJECT_TYPE_SEMAPHORE);
6257
6258 sem->temporary.kind = RADV_SEMAPHORE_NONE;
6259 sem->permanent.kind = RADV_SEMAPHORE_NONE;
6260
6261 if (type == VK_SEMAPHORE_TYPE_TIMELINE &&
6262 device->physical_device->rad_info.has_timeline_syncobj) {
6263 int ret = device->ws->create_syncobj(device->ws, false, &sem->permanent.syncobj);
6264 if (ret) {
6265 radv_destroy_semaphore(device, pAllocator, sem);
6266 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6267 }
6268 device->ws->signal_syncobj(device->ws, sem->permanent.syncobj, initial_value);
6269 sem->permanent.timeline_syncobj.max_point = initial_value;
6270 sem->permanent.kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
6271 } else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
6272 radv_create_timeline(&sem->permanent.timeline, initial_value);
6273 sem->permanent.kind = RADV_SEMAPHORE_TIMELINE;
6274 } else if (device->always_use_syncobj || handleTypes) {
6275 assert (device->physical_device->rad_info.has_syncobj);
6276 int ret = device->ws->create_syncobj(device->ws, false,
6277 &sem->permanent.syncobj);
6278 if (ret) {
6279 radv_destroy_semaphore(device, pAllocator, sem);
6280 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6281 }
6282 sem->permanent.kind = RADV_SEMAPHORE_SYNCOBJ;
6283 } else {
6284 sem->permanent.ws_sem = device->ws->create_sem(device->ws);
6285 if (!sem->permanent.ws_sem) {
6286 radv_destroy_semaphore(device, pAllocator, sem);
6287 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6288 }
6289 sem->permanent.kind = RADV_SEMAPHORE_WINSYS;
6290 }
6291
6292 *pSemaphore = radv_semaphore_to_handle(sem);
6293 return VK_SUCCESS;
6294 }
6295
radv_DestroySemaphore(VkDevice _device,VkSemaphore _semaphore,const VkAllocationCallbacks * pAllocator)6296 void radv_DestroySemaphore(
6297 VkDevice _device,
6298 VkSemaphore _semaphore,
6299 const VkAllocationCallbacks* pAllocator)
6300 {
6301 RADV_FROM_HANDLE(radv_device, device, _device);
6302 RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
6303 if (!_semaphore)
6304 return;
6305
6306 radv_destroy_semaphore(device, pAllocator, sem);
6307 }
6308
6309 VkResult
radv_GetSemaphoreCounterValue(VkDevice _device,VkSemaphore _semaphore,uint64_t * pValue)6310 radv_GetSemaphoreCounterValue(VkDevice _device,
6311 VkSemaphore _semaphore,
6312 uint64_t* pValue)
6313 {
6314 RADV_FROM_HANDLE(radv_device, device, _device);
6315 RADV_FROM_HANDLE(radv_semaphore, semaphore, _semaphore);
6316
6317 if (radv_device_is_lost(device))
6318 return VK_ERROR_DEVICE_LOST;
6319
6320 struct radv_semaphore_part *part =
6321 semaphore->temporary.kind != RADV_SEMAPHORE_NONE ? &semaphore->temporary : &semaphore->permanent;
6322
6323 switch (part->kind) {
6324 case RADV_SEMAPHORE_TIMELINE: {
6325 pthread_mutex_lock(&part->timeline.mutex);
6326 radv_timeline_gc_locked(device, &part->timeline);
6327 *pValue = part->timeline.highest_signaled;
6328 pthread_mutex_unlock(&part->timeline.mutex);
6329 return VK_SUCCESS;
6330 }
6331 case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
6332 return device->ws->query_syncobj(device->ws, part->syncobj, pValue);
6333 }
6334 case RADV_SEMAPHORE_NONE:
6335 case RADV_SEMAPHORE_SYNCOBJ:
6336 case RADV_SEMAPHORE_WINSYS:
6337 unreachable("Invalid semaphore type");
6338 }
6339 unreachable("Unhandled semaphore type");
6340 }
6341
6342
6343 static VkResult
radv_wait_timelines(struct radv_device * device,const VkSemaphoreWaitInfo * pWaitInfo,uint64_t abs_timeout)6344 radv_wait_timelines(struct radv_device *device,
6345 const VkSemaphoreWaitInfo* pWaitInfo,
6346 uint64_t abs_timeout)
6347 {
6348 if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) && pWaitInfo->semaphoreCount > 1) {
6349 for (;;) {
6350 for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6351 RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6352 VkResult result = radv_timeline_wait(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], 0);
6353
6354 if (result == VK_SUCCESS)
6355 return VK_SUCCESS;
6356 }
6357 if (radv_get_current_time() > abs_timeout)
6358 return VK_TIMEOUT;
6359 }
6360 }
6361
6362 for(uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6363 RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6364 VkResult result = radv_timeline_wait(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], abs_timeout);
6365
6366 if (result != VK_SUCCESS)
6367 return result;
6368 }
6369 return VK_SUCCESS;
6370 }
6371 VkResult
radv_WaitSemaphores(VkDevice _device,const VkSemaphoreWaitInfo * pWaitInfo,uint64_t timeout)6372 radv_WaitSemaphores(VkDevice _device,
6373 const VkSemaphoreWaitInfo* pWaitInfo,
6374 uint64_t timeout)
6375 {
6376 RADV_FROM_HANDLE(radv_device, device, _device);
6377
6378 if (radv_device_is_lost(device))
6379 return VK_ERROR_DEVICE_LOST;
6380
6381 uint64_t abs_timeout = radv_get_absolute_timeout(timeout);
6382
6383 if (radv_semaphore_from_handle(pWaitInfo->pSemaphores[0])->permanent.kind == RADV_SEMAPHORE_TIMELINE)
6384 return radv_wait_timelines(device, pWaitInfo, abs_timeout);
6385
6386 if (pWaitInfo->semaphoreCount > UINT32_MAX / sizeof(uint32_t))
6387 return vk_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "semaphoreCount integer overflow");
6388
6389 bool wait_all = !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR);
6390 uint32_t *handles = malloc(sizeof(*handles) * pWaitInfo->semaphoreCount);
6391 if (!handles)
6392 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6393
6394 for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6395 RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6396 handles[i] = semaphore->permanent.syncobj;
6397 }
6398
6399 bool success = device->ws->wait_timeline_syncobj(device->ws, handles, pWaitInfo->pValues,
6400 pWaitInfo->semaphoreCount, wait_all, false,
6401 abs_timeout);
6402 free(handles);
6403 return success ? VK_SUCCESS : VK_TIMEOUT;
6404 }
6405
6406 VkResult
radv_SignalSemaphore(VkDevice _device,const VkSemaphoreSignalInfo * pSignalInfo)6407 radv_SignalSemaphore(VkDevice _device,
6408 const VkSemaphoreSignalInfo* pSignalInfo)
6409 {
6410 RADV_FROM_HANDLE(radv_device, device, _device);
6411 RADV_FROM_HANDLE(radv_semaphore, semaphore, pSignalInfo->semaphore);
6412
6413 struct radv_semaphore_part *part =
6414 semaphore->temporary.kind != RADV_SEMAPHORE_NONE ? &semaphore->temporary : &semaphore->permanent;
6415
6416 switch(part->kind) {
6417 case RADV_SEMAPHORE_TIMELINE: {
6418 pthread_mutex_lock(&part->timeline.mutex);
6419 radv_timeline_gc_locked(device, &part->timeline);
6420 part->timeline.highest_submitted = MAX2(part->timeline.highest_submitted, pSignalInfo->value);
6421 part->timeline.highest_signaled = MAX2(part->timeline.highest_signaled, pSignalInfo->value);
6422
6423 struct list_head processing_list;
6424 list_inithead(&processing_list);
6425 radv_timeline_trigger_waiters_locked(&part->timeline, &processing_list);
6426 pthread_mutex_unlock(&part->timeline.mutex);
6427
6428 VkResult result = radv_process_submissions(&processing_list);
6429
6430 /* This needs to happen after radv_process_submissions, so
6431 * that any submitted submissions that are now unblocked get
6432 * processed before we wake the application. This way we
6433 * ensure that any binary semaphores that are now unblocked
6434 * are usable by the application. */
6435 pthread_cond_broadcast(&device->timeline_cond);
6436
6437 return result;
6438 }
6439 case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
6440 part->timeline_syncobj.max_point = MAX2(part->timeline_syncobj.max_point, pSignalInfo->value);
6441 device->ws->signal_syncobj(device->ws, part->syncobj, pSignalInfo->value);
6442 break;
6443 }
6444 case RADV_SEMAPHORE_NONE:
6445 case RADV_SEMAPHORE_SYNCOBJ:
6446 case RADV_SEMAPHORE_WINSYS:
6447 unreachable("Invalid semaphore type");
6448 }
6449 return VK_SUCCESS;
6450 }
6451
radv_destroy_event(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_event * event)6452 static void radv_destroy_event(struct radv_device *device,
6453 const VkAllocationCallbacks* pAllocator,
6454 struct radv_event *event)
6455 {
6456 if (event->bo)
6457 device->ws->buffer_destroy(event->bo);
6458
6459 vk_object_base_finish(&event->base);
6460 vk_free2(&device->vk.alloc, pAllocator, event);
6461 }
6462
radv_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)6463 VkResult radv_CreateEvent(
6464 VkDevice _device,
6465 const VkEventCreateInfo* pCreateInfo,
6466 const VkAllocationCallbacks* pAllocator,
6467 VkEvent* pEvent)
6468 {
6469 RADV_FROM_HANDLE(radv_device, device, _device);
6470 struct radv_event *event = vk_alloc2(&device->vk.alloc, pAllocator,
6471 sizeof(*event), 8,
6472 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6473
6474 if (!event)
6475 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6476
6477 vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT);
6478
6479 event->bo = device->ws->buffer_create(device->ws, 8, 8,
6480 RADEON_DOMAIN_GTT,
6481 RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
6482 RADV_BO_PRIORITY_FENCE);
6483 if (!event->bo) {
6484 radv_destroy_event(device, pAllocator, event);
6485 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
6486 }
6487
6488 event->map = (uint64_t*)device->ws->buffer_map(event->bo);
6489 if (!event->map) {
6490 radv_destroy_event(device, pAllocator, event);
6491 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
6492 }
6493
6494 *pEvent = radv_event_to_handle(event);
6495
6496 return VK_SUCCESS;
6497 }
6498
radv_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)6499 void radv_DestroyEvent(
6500 VkDevice _device,
6501 VkEvent _event,
6502 const VkAllocationCallbacks* pAllocator)
6503 {
6504 RADV_FROM_HANDLE(radv_device, device, _device);
6505 RADV_FROM_HANDLE(radv_event, event, _event);
6506
6507 if (!event)
6508 return;
6509
6510 radv_destroy_event(device, pAllocator, event);
6511 }
6512
radv_GetEventStatus(VkDevice _device,VkEvent _event)6513 VkResult radv_GetEventStatus(
6514 VkDevice _device,
6515 VkEvent _event)
6516 {
6517 RADV_FROM_HANDLE(radv_device, device, _device);
6518 RADV_FROM_HANDLE(radv_event, event, _event);
6519
6520 if (radv_device_is_lost(device))
6521 return VK_ERROR_DEVICE_LOST;
6522
6523 if (*event->map == 1)
6524 return VK_EVENT_SET;
6525 return VK_EVENT_RESET;
6526 }
6527
radv_SetEvent(VkDevice _device,VkEvent _event)6528 VkResult radv_SetEvent(
6529 VkDevice _device,
6530 VkEvent _event)
6531 {
6532 RADV_FROM_HANDLE(radv_event, event, _event);
6533 *event->map = 1;
6534
6535 return VK_SUCCESS;
6536 }
6537
radv_ResetEvent(VkDevice _device,VkEvent _event)6538 VkResult radv_ResetEvent(
6539 VkDevice _device,
6540 VkEvent _event)
6541 {
6542 RADV_FROM_HANDLE(radv_event, event, _event);
6543 *event->map = 0;
6544
6545 return VK_SUCCESS;
6546 }
6547
6548 static void
radv_destroy_buffer(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_buffer * buffer)6549 radv_destroy_buffer(struct radv_device *device,
6550 const VkAllocationCallbacks *pAllocator,
6551 struct radv_buffer *buffer)
6552 {
6553 if ((buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) && buffer->bo)
6554 device->ws->buffer_destroy(buffer->bo);
6555
6556 vk_object_base_finish(&buffer->base);
6557 vk_free2(&device->vk.alloc, pAllocator, buffer);
6558 }
6559
radv_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)6560 VkResult radv_CreateBuffer(
6561 VkDevice _device,
6562 const VkBufferCreateInfo* pCreateInfo,
6563 const VkAllocationCallbacks* pAllocator,
6564 VkBuffer* pBuffer)
6565 {
6566 RADV_FROM_HANDLE(radv_device, device, _device);
6567 struct radv_buffer *buffer;
6568
6569 if (pCreateInfo->size > RADV_MAX_MEMORY_ALLOCATION_SIZE)
6570 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
6571
6572 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
6573
6574 buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8,
6575 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6576 if (buffer == NULL)
6577 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6578
6579 vk_object_base_init(&device->vk, &buffer->base, VK_OBJECT_TYPE_BUFFER);
6580
6581 buffer->size = pCreateInfo->size;
6582 buffer->usage = pCreateInfo->usage;
6583 buffer->bo = NULL;
6584 buffer->offset = 0;
6585 buffer->flags = pCreateInfo->flags;
6586
6587 buffer->shareable = vk_find_struct_const(pCreateInfo->pNext,
6588 EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL;
6589
6590 if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
6591 buffer->bo = device->ws->buffer_create(device->ws,
6592 align64(buffer->size, 4096),
6593 4096, 0, RADEON_FLAG_VIRTUAL,
6594 RADV_BO_PRIORITY_VIRTUAL);
6595 if (!buffer->bo) {
6596 radv_destroy_buffer(device, pAllocator, buffer);
6597 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
6598 }
6599 }
6600
6601 *pBuffer = radv_buffer_to_handle(buffer);
6602
6603 return VK_SUCCESS;
6604 }
6605
radv_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)6606 void radv_DestroyBuffer(
6607 VkDevice _device,
6608 VkBuffer _buffer,
6609 const VkAllocationCallbacks* pAllocator)
6610 {
6611 RADV_FROM_HANDLE(radv_device, device, _device);
6612 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
6613
6614 if (!buffer)
6615 return;
6616
6617 radv_destroy_buffer(device, pAllocator, buffer);
6618 }
6619
radv_GetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)6620 VkDeviceAddress radv_GetBufferDeviceAddress(
6621 VkDevice device,
6622 const VkBufferDeviceAddressInfo* pInfo)
6623 {
6624 RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
6625 return radv_buffer_get_va(buffer->bo) + buffer->offset;
6626 }
6627
6628
radv_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)6629 uint64_t radv_GetBufferOpaqueCaptureAddress(VkDevice device,
6630 const VkBufferDeviceAddressInfo* pInfo)
6631 {
6632 return 0;
6633 }
6634
radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)6635 uint64_t radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,
6636 const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
6637 {
6638 return 0;
6639 }
6640
6641 static inline unsigned
si_tile_mode_index(const struct radv_image_plane * plane,unsigned level,bool stencil)6642 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
6643 {
6644 if (stencil)
6645 return plane->surface.u.legacy.stencil_tiling_index[level];
6646 else
6647 return plane->surface.u.legacy.tiling_index[level];
6648 }
6649
radv_surface_max_layer_count(struct radv_image_view * iview)6650 static uint32_t radv_surface_max_layer_count(struct radv_image_view *iview)
6651 {
6652 return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth : (iview->base_layer + iview->layer_count);
6653 }
6654
6655 static uint32_t
radv_init_dcc_control_reg(struct radv_device * device,struct radv_image_view * iview)6656 radv_init_dcc_control_reg(struct radv_device *device,
6657 struct radv_image_view *iview)
6658 {
6659 unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
6660 unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
6661 unsigned max_compressed_block_size;
6662 unsigned independent_128b_blocks;
6663 unsigned independent_64b_blocks;
6664
6665 if (!radv_dcc_enabled(iview->image, iview->base_mip))
6666 return 0;
6667
6668 if (!device->physical_device->rad_info.has_dedicated_vram) {
6669 /* amdvlk: [min-compressed-block-size] should be set to 32 for
6670 * dGPU and 64 for APU because all of our APUs to date use
6671 * DIMMs which have a request granularity size of 64B while all
6672 * other chips have a 32B request size.
6673 */
6674 min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
6675 }
6676
6677 if (device->physical_device->rad_info.chip_class >= GFX10) {
6678 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
6679 independent_64b_blocks = 0;
6680 independent_128b_blocks = 1;
6681 } else {
6682 independent_128b_blocks = 0;
6683
6684 if (iview->image->info.samples > 1) {
6685 if (iview->image->planes[0].surface.bpe == 1)
6686 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
6687 else if (iview->image->planes[0].surface.bpe == 2)
6688 max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
6689 }
6690
6691 if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT |
6692 VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
6693 VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
6694 /* If this DCC image is potentially going to be used in texture
6695 * fetches, we need some special settings.
6696 */
6697 independent_64b_blocks = 1;
6698 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
6699 } else {
6700 /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
6701 * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
6702 * big as possible for better compression state.
6703 */
6704 independent_64b_blocks = 0;
6705 max_compressed_block_size = max_uncompressed_block_size;
6706 }
6707 }
6708
6709 return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
6710 S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
6711 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
6712 S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks) |
6713 S_028C78_INDEPENDENT_128B_BLOCKS(independent_128b_blocks);
6714 }
6715
6716 void
radv_initialise_color_surface(struct radv_device * device,struct radv_color_buffer_info * cb,struct radv_image_view * iview)6717 radv_initialise_color_surface(struct radv_device *device,
6718 struct radv_color_buffer_info *cb,
6719 struct radv_image_view *iview)
6720 {
6721 const struct vk_format_description *desc;
6722 unsigned ntype, format, swap, endian;
6723 unsigned blend_clamp = 0, blend_bypass = 0;
6724 uint64_t va;
6725 const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
6726 const struct radeon_surf *surf = &plane->surface;
6727
6728 desc = vk_format_description(iview->vk_format);
6729
6730 memset(cb, 0, sizeof(*cb));
6731
6732 /* Intensity is implemented as Red, so treat it that way. */
6733 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1);
6734
6735 va = radv_buffer_get_va(iview->bo) + iview->image->offset + plane->offset;
6736
6737 cb->cb_color_base = va >> 8;
6738
6739 if (device->physical_device->rad_info.chip_class >= GFX9) {
6740 if (device->physical_device->rad_info.chip_class >= GFX10) {
6741 cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
6742 S_028EE0_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
6743 S_028EE0_CMASK_PIPE_ALIGNED(1) |
6744 S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.dcc.pipe_aligned);
6745 } else {
6746 struct gfx9_surf_meta_flags meta = {
6747 .rb_aligned = 1,
6748 .pipe_aligned = 1,
6749 };
6750
6751 if (surf->dcc_offset)
6752 meta = surf->u.gfx9.dcc;
6753
6754 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
6755 S_028C74_FMASK_SW_MODE(surf->u.gfx9.fmask.swizzle_mode) |
6756 S_028C74_RB_ALIGNED(meta.rb_aligned) |
6757 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
6758 cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.surf.epitch);
6759 }
6760
6761 cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
6762 cb->cb_color_base |= surf->tile_swizzle;
6763 } else {
6764 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
6765 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
6766
6767 cb->cb_color_base += level_info->offset >> 8;
6768 if (level_info->mode == RADEON_SURF_MODE_2D)
6769 cb->cb_color_base |= surf->tile_swizzle;
6770
6771 pitch_tile_max = level_info->nblk_x / 8 - 1;
6772 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
6773 tile_mode_index = si_tile_mode_index(plane, iview->base_mip, false);
6774
6775 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
6776 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
6777 cb->cb_color_cmask_slice = surf->u.legacy.cmask_slice_tile_max;
6778
6779 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
6780
6781 if (radv_image_has_fmask(iview->image)) {
6782 if (device->physical_device->rad_info.chip_class >= GFX7)
6783 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.fmask.pitch_in_pixels / 8 - 1);
6784 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.fmask.tiling_index);
6785 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.fmask.slice_tile_max);
6786 } else {
6787 /* This must be set for fast clear to work without FMASK. */
6788 if (device->physical_device->rad_info.chip_class >= GFX7)
6789 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
6790 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
6791 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
6792 }
6793 }
6794
6795 /* CMASK variables */
6796 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
6797 va += surf->cmask_offset;
6798 cb->cb_color_cmask = va >> 8;
6799
6800 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
6801 va += surf->dcc_offset;
6802
6803 if (radv_dcc_enabled(iview->image, iview->base_mip) &&
6804 device->physical_device->rad_info.chip_class <= GFX8)
6805 va += plane->surface.u.legacy.level[iview->base_mip].dcc_offset;
6806
6807 unsigned dcc_tile_swizzle = surf->tile_swizzle;
6808 dcc_tile_swizzle &= (surf->dcc_alignment - 1) >> 8;
6809
6810 cb->cb_dcc_base = va >> 8;
6811 cb->cb_dcc_base |= dcc_tile_swizzle;
6812
6813 /* GFX10 field has the same base shift as the GFX6 field. */
6814 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
6815 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) |
6816 S_028C6C_SLICE_MAX_GFX10(max_slice);
6817
6818 if (iview->image->info.samples > 1) {
6819 unsigned log_samples = util_logbase2(iview->image->info.samples);
6820
6821 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) |
6822 S_028C74_NUM_FRAGMENTS(log_samples);
6823 }
6824
6825 if (radv_image_has_fmask(iview->image)) {
6826 va = radv_buffer_get_va(iview->bo) + iview->image->offset + surf->fmask_offset;
6827 cb->cb_color_fmask = va >> 8;
6828 cb->cb_color_fmask |= surf->fmask_tile_swizzle;
6829 } else {
6830 cb->cb_color_fmask = cb->cb_color_base;
6831 }
6832
6833 ntype = radv_translate_color_numformat(iview->vk_format,
6834 desc,
6835 vk_format_get_first_non_void_channel(iview->vk_format));
6836 format = radv_translate_colorformat(iview->vk_format);
6837 if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
6838 radv_finishme("Illegal color\n");
6839 swap = radv_translate_colorswap(iview->vk_format, false);
6840 endian = radv_colorformat_endian_swap(format);
6841
6842 /* blend clamp should be set for all NORM/SRGB types */
6843 if (ntype == V_028C70_NUMBER_UNORM ||
6844 ntype == V_028C70_NUMBER_SNORM ||
6845 ntype == V_028C70_NUMBER_SRGB)
6846 blend_clamp = 1;
6847
6848 /* set blend bypass according to docs if SINT/UINT or
6849 8/24 COLOR variants */
6850 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
6851 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
6852 format == V_028C70_COLOR_X24_8_32_FLOAT) {
6853 blend_clamp = 0;
6854 blend_bypass = 1;
6855 }
6856 #if 0
6857 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
6858 (format == V_028C70_COLOR_8 ||
6859 format == V_028C70_COLOR_8_8 ||
6860 format == V_028C70_COLOR_8_8_8_8))
6861 ->color_is_int8 = true;
6862 #endif
6863 cb->cb_color_info = S_028C70_FORMAT(format) |
6864 S_028C70_COMP_SWAP(swap) |
6865 S_028C70_BLEND_CLAMP(blend_clamp) |
6866 S_028C70_BLEND_BYPASS(blend_bypass) |
6867 S_028C70_SIMPLE_FLOAT(1) |
6868 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM &&
6869 ntype != V_028C70_NUMBER_SNORM &&
6870 ntype != V_028C70_NUMBER_SRGB &&
6871 format != V_028C70_COLOR_8_24 &&
6872 format != V_028C70_COLOR_24_8) |
6873 S_028C70_NUMBER_TYPE(ntype) |
6874 S_028C70_ENDIAN(endian);
6875 if (radv_image_has_fmask(iview->image)) {
6876 cb->cb_color_info |= S_028C70_COMPRESSION(1);
6877 if (device->physical_device->rad_info.chip_class == GFX6) {
6878 unsigned fmask_bankh = util_logbase2(surf->u.legacy.fmask.bankh);
6879 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
6880 }
6881
6882 if (radv_image_is_tc_compat_cmask(iview->image)) {
6883 /* Allow the texture block to read FMASK directly
6884 * without decompressing it. This bit must be cleared
6885 * when performing FMASK_DECOMPRESS or DCC_COMPRESS,
6886 * otherwise the operation doesn't happen.
6887 */
6888 cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
6889
6890 /* Set CMASK into a tiling format that allows the
6891 * texture block to read it.
6892 */
6893 cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
6894 }
6895 }
6896
6897 if (radv_image_has_cmask(iview->image) &&
6898 !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
6899 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
6900
6901 if (radv_dcc_enabled(iview->image, iview->base_mip))
6902 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
6903
6904 cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
6905
6906 /* This must be set for fast clear to work without FMASK. */
6907 if (!radv_image_has_fmask(iview->image) &&
6908 device->physical_device->rad_info.chip_class == GFX6) {
6909 unsigned bankh = util_logbase2(surf->u.legacy.bankh);
6910 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
6911 }
6912
6913 if (device->physical_device->rad_info.chip_class >= GFX9) {
6914 const struct vk_format_description *format_desc = vk_format_description(iview->image->vk_format);
6915
6916 unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D ?
6917 (iview->extent.depth - 1) : (iview->image->info.array_size - 1);
6918 unsigned width = iview->extent.width / (iview->plane_id ? format_desc->width_divisor : 1);
6919 unsigned height = iview->extent.height / (iview->plane_id ? format_desc->height_divisor : 1);
6920
6921 if (device->physical_device->rad_info.chip_class >= GFX10) {
6922 cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->base_mip);
6923
6924 cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) |
6925 S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
6926 S_028EE0_RESOURCE_LEVEL(1);
6927 } else {
6928 cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->base_mip);
6929 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) |
6930 S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
6931 }
6932
6933 cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) |
6934 S_028C68_MIP0_HEIGHT(height - 1) |
6935 S_028C68_MAX_MIP(iview->image->info.levels - 1);
6936 }
6937 }
6938
6939 static unsigned
radv_calc_decompress_on_z_planes(struct radv_device * device,struct radv_image_view * iview)6940 radv_calc_decompress_on_z_planes(struct radv_device *device,
6941 struct radv_image_view *iview)
6942 {
6943 unsigned max_zplanes = 0;
6944
6945 assert(radv_image_is_tc_compat_htile(iview->image));
6946
6947 if (device->physical_device->rad_info.chip_class >= GFX9) {
6948 /* Default value for 32-bit depth surfaces. */
6949 max_zplanes = 4;
6950
6951 if (iview->vk_format == VK_FORMAT_D16_UNORM &&
6952 iview->image->info.samples > 1)
6953 max_zplanes = 2;
6954
6955 max_zplanes = max_zplanes + 1;
6956 } else {
6957 if (iview->vk_format == VK_FORMAT_D16_UNORM) {
6958 /* Do not enable Z plane compression for 16-bit depth
6959 * surfaces because isn't supported on GFX8. Only
6960 * 32-bit depth surfaces are supported by the hardware.
6961 * This allows to maintain shader compatibility and to
6962 * reduce the number of depth decompressions.
6963 */
6964 max_zplanes = 1;
6965 } else {
6966 if (iview->image->info.samples <= 1)
6967 max_zplanes = 5;
6968 else if (iview->image->info.samples <= 4)
6969 max_zplanes = 3;
6970 else
6971 max_zplanes = 2;
6972 }
6973 }
6974
6975 return max_zplanes;
6976 }
6977
6978 void
radv_initialise_ds_surface(struct radv_device * device,struct radv_ds_buffer_info * ds,struct radv_image_view * iview)6979 radv_initialise_ds_surface(struct radv_device *device,
6980 struct radv_ds_buffer_info *ds,
6981 struct radv_image_view *iview)
6982 {
6983 unsigned level = iview->base_mip;
6984 unsigned format, stencil_format;
6985 uint64_t va, s_offs, z_offs;
6986 bool stencil_only = false;
6987 const struct radv_image_plane *plane = &iview->image->planes[0];
6988 const struct radeon_surf *surf = &plane->surface;
6989
6990 assert(vk_format_get_plane_count(iview->image->vk_format) == 1);
6991
6992 memset(ds, 0, sizeof(*ds));
6993 switch (iview->image->vk_format) {
6994 case VK_FORMAT_D24_UNORM_S8_UINT:
6995 case VK_FORMAT_X8_D24_UNORM_PACK32:
6996 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
6997 ds->offset_scale = 2.0f;
6998 break;
6999 case VK_FORMAT_D16_UNORM:
7000 case VK_FORMAT_D16_UNORM_S8_UINT:
7001 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
7002 ds->offset_scale = 4.0f;
7003 break;
7004 case VK_FORMAT_D32_SFLOAT:
7005 case VK_FORMAT_D32_SFLOAT_S8_UINT:
7006 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) |
7007 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
7008 ds->offset_scale = 1.0f;
7009 break;
7010 case VK_FORMAT_S8_UINT:
7011 stencil_only = true;
7012 break;
7013 default:
7014 break;
7015 }
7016
7017 format = radv_translate_dbformat(iview->image->vk_format);
7018 stencil_format = surf->has_stencil ?
7019 V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
7020
7021 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
7022 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) |
7023 S_028008_SLICE_MAX(max_slice);
7024 if (device->physical_device->rad_info.chip_class >= GFX10) {
7025 ds->db_depth_view |= S_028008_SLICE_START_HI(iview->base_layer >> 11) |
7026 S_028008_SLICE_MAX_HI(max_slice >> 11);
7027 }
7028
7029 ds->db_htile_data_base = 0;
7030 ds->db_htile_surface = 0;
7031
7032 va = radv_buffer_get_va(iview->bo) + iview->image->offset;
7033 s_offs = z_offs = va;
7034
7035 if (device->physical_device->rad_info.chip_class >= GFX9) {
7036 assert(surf->u.gfx9.surf_offset == 0);
7037 s_offs += surf->u.gfx9.stencil_offset;
7038
7039 ds->db_z_info = S_028038_FORMAT(format) |
7040 S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
7041 S_028038_SW_MODE(surf->u.gfx9.surf.swizzle_mode) |
7042 S_028038_MAXMIP(iview->image->info.levels - 1) |
7043 S_028038_ZRANGE_PRECISION(1);
7044 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) |
7045 S_02803C_SW_MODE(surf->u.gfx9.stencil.swizzle_mode);
7046
7047 if (device->physical_device->rad_info.chip_class == GFX9) {
7048 ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.surf.epitch);
7049 ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.stencil.epitch);
7050 }
7051
7052 ds->db_depth_view |= S_028008_MIPID(level);
7053 ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
7054 S_02801C_Y_MAX(iview->image->info.height - 1);
7055
7056 if (radv_htile_enabled(iview->image, level)) {
7057 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
7058
7059 if (radv_image_is_tc_compat_htile(iview->image)) {
7060 unsigned max_zplanes =
7061 radv_calc_decompress_on_z_planes(device, iview);
7062
7063 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
7064
7065 if (device->physical_device->rad_info.chip_class >= GFX10) {
7066 ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
7067 ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
7068 } else {
7069 ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
7070 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
7071 }
7072 }
7073
7074 if (!surf->has_stencil)
7075 /* Use all of the htile_buffer for depth if there's no stencil. */
7076 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
7077 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
7078 surf->htile_offset;
7079 ds->db_htile_data_base = va >> 8;
7080 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) |
7081 S_028ABC_PIPE_ALIGNED(1);
7082
7083 if (device->physical_device->rad_info.chip_class == GFX9) {
7084 ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
7085 }
7086 }
7087 } else {
7088 const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
7089
7090 if (stencil_only)
7091 level_info = &surf->u.legacy.stencil_level[level];
7092
7093 z_offs += surf->u.legacy.level[level].offset;
7094 s_offs += surf->u.legacy.stencil_level[level].offset;
7095
7096 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
7097 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
7098 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
7099
7100 if (iview->image->info.samples > 1)
7101 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
7102
7103 if (device->physical_device->rad_info.chip_class >= GFX7) {
7104 struct radeon_info *info = &device->physical_device->rad_info;
7105 unsigned tiling_index = surf->u.legacy.tiling_index[level];
7106 unsigned stencil_index = surf->u.legacy.stencil_tiling_index[level];
7107 unsigned macro_index = surf->u.legacy.macro_tile_index;
7108 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
7109 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
7110 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
7111
7112 if (stencil_only)
7113 tile_mode = stencil_tile_mode;
7114
7115 ds->db_depth_info |=
7116 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
7117 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
7118 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
7119 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
7120 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
7121 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
7122 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
7123 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
7124 } else {
7125 unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false);
7126 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
7127 tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true);
7128 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
7129 if (stencil_only)
7130 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
7131 }
7132
7133 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
7134 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
7135 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
7136
7137 if (radv_htile_enabled(iview->image, level)) {
7138 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
7139
7140 if (!surf->has_stencil &&
7141 !radv_image_is_tc_compat_htile(iview->image))
7142 /* Use all of the htile_buffer for depth if there's no stencil. */
7143 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
7144
7145 va = radv_buffer_get_va(iview->bo) + iview->image->offset +
7146 surf->htile_offset;
7147 ds->db_htile_data_base = va >> 8;
7148 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
7149
7150 if (radv_image_is_tc_compat_htile(iview->image)) {
7151 unsigned max_zplanes =
7152 radv_calc_decompress_on_z_planes(device, iview);
7153
7154 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
7155 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
7156 }
7157 }
7158 }
7159
7160 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
7161 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
7162 }
7163
radv_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)7164 VkResult radv_CreateFramebuffer(
7165 VkDevice _device,
7166 const VkFramebufferCreateInfo* pCreateInfo,
7167 const VkAllocationCallbacks* pAllocator,
7168 VkFramebuffer* pFramebuffer)
7169 {
7170 RADV_FROM_HANDLE(radv_device, device, _device);
7171 struct radv_framebuffer *framebuffer;
7172 const VkFramebufferAttachmentsCreateInfo *imageless_create_info =
7173 vk_find_struct_const(pCreateInfo->pNext,
7174 FRAMEBUFFER_ATTACHMENTS_CREATE_INFO);
7175
7176 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
7177
7178 size_t size = sizeof(*framebuffer);
7179 if (!imageless_create_info)
7180 size += sizeof(struct radv_image_view*) * pCreateInfo->attachmentCount;
7181 framebuffer = vk_alloc2(&device->vk.alloc, pAllocator, size, 8,
7182 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7183 if (framebuffer == NULL)
7184 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
7185
7186 vk_object_base_init(&device->vk, &framebuffer->base,
7187 VK_OBJECT_TYPE_FRAMEBUFFER);
7188
7189 framebuffer->attachment_count = pCreateInfo->attachmentCount;
7190 framebuffer->width = pCreateInfo->width;
7191 framebuffer->height = pCreateInfo->height;
7192 framebuffer->layers = pCreateInfo->layers;
7193 if (imageless_create_info) {
7194 for (unsigned i = 0; i < imageless_create_info->attachmentImageInfoCount; ++i) {
7195 const VkFramebufferAttachmentImageInfo *attachment =
7196 imageless_create_info->pAttachmentImageInfos + i;
7197 framebuffer->width = MIN2(framebuffer->width, attachment->width);
7198 framebuffer->height = MIN2(framebuffer->height, attachment->height);
7199 framebuffer->layers = MIN2(framebuffer->layers, attachment->layerCount);
7200 }
7201 } else {
7202 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
7203 VkImageView _iview = pCreateInfo->pAttachments[i];
7204 struct radv_image_view *iview = radv_image_view_from_handle(_iview);
7205 framebuffer->attachments[i] = iview;
7206 framebuffer->width = MIN2(framebuffer->width, iview->extent.width);
7207 framebuffer->height = MIN2(framebuffer->height, iview->extent.height);
7208 framebuffer->layers = MIN2(framebuffer->layers, radv_surface_max_layer_count(iview));
7209 }
7210 }
7211
7212 *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
7213 return VK_SUCCESS;
7214 }
7215
radv_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)7216 void radv_DestroyFramebuffer(
7217 VkDevice _device,
7218 VkFramebuffer _fb,
7219 const VkAllocationCallbacks* pAllocator)
7220 {
7221 RADV_FROM_HANDLE(radv_device, device, _device);
7222 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
7223
7224 if (!fb)
7225 return;
7226 vk_object_base_finish(&fb->base);
7227 vk_free2(&device->vk.alloc, pAllocator, fb);
7228 }
7229
radv_tex_wrap(VkSamplerAddressMode address_mode)7230 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode)
7231 {
7232 switch (address_mode) {
7233 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
7234 return V_008F30_SQ_TEX_WRAP;
7235 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
7236 return V_008F30_SQ_TEX_MIRROR;
7237 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
7238 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
7239 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
7240 return V_008F30_SQ_TEX_CLAMP_BORDER;
7241 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
7242 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
7243 default:
7244 unreachable("illegal tex wrap mode");
7245 break;
7246 }
7247 }
7248
7249 static unsigned
radv_tex_compare(VkCompareOp op)7250 radv_tex_compare(VkCompareOp op)
7251 {
7252 switch (op) {
7253 case VK_COMPARE_OP_NEVER:
7254 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
7255 case VK_COMPARE_OP_LESS:
7256 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
7257 case VK_COMPARE_OP_EQUAL:
7258 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
7259 case VK_COMPARE_OP_LESS_OR_EQUAL:
7260 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
7261 case VK_COMPARE_OP_GREATER:
7262 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
7263 case VK_COMPARE_OP_NOT_EQUAL:
7264 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
7265 case VK_COMPARE_OP_GREATER_OR_EQUAL:
7266 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
7267 case VK_COMPARE_OP_ALWAYS:
7268 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
7269 default:
7270 unreachable("illegal compare mode");
7271 break;
7272 }
7273 }
7274
7275 static unsigned
radv_tex_filter(VkFilter filter,unsigned max_ansio)7276 radv_tex_filter(VkFilter filter, unsigned max_ansio)
7277 {
7278 switch (filter) {
7279 case VK_FILTER_NEAREST:
7280 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT :
7281 V_008F38_SQ_TEX_XY_FILTER_POINT);
7282 case VK_FILTER_LINEAR:
7283 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR :
7284 V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
7285 case VK_FILTER_CUBIC_IMG:
7286 default:
7287 fprintf(stderr, "illegal texture filter");
7288 return 0;
7289 }
7290 }
7291
7292 static unsigned
radv_tex_mipfilter(VkSamplerMipmapMode mode)7293 radv_tex_mipfilter(VkSamplerMipmapMode mode)
7294 {
7295 switch (mode) {
7296 case VK_SAMPLER_MIPMAP_MODE_NEAREST:
7297 return V_008F38_SQ_TEX_Z_FILTER_POINT;
7298 case VK_SAMPLER_MIPMAP_MODE_LINEAR:
7299 return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
7300 default:
7301 return V_008F38_SQ_TEX_Z_FILTER_NONE;
7302 }
7303 }
7304
7305 static unsigned
radv_tex_bordercolor(VkBorderColor bcolor)7306 radv_tex_bordercolor(VkBorderColor bcolor)
7307 {
7308 switch (bcolor) {
7309 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
7310 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
7311 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
7312 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
7313 case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
7314 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
7315 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
7316 case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
7317 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
7318 case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
7319 case VK_BORDER_COLOR_INT_CUSTOM_EXT:
7320 return V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
7321 default:
7322 break;
7323 }
7324 return 0;
7325 }
7326
7327 static unsigned
radv_tex_aniso_filter(unsigned filter)7328 radv_tex_aniso_filter(unsigned filter)
7329 {
7330 if (filter < 2)
7331 return 0;
7332 if (filter < 4)
7333 return 1;
7334 if (filter < 8)
7335 return 2;
7336 if (filter < 16)
7337 return 3;
7338 return 4;
7339 }
7340
7341 static unsigned
radv_tex_filter_mode(VkSamplerReductionMode mode)7342 radv_tex_filter_mode(VkSamplerReductionMode mode)
7343 {
7344 switch (mode) {
7345 case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
7346 return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
7347 case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
7348 return V_008F30_SQ_IMG_FILTER_MODE_MIN;
7349 case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
7350 return V_008F30_SQ_IMG_FILTER_MODE_MAX;
7351 default:
7352 break;
7353 }
7354 return 0;
7355 }
7356
7357 static uint32_t
radv_get_max_anisotropy(struct radv_device * device,const VkSamplerCreateInfo * pCreateInfo)7358 radv_get_max_anisotropy(struct radv_device *device,
7359 const VkSamplerCreateInfo *pCreateInfo)
7360 {
7361 if (device->force_aniso >= 0)
7362 return device->force_aniso;
7363
7364 if (pCreateInfo->anisotropyEnable &&
7365 pCreateInfo->maxAnisotropy > 1.0f)
7366 return (uint32_t)pCreateInfo->maxAnisotropy;
7367
7368 return 0;
7369 }
7370
S_FIXED(float value,unsigned frac_bits)7371 static inline int S_FIXED(float value, unsigned frac_bits)
7372 {
7373 return value * (1 << frac_bits);
7374 }
7375
radv_register_border_color(struct radv_device * device,VkClearColorValue value)7376 static uint32_t radv_register_border_color(struct radv_device *device,
7377 VkClearColorValue value)
7378 {
7379 uint32_t slot;
7380
7381 pthread_mutex_lock(&device->border_color_data.mutex);
7382
7383 for (slot = 0; slot < RADV_BORDER_COLOR_COUNT; slot++) {
7384 if (!device->border_color_data.used[slot]) {
7385 /* Copy to the GPU wrt endian-ness. */
7386 util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot],
7387 &value,
7388 sizeof(VkClearColorValue));
7389
7390 device->border_color_data.used[slot] = true;
7391 break;
7392 }
7393 }
7394
7395 pthread_mutex_unlock(&device->border_color_data.mutex);
7396
7397 return slot;
7398 }
7399
radv_unregister_border_color(struct radv_device * device,uint32_t slot)7400 static void radv_unregister_border_color(struct radv_device *device,
7401 uint32_t slot)
7402 {
7403 pthread_mutex_lock(&device->border_color_data.mutex);
7404
7405 device->border_color_data.used[slot] = false;
7406
7407 pthread_mutex_unlock(&device->border_color_data.mutex);
7408 }
7409
7410 static void
radv_init_sampler(struct radv_device * device,struct radv_sampler * sampler,const VkSamplerCreateInfo * pCreateInfo)7411 radv_init_sampler(struct radv_device *device,
7412 struct radv_sampler *sampler,
7413 const VkSamplerCreateInfo *pCreateInfo)
7414 {
7415 uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
7416 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
7417 bool compat_mode = device->physical_device->rad_info.chip_class == GFX8 ||
7418 device->physical_device->rad_info.chip_class == GFX9;
7419 unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
7420 unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
7421 bool trunc_coord = pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST;
7422 bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
7423 pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
7424 pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
7425 VkBorderColor border_color = uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
7426 uint32_t border_color_ptr;
7427
7428 const struct VkSamplerReductionModeCreateInfo *sampler_reduction =
7429 vk_find_struct_const(pCreateInfo->pNext,
7430 SAMPLER_REDUCTION_MODE_CREATE_INFO);
7431 if (sampler_reduction)
7432 filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
7433
7434 if (pCreateInfo->compareEnable)
7435 depth_compare_func = radv_tex_compare(pCreateInfo->compareOp);
7436
7437 sampler->border_color_slot = RADV_BORDER_COLOR_COUNT;
7438
7439 if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT || border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
7440 const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
7441 vk_find_struct_const(pCreateInfo->pNext,
7442 SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
7443
7444 assert(custom_border_color);
7445
7446 sampler->border_color_slot =
7447 radv_register_border_color(device, custom_border_color->customBorderColor);
7448
7449 /* Did we fail to find a slot? */
7450 if (sampler->border_color_slot == RADV_BORDER_COLOR_COUNT) {
7451 fprintf(stderr, "WARNING: no free border color slots, defaulting to TRANS_BLACK.\n");
7452 border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
7453 }
7454 }
7455
7456 /* If we don't have a custom color, set the ptr to 0 */
7457 border_color_ptr = sampler->border_color_slot != RADV_BORDER_COLOR_COUNT
7458 ? sampler->border_color_slot
7459 : 0;
7460
7461 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
7462 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
7463 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
7464 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) |
7465 S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) |
7466 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
7467 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) |
7468 S_008F30_ANISO_BIAS(max_aniso_ratio) |
7469 S_008F30_DISABLE_CUBE_WRAP(0) |
7470 S_008F30_COMPAT_MODE(compat_mode) |
7471 S_008F30_FILTER_MODE(filter_mode) |
7472 S_008F30_TRUNC_COORD(trunc_coord));
7473 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
7474 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
7475 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
7476 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
7477 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
7478 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
7479 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
7480 S_008F38_MIP_POINT_PRECLAMP(0));
7481 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(border_color_ptr) |
7482 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color)));
7483
7484 if (device->physical_device->rad_info.chip_class >= GFX10) {
7485 sampler->state[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1);
7486 } else {
7487 sampler->state[2] |=
7488 S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |
7489 S_008F38_FILTER_PREC_FIX(1) |
7490 S_008F38_ANISO_OVERRIDE_GFX8(device->physical_device->rad_info.chip_class >= GFX8);
7491 }
7492 }
7493
radv_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)7494 VkResult radv_CreateSampler(
7495 VkDevice _device,
7496 const VkSamplerCreateInfo* pCreateInfo,
7497 const VkAllocationCallbacks* pAllocator,
7498 VkSampler* pSampler)
7499 {
7500 RADV_FROM_HANDLE(radv_device, device, _device);
7501 struct radv_sampler *sampler;
7502
7503 const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
7504 vk_find_struct_const(pCreateInfo->pNext,
7505 SAMPLER_YCBCR_CONVERSION_INFO);
7506
7507 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
7508
7509 sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
7510 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7511 if (!sampler)
7512 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
7513
7514 vk_object_base_init(&device->vk, &sampler->base,
7515 VK_OBJECT_TYPE_SAMPLER);
7516
7517 radv_init_sampler(device, sampler, pCreateInfo);
7518
7519 sampler->ycbcr_sampler = ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion): NULL;
7520 *pSampler = radv_sampler_to_handle(sampler);
7521
7522 return VK_SUCCESS;
7523 }
7524
radv_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)7525 void radv_DestroySampler(
7526 VkDevice _device,
7527 VkSampler _sampler,
7528 const VkAllocationCallbacks* pAllocator)
7529 {
7530 RADV_FROM_HANDLE(radv_device, device, _device);
7531 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
7532
7533 if (!sampler)
7534 return;
7535
7536 if (sampler->border_color_slot != RADV_BORDER_COLOR_COUNT)
7537 radv_unregister_border_color(device, sampler->border_color_slot);
7538
7539 vk_object_base_finish(&sampler->base);
7540 vk_free2(&device->vk.alloc, pAllocator, sampler);
7541 }
7542
7543 /* vk_icd.h does not declare this function, so we declare it here to
7544 * suppress Wmissing-prototypes.
7545 */
7546 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
7547 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
7548
7549 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t * pSupportedVersion)7550 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
7551 {
7552 /* For the full details on loader interface versioning, see
7553 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
7554 * What follows is a condensed summary, to help you navigate the large and
7555 * confusing official doc.
7556 *
7557 * - Loader interface v0 is incompatible with later versions. We don't
7558 * support it.
7559 *
7560 * - In loader interface v1:
7561 * - The first ICD entrypoint called by the loader is
7562 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
7563 * entrypoint.
7564 * - The ICD must statically expose no other Vulkan symbol unless it is
7565 * linked with -Bsymbolic.
7566 * - Each dispatchable Vulkan handle created by the ICD must be
7567 * a pointer to a struct whose first member is VK_LOADER_DATA. The
7568 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
7569 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
7570 * vkDestroySurfaceKHR(). The ICD must be capable of working with
7571 * such loader-managed surfaces.
7572 *
7573 * - Loader interface v2 differs from v1 in:
7574 * - The first ICD entrypoint called by the loader is
7575 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
7576 * statically expose this entrypoint.
7577 *
7578 * - Loader interface v3 differs from v2 in:
7579 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
7580 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
7581 * because the loader no longer does so.
7582 */
7583 *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
7584 return VK_SUCCESS;
7585 }
7586
radv_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFD)7587 VkResult radv_GetMemoryFdKHR(VkDevice _device,
7588 const VkMemoryGetFdInfoKHR *pGetFdInfo,
7589 int *pFD)
7590 {
7591 RADV_FROM_HANDLE(radv_device, device, _device);
7592 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
7593
7594 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
7595
7596 /* At the moment, we support only the below handle types. */
7597 assert(pGetFdInfo->handleType ==
7598 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
7599 pGetFdInfo->handleType ==
7600 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
7601
7602 bool ret = radv_get_memory_fd(device, memory, pFD);
7603 if (ret == false)
7604 return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
7605 return VK_SUCCESS;
7606 }
7607
radv_compute_valid_memory_types_attempt(struct radv_physical_device * dev,enum radeon_bo_domain domains,enum radeon_bo_flag flags,enum radeon_bo_flag ignore_flags)7608 static uint32_t radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev,
7609 enum radeon_bo_domain domains,
7610 enum radeon_bo_flag flags,
7611 enum radeon_bo_flag ignore_flags)
7612 {
7613 /* Don't count GTT/CPU as relevant:
7614 *
7615 * - We're not fully consistent between the two.
7616 * - Sometimes VRAM gets VRAM|GTT.
7617 */
7618 const enum radeon_bo_domain relevant_domains = RADEON_DOMAIN_VRAM |
7619 RADEON_DOMAIN_GDS |
7620 RADEON_DOMAIN_OA;
7621 uint32_t bits = 0;
7622 for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) {
7623 if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains))
7624 continue;
7625
7626 if ((flags & ~ignore_flags) != (dev->memory_flags[i] & ~ignore_flags))
7627 continue;
7628
7629 bits |= 1u << i;
7630 }
7631
7632 return bits;
7633 }
7634
radv_compute_valid_memory_types(struct radv_physical_device * dev,enum radeon_bo_domain domains,enum radeon_bo_flag flags)7635 static uint32_t radv_compute_valid_memory_types(struct radv_physical_device *dev,
7636 enum radeon_bo_domain domains,
7637 enum radeon_bo_flag flags)
7638 {
7639 enum radeon_bo_flag ignore_flags = ~(RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC);
7640 uint32_t bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
7641
7642 if (!bits) {
7643 ignore_flags |= RADEON_FLAG_NO_CPU_ACCESS;
7644 bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
7645 }
7646
7647 return bits;
7648 }
radv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)7649 VkResult radv_GetMemoryFdPropertiesKHR(VkDevice _device,
7650 VkExternalMemoryHandleTypeFlagBits handleType,
7651 int fd,
7652 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
7653 {
7654 RADV_FROM_HANDLE(radv_device, device, _device);
7655
7656 switch (handleType) {
7657 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
7658 enum radeon_bo_domain domains;
7659 enum radeon_bo_flag flags;
7660 if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
7661 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7662
7663 pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(device->physical_device, domains, flags);
7664 return VK_SUCCESS;
7665 }
7666 default:
7667 /* The valid usage section for this function says:
7668 *
7669 * "handleType must not be one of the handle types defined as
7670 * opaque."
7671 *
7672 * So opaque handle types fall into the default "unsupported" case.
7673 */
7674 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7675 }
7676 }
7677
radv_import_opaque_fd(struct radv_device * device,int fd,uint32_t * syncobj)7678 static VkResult radv_import_opaque_fd(struct radv_device *device,
7679 int fd,
7680 uint32_t *syncobj)
7681 {
7682 uint32_t syncobj_handle = 0;
7683 int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
7684 if (ret != 0)
7685 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7686
7687 if (*syncobj)
7688 device->ws->destroy_syncobj(device->ws, *syncobj);
7689
7690 *syncobj = syncobj_handle;
7691 close(fd);
7692
7693 return VK_SUCCESS;
7694 }
7695
radv_import_sync_fd(struct radv_device * device,int fd,uint32_t * syncobj)7696 static VkResult radv_import_sync_fd(struct radv_device *device,
7697 int fd,
7698 uint32_t *syncobj)
7699 {
7700 /* If we create a syncobj we do it locally so that if we have an error, we don't
7701 * leave a syncobj in an undetermined state in the fence. */
7702 uint32_t syncobj_handle = *syncobj;
7703 if (!syncobj_handle) {
7704 bool create_signaled = fd == -1 ? true : false;
7705
7706 int ret = device->ws->create_syncobj(device->ws, create_signaled,
7707 &syncobj_handle);
7708 if (ret) {
7709 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
7710 }
7711 } else {
7712 if (fd == -1)
7713 device->ws->signal_syncobj(device->ws, syncobj_handle, 0);
7714 }
7715
7716 if (fd != -1) {
7717 int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
7718 if (ret)
7719 return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7720 close(fd);
7721 }
7722
7723 *syncobj = syncobj_handle;
7724
7725 return VK_SUCCESS;
7726 }
7727
radv_ImportSemaphoreFdKHR(VkDevice _device,const VkImportSemaphoreFdInfoKHR * pImportSemaphoreFdInfo)7728 VkResult radv_ImportSemaphoreFdKHR(VkDevice _device,
7729 const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
7730 {
7731 RADV_FROM_HANDLE(radv_device, device, _device);
7732 RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
7733 VkResult result;
7734 struct radv_semaphore_part *dst = NULL;
7735 bool timeline = sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
7736
7737 if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
7738 assert(!timeline);
7739 dst = &sem->temporary;
7740 } else {
7741 dst = &sem->permanent;
7742 }
7743
7744 uint32_t syncobj = (dst->kind == RADV_SEMAPHORE_SYNCOBJ ||
7745 dst->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ) ? dst->syncobj : 0;
7746
7747 switch(pImportSemaphoreFdInfo->handleType) {
7748 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
7749 result = radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
7750 break;
7751 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
7752 assert(!timeline);
7753 result = radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
7754 break;
7755 default:
7756 unreachable("Unhandled semaphore handle type");
7757 }
7758
7759 if (result == VK_SUCCESS) {
7760 dst->syncobj = syncobj;
7761 dst->kind = RADV_SEMAPHORE_SYNCOBJ;
7762 if (timeline) {
7763 dst->kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
7764 dst->timeline_syncobj.max_point = 0;
7765 }
7766 }
7767
7768 return result;
7769 }
7770
radv_GetSemaphoreFdKHR(VkDevice _device,const VkSemaphoreGetFdInfoKHR * pGetFdInfo,int * pFd)7771 VkResult radv_GetSemaphoreFdKHR(VkDevice _device,
7772 const VkSemaphoreGetFdInfoKHR *pGetFdInfo,
7773 int *pFd)
7774 {
7775 RADV_FROM_HANDLE(radv_device, device, _device);
7776 RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
7777 int ret;
7778 uint32_t syncobj_handle;
7779
7780 if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
7781 assert(sem->temporary.kind == RADV_SEMAPHORE_SYNCOBJ ||
7782 sem->temporary.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
7783 syncobj_handle = sem->temporary.syncobj;
7784 } else {
7785 assert(sem->permanent.kind == RADV_SEMAPHORE_SYNCOBJ ||
7786 sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
7787 syncobj_handle = sem->permanent.syncobj;
7788 }
7789
7790 switch(pGetFdInfo->handleType) {
7791 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
7792 ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
7793 if (ret)
7794 return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
7795 break;
7796 case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
7797 ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
7798 if (ret)
7799 return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
7800
7801 if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
7802 radv_destroy_semaphore_part(device, &sem->temporary);
7803 } else {
7804 device->ws->reset_syncobj(device->ws, syncobj_handle);
7805 }
7806 break;
7807 default:
7808 unreachable("Unhandled semaphore handle type");
7809 }
7810
7811 return VK_SUCCESS;
7812 }
7813
radv_GetPhysicalDeviceExternalSemaphoreProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalSemaphoreInfo * pExternalSemaphoreInfo,VkExternalSemaphoreProperties * pExternalSemaphoreProperties)7814 void radv_GetPhysicalDeviceExternalSemaphoreProperties(
7815 VkPhysicalDevice physicalDevice,
7816 const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
7817 VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
7818 {
7819 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
7820 VkSemaphoreTypeKHR type = radv_get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
7821
7822 if (type == VK_SEMAPHORE_TYPE_TIMELINE && pdevice->rad_info.has_timeline_syncobj &&
7823 pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
7824 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7825 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7826 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7827 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7828 } else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
7829 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
7830 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
7831 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
7832
7833 /* Require has_syncobj_wait_for_submit for the syncobj signal ioctl introduced at virtually the same time */
7834 } else if (pdevice->rad_info.has_syncobj_wait_for_submit &&
7835 (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
7836 pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT)) {
7837 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
7838 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
7839 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7840 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7841 } else if (pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
7842 pExternalSemaphoreProperties->exportFromImportedHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7843 pExternalSemaphoreProperties->compatibleHandleTypes = VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7844 pExternalSemaphoreProperties->externalSemaphoreFeatures = VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7845 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7846 } else {
7847 pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
7848 pExternalSemaphoreProperties->compatibleHandleTypes = 0;
7849 pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
7850 }
7851 }
7852
radv_ImportFenceFdKHR(VkDevice _device,const VkImportFenceFdInfoKHR * pImportFenceFdInfo)7853 VkResult radv_ImportFenceFdKHR(VkDevice _device,
7854 const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
7855 {
7856 RADV_FROM_HANDLE(radv_device, device, _device);
7857 RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
7858 struct radv_fence_part *dst = NULL;
7859 VkResult result;
7860
7861 if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
7862 dst = &fence->temporary;
7863 } else {
7864 dst = &fence->permanent;
7865 }
7866
7867 uint32_t syncobj = dst->kind == RADV_FENCE_SYNCOBJ ? dst->syncobj : 0;
7868
7869 switch(pImportFenceFdInfo->handleType) {
7870 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
7871 result = radv_import_opaque_fd(device, pImportFenceFdInfo->fd, &syncobj);
7872 break;
7873 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
7874 result = radv_import_sync_fd(device, pImportFenceFdInfo->fd, &syncobj);
7875 break;
7876 default:
7877 unreachable("Unhandled fence handle type");
7878 }
7879
7880 if (result == VK_SUCCESS) {
7881 dst->syncobj = syncobj;
7882 dst->kind = RADV_FENCE_SYNCOBJ;
7883 }
7884
7885 return result;
7886 }
7887
radv_GetFenceFdKHR(VkDevice _device,const VkFenceGetFdInfoKHR * pGetFdInfo,int * pFd)7888 VkResult radv_GetFenceFdKHR(VkDevice _device,
7889 const VkFenceGetFdInfoKHR *pGetFdInfo,
7890 int *pFd)
7891 {
7892 RADV_FROM_HANDLE(radv_device, device, _device);
7893 RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
7894 int ret;
7895
7896 struct radv_fence_part *part =
7897 fence->temporary.kind != RADV_FENCE_NONE ?
7898 &fence->temporary : &fence->permanent;
7899
7900 switch(pGetFdInfo->handleType) {
7901 case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
7902 ret = device->ws->export_syncobj(device->ws, part->syncobj, pFd);
7903 if (ret)
7904 return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
7905 break;
7906 case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
7907 ret = device->ws->export_syncobj_to_sync_file(device->ws,
7908 part->syncobj, pFd);
7909 if (ret)
7910 return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
7911
7912 if (part == &fence->temporary) {
7913 radv_destroy_fence_part(device, part);
7914 } else {
7915 device->ws->reset_syncobj(device->ws, part->syncobj);
7916 }
7917 break;
7918 default:
7919 unreachable("Unhandled fence handle type");
7920 }
7921
7922 return VK_SUCCESS;
7923 }
7924
radv_GetPhysicalDeviceExternalFenceProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalFenceInfo * pExternalFenceInfo,VkExternalFenceProperties * pExternalFenceProperties)7925 void radv_GetPhysicalDeviceExternalFenceProperties(
7926 VkPhysicalDevice physicalDevice,
7927 const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
7928 VkExternalFenceProperties *pExternalFenceProperties)
7929 {
7930 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
7931
7932 if (pdevice->rad_info.has_syncobj_wait_for_submit &&
7933 (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT ||
7934 pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT)) {
7935 pExternalFenceProperties->exportFromImportedHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
7936 pExternalFenceProperties->compatibleHandleTypes = VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
7937 pExternalFenceProperties->externalFenceFeatures = VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT |
7938 VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7939 } else {
7940 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
7941 pExternalFenceProperties->compatibleHandleTypes = 0;
7942 pExternalFenceProperties->externalFenceFeatures = 0;
7943 }
7944 }
7945
7946 VkResult
radv_CreateDebugReportCallbackEXT(VkInstance _instance,const VkDebugReportCallbackCreateInfoEXT * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDebugReportCallbackEXT * pCallback)7947 radv_CreateDebugReportCallbackEXT(VkInstance _instance,
7948 const VkDebugReportCallbackCreateInfoEXT* pCreateInfo,
7949 const VkAllocationCallbacks* pAllocator,
7950 VkDebugReportCallbackEXT* pCallback)
7951 {
7952 RADV_FROM_HANDLE(radv_instance, instance, _instance);
7953 return vk_create_debug_report_callback(&instance->debug_report_callbacks,
7954 pCreateInfo, pAllocator, &instance->alloc,
7955 pCallback);
7956 }
7957
7958 void
radv_DestroyDebugReportCallbackEXT(VkInstance _instance,VkDebugReportCallbackEXT _callback,const VkAllocationCallbacks * pAllocator)7959 radv_DestroyDebugReportCallbackEXT(VkInstance _instance,
7960 VkDebugReportCallbackEXT _callback,
7961 const VkAllocationCallbacks* pAllocator)
7962 {
7963 RADV_FROM_HANDLE(radv_instance, instance, _instance);
7964 vk_destroy_debug_report_callback(&instance->debug_report_callbacks,
7965 _callback, pAllocator, &instance->alloc);
7966 }
7967
7968 void
radv_DebugReportMessageEXT(VkInstance _instance,VkDebugReportFlagsEXT flags,VkDebugReportObjectTypeEXT objectType,uint64_t object,size_t location,int32_t messageCode,const char * pLayerPrefix,const char * pMessage)7969 radv_DebugReportMessageEXT(VkInstance _instance,
7970 VkDebugReportFlagsEXT flags,
7971 VkDebugReportObjectTypeEXT objectType,
7972 uint64_t object,
7973 size_t location,
7974 int32_t messageCode,
7975 const char* pLayerPrefix,
7976 const char* pMessage)
7977 {
7978 RADV_FROM_HANDLE(radv_instance, instance, _instance);
7979 vk_debug_report(&instance->debug_report_callbacks, flags, objectType,
7980 object, location, messageCode, pLayerPrefix, pMessage);
7981 }
7982
7983 void
radv_GetDeviceGroupPeerMemoryFeatures(VkDevice device,uint32_t heapIndex,uint32_t localDeviceIndex,uint32_t remoteDeviceIndex,VkPeerMemoryFeatureFlags * pPeerMemoryFeatures)7984 radv_GetDeviceGroupPeerMemoryFeatures(
7985 VkDevice device,
7986 uint32_t heapIndex,
7987 uint32_t localDeviceIndex,
7988 uint32_t remoteDeviceIndex,
7989 VkPeerMemoryFeatureFlags* pPeerMemoryFeatures)
7990 {
7991 assert(localDeviceIndex == remoteDeviceIndex);
7992
7993 *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
7994 VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
7995 VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
7996 VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
7997 }
7998
7999 static const VkTimeDomainEXT radv_time_domains[] = {
8000 VK_TIME_DOMAIN_DEVICE_EXT,
8001 VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
8002 #ifdef CLOCK_MONOTONIC_RAW
8003 VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
8004 #endif
8005 };
8006
radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainEXT * pTimeDomains)8007 VkResult radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(
8008 VkPhysicalDevice physicalDevice,
8009 uint32_t *pTimeDomainCount,
8010 VkTimeDomainEXT *pTimeDomains)
8011 {
8012 int d;
8013 VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains,
8014 pTimeDomainCount);
8015
8016 for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
8017 vk_outarray_append_typed(VkTimeDomainEXT, &out, i) {
8018 *i = radv_time_domains[d];
8019 }
8020 }
8021
8022 return vk_outarray_status(&out);
8023 }
8024
8025 static uint64_t
radv_clock_gettime(clockid_t clock_id)8026 radv_clock_gettime(clockid_t clock_id)
8027 {
8028 struct timespec current;
8029 int ret;
8030
8031 ret = clock_gettime(clock_id, ¤t);
8032 #ifdef CLOCK_MONOTONIC_RAW
8033 if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
8034 ret = clock_gettime(CLOCK_MONOTONIC, ¤t);
8035 #endif
8036 if (ret < 0)
8037 return 0;
8038
8039 return (uint64_t) current.tv_sec * 1000000000ULL + current.tv_nsec;
8040 }
8041
radv_GetCalibratedTimestampsEXT(VkDevice _device,uint32_t timestampCount,const VkCalibratedTimestampInfoEXT * pTimestampInfos,uint64_t * pTimestamps,uint64_t * pMaxDeviation)8042 VkResult radv_GetCalibratedTimestampsEXT(
8043 VkDevice _device,
8044 uint32_t timestampCount,
8045 const VkCalibratedTimestampInfoEXT *pTimestampInfos,
8046 uint64_t *pTimestamps,
8047 uint64_t *pMaxDeviation)
8048 {
8049 RADV_FROM_HANDLE(radv_device, device, _device);
8050 uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
8051 int d;
8052 uint64_t begin, end;
8053 uint64_t max_clock_period = 0;
8054
8055 #ifdef CLOCK_MONOTONIC_RAW
8056 begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
8057 #else
8058 begin = radv_clock_gettime(CLOCK_MONOTONIC);
8059 #endif
8060
8061 for (d = 0; d < timestampCount; d++) {
8062 switch (pTimestampInfos[d].timeDomain) {
8063 case VK_TIME_DOMAIN_DEVICE_EXT:
8064 pTimestamps[d] = device->ws->query_value(device->ws,
8065 RADEON_TIMESTAMP);
8066 uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
8067 max_clock_period = MAX2(max_clock_period, device_period);
8068 break;
8069 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
8070 pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
8071 max_clock_period = MAX2(max_clock_period, 1);
8072 break;
8073
8074 #ifdef CLOCK_MONOTONIC_RAW
8075 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
8076 pTimestamps[d] = begin;
8077 break;
8078 #endif
8079 default:
8080 pTimestamps[d] = 0;
8081 break;
8082 }
8083 }
8084
8085 #ifdef CLOCK_MONOTONIC_RAW
8086 end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
8087 #else
8088 end = radv_clock_gettime(CLOCK_MONOTONIC);
8089 #endif
8090
8091 /*
8092 * The maximum deviation is the sum of the interval over which we
8093 * perform the sampling and the maximum period of any sampled
8094 * clock. That's because the maximum skew between any two sampled
8095 * clock edges is when the sampled clock with the largest period is
8096 * sampled at the end of that period but right at the beginning of the
8097 * sampling interval and some other clock is sampled right at the
8098 * begining of its sampling period and right at the end of the
8099 * sampling interval. Let's assume the GPU has the longest clock
8100 * period and that the application is sampling GPU and monotonic:
8101 *
8102 * s e
8103 * w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
8104 * Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
8105 *
8106 * g
8107 * 0 1 2 3
8108 * GPU -----_____-----_____-----_____-----_____
8109 *
8110 * m
8111 * x y z 0 1 2 3 4 5 6 7 8 9 a b c
8112 * Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
8113 *
8114 * Interval <----------------->
8115 * Deviation <-------------------------->
8116 *
8117 * s = read(raw) 2
8118 * g = read(GPU) 1
8119 * m = read(monotonic) 2
8120 * e = read(raw) b
8121 *
8122 * We round the sample interval up by one tick to cover sampling error
8123 * in the interval clock
8124 */
8125
8126 uint64_t sample_interval = end - begin + 1;
8127
8128 *pMaxDeviation = sample_interval + max_clock_period;
8129
8130 return VK_SUCCESS;
8131 }
8132
radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)8133 void radv_GetPhysicalDeviceMultisamplePropertiesEXT(
8134 VkPhysicalDevice physicalDevice,
8135 VkSampleCountFlagBits samples,
8136 VkMultisamplePropertiesEXT* pMultisampleProperties)
8137 {
8138 if (samples & (VK_SAMPLE_COUNT_2_BIT |
8139 VK_SAMPLE_COUNT_4_BIT |
8140 VK_SAMPLE_COUNT_8_BIT)) {
8141 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 2, 2 };
8142 } else {
8143 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
8144 }
8145 }
8146
radv_CreatePrivateDataSlotEXT(VkDevice _device,const VkPrivateDataSlotCreateInfoEXT * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkPrivateDataSlotEXT * pPrivateDataSlot)8147 VkResult radv_CreatePrivateDataSlotEXT(
8148 VkDevice _device,
8149 const VkPrivateDataSlotCreateInfoEXT* pCreateInfo,
8150 const VkAllocationCallbacks* pAllocator,
8151 VkPrivateDataSlotEXT* pPrivateDataSlot)
8152 {
8153 RADV_FROM_HANDLE(radv_device, device, _device);
8154 return vk_private_data_slot_create(&device->vk, pCreateInfo, pAllocator,
8155 pPrivateDataSlot);
8156 }
8157
radv_DestroyPrivateDataSlotEXT(VkDevice _device,VkPrivateDataSlotEXT privateDataSlot,const VkAllocationCallbacks * pAllocator)8158 void radv_DestroyPrivateDataSlotEXT(
8159 VkDevice _device,
8160 VkPrivateDataSlotEXT privateDataSlot,
8161 const VkAllocationCallbacks* pAllocator)
8162 {
8163 RADV_FROM_HANDLE(radv_device, device, _device);
8164 vk_private_data_slot_destroy(&device->vk, privateDataSlot, pAllocator);
8165 }
8166
radv_SetPrivateDataEXT(VkDevice _device,VkObjectType objectType,uint64_t objectHandle,VkPrivateDataSlotEXT privateDataSlot,uint64_t data)8167 VkResult radv_SetPrivateDataEXT(
8168 VkDevice _device,
8169 VkObjectType objectType,
8170 uint64_t objectHandle,
8171 VkPrivateDataSlotEXT privateDataSlot,
8172 uint64_t data)
8173 {
8174 RADV_FROM_HANDLE(radv_device, device, _device);
8175 return vk_object_base_set_private_data(&device->vk, objectType,
8176 objectHandle, privateDataSlot,
8177 data);
8178 }
8179
radv_GetPrivateDataEXT(VkDevice _device,VkObjectType objectType,uint64_t objectHandle,VkPrivateDataSlotEXT privateDataSlot,uint64_t * pData)8180 void radv_GetPrivateDataEXT(
8181 VkDevice _device,
8182 VkObjectType objectType,
8183 uint64_t objectHandle,
8184 VkPrivateDataSlotEXT privateDataSlot,
8185 uint64_t* pData)
8186 {
8187 RADV_FROM_HANDLE(radv_device, device, _device);
8188 vk_object_base_get_private_data(&device->vk, objectType, objectHandle,
8189 privateDataSlot, pData);
8190 }
8191