1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <string.h>
31
32 #ifdef __FreeBSD__
33 #include <sys/types.h>
34 #endif
35 #ifdef MAJOR_IN_MKDEV
36 #include <sys/mkdev.h>
37 #endif
38 #ifdef MAJOR_IN_SYSMACROS
39 #include <sys/sysmacros.h>
40 #endif
41
42 #ifdef __linux__
43 #include <sys/inotify.h>
44 #endif
45
46 #include "meta/radv_meta.h"
47 #include "util/disk_cache.h"
48 #include "util/u_debug.h"
49 #include "radv_cs.h"
50 #include "radv_debug.h"
51 #include "radv_private.h"
52 #include "radv_shader.h"
53 #include "vk_common_entrypoints.h"
54 #include "vk_pipeline_cache.h"
55 #include "vk_semaphore.h"
56 #include "vk_util.h"
57 #ifdef _WIN32
58 typedef void *drmDevicePtr;
59 #include <io.h>
60 #else
61 #include <amdgpu.h>
62 #include <xf86drm.h>
63 #include "drm-uapi/amdgpu_drm.h"
64 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
65 #endif
66 #include "util/build_id.h"
67 #include "util/driconf.h"
68 #include "util/mesa-sha1.h"
69 #include "util/os_time.h"
70 #include "util/timespec.h"
71 #include "util/u_atomic.h"
72 #include "util/u_process.h"
73 #include "vulkan/vk_icd.h"
74 #include "winsys/null/radv_null_winsys_public.h"
75 #include "git_sha1.h"
76 #include "sid.h"
77 #include "vk_common_entrypoints.h"
78 #include "vk_format.h"
79 #include "vk_sync.h"
80 #include "vk_sync_dummy.h"
81
82 #if LLVM_AVAILABLE
83 #include "ac_llvm_util.h"
84 #endif
85
86 static bool
radv_spm_trace_enabled(struct radv_instance * instance)87 radv_spm_trace_enabled(struct radv_instance *instance)
88 {
89 return (instance->vk.trace_mode & RADV_TRACE_MODE_RGP) &&
90 debug_get_bool_option("RADV_THREAD_TRACE_CACHE_COUNTERS", true);
91 }
92
93 bool
radv_device_fault_detection_enabled(const struct radv_device * device)94 radv_device_fault_detection_enabled(const struct radv_device *device)
95 {
96 return device->instance->debug_flags & RADV_DEBUG_HANG;
97 }
98
99 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)100 radv_GetMemoryHostPointerPropertiesEXT(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType,
101 const void *pHostPointer,
102 VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
103 {
104 RADV_FROM_HANDLE(radv_device, device, _device);
105
106 switch (handleType) {
107 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
108 const struct radv_physical_device *physical_device = device->physical_device;
109 uint32_t memoryTypeBits = 0;
110 for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
111 if (physical_device->memory_domains[i] == RADEON_DOMAIN_GTT &&
112 !(physical_device->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
113 memoryTypeBits = (1 << i);
114 break;
115 }
116 }
117 pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
118 return VK_SUCCESS;
119 }
120 default:
121 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
122 }
123 }
124
125 static VkResult
radv_device_init_border_color(struct radv_device * device)126 radv_device_init_border_color(struct radv_device *device)
127 {
128 VkResult result;
129
130 result =
131 device->ws->buffer_create(device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
132 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING,
133 RADV_BO_PRIORITY_SHADER, 0, &device->border_color_data.bo);
134
135 if (result != VK_SUCCESS)
136 return vk_error(device, result);
137
138 radv_rmv_log_border_color_palette_create(device, device->border_color_data.bo);
139
140 result = device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, true);
141 if (result != VK_SUCCESS)
142 return vk_error(device, result);
143
144 device->border_color_data.colors_gpu_ptr = device->ws->buffer_map(device->border_color_data.bo);
145 if (!device->border_color_data.colors_gpu_ptr)
146 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
147 mtx_init(&device->border_color_data.mutex, mtx_plain);
148
149 return VK_SUCCESS;
150 }
151
152 static void
radv_device_finish_border_color(struct radv_device * device)153 radv_device_finish_border_color(struct radv_device *device)
154 {
155 if (device->border_color_data.bo) {
156 radv_rmv_log_border_color_palette_destroy(device, device->border_color_data.bo);
157 device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, false);
158 device->ws->buffer_destroy(device->ws, device->border_color_data.bo);
159
160 mtx_destroy(&device->border_color_data.mutex);
161 }
162 }
163
164 static struct radv_shader_part *
_radv_create_vs_prolog(struct radv_device * device,const void * _key)165 _radv_create_vs_prolog(struct radv_device *device, const void *_key)
166 {
167 struct radv_vs_prolog_key *key = (struct radv_vs_prolog_key *)_key;
168 return radv_create_vs_prolog(device, key);
169 }
170
171 static uint32_t
radv_hash_vs_prolog(const void * key_)172 radv_hash_vs_prolog(const void *key_)
173 {
174 const struct radv_vs_prolog_key *key = key_;
175 return _mesa_hash_data(key, sizeof(*key));
176 }
177
178 static bool
radv_cmp_vs_prolog(const void * a_,const void * b_)179 radv_cmp_vs_prolog(const void *a_, const void *b_)
180 {
181 const struct radv_vs_prolog_key *a = a_;
182 const struct radv_vs_prolog_key *b = b_;
183
184 return memcmp(a, b, sizeof(*a)) == 0;
185 }
186
187 static struct radv_shader_part_cache_ops vs_prolog_ops = {
188 .create = _radv_create_vs_prolog,
189 .hash = radv_hash_vs_prolog,
190 .equals = radv_cmp_vs_prolog,
191 };
192
193 static VkResult
radv_device_init_vs_prologs(struct radv_device * device)194 radv_device_init_vs_prologs(struct radv_device *device)
195 {
196 if (!radv_shader_part_cache_init(&device->vs_prologs, &vs_prolog_ops))
197 return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
198
199 /* don't pre-compile prologs if we want to print them */
200 if (device->instance->debug_flags & RADV_DEBUG_DUMP_PROLOGS)
201 return VK_SUCCESS;
202
203 struct radv_vs_prolog_key key;
204 memset(&key, 0, sizeof(key));
205 key.as_ls = false;
206 key.is_ngg = device->physical_device->use_ngg;
207 key.next_stage = MESA_SHADER_VERTEX;
208 key.wave32 = device->physical_device->ge_wave_size == 32;
209
210 for (unsigned i = 1; i <= MAX_VERTEX_ATTRIBS; i++) {
211 key.instance_rate_inputs = 0;
212 key.num_attributes = i;
213
214 device->simple_vs_prologs[i - 1] = radv_create_vs_prolog(device, &key);
215 if (!device->simple_vs_prologs[i - 1])
216 return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
217 }
218
219 unsigned idx = 0;
220 for (unsigned num_attributes = 1; num_attributes <= 16; num_attributes++) {
221 for (unsigned count = 1; count <= num_attributes; count++) {
222 for (unsigned start = 0; start <= (num_attributes - count); start++) {
223 key.instance_rate_inputs = u_bit_consecutive(start, count);
224 key.num_attributes = num_attributes;
225
226 struct radv_shader_part *prolog = radv_create_vs_prolog(device, &key);
227 if (!prolog)
228 return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
229
230 assert(idx == radv_instance_rate_prolog_index(num_attributes, key.instance_rate_inputs));
231 device->instance_rate_vs_prologs[idx++] = prolog;
232 }
233 }
234 }
235 assert(idx == ARRAY_SIZE(device->instance_rate_vs_prologs));
236
237 return VK_SUCCESS;
238 }
239
240 static void
radv_device_finish_vs_prologs(struct radv_device * device)241 radv_device_finish_vs_prologs(struct radv_device *device)
242 {
243 if (device->vs_prologs.ops)
244 radv_shader_part_cache_finish(device, &device->vs_prologs);
245
246 for (unsigned i = 0; i < ARRAY_SIZE(device->simple_vs_prologs); i++) {
247 if (!device->simple_vs_prologs[i])
248 continue;
249
250 radv_shader_part_unref(device, device->simple_vs_prologs[i]);
251 }
252
253 for (unsigned i = 0; i < ARRAY_SIZE(device->instance_rate_vs_prologs); i++) {
254 if (!device->instance_rate_vs_prologs[i])
255 continue;
256
257 radv_shader_part_unref(device, device->instance_rate_vs_prologs[i]);
258 }
259 }
260
261 static struct radv_shader_part *
_radv_create_ps_epilog(struct radv_device * device,const void * _key)262 _radv_create_ps_epilog(struct radv_device *device, const void *_key)
263 {
264 struct radv_ps_epilog_key *key = (struct radv_ps_epilog_key *)_key;
265 return radv_create_ps_epilog(device, key, NULL);
266 }
267
268 static uint32_t
radv_hash_ps_epilog(const void * key_)269 radv_hash_ps_epilog(const void *key_)
270 {
271 const struct radv_ps_epilog_key *key = key_;
272 return _mesa_hash_data(key, sizeof(*key));
273 }
274
275 static bool
radv_cmp_ps_epilog(const void * a_,const void * b_)276 radv_cmp_ps_epilog(const void *a_, const void *b_)
277 {
278 const struct radv_ps_epilog_key *a = a_;
279 const struct radv_ps_epilog_key *b = b_;
280
281 return memcmp(a, b, sizeof(*a)) == 0;
282 }
283
284 static struct radv_shader_part_cache_ops ps_epilog_ops = {
285 .create = _radv_create_ps_epilog,
286 .hash = radv_hash_ps_epilog,
287 .equals = radv_cmp_ps_epilog,
288 };
289
290 static struct radv_shader_part *
_radv_create_tcs_epilog(struct radv_device * device,const void * _key)291 _radv_create_tcs_epilog(struct radv_device *device, const void *_key)
292 {
293 struct radv_tcs_epilog_key *key = (struct radv_tcs_epilog_key *)_key;
294 return radv_create_tcs_epilog(device, key);
295 }
296
297 static uint32_t
radv_hash_tcs_epilog(const void * key_)298 radv_hash_tcs_epilog(const void *key_)
299 {
300 const struct radv_tcs_epilog_key *key = key_;
301 return _mesa_hash_data(key, sizeof(*key));
302 }
303
304 static bool
radv_cmp_tcs_epilog(const void * a_,const void * b_)305 radv_cmp_tcs_epilog(const void *a_, const void *b_)
306 {
307 const struct radv_tcs_epilog_key *a = a_;
308 const struct radv_tcs_epilog_key *b = b_;
309
310 return memcmp(a, b, sizeof(*a)) == 0;
311 }
312
313 static struct radv_shader_part_cache_ops tcs_epilog_ops = {
314 .create = _radv_create_tcs_epilog,
315 .hash = radv_hash_tcs_epilog,
316 .equals = radv_cmp_tcs_epilog,
317 };
318
319 VkResult
radv_device_init_vrs_state(struct radv_device * device)320 radv_device_init_vrs_state(struct radv_device *device)
321 {
322 VkDeviceMemory mem;
323 VkBuffer buffer;
324 VkResult result;
325 VkImage image;
326
327 VkImageCreateInfo image_create_info = {
328 .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
329 .imageType = VK_IMAGE_TYPE_2D,
330 .format = VK_FORMAT_D16_UNORM,
331 .extent = {MAX_FRAMEBUFFER_WIDTH, MAX_FRAMEBUFFER_HEIGHT, 1},
332 .mipLevels = 1,
333 .arrayLayers = 1,
334 .samples = VK_SAMPLE_COUNT_1_BIT,
335 .tiling = VK_IMAGE_TILING_OPTIMAL,
336 .usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
337 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
338 .queueFamilyIndexCount = 0,
339 .pQueueFamilyIndices = NULL,
340 .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
341 };
342
343 result =
344 radv_image_create(radv_device_to_handle(device), &(struct radv_image_create_info){.vk_info = &image_create_info},
345 &device->meta_state.alloc, &image, true);
346 if (result != VK_SUCCESS)
347 return result;
348
349 VkBufferCreateInfo buffer_create_info = {
350 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
351 .pNext =
352 &(VkBufferUsageFlags2CreateInfoKHR){
353 .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR,
354 .usage = VK_BUFFER_USAGE_2_STORAGE_BUFFER_BIT_KHR,
355 },
356 .size = radv_image_from_handle(image)->planes[0].surface.meta_size,
357 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
358 };
359
360 result = radv_create_buffer(device, &buffer_create_info, &device->meta_state.alloc, &buffer, true);
361 if (result != VK_SUCCESS)
362 goto fail_create;
363
364 VkBufferMemoryRequirementsInfo2 info = {
365 .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
366 .buffer = buffer,
367 };
368 VkMemoryRequirements2 mem_req = {
369 .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
370 };
371 vk_common_GetBufferMemoryRequirements2(radv_device_to_handle(device), &info, &mem_req);
372
373 VkMemoryAllocateInfo alloc_info = {
374 .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
375 .allocationSize = mem_req.memoryRequirements.size,
376 };
377
378 result = radv_alloc_memory(device, &alloc_info, &device->meta_state.alloc, &mem, true);
379 if (result != VK_SUCCESS)
380 goto fail_alloc;
381
382 VkBindBufferMemoryInfo bind_info = {.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
383 .buffer = buffer,
384 .memory = mem,
385 .memoryOffset = 0};
386
387 result = radv_BindBufferMemory2(radv_device_to_handle(device), 1, &bind_info);
388 if (result != VK_SUCCESS)
389 goto fail_bind;
390
391 device->vrs.image = radv_image_from_handle(image);
392 device->vrs.buffer = radv_buffer_from_handle(buffer);
393 device->vrs.mem = radv_device_memory_from_handle(mem);
394
395 return VK_SUCCESS;
396
397 fail_bind:
398 radv_FreeMemory(radv_device_to_handle(device), mem, &device->meta_state.alloc);
399 fail_alloc:
400 radv_DestroyBuffer(radv_device_to_handle(device), buffer, &device->meta_state.alloc);
401 fail_create:
402 radv_DestroyImage(radv_device_to_handle(device), image, &device->meta_state.alloc);
403
404 return result;
405 }
406
407 static void
radv_device_finish_vrs_image(struct radv_device * device)408 radv_device_finish_vrs_image(struct radv_device *device)
409 {
410 if (!device->vrs.image)
411 return;
412
413 radv_FreeMemory(radv_device_to_handle(device), radv_device_memory_to_handle(device->vrs.mem),
414 &device->meta_state.alloc);
415 radv_DestroyBuffer(radv_device_to_handle(device), radv_buffer_to_handle(device->vrs.buffer),
416 &device->meta_state.alloc);
417 radv_DestroyImage(radv_device_to_handle(device), radv_image_to_handle(device->vrs.image), &device->meta_state.alloc);
418 }
419
420 static enum radv_force_vrs
radv_parse_vrs_rates(const char * str)421 radv_parse_vrs_rates(const char *str)
422 {
423 if (!strcmp(str, "2x2")) {
424 return RADV_FORCE_VRS_2x2;
425 } else if (!strcmp(str, "2x1")) {
426 return RADV_FORCE_VRS_2x1;
427 } else if (!strcmp(str, "1x2")) {
428 return RADV_FORCE_VRS_1x2;
429 } else if (!strcmp(str, "1x1")) {
430 return RADV_FORCE_VRS_1x1;
431 }
432
433 fprintf(stderr, "radv: Invalid VRS rates specified (valid values are 2x2, 2x1, 1x2 and 1x1)\n");
434 return RADV_FORCE_VRS_1x1;
435 }
436
437 static const char *
radv_get_force_vrs_config_file(void)438 radv_get_force_vrs_config_file(void)
439 {
440 return getenv("RADV_FORCE_VRS_CONFIG_FILE");
441 }
442
443 static enum radv_force_vrs
radv_parse_force_vrs_config_file(const char * config_file)444 radv_parse_force_vrs_config_file(const char *config_file)
445 {
446 enum radv_force_vrs force_vrs = RADV_FORCE_VRS_1x1;
447 char buf[4];
448 FILE *f;
449
450 f = fopen(config_file, "r");
451 if (!f) {
452 fprintf(stderr, "radv: Can't open file: '%s'.\n", config_file);
453 return force_vrs;
454 }
455
456 if (fread(buf, sizeof(buf), 1, f) == 1) {
457 buf[3] = '\0';
458 force_vrs = radv_parse_vrs_rates(buf);
459 }
460
461 fclose(f);
462 return force_vrs;
463 }
464
465 #ifdef __linux__
466
467 #define BUF_LEN ((10 * (sizeof(struct inotify_event) + NAME_MAX + 1)))
468
469 static int
radv_notifier_thread_run(void * data)470 radv_notifier_thread_run(void *data)
471 {
472 struct radv_device *device = data;
473 struct radv_notifier *notifier = &device->notifier;
474 char buf[BUF_LEN];
475
476 while (!notifier->quit) {
477 const char *file = radv_get_force_vrs_config_file();
478 struct timespec tm = {.tv_nsec = 100000000}; /* 1OOms */
479 int length, i = 0;
480
481 length = read(notifier->fd, buf, BUF_LEN);
482 while (i < length) {
483 struct inotify_event *event = (struct inotify_event *)&buf[i];
484
485 i += sizeof(struct inotify_event) + event->len;
486 if (event->mask & IN_MODIFY || event->mask & IN_DELETE_SELF) {
487 /* Sleep 100ms for editors that use a temporary file and delete the original. */
488 thrd_sleep(&tm, NULL);
489 device->force_vrs = radv_parse_force_vrs_config_file(file);
490
491 fprintf(stderr, "radv: Updated the per-vertex VRS rate to '%d'.\n", device->force_vrs);
492
493 if (event->mask & IN_DELETE_SELF) {
494 inotify_rm_watch(notifier->fd, notifier->watch);
495 notifier->watch = inotify_add_watch(notifier->fd, file, IN_MODIFY | IN_DELETE_SELF);
496 }
497 }
498 }
499
500 thrd_sleep(&tm, NULL);
501 }
502
503 return 0;
504 }
505
506 #endif
507
508 static int
radv_device_init_notifier(struct radv_device * device)509 radv_device_init_notifier(struct radv_device *device)
510 {
511 #ifndef __linux__
512 return true;
513 #else
514 struct radv_notifier *notifier = &device->notifier;
515 const char *file = radv_get_force_vrs_config_file();
516 int ret;
517
518 notifier->fd = inotify_init1(IN_NONBLOCK);
519 if (notifier->fd < 0)
520 return false;
521
522 notifier->watch = inotify_add_watch(notifier->fd, file, IN_MODIFY | IN_DELETE_SELF);
523 if (notifier->watch < 0)
524 goto fail_watch;
525
526 ret = thrd_create(¬ifier->thread, radv_notifier_thread_run, device);
527 if (ret)
528 goto fail_thread;
529
530 return true;
531
532 fail_thread:
533 inotify_rm_watch(notifier->fd, notifier->watch);
534 fail_watch:
535 close(notifier->fd);
536
537 return false;
538 #endif
539 }
540
541 static void
radv_device_finish_notifier(struct radv_device * device)542 radv_device_finish_notifier(struct radv_device *device)
543 {
544 #ifdef __linux__
545 struct radv_notifier *notifier = &device->notifier;
546
547 if (!notifier->thread)
548 return;
549
550 notifier->quit = true;
551 thrd_join(notifier->thread, NULL);
552 inotify_rm_watch(notifier->fd, notifier->watch);
553 close(notifier->fd);
554 #endif
555 }
556
557 static void
radv_device_finish_perf_counter_lock_cs(struct radv_device * device)558 radv_device_finish_perf_counter_lock_cs(struct radv_device *device)
559 {
560 if (!device->perf_counter_lock_cs)
561 return;
562
563 for (unsigned i = 0; i < 2 * PERF_CTR_MAX_PASSES; ++i) {
564 if (device->perf_counter_lock_cs[i])
565 device->ws->cs_destroy(device->perf_counter_lock_cs[i]);
566 }
567
568 free(device->perf_counter_lock_cs);
569 }
570
571 struct dispatch_table_builder {
572 struct vk_device_dispatch_table *tables[RADV_DISPATCH_TABLE_COUNT];
573 bool used[RADV_DISPATCH_TABLE_COUNT];
574 bool initialized[RADV_DISPATCH_TABLE_COUNT];
575 };
576
577 static void
add_entrypoints(struct dispatch_table_builder * b,const struct vk_device_entrypoint_table * entrypoints,enum radv_dispatch_table table)578 add_entrypoints(struct dispatch_table_builder *b, const struct vk_device_entrypoint_table *entrypoints,
579 enum radv_dispatch_table table)
580 {
581 for (int32_t i = table - 1; i >= RADV_DEVICE_DISPATCH_TABLE; i--) {
582 if (i == RADV_DEVICE_DISPATCH_TABLE || b->used[i]) {
583 vk_device_dispatch_table_from_entrypoints(b->tables[i], entrypoints, !b->initialized[i]);
584 b->initialized[i] = true;
585 }
586 }
587
588 if (table < RADV_DISPATCH_TABLE_COUNT)
589 b->used[table] = true;
590 }
591
592 static void
init_dispatch_tables(struct radv_device * device,struct radv_physical_device * physical_device)593 init_dispatch_tables(struct radv_device *device, struct radv_physical_device *physical_device)
594 {
595 struct dispatch_table_builder b = {0};
596 b.tables[RADV_DEVICE_DISPATCH_TABLE] = &device->vk.dispatch_table;
597 b.tables[RADV_APP_DISPATCH_TABLE] = &device->layer_dispatch.app;
598 b.tables[RADV_RGP_DISPATCH_TABLE] = &device->layer_dispatch.rgp;
599 b.tables[RADV_RRA_DISPATCH_TABLE] = &device->layer_dispatch.rra;
600 b.tables[RADV_RMV_DISPATCH_TABLE] = &device->layer_dispatch.rmv;
601 b.tables[RADV_CTX_ROLL_DISPATCH_TABLE] = &device->layer_dispatch.ctx_roll;
602
603 if (!strcmp(physical_device->instance->drirc.app_layer, "metroexodus")) {
604 add_entrypoints(&b, &metro_exodus_device_entrypoints, RADV_APP_DISPATCH_TABLE);
605 } else if (!strcmp(physical_device->instance->drirc.app_layer, "rage2")) {
606 add_entrypoints(&b, &rage2_device_entrypoints, RADV_APP_DISPATCH_TABLE);
607 } else if (!strcmp(physical_device->instance->drirc.app_layer, "quanticdream")) {
608 add_entrypoints(&b, &quantic_dream_device_entrypoints, RADV_APP_DISPATCH_TABLE);
609 }
610
611 if (physical_device->instance->vk.trace_mode & RADV_TRACE_MODE_RGP)
612 add_entrypoints(&b, &sqtt_device_entrypoints, RADV_RGP_DISPATCH_TABLE);
613
614 if ((physical_device->instance->vk.trace_mode & RADV_TRACE_MODE_RRA) && radv_enable_rt(physical_device, false))
615 add_entrypoints(&b, &rra_device_entrypoints, RADV_RRA_DISPATCH_TABLE);
616
617 #ifndef _WIN32
618 if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV)
619 add_entrypoints(&b, &rmv_device_entrypoints, RADV_RMV_DISPATCH_TABLE);
620 #endif
621
622 if (physical_device->instance->vk.trace_mode & RADV_TRACE_MODE_CTX_ROLLS)
623 add_entrypoints(&b, &ctx_roll_device_entrypoints, RADV_CTX_ROLL_DISPATCH_TABLE);
624
625 add_entrypoints(&b, &radv_device_entrypoints, RADV_DISPATCH_TABLE_COUNT);
626 add_entrypoints(&b, &wsi_device_entrypoints, RADV_DISPATCH_TABLE_COUNT);
627 add_entrypoints(&b, &vk_common_device_entrypoints, RADV_DISPATCH_TABLE_COUNT);
628 }
629
630 static VkResult
capture_trace(VkQueue _queue)631 capture_trace(VkQueue _queue)
632 {
633 RADV_FROM_HANDLE(radv_queue, queue, _queue);
634
635 VkResult result = VK_SUCCESS;
636
637 if (queue->device->instance->vk.trace_mode & RADV_TRACE_MODE_RRA)
638 queue->device->rra_trace.triggered = true;
639
640 if (queue->device->vk.memory_trace_data.is_enabled) {
641 simple_mtx_lock(&queue->device->vk.memory_trace_data.token_mtx);
642 radv_rmv_collect_trace_events(queue->device);
643 vk_dump_rmv_capture(&queue->device->vk.memory_trace_data);
644 simple_mtx_unlock(&queue->device->vk.memory_trace_data.token_mtx);
645 }
646
647 if (queue->device->instance->vk.trace_mode & RADV_TRACE_MODE_RGP)
648 queue->device->sqtt_triggered = true;
649
650 if (queue->device->instance->vk.trace_mode & RADV_TRACE_MODE_CTX_ROLLS) {
651 char filename[2048];
652 time_t t = time(NULL);
653 struct tm now = *localtime(&t);
654 snprintf(filename, sizeof(filename), "/tmp/%s_%04d.%02d.%02d_%02d.%02d.%02d.ctxroll", util_get_process_name(),
655 1900 + now.tm_year, now.tm_mon + 1, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec);
656
657 simple_mtx_lock(&queue->device->ctx_roll_mtx);
658
659 queue->device->ctx_roll_file = fopen(filename, "w");
660 if (queue->device->ctx_roll_file)
661 fprintf(stderr, "radv: Writing context rolls to '%s'...\n", filename);
662
663 simple_mtx_unlock(&queue->device->ctx_roll_mtx);
664 }
665
666 return result;
667 }
668
669 static void
radv_device_init_cache_key(struct radv_device * device)670 radv_device_init_cache_key(struct radv_device *device)
671 {
672 struct radv_device_cache_key *key = &device->cache_key;
673
674 key->disable_trunc_coord = device->disable_trunc_coord;
675 key->image_2d_view_of_3d =
676 device->vk.enabled_features.image2DViewOf3D && device->physical_device->rad_info.gfx_level == GFX9;
677 key->mesh_shader_queries = device->vk.enabled_features.meshShaderQueries;
678 key->primitives_generated_query = radv_uses_primitives_generated_query(device);
679
680 /* The Vulkan spec says:
681 * "Binary shaders retrieved from a physical device with a certain shaderBinaryUUID are
682 * guaranteed to be compatible with all other physical devices reporting the same
683 * shaderBinaryUUID and the same or higher shaderBinaryVersion."
684 *
685 * That means the driver should compile shaders for the "worst" case of all features being
686 * enabled, regardless of what features are actually enabled on the logical device.
687 */
688 if (device->vk.enabled_features.shaderObject) {
689 key->image_2d_view_of_3d = device->physical_device->rad_info.gfx_level == GFX9;
690 key->primitives_generated_query = true;
691 }
692
693 _mesa_blake3_compute(key, sizeof(*key), device->cache_hash);
694 }
695
696 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)697 radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo,
698 const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
699 {
700 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
701 VkResult result;
702 struct radv_device *device;
703
704 bool keep_shader_info = false;
705 bool overallocation_disallowed = false;
706
707 vk_foreach_struct_const (ext, pCreateInfo->pNext) {
708 switch (ext->sType) {
709 case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
710 const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext;
711 if (overallocation->overallocationBehavior == VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)
712 overallocation_disallowed = true;
713 break;
714 }
715 default:
716 break;
717 }
718 }
719
720 device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,
721 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
722 if (!device)
723 return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
724
725 result = vk_device_init(&device->vk, &physical_device->vk, NULL, pCreateInfo, pAllocator);
726 if (result != VK_SUCCESS) {
727 vk_free(&device->vk.alloc, device);
728 return result;
729 }
730
731 init_dispatch_tables(device, physical_device);
732
733 device->vk.capture_trace = capture_trace;
734
735 device->vk.command_buffer_ops = &radv_cmd_buffer_ops;
736
737 device->instance = physical_device->instance;
738 device->physical_device = physical_device;
739 simple_mtx_init(&device->ctx_roll_mtx, mtx_plain);
740 simple_mtx_init(&device->trace_mtx, mtx_plain);
741 simple_mtx_init(&device->pstate_mtx, mtx_plain);
742 simple_mtx_init(&device->rt_handles_mtx, mtx_plain);
743
744 device->rt_handles = _mesa_hash_table_create(NULL, _mesa_hash_u32, _mesa_key_u32_equal);
745
746 device->ws = physical_device->ws;
747 vk_device_set_drm_fd(&device->vk, device->ws->get_fd(device->ws));
748
749 /* With update after bind we can't attach bo's to the command buffer
750 * from the descriptor set anymore, so we have to use a global BO list.
751 */
752 device->use_global_bo_list =
753 (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) || device->vk.enabled_features.bufferDeviceAddress ||
754 device->vk.enabled_features.descriptorIndexing || device->vk.enabled_extensions.EXT_descriptor_indexing ||
755 device->vk.enabled_extensions.EXT_buffer_device_address ||
756 device->vk.enabled_extensions.KHR_buffer_device_address ||
757 device->vk.enabled_extensions.KHR_ray_tracing_pipeline ||
758 device->vk.enabled_extensions.KHR_acceleration_structure ||
759 device->vk.enabled_extensions.VALVE_descriptor_set_host_mapping;
760
761 device->buffer_robustness = device->vk.enabled_features.robustBufferAccess2 ? RADV_BUFFER_ROBUSTNESS_2
762 : device->vk.enabled_features.robustBufferAccess ? RADV_BUFFER_ROBUSTNESS_1
763 : RADV_BUFFER_ROBUSTNESS_DISABLED;
764
765 radv_init_shader_arenas(device);
766
767 device->overallocation_disallowed = overallocation_disallowed;
768 mtx_init(&device->overallocation_mutex, mtx_plain);
769
770 if (physical_device->rad_info.register_shadowing_required || device->instance->debug_flags & RADV_DEBUG_SHADOW_REGS)
771 device->uses_shadow_regs = true;
772
773 /* Create one context per queue priority. */
774 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
775 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
776 const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority =
777 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
778 enum radeon_ctx_priority priority = radv_get_queue_global_priority(global_priority);
779
780 if (device->hw_ctx[priority])
781 continue;
782
783 result = device->ws->ctx_create(device->ws, priority, &device->hw_ctx[priority]);
784 if (result != VK_SUCCESS)
785 goto fail_queue;
786 }
787
788 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
789 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
790 uint32_t qfi = queue_create->queueFamilyIndex;
791 const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority =
792 vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
793
794 device->queues[qfi] = vk_alloc(&device->vk.alloc, queue_create->queueCount * sizeof(struct radv_queue), 8,
795 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
796 if (!device->queues[qfi]) {
797 result = VK_ERROR_OUT_OF_HOST_MEMORY;
798 goto fail_queue;
799 }
800
801 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
802
803 device->queue_count[qfi] = queue_create->queueCount;
804
805 for (unsigned q = 0; q < queue_create->queueCount; q++) {
806 result = radv_queue_init(device, &device->queues[qfi][q], q, queue_create, global_priority);
807 if (result != VK_SUCCESS)
808 goto fail_queue;
809 }
810 }
811 device->private_sdma_queue = VK_NULL_HANDLE;
812
813 device->shader_use_invisible_vram = (device->instance->perftest_flags & RADV_PERFTEST_DMA_SHADERS) &&
814 /* SDMA buffer copy is only implemented for GFX7+. */
815 device->physical_device->rad_info.gfx_level >= GFX7;
816 result = radv_init_shader_upload_queue(device);
817 if (result != VK_SUCCESS)
818 goto fail;
819
820 device->pbb_allowed =
821 device->physical_device->rad_info.gfx_level >= GFX9 && !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
822
823 device->disable_trunc_coord = device->instance->drirc.disable_trunc_coord;
824
825 if (device->instance->vk.app_info.engine_name && !strcmp(device->instance->vk.app_info.engine_name, "DXVK")) {
826 /* For DXVK 2.3.0 and older, use dualSrcBlend to determine if this is D3D9. */
827 bool is_d3d9 = !device->vk.enabled_features.dualSrcBlend;
828 if (device->instance->vk.app_info.engine_version > VK_MAKE_VERSION(2, 3, 0))
829 is_d3d9 = device->instance->vk.app_info.app_version & 0x1;
830
831 device->disable_trunc_coord &= !is_d3d9;
832 }
833
834 /* The maximum number of scratch waves. Scratch space isn't divided
835 * evenly between CUs. The number is only a function of the number of CUs.
836 * We can decrease the constant to decrease the scratch buffer size.
837 *
838 * sctx->scratch_waves must be >= the maximum possible size of
839 * 1 threadgroup, so that the hw doesn't hang from being unable
840 * to start any.
841 *
842 * The recommended value is 4 per CU at most. Higher numbers don't
843 * bring much benefit, but they still occupy chip resources (think
844 * async compute). I've seen ~2% performance difference between 4 and 32.
845 */
846 uint32_t max_threads_per_block = 2048;
847 device->scratch_waves = MAX2(32 * physical_device->rad_info.num_cu, max_threads_per_block / 64);
848
849 device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
850
851 if (device->physical_device->rad_info.gfx_level >= GFX7) {
852 /* If the KMD allows it (there is a KMD hw register for it),
853 * allow launching waves out-of-order.
854 */
855 device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
856 }
857 if (device->physical_device->rad_info.gfx_level >= GFX10) {
858 /* Enable asynchronous compute tunneling. The KMD restricts this feature
859 * to high-priority compute queues, so setting the bit on any other queue
860 * is a no-op. PAL always sets this bit as well.
861 */
862 device->dispatch_initiator |= S_00B800_TUNNEL_ENABLE(1);
863 }
864
865 /* Disable partial preemption for task shaders.
866 * The kernel may not support preemption, but PAL always sets this bit,
867 * so let's also set it here for consistency.
868 */
869 device->dispatch_initiator_task = device->dispatch_initiator | S_00B800_DISABLE_DISP_PREMPT_EN(1);
870
871 if (radv_device_fault_detection_enabled(device)) {
872 /* Enable GPU hangs detection and dump logs if a GPU hang is
873 * detected.
874 */
875 keep_shader_info = true;
876
877 if (!radv_init_trace(device)) {
878 result = VK_ERROR_INITIALIZATION_FAILED;
879 goto fail;
880 }
881
882 fprintf(stderr, "*****************************************************************************\n");
883 fprintf(stderr, "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
884 fprintf(stderr, "*****************************************************************************\n");
885
886 /* Wait for idle after every draw/dispatch to identify the
887 * first bad call.
888 */
889 device->instance->debug_flags |= RADV_DEBUG_SYNC_SHADERS;
890
891 radv_dump_enabled_options(device, stderr);
892 }
893
894 if (device->instance->vk.trace_mode & RADV_TRACE_MODE_RGP) {
895 if (device->physical_device->rad_info.gfx_level < GFX8 || device->physical_device->rad_info.gfx_level > GFX11) {
896 fprintf(stderr, "GPU hardware not supported: refer to "
897 "the RGP documentation for the list of "
898 "supported GPUs!\n");
899 abort();
900 }
901
902 if (!radv_sqtt_init(device)) {
903 result = VK_ERROR_INITIALIZATION_FAILED;
904 goto fail;
905 }
906
907 fprintf(stderr,
908 "radv: Thread trace support is enabled (initial buffer size: %u MiB, "
909 "instruction timing: %s, cache counters: %s, queue events: %s).\n",
910 device->sqtt.buffer_size / (1024 * 1024), radv_is_instruction_timing_enabled() ? "enabled" : "disabled",
911 radv_spm_trace_enabled(device->instance) ? "enabled" : "disabled",
912 radv_sqtt_queue_events_enabled() ? "enabled" : "disabled");
913
914 if (radv_spm_trace_enabled(device->instance)) {
915 if (device->physical_device->rad_info.gfx_level >= GFX10) {
916 if (!radv_spm_init(device)) {
917 result = VK_ERROR_INITIALIZATION_FAILED;
918 goto fail;
919 }
920 } else {
921 fprintf(stderr, "radv: SPM isn't supported for this GPU (%s)!\n", device->physical_device->name);
922 }
923 }
924 }
925
926 #ifndef _WIN32
927 if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV) {
928 struct vk_rmv_device_info info;
929 memset(&info, 0, sizeof(struct vk_rmv_device_info));
930 radv_rmv_fill_device_info(physical_device, &info);
931 vk_memory_trace_init(&device->vk, &info);
932 radv_memory_trace_init(device);
933 }
934 #endif
935
936 if (getenv("RADV_TRAP_HANDLER")) {
937 /* TODO: Add support for more hardware. */
938 assert(device->physical_device->rad_info.gfx_level == GFX8);
939
940 fprintf(stderr, "**********************************************************************\n");
941 fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
942 fprintf(stderr, "**********************************************************************\n");
943
944 /* To get the disassembly of the faulty shaders, we have to
945 * keep some shader info around.
946 */
947 keep_shader_info = true;
948
949 if (!radv_trap_handler_init(device)) {
950 result = VK_ERROR_INITIALIZATION_FAILED;
951 goto fail;
952 }
953 }
954
955 if (device->physical_device->rad_info.gfx_level == GFX10_3) {
956 if (getenv("RADV_FORCE_VRS_CONFIG_FILE")) {
957 const char *file = radv_get_force_vrs_config_file();
958
959 device->force_vrs = radv_parse_force_vrs_config_file(file);
960
961 if (radv_device_init_notifier(device)) {
962 device->force_vrs_enabled = true;
963 } else {
964 fprintf(stderr, "radv: Failed to initialize the notifier for RADV_FORCE_VRS_CONFIG_FILE!\n");
965 }
966 } else if (getenv("RADV_FORCE_VRS")) {
967 const char *vrs_rates = getenv("RADV_FORCE_VRS");
968
969 device->force_vrs = radv_parse_vrs_rates(vrs_rates);
970 device->force_vrs_enabled = device->force_vrs != RADV_FORCE_VRS_1x1;
971 }
972 }
973
974 /* PKT3_LOAD_SH_REG_INDEX is supported on GFX8+, but it hangs with compute queues until GFX10.3. */
975 device->load_grid_size_from_user_sgpr = device->physical_device->rad_info.gfx_level >= GFX10_3;
976
977 device->keep_shader_info = keep_shader_info;
978
979 /* Initialize the per-device cache key before compiling meta shaders. */
980 radv_device_init_cache_key(device);
981
982 result = radv_device_init_meta(device);
983 if (result != VK_SUCCESS)
984 goto fail;
985
986 radv_device_init_msaa(device);
987
988 /* If the border color extension is enabled, let's create the buffer we need. */
989 if (device->vk.enabled_features.customBorderColors) {
990 result = radv_device_init_border_color(device);
991 if (result != VK_SUCCESS)
992 goto fail;
993 }
994
995 if (device->vk.enabled_features.vertexInputDynamicState || device->vk.enabled_features.graphicsPipelineLibrary ||
996 device->vk.enabled_features.shaderObject) {
997 result = radv_device_init_vs_prologs(device);
998 if (result != VK_SUCCESS)
999 goto fail;
1000 }
1001
1002 if (device->vk.enabled_features.shaderObject) {
1003 if (!radv_shader_part_cache_init(&device->tcs_epilogs, &tcs_epilog_ops)) {
1004 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1005 goto fail;
1006 }
1007 }
1008
1009 if (device->vk.enabled_features.graphicsPipelineLibrary || device->vk.enabled_features.shaderObject ||
1010 device->vk.enabled_features.extendedDynamicState3ColorBlendEnable ||
1011 device->vk.enabled_features.extendedDynamicState3ColorWriteMask ||
1012 device->vk.enabled_features.extendedDynamicState3AlphaToCoverageEnable ||
1013 device->vk.enabled_features.extendedDynamicState3ColorBlendEquation) {
1014 if (!radv_shader_part_cache_init(&device->ps_epilogs, &ps_epilog_ops)) {
1015 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1016 goto fail;
1017 }
1018 }
1019
1020 if (!(device->instance->debug_flags & RADV_DEBUG_NO_IBS))
1021 radv_create_gfx_config(device);
1022
1023 struct vk_pipeline_cache_create_info info = {.weak_ref = true};
1024 device->mem_cache = vk_pipeline_cache_create(&device->vk, &info, NULL);
1025 if (!device->mem_cache) {
1026 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1027 goto fail_meta;
1028 }
1029
1030 device->force_aniso = MIN2(16, (int)debug_get_num_option("RADV_TEX_ANISO", -1));
1031 if (device->force_aniso >= 0) {
1032 fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n", 1 << util_logbase2(device->force_aniso));
1033 }
1034
1035 if (device->vk.enabled_features.performanceCounterQueryPools) {
1036 size_t bo_size = PERF_CTR_BO_PASS_OFFSET + sizeof(uint64_t) * PERF_CTR_MAX_PASSES;
1037 result = device->ws->buffer_create(device->ws, bo_size, 4096, RADEON_DOMAIN_GTT,
1038 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
1039 RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->perf_counter_bo);
1040 if (result != VK_SUCCESS)
1041 goto fail_cache;
1042
1043 device->perf_counter_lock_cs = calloc(sizeof(struct radeon_winsys_cs *), 2 * PERF_CTR_MAX_PASSES);
1044 if (!device->perf_counter_lock_cs) {
1045 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1046 goto fail_cache;
1047 }
1048
1049 if (!device->physical_device->ac_perfcounters.blocks) {
1050 result = VK_ERROR_INITIALIZATION_FAILED;
1051 goto fail_cache;
1052 }
1053 }
1054
1055 if ((device->instance->vk.trace_mode & RADV_TRACE_MODE_RRA) && radv_enable_rt(physical_device, false)) {
1056 result = radv_rra_trace_init(device);
1057 if (result != VK_SUCCESS)
1058 goto fail;
1059 }
1060
1061 if (device->vk.enabled_features.rayTracingPipelineShaderGroupHandleCaptureReplay) {
1062 device->capture_replay_arena_vas = _mesa_hash_table_u64_create(NULL);
1063 }
1064
1065 result = radv_printf_data_init(device);
1066 if (result != VK_SUCCESS)
1067 goto fail_cache;
1068
1069 *pDevice = radv_device_to_handle(device);
1070 return VK_SUCCESS;
1071
1072 fail_cache:
1073 vk_pipeline_cache_destroy(device->mem_cache, NULL);
1074 fail_meta:
1075 radv_device_finish_meta(device);
1076 fail:
1077 radv_printf_data_finish(device);
1078
1079 radv_sqtt_finish(device);
1080
1081 radv_rra_trace_finish(radv_device_to_handle(device), &device->rra_trace);
1082
1083 radv_spm_finish(device);
1084
1085 radv_trap_handler_finish(device);
1086 radv_finish_trace(device);
1087
1088 radv_device_finish_perf_counter_lock_cs(device);
1089 if (device->perf_counter_bo)
1090 device->ws->buffer_destroy(device->ws, device->perf_counter_bo);
1091 if (device->gfx_init)
1092 device->ws->buffer_destroy(device->ws, device->gfx_init);
1093
1094 radv_device_finish_notifier(device);
1095 radv_device_finish_vs_prologs(device);
1096 if (device->tcs_epilogs.ops)
1097 radv_shader_part_cache_finish(device, &device->tcs_epilogs);
1098 if (device->ps_epilogs.ops)
1099 radv_shader_part_cache_finish(device, &device->ps_epilogs);
1100 radv_device_finish_border_color(device);
1101
1102 radv_destroy_shader_upload_queue(device);
1103
1104 fail_queue:
1105 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1106 for (unsigned q = 0; q < device->queue_count[i]; q++)
1107 radv_queue_finish(&device->queues[i][q]);
1108 if (device->queue_count[i])
1109 vk_free(&device->vk.alloc, device->queues[i]);
1110 }
1111
1112 for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
1113 if (device->hw_ctx[i])
1114 device->ws->ctx_destroy(device->hw_ctx[i]);
1115 }
1116
1117 radv_destroy_shader_arenas(device);
1118
1119 _mesa_hash_table_destroy(device->rt_handles, NULL);
1120
1121 simple_mtx_destroy(&device->ctx_roll_mtx);
1122 simple_mtx_destroy(&device->pstate_mtx);
1123 simple_mtx_destroy(&device->trace_mtx);
1124 simple_mtx_destroy(&device->rt_handles_mtx);
1125 mtx_destroy(&device->overallocation_mutex);
1126
1127 vk_device_finish(&device->vk);
1128 vk_free(&device->vk.alloc, device);
1129 return result;
1130 }
1131
1132 VKAPI_ATTR void VKAPI_CALL
radv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)1133 radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
1134 {
1135 RADV_FROM_HANDLE(radv_device, device, _device);
1136
1137 if (!device)
1138 return;
1139
1140 if (device->capture_replay_arena_vas)
1141 _mesa_hash_table_u64_destroy(device->capture_replay_arena_vas);
1142
1143 radv_device_finish_perf_counter_lock_cs(device);
1144 if (device->perf_counter_bo)
1145 device->ws->buffer_destroy(device->ws, device->perf_counter_bo);
1146
1147 if (device->gfx_init)
1148 device->ws->buffer_destroy(device->ws, device->gfx_init);
1149
1150 radv_device_finish_notifier(device);
1151 radv_device_finish_vs_prologs(device);
1152 if (device->tcs_epilogs.ops)
1153 radv_shader_part_cache_finish(device, &device->tcs_epilogs);
1154 if (device->ps_epilogs.ops)
1155 radv_shader_part_cache_finish(device, &device->ps_epilogs);
1156 radv_device_finish_border_color(device);
1157 radv_device_finish_vrs_image(device);
1158
1159 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1160 for (unsigned q = 0; q < device->queue_count[i]; q++)
1161 radv_queue_finish(&device->queues[i][q]);
1162 if (device->queue_count[i])
1163 vk_free(&device->vk.alloc, device->queues[i]);
1164 }
1165 if (device->private_sdma_queue != VK_NULL_HANDLE) {
1166 radv_queue_finish(device->private_sdma_queue);
1167 vk_free(&device->vk.alloc, device->private_sdma_queue);
1168 }
1169
1170 _mesa_hash_table_destroy(device->rt_handles, NULL);
1171
1172 radv_device_finish_meta(device);
1173
1174 vk_pipeline_cache_destroy(device->mem_cache, NULL);
1175
1176 radv_destroy_shader_upload_queue(device);
1177
1178 for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
1179 if (device->hw_ctx[i])
1180 device->ws->ctx_destroy(device->hw_ctx[i]);
1181 }
1182
1183 mtx_destroy(&device->overallocation_mutex);
1184 simple_mtx_destroy(&device->ctx_roll_mtx);
1185 simple_mtx_destroy(&device->pstate_mtx);
1186 simple_mtx_destroy(&device->trace_mtx);
1187 simple_mtx_destroy(&device->rt_handles_mtx);
1188
1189 radv_trap_handler_finish(device);
1190 radv_finish_trace(device);
1191
1192 radv_destroy_shader_arenas(device);
1193
1194 radv_printf_data_finish(device);
1195
1196 radv_sqtt_finish(device);
1197
1198 radv_rra_trace_finish(_device, &device->rra_trace);
1199
1200 radv_memory_trace_finish(device);
1201
1202 radv_spm_finish(device);
1203
1204 ralloc_free(device->gpu_hang_report);
1205
1206 vk_device_finish(&device->vk);
1207 vk_free(&device->vk.alloc, device);
1208 }
1209
1210 bool
radv_get_memory_fd(struct radv_device * device,struct radv_device_memory * memory,int * pFD)1211 radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD)
1212 {
1213 /* Set BO metadata for dedicated image allocations. We don't need it for import when the image
1214 * tiling is VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, but we set it anyway for foreign consumers.
1215 */
1216 if (memory->image) {
1217 struct radeon_bo_metadata metadata;
1218
1219 assert(memory->image->bindings[0].offset == 0);
1220 radv_init_metadata(device, memory->image, &metadata);
1221 device->ws->buffer_set_metadata(device->ws, memory->bo, &metadata);
1222 }
1223
1224 return device->ws->buffer_get_fd(device->ws, memory->bo, pFD);
1225 }
1226
1227 VKAPI_ATTR void VKAPI_CALL
radv_GetImageMemoryRequirements2(VkDevice _device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)1228 radv_GetImageMemoryRequirements2(VkDevice _device, const VkImageMemoryRequirementsInfo2 *pInfo,
1229 VkMemoryRequirements2 *pMemoryRequirements)
1230 {
1231 RADV_FROM_HANDLE(radv_device, device, _device);
1232 RADV_FROM_HANDLE(radv_image, image, pInfo->image);
1233
1234 pMemoryRequirements->memoryRequirements.memoryTypeBits =
1235 ((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) &
1236 ~device->physical_device->memory_types_32bit;
1237
1238 pMemoryRequirements->memoryRequirements.size = image->size;
1239 pMemoryRequirements->memoryRequirements.alignment = image->alignment;
1240
1241 vk_foreach_struct (ext, pMemoryRequirements->pNext) {
1242 switch (ext->sType) {
1243 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
1244 VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
1245 req->requiresDedicatedAllocation = image->shareable && image->vk.tiling != VK_IMAGE_TILING_LINEAR;
1246 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
1247 break;
1248 }
1249 default:
1250 break;
1251 }
1252 }
1253 }
1254
1255 VKAPI_ATTR void VKAPI_CALL
radv_GetDeviceImageMemoryRequirements(VkDevice device,const VkDeviceImageMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)1256 radv_GetDeviceImageMemoryRequirements(VkDevice device, const VkDeviceImageMemoryRequirements *pInfo,
1257 VkMemoryRequirements2 *pMemoryRequirements)
1258 {
1259 UNUSED VkResult result;
1260 VkImage image;
1261
1262 /* Determining the image size/alignment require to create a surface, which is complicated without
1263 * creating an image.
1264 * TODO: Avoid creating an image.
1265 */
1266 result =
1267 radv_image_create(device, &(struct radv_image_create_info){.vk_info = pInfo->pCreateInfo}, NULL, &image, true);
1268 assert(result == VK_SUCCESS);
1269
1270 VkImageMemoryRequirementsInfo2 info2 = {
1271 .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
1272 .image = image,
1273 };
1274
1275 radv_GetImageMemoryRequirements2(device, &info2, pMemoryRequirements);
1276
1277 radv_DestroyImage(device, image, NULL);
1278 }
1279
1280 static uint32_t
radv_surface_max_layer_count(struct radv_image_view * iview)1281 radv_surface_max_layer_count(struct radv_image_view *iview)
1282 {
1283 return iview->vk.view_type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth
1284 : (iview->vk.base_array_layer + iview->vk.layer_count);
1285 }
1286
1287 unsigned
radv_get_dcc_max_uncompressed_block_size(const struct radv_device * device,const struct radv_image * image)1288 radv_get_dcc_max_uncompressed_block_size(const struct radv_device *device, const struct radv_image *image)
1289 {
1290 if (device->physical_device->rad_info.gfx_level < GFX10 && image->vk.samples > 1) {
1291 if (image->planes[0].surface.bpe == 1)
1292 return V_028C78_MAX_BLOCK_SIZE_64B;
1293 else if (image->planes[0].surface.bpe == 2)
1294 return V_028C78_MAX_BLOCK_SIZE_128B;
1295 }
1296
1297 return V_028C78_MAX_BLOCK_SIZE_256B;
1298 }
1299
1300 static unsigned
get_dcc_min_compressed_block_size(const struct radv_device * device)1301 get_dcc_min_compressed_block_size(const struct radv_device *device)
1302 {
1303 if (!device->physical_device->rad_info.has_dedicated_vram) {
1304 /* amdvlk: [min-compressed-block-size] should be set to 32 for
1305 * dGPU and 64 for APU because all of our APUs to date use
1306 * DIMMs which have a request granularity size of 64B while all
1307 * other chips have a 32B request size.
1308 */
1309 return V_028C78_MIN_BLOCK_SIZE_64B;
1310 }
1311
1312 return V_028C78_MIN_BLOCK_SIZE_32B;
1313 }
1314
1315 static uint32_t
radv_init_dcc_control_reg(struct radv_device * device,struct radv_image_view * iview)1316 radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iview)
1317 {
1318 unsigned max_uncompressed_block_size = radv_get_dcc_max_uncompressed_block_size(device, iview->image);
1319 unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device);
1320 unsigned max_compressed_block_size;
1321 unsigned independent_128b_blocks;
1322 unsigned independent_64b_blocks;
1323
1324 if (!radv_dcc_enabled(iview->image, iview->vk.base_mip_level))
1325 return 0;
1326
1327 /* For GFX9+ ac_surface computes values for us (except min_compressed
1328 * and max_uncompressed) */
1329 if (device->physical_device->rad_info.gfx_level >= GFX9) {
1330 max_compressed_block_size = iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size;
1331 independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_128B_blocks;
1332 independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_64B_blocks;
1333 } else {
1334 independent_128b_blocks = 0;
1335
1336 if (iview->image->vk.usage &
1337 (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
1338 /* If this DCC image is potentially going to be used in texture
1339 * fetches, we need some special settings.
1340 */
1341 independent_64b_blocks = 1;
1342 max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
1343 } else {
1344 /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
1345 * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
1346 * big as possible for better compression state.
1347 */
1348 independent_64b_blocks = 0;
1349 max_compressed_block_size = max_uncompressed_block_size;
1350 }
1351 }
1352
1353 uint32_t result = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
1354 S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
1355 S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
1356 S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
1357
1358 if (device->physical_device->rad_info.gfx_level >= GFX11) {
1359 result |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX11(independent_128b_blocks) |
1360 S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) |
1361 S_028C78_FDCC_ENABLE(radv_dcc_enabled(iview->image, iview->vk.base_mip_level));
1362
1363 if (device->physical_device->rad_info.family >= CHIP_GFX1103_R2) {
1364 result |= S_028C78_ENABLE_MAX_COMP_FRAG_OVERRIDE(1) | S_028C78_MAX_COMP_FRAGS(iview->image->vk.samples >= 4);
1365 }
1366 } else {
1367 result |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX10(independent_128b_blocks);
1368 }
1369
1370 return result;
1371 }
1372
1373 void
radv_initialise_color_surface(struct radv_device * device,struct radv_color_buffer_info * cb,struct radv_image_view * iview)1374 radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
1375 struct radv_image_view *iview)
1376 {
1377 const struct util_format_description *desc;
1378 unsigned ntype, format, swap, endian;
1379 unsigned blend_clamp = 0, blend_bypass = 0;
1380 uint64_t va;
1381 const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
1382 const struct radeon_surf *surf = &plane->surface;
1383 uint8_t tile_swizzle = plane->surface.tile_swizzle;
1384
1385 desc = vk_format_description(iview->vk.format);
1386
1387 memset(cb, 0, sizeof(*cb));
1388
1389 /* Intensity is implemented as Red, so treat it that way. */
1390 if (device->physical_device->rad_info.gfx_level >= GFX11)
1391 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX11(desc->swizzle[3] == PIPE_SWIZZLE_1);
1392 else
1393 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX6(desc->swizzle[3] == PIPE_SWIZZLE_1);
1394
1395 uint32_t plane_id = iview->image->disjoint ? iview->plane_id : 0;
1396 va = radv_buffer_get_va(iview->image->bindings[plane_id].bo) + iview->image->bindings[plane_id].offset;
1397
1398 if (iview->nbc_view.valid) {
1399 va += iview->nbc_view.base_address_offset;
1400 tile_swizzle = iview->nbc_view.tile_swizzle;
1401 }
1402
1403 cb->cb_color_base = va >> 8;
1404
1405 if (device->physical_device->rad_info.gfx_level >= GFX9) {
1406 if (device->physical_device->rad_info.gfx_level >= GFX11) {
1407 cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
1408 S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
1409 } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
1410 cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
1411 S_028EE0_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
1412 S_028EE0_CMASK_PIPE_ALIGNED(1) |
1413 S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
1414 } else {
1415 struct gfx9_surf_meta_flags meta = {
1416 .rb_aligned = 1,
1417 .pipe_aligned = 1,
1418 };
1419
1420 if (surf->meta_offset)
1421 meta = surf->u.gfx9.color.dcc;
1422
1423 cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
1424 S_028C74_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
1425 S_028C74_RB_ALIGNED(meta.rb_aligned) | S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
1426 cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.epitch);
1427 }
1428
1429 cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
1430 cb->cb_color_base |= tile_swizzle;
1431 } else {
1432 const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->vk.base_mip_level];
1433 unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
1434
1435 cb->cb_color_base += level_info->offset_256B;
1436 if (level_info->mode == RADEON_SURF_MODE_2D)
1437 cb->cb_color_base |= tile_swizzle;
1438
1439 pitch_tile_max = level_info->nblk_x / 8 - 1;
1440 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
1441 tile_mode_index = radv_tile_mode_index(plane, iview->vk.base_mip_level, false);
1442
1443 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
1444 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
1445 cb->cb_color_cmask_slice = surf->u.legacy.color.cmask_slice_tile_max;
1446
1447 cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
1448
1449 if (radv_image_has_fmask(iview->image)) {
1450 if (device->physical_device->rad_info.gfx_level >= GFX7)
1451 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1);
1452 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index);
1453 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max);
1454 } else {
1455 /* This must be set for fast clear to work without FMASK. */
1456 if (device->physical_device->rad_info.gfx_level >= GFX7)
1457 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
1458 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
1459 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
1460 }
1461 }
1462
1463 /* CMASK variables */
1464 va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset;
1465 va += surf->cmask_offset;
1466 cb->cb_color_cmask = va >> 8;
1467
1468 va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset;
1469 va += surf->meta_offset;
1470
1471 if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && device->physical_device->rad_info.gfx_level <= GFX8)
1472 va += plane->surface.u.legacy.color.dcc_level[iview->vk.base_mip_level].dcc_offset;
1473
1474 unsigned dcc_tile_swizzle = tile_swizzle;
1475 dcc_tile_swizzle &= ((1 << surf->meta_alignment_log2) - 1) >> 8;
1476
1477 cb->cb_dcc_base = va >> 8;
1478 cb->cb_dcc_base |= dcc_tile_swizzle;
1479
1480 /* GFX10 field has the same base shift as the GFX6 field. */
1481 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
1482 uint32_t slice_start = iview->nbc_view.valid ? 0 : iview->vk.base_array_layer;
1483 cb->cb_color_view = S_028C6C_SLICE_START(slice_start) | S_028C6C_SLICE_MAX_GFX10(max_slice);
1484
1485 if (iview->image->vk.samples > 1) {
1486 unsigned log_samples = util_logbase2(iview->image->vk.samples);
1487
1488 if (device->physical_device->rad_info.gfx_level >= GFX11)
1489 cb->cb_color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(log_samples);
1490 else
1491 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS_GFX6(log_samples);
1492 }
1493
1494 if (radv_image_has_fmask(iview->image)) {
1495 va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + surf->fmask_offset;
1496 cb->cb_color_fmask = va >> 8;
1497 cb->cb_color_fmask |= surf->fmask_tile_swizzle;
1498 } else {
1499 cb->cb_color_fmask = cb->cb_color_base;
1500 }
1501
1502 ntype = ac_get_cb_number_type(desc->format);
1503 format = ac_get_cb_format(device->physical_device->rad_info.gfx_level, desc->format);
1504 assert(format != V_028C70_COLOR_INVALID);
1505
1506 swap = radv_translate_colorswap(iview->vk.format, false);
1507 endian = radv_colorformat_endian_swap(format);
1508
1509 /* blend clamp should be set for all NORM/SRGB types */
1510 if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM || ntype == V_028C70_NUMBER_SRGB)
1511 blend_clamp = 1;
1512
1513 /* set blend bypass according to docs if SINT/UINT or
1514 8/24 COLOR variants */
1515 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || format == V_028C70_COLOR_8_24 ||
1516 format == V_028C70_COLOR_24_8 || format == V_028C70_COLOR_X24_8_32_FLOAT) {
1517 blend_clamp = 0;
1518 blend_bypass = 1;
1519 }
1520 #if 0
1521 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
1522 (format == V_028C70_COLOR_8 ||
1523 format == V_028C70_COLOR_8_8 ||
1524 format == V_028C70_COLOR_8_8_8_8))
1525 ->color_is_int8 = true;
1526 #endif
1527 cb->cb_color_info = S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) |
1528 S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) |
1529 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM &&
1530 ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 &&
1531 format != V_028C70_COLOR_24_8) |
1532 S_028C70_NUMBER_TYPE(ntype);
1533
1534 if (device->physical_device->rad_info.gfx_level >= GFX11)
1535 cb->cb_color_info |= S_028C70_FORMAT_GFX11(format);
1536 else
1537 cb->cb_color_info |= S_028C70_FORMAT_GFX6(format) | S_028C70_ENDIAN(endian);
1538
1539 if (radv_image_has_fmask(iview->image)) {
1540 cb->cb_color_info |= S_028C70_COMPRESSION(1);
1541 if (device->physical_device->rad_info.gfx_level == GFX6) {
1542 unsigned fmask_bankh = util_logbase2(surf->u.legacy.color.fmask.bankh);
1543 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
1544 }
1545
1546 if (radv_image_is_tc_compat_cmask(iview->image)) {
1547 /* Allow the texture block to read FMASK directly without decompressing it. */
1548 cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
1549
1550 if (device->physical_device->rad_info.gfx_level == GFX8) {
1551 /* Set CMASK into a tiling format that allows
1552 * the texture block to read it.
1553 */
1554 cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
1555 }
1556 }
1557 }
1558
1559 if (radv_image_has_cmask(iview->image) && !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
1560 cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
1561
1562 if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && !iview->disable_dcc_mrt &&
1563 device->physical_device->rad_info.gfx_level < GFX11)
1564 cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
1565
1566 cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
1567
1568 /* This must be set for fast clear to work without FMASK. */
1569 if (!radv_image_has_fmask(iview->image) && device->physical_device->rad_info.gfx_level == GFX6) {
1570 unsigned bankh = util_logbase2(surf->u.legacy.bankh);
1571 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
1572 }
1573
1574 if (device->physical_device->rad_info.gfx_level >= GFX9) {
1575 unsigned mip0_depth = iview->image->vk.image_type == VK_IMAGE_TYPE_3D ? (iview->extent.depth - 1)
1576 : (iview->image->vk.array_layers - 1);
1577 unsigned width = vk_format_get_plane_width(iview->image->vk.format, iview->plane_id, iview->extent.width);
1578 unsigned height = vk_format_get_plane_height(iview->image->vk.format, iview->plane_id, iview->extent.height);
1579 unsigned max_mip = iview->image->vk.mip_levels - 1;
1580
1581 if (device->physical_device->rad_info.gfx_level >= GFX10) {
1582 unsigned base_level = iview->vk.base_mip_level;
1583
1584 if (iview->nbc_view.valid) {
1585 base_level = iview->nbc_view.level;
1586 max_mip = iview->nbc_view.num_levels - 1;
1587 }
1588
1589 cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(base_level);
1590
1591 cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) | S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
1592 S_028EE0_RESOURCE_LEVEL(device->physical_device->rad_info.gfx_level >= GFX11 ? 0 : 1);
1593 } else {
1594 cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->vk.base_mip_level);
1595 cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
1596 }
1597
1598 /* GFX10.3+ can set a custom pitch for 1D and 2D non-array, but it must be a multiple
1599 * of 256B. Only set it for 2D linear for multi-GPU interop.
1600 *
1601 * We set the pitch in MIP0_WIDTH.
1602 */
1603 if (device->physical_device->rad_info.gfx_level && iview->image->vk.image_type == VK_IMAGE_TYPE_2D &&
1604 iview->image->vk.array_layers == 1 && plane->surface.is_linear) {
1605 assert((plane->surface.u.gfx9.surf_pitch * plane->surface.bpe) % 256 == 0);
1606
1607 width = plane->surface.u.gfx9.surf_pitch;
1608
1609 /* Subsampled images have the pitch in the units of blocks. */
1610 if (plane->surface.blk_w == 2)
1611 width *= 2;
1612 }
1613
1614 cb->cb_color_attrib2 =
1615 S_028C68_MIP0_WIDTH(width - 1) | S_028C68_MIP0_HEIGHT(height - 1) | S_028C68_MAX_MIP(max_mip);
1616 }
1617 }
1618
1619 static unsigned
radv_calc_decompress_on_z_planes(const struct radv_device * device,struct radv_image_view * iview)1620 radv_calc_decompress_on_z_planes(const struct radv_device *device, struct radv_image_view *iview)
1621 {
1622 unsigned max_zplanes = 0;
1623
1624 assert(radv_image_is_tc_compat_htile(iview->image));
1625
1626 if (device->physical_device->rad_info.gfx_level >= GFX9) {
1627 /* Default value for 32-bit depth surfaces. */
1628 max_zplanes = 4;
1629
1630 if (iview->vk.format == VK_FORMAT_D16_UNORM && iview->image->vk.samples > 1)
1631 max_zplanes = 2;
1632
1633 /* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
1634 if (device->physical_device->rad_info.has_two_planes_iterate256_bug &&
1635 radv_image_get_iterate256(device, iview->image) && !radv_image_tile_stencil_disabled(device, iview->image) &&
1636 iview->image->vk.samples == 4) {
1637 max_zplanes = 1;
1638 }
1639
1640 max_zplanes = max_zplanes + 1;
1641 } else {
1642 if (iview->vk.format == VK_FORMAT_D16_UNORM) {
1643 /* Do not enable Z plane compression for 16-bit depth
1644 * surfaces because isn't supported on GFX8. Only
1645 * 32-bit depth surfaces are supported by the hardware.
1646 * This allows to maintain shader compatibility and to
1647 * reduce the number of depth decompressions.
1648 */
1649 max_zplanes = 1;
1650 } else {
1651 if (iview->image->vk.samples <= 1)
1652 max_zplanes = 5;
1653 else if (iview->image->vk.samples <= 4)
1654 max_zplanes = 3;
1655 else
1656 max_zplanes = 2;
1657 }
1658 }
1659
1660 return max_zplanes;
1661 }
1662
1663 void
radv_initialise_vrs_surface(struct radv_image * image,struct radv_buffer * htile_buffer,struct radv_ds_buffer_info * ds)1664 radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer, struct radv_ds_buffer_info *ds)
1665 {
1666 const struct radeon_surf *surf = &image->planes[0].surface;
1667
1668 assert(image->vk.format == VK_FORMAT_D16_UNORM);
1669 memset(ds, 0, sizeof(*ds));
1670
1671 ds->db_z_info = S_028038_FORMAT(V_028040_Z_16) | S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
1672 S_028038_ZRANGE_PRECISION(1) | S_028038_TILE_SURFACE_ENABLE(1);
1673 ds->db_stencil_info = S_02803C_FORMAT(V_028044_STENCIL_INVALID);
1674
1675 ds->db_depth_size = S_02801C_X_MAX(image->vk.extent.width - 1) | S_02801C_Y_MAX(image->vk.extent.height - 1);
1676
1677 ds->db_htile_data_base = radv_buffer_get_va(htile_buffer->bo) >> 8;
1678 ds->db_htile_surface =
1679 S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1) | S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
1680 }
1681
1682 void
radv_initialise_ds_surface(const struct radv_device * device,struct radv_ds_buffer_info * ds,struct radv_image_view * iview,VkImageAspectFlags ds_aspects)1683 radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buffer_info *ds,
1684 struct radv_image_view *iview, VkImageAspectFlags ds_aspects)
1685 {
1686 unsigned level = iview->vk.base_mip_level;
1687 unsigned format, stencil_format;
1688 uint64_t va, s_offs, z_offs;
1689 bool stencil_only = iview->image->vk.format == VK_FORMAT_S8_UINT;
1690 const struct radv_image_plane *plane = &iview->image->planes[0];
1691 const struct radeon_surf *surf = &plane->surface;
1692
1693 assert(vk_format_get_plane_count(iview->image->vk.format) == 1);
1694
1695 memset(ds, 0, sizeof(*ds));
1696
1697 format = radv_translate_dbformat(iview->image->vk.format);
1698 stencil_format = surf->has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
1699
1700 uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
1701 ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) | S_028008_SLICE_MAX(max_slice) |
1702 S_028008_Z_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) |
1703 S_028008_STENCIL_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT));
1704 if (device->physical_device->rad_info.gfx_level >= GFX10) {
1705 ds->db_depth_view |=
1706 S_028008_SLICE_START_HI(iview->vk.base_array_layer >> 11) | S_028008_SLICE_MAX_HI(max_slice >> 11);
1707 }
1708
1709 ds->db_htile_data_base = 0;
1710 ds->db_htile_surface = 0;
1711
1712 va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset;
1713 s_offs = z_offs = va;
1714
1715 /* Recommended value for better performance with 4x and 8x. */
1716 ds->db_render_override2 = S_028010_DECOMPRESS_Z_ON_FLUSH(iview->image->vk.samples >= 4) |
1717 S_028010_CENTROID_COMPUTATION_MODE(device->physical_device->rad_info.gfx_level >= GFX10_3);
1718
1719 if (device->physical_device->rad_info.gfx_level >= GFX9) {
1720 assert(surf->u.gfx9.surf_offset == 0);
1721 s_offs += surf->u.gfx9.zs.stencil_offset;
1722
1723 ds->db_z_info = S_028038_FORMAT(format) | S_028038_NUM_SAMPLES(util_logbase2(iview->image->vk.samples)) |
1724 S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) | S_028038_MAXMIP(iview->image->vk.mip_levels - 1) |
1725 S_028038_ZRANGE_PRECISION(1) |
1726 S_028040_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11);
1727 ds->db_stencil_info = S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) |
1728 S_028044_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11);
1729
1730 if (device->physical_device->rad_info.gfx_level == GFX9) {
1731 ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch);
1732 ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.zs.stencil_epitch);
1733 }
1734
1735 ds->db_depth_view |= S_028008_MIPID(level);
1736 ds->db_depth_size =
1737 S_02801C_X_MAX(iview->image->vk.extent.width - 1) | S_02801C_Y_MAX(iview->image->vk.extent.height - 1);
1738
1739 if (radv_htile_enabled(iview->image, level)) {
1740 ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
1741
1742 if (radv_image_is_tc_compat_htile(iview->image)) {
1743 unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
1744
1745 ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
1746
1747 if (device->physical_device->rad_info.gfx_level >= GFX10) {
1748 bool iterate256 = radv_image_get_iterate256(device, iview->image);
1749
1750 ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
1751 ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
1752 ds->db_z_info |= S_028040_ITERATE_256(iterate256);
1753 ds->db_stencil_info |= S_028044_ITERATE_256(iterate256);
1754 } else {
1755 ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
1756 ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
1757 }
1758 }
1759
1760 if (radv_image_tile_stencil_disabled(device, iview->image)) {
1761 ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
1762 }
1763
1764 va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + surf->meta_offset;
1765 ds->db_htile_data_base = va >> 8;
1766 ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);
1767
1768 if (device->physical_device->rad_info.gfx_level == GFX9) {
1769 ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
1770 }
1771
1772 if (radv_image_has_vrs_htile(device, iview->image)) {
1773 ds->db_htile_surface |= S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
1774 }
1775 }
1776
1777 if (device->physical_device->rad_info.gfx_level >= GFX11) {
1778 radv_gfx11_set_db_render_control(device, iview->image->vk.samples, &ds->db_render_control);
1779 }
1780 } else {
1781 const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
1782
1783 if (stencil_only)
1784 level_info = &surf->u.legacy.zs.stencil_level[level];
1785
1786 z_offs += (uint64_t)surf->u.legacy.level[level].offset_256B * 256;
1787 s_offs += (uint64_t)surf->u.legacy.zs.stencil_level[level].offset_256B * 256;
1788
1789 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
1790 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
1791 ds->db_stencil_info = S_028044_FORMAT(stencil_format);
1792
1793 if (iview->image->vk.samples > 1)
1794 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->vk.samples));
1795
1796 if (device->physical_device->rad_info.gfx_level >= GFX7) {
1797 const struct radeon_info *info = &device->physical_device->rad_info;
1798 unsigned tiling_index = surf->u.legacy.tiling_index[level];
1799 unsigned stencil_index = surf->u.legacy.zs.stencil_tiling_index[level];
1800 unsigned macro_index = surf->u.legacy.macro_tile_index;
1801 unsigned tile_mode = info->si_tile_mode_array[tiling_index];
1802 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
1803 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
1804
1805 if (stencil_only)
1806 tile_mode = stencil_tile_mode;
1807
1808 ds->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
1809 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
1810 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
1811 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
1812 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
1813 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
1814 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
1815 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
1816 } else {
1817 unsigned tile_mode_index = radv_tile_mode_index(&iview->image->planes[0], level, false);
1818 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
1819 tile_mode_index = radv_tile_mode_index(&iview->image->planes[0], level, true);
1820 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
1821 if (stencil_only)
1822 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
1823 }
1824
1825 ds->db_depth_size =
1826 S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) | S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
1827 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
1828
1829 if (radv_htile_enabled(iview->image, level)) {
1830 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
1831
1832 if (radv_image_tile_stencil_disabled(device, iview->image)) {
1833 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
1834 }
1835
1836 va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + surf->meta_offset;
1837 ds->db_htile_data_base = va >> 8;
1838 ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
1839
1840 if (radv_image_is_tc_compat_htile(iview->image)) {
1841 unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
1842
1843 ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
1844 ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
1845 }
1846 }
1847 }
1848
1849 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
1850 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
1851 }
1852
1853 void
radv_gfx11_set_db_render_control(const struct radv_device * device,unsigned num_samples,unsigned * db_render_control)1854 radv_gfx11_set_db_render_control(const struct radv_device *device, unsigned num_samples, unsigned *db_render_control)
1855 {
1856 const struct radv_physical_device *pdevice = device->physical_device;
1857 unsigned max_allowed_tiles_in_wave = 0;
1858
1859 if (pdevice->rad_info.has_dedicated_vram) {
1860 if (num_samples == 8)
1861 max_allowed_tiles_in_wave = 6;
1862 else if (num_samples == 4)
1863 max_allowed_tiles_in_wave = 13;
1864 else
1865 max_allowed_tiles_in_wave = 0;
1866 } else {
1867 if (num_samples == 8)
1868 max_allowed_tiles_in_wave = 7;
1869 else if (num_samples == 4)
1870 max_allowed_tiles_in_wave = 15;
1871 else
1872 max_allowed_tiles_in_wave = 0;
1873 }
1874
1875 *db_render_control |= S_028000_MAX_ALLOWED_TILES_IN_WAVE(max_allowed_tiles_in_wave);
1876 }
1877
1878 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFD)1879 radv_GetMemoryFdKHR(VkDevice _device, const VkMemoryGetFdInfoKHR *pGetFdInfo, int *pFD)
1880 {
1881 RADV_FROM_HANDLE(radv_device, device, _device);
1882 RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
1883
1884 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
1885
1886 /* At the moment, we support only the below handle types. */
1887 assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
1888 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1889
1890 bool ret = radv_get_memory_fd(device, memory, pFD);
1891 if (ret == false)
1892 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1893 return VK_SUCCESS;
1894 }
1895
1896 static uint32_t
radv_compute_valid_memory_types_attempt(struct radv_physical_device * dev,enum radeon_bo_domain domains,enum radeon_bo_flag flags,enum radeon_bo_flag ignore_flags)1897 radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev, enum radeon_bo_domain domains,
1898 enum radeon_bo_flag flags, enum radeon_bo_flag ignore_flags)
1899 {
1900 /* Don't count GTT/CPU as relevant:
1901 *
1902 * - We're not fully consistent between the two.
1903 * - Sometimes VRAM gets VRAM|GTT.
1904 */
1905 const enum radeon_bo_domain relevant_domains = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA;
1906 uint32_t bits = 0;
1907 for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) {
1908 if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains))
1909 continue;
1910
1911 if ((flags & ~ignore_flags) != (dev->memory_flags[i] & ~ignore_flags))
1912 continue;
1913
1914 bits |= 1u << i;
1915 }
1916
1917 return bits;
1918 }
1919
1920 static uint32_t
radv_compute_valid_memory_types(struct radv_physical_device * dev,enum radeon_bo_domain domains,enum radeon_bo_flag flags)1921 radv_compute_valid_memory_types(struct radv_physical_device *dev, enum radeon_bo_domain domains,
1922 enum radeon_bo_flag flags)
1923 {
1924 enum radeon_bo_flag ignore_flags = ~(RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC);
1925 uint32_t bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
1926
1927 if (!bits) {
1928 ignore_flags |= RADEON_FLAG_GTT_WC;
1929 bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
1930 }
1931
1932 if (!bits) {
1933 ignore_flags |= RADEON_FLAG_NO_CPU_ACCESS;
1934 bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
1935 }
1936
1937 /* Avoid 32-bit memory types for shared memory. */
1938 bits &= ~dev->memory_types_32bit;
1939
1940 return bits;
1941 }
1942 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)1943 radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, int fd,
1944 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
1945 {
1946 RADV_FROM_HANDLE(radv_device, device, _device);
1947
1948 switch (handleType) {
1949 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
1950 enum radeon_bo_domain domains;
1951 enum radeon_bo_flag flags;
1952 if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
1953 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1954
1955 pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(device->physical_device, domains, flags);
1956 return VK_SUCCESS;
1957 }
1958 default:
1959 /* The valid usage section for this function says:
1960 *
1961 * "handleType must not be one of the handle types defined as
1962 * opaque."
1963 *
1964 * So opaque handle types fall into the default "unsupported" case.
1965 */
1966 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1967 }
1968 }
1969
1970 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetCalibratedTimestampsKHR(VkDevice _device,uint32_t timestampCount,const VkCalibratedTimestampInfoKHR * pTimestampInfos,uint64_t * pTimestamps,uint64_t * pMaxDeviation)1971 radv_GetCalibratedTimestampsKHR(VkDevice _device, uint32_t timestampCount,
1972 const VkCalibratedTimestampInfoKHR *pTimestampInfos, uint64_t *pTimestamps,
1973 uint64_t *pMaxDeviation)
1974 {
1975 #ifndef _WIN32
1976 RADV_FROM_HANDLE(radv_device, device, _device);
1977 uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
1978 int d;
1979 uint64_t begin, end;
1980 uint64_t max_clock_period = 0;
1981
1982 #ifdef CLOCK_MONOTONIC_RAW
1983 begin = vk_clock_gettime(CLOCK_MONOTONIC_RAW);
1984 #else
1985 begin = vk_clock_gettime(CLOCK_MONOTONIC);
1986 #endif
1987
1988 for (d = 0; d < timestampCount; d++) {
1989 switch (pTimestampInfos[d].timeDomain) {
1990 case VK_TIME_DOMAIN_DEVICE_KHR:
1991 pTimestamps[d] = device->ws->query_value(device->ws, RADEON_TIMESTAMP);
1992 uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
1993 max_clock_period = MAX2(max_clock_period, device_period);
1994 break;
1995 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
1996 pTimestamps[d] = vk_clock_gettime(CLOCK_MONOTONIC);
1997 max_clock_period = MAX2(max_clock_period, 1);
1998 break;
1999
2000 #ifdef CLOCK_MONOTONIC_RAW
2001 case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
2002 pTimestamps[d] = begin;
2003 break;
2004 #endif
2005 default:
2006 pTimestamps[d] = 0;
2007 break;
2008 }
2009 }
2010
2011 #ifdef CLOCK_MONOTONIC_RAW
2012 end = vk_clock_gettime(CLOCK_MONOTONIC_RAW);
2013 #else
2014 end = vk_clock_gettime(CLOCK_MONOTONIC);
2015 #endif
2016
2017 *pMaxDeviation = vk_time_max_deviation(begin, end, max_clock_period);
2018
2019 return VK_SUCCESS;
2020 #else
2021 return VK_ERROR_FEATURE_NOT_PRESENT;
2022 #endif
2023 }
2024
2025 bool
radv_device_set_pstate(struct radv_device * device,bool enable)2026 radv_device_set_pstate(struct radv_device *device, bool enable)
2027 {
2028 struct radeon_winsys *ws = device->ws;
2029 enum radeon_ctx_pstate pstate = enable ? RADEON_CTX_PSTATE_PEAK : RADEON_CTX_PSTATE_NONE;
2030
2031 if (device->physical_device->rad_info.has_stable_pstate) {
2032 /* pstate is per-device; setting it for one ctx is sufficient.
2033 * We pick the first initialized one below. */
2034 for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++)
2035 if (device->hw_ctx[i])
2036 return ws->ctx_set_pstate(device->hw_ctx[i], pstate) >= 0;
2037 }
2038
2039 return true;
2040 }
2041
2042 bool
radv_device_acquire_performance_counters(struct radv_device * device)2043 radv_device_acquire_performance_counters(struct radv_device *device)
2044 {
2045 bool result = true;
2046 simple_mtx_lock(&device->pstate_mtx);
2047
2048 if (device->pstate_cnt == 0) {
2049 result = radv_device_set_pstate(device, true);
2050 if (result)
2051 ++device->pstate_cnt;
2052 }
2053
2054 simple_mtx_unlock(&device->pstate_mtx);
2055 return result;
2056 }
2057
2058 void
radv_device_release_performance_counters(struct radv_device * device)2059 radv_device_release_performance_counters(struct radv_device *device)
2060 {
2061 simple_mtx_lock(&device->pstate_mtx);
2062
2063 if (--device->pstate_cnt == 0)
2064 radv_device_set_pstate(device, false);
2065
2066 simple_mtx_unlock(&device->pstate_mtx);
2067 }
2068
2069 VKAPI_ATTR VkResult VKAPI_CALL
radv_AcquireProfilingLockKHR(VkDevice _device,const VkAcquireProfilingLockInfoKHR * pInfo)2070 radv_AcquireProfilingLockKHR(VkDevice _device, const VkAcquireProfilingLockInfoKHR *pInfo)
2071 {
2072 RADV_FROM_HANDLE(radv_device, device, _device);
2073 bool result = radv_device_acquire_performance_counters(device);
2074 return result ? VK_SUCCESS : VK_ERROR_UNKNOWN;
2075 }
2076
2077 VKAPI_ATTR void VKAPI_CALL
radv_ReleaseProfilingLockKHR(VkDevice _device)2078 radv_ReleaseProfilingLockKHR(VkDevice _device)
2079 {
2080 RADV_FROM_HANDLE(radv_device, device, _device);
2081 radv_device_release_performance_counters(device);
2082 }
2083
2084 VKAPI_ATTR void VKAPI_CALL
radv_GetDeviceImageSubresourceLayoutKHR(VkDevice device,const VkDeviceImageSubresourceInfoKHR * pInfo,VkSubresourceLayout2KHR * pLayout)2085 radv_GetDeviceImageSubresourceLayoutKHR(VkDevice device, const VkDeviceImageSubresourceInfoKHR *pInfo,
2086 VkSubresourceLayout2KHR *pLayout)
2087 {
2088 UNUSED VkResult result;
2089 VkImage image;
2090
2091 result =
2092 radv_image_create(device, &(struct radv_image_create_info){.vk_info = pInfo->pCreateInfo}, NULL, &image, true);
2093 assert(result == VK_SUCCESS);
2094
2095 radv_GetImageSubresourceLayout2KHR(device, image, pInfo->pSubresource, pLayout);
2096
2097 radv_DestroyImage(device, image, NULL);
2098 }
2099