• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <string.h>
31 
32 #ifdef __FreeBSD__
33 #include <sys/types.h>
34 #endif
35 #ifdef MAJOR_IN_MKDEV
36 #include <sys/mkdev.h>
37 #endif
38 #ifdef MAJOR_IN_SYSMACROS
39 #include <sys/sysmacros.h>
40 #endif
41 
42 #ifdef __linux__
43 #include <sys/inotify.h>
44 #endif
45 
46 #include "meta/radv_meta.h"
47 #include "util/disk_cache.h"
48 #include "util/u_debug.h"
49 #include "radv_cs.h"
50 #include "radv_debug.h"
51 #include "radv_private.h"
52 #include "radv_shader.h"
53 #include "vk_common_entrypoints.h"
54 #include "vk_pipeline_cache.h"
55 #include "vk_semaphore.h"
56 #include "vk_util.h"
57 #ifdef _WIN32
58 typedef void *drmDevicePtr;
59 #include <io.h>
60 #else
61 #include <amdgpu.h>
62 #include <xf86drm.h>
63 #include "drm-uapi/amdgpu_drm.h"
64 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
65 #endif
66 #include "util/build_id.h"
67 #include "util/driconf.h"
68 #include "util/mesa-sha1.h"
69 #include "util/os_time.h"
70 #include "util/timespec.h"
71 #include "util/u_atomic.h"
72 #include "util/u_process.h"
73 #include "vulkan/vk_icd.h"
74 #include "winsys/null/radv_null_winsys_public.h"
75 #include "git_sha1.h"
76 #include "sid.h"
77 #include "vk_common_entrypoints.h"
78 #include "vk_format.h"
79 #include "vk_sync.h"
80 #include "vk_sync_dummy.h"
81 
82 #if LLVM_AVAILABLE
83 #include "ac_llvm_util.h"
84 #endif
85 
86 static bool
radv_spm_trace_enabled(struct radv_instance * instance)87 radv_spm_trace_enabled(struct radv_instance *instance)
88 {
89    return (instance->vk.trace_mode & RADV_TRACE_MODE_RGP) &&
90           debug_get_bool_option("RADV_THREAD_TRACE_CACHE_COUNTERS", true);
91 }
92 
93 bool
radv_device_fault_detection_enabled(const struct radv_device * device)94 radv_device_fault_detection_enabled(const struct radv_device *device)
95 {
96    return device->instance->debug_flags & RADV_DEBUG_HANG;
97 }
98 
99 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)100 radv_GetMemoryHostPointerPropertiesEXT(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType,
101                                        const void *pHostPointer,
102                                        VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
103 {
104    RADV_FROM_HANDLE(radv_device, device, _device);
105 
106    switch (handleType) {
107    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
108       const struct radv_physical_device *physical_device = device->physical_device;
109       uint32_t memoryTypeBits = 0;
110       for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
111          if (physical_device->memory_domains[i] == RADEON_DOMAIN_GTT &&
112              !(physical_device->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
113             memoryTypeBits = (1 << i);
114             break;
115          }
116       }
117       pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
118       return VK_SUCCESS;
119    }
120    default:
121       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
122    }
123 }
124 
125 static VkResult
radv_device_init_border_color(struct radv_device * device)126 radv_device_init_border_color(struct radv_device *device)
127 {
128    VkResult result;
129 
130    result =
131       device->ws->buffer_create(device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
132                                 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING,
133                                 RADV_BO_PRIORITY_SHADER, 0, &device->border_color_data.bo);
134 
135    if (result != VK_SUCCESS)
136       return vk_error(device, result);
137 
138    radv_rmv_log_border_color_palette_create(device, device->border_color_data.bo);
139 
140    result = device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, true);
141    if (result != VK_SUCCESS)
142       return vk_error(device, result);
143 
144    device->border_color_data.colors_gpu_ptr = device->ws->buffer_map(device->border_color_data.bo);
145    if (!device->border_color_data.colors_gpu_ptr)
146       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
147    mtx_init(&device->border_color_data.mutex, mtx_plain);
148 
149    return VK_SUCCESS;
150 }
151 
152 static void
radv_device_finish_border_color(struct radv_device * device)153 radv_device_finish_border_color(struct radv_device *device)
154 {
155    if (device->border_color_data.bo) {
156       radv_rmv_log_border_color_palette_destroy(device, device->border_color_data.bo);
157       device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, false);
158       device->ws->buffer_destroy(device->ws, device->border_color_data.bo);
159 
160       mtx_destroy(&device->border_color_data.mutex);
161    }
162 }
163 
164 static struct radv_shader_part *
_radv_create_vs_prolog(struct radv_device * device,const void * _key)165 _radv_create_vs_prolog(struct radv_device *device, const void *_key)
166 {
167    struct radv_vs_prolog_key *key = (struct radv_vs_prolog_key *)_key;
168    return radv_create_vs_prolog(device, key);
169 }
170 
171 static uint32_t
radv_hash_vs_prolog(const void * key_)172 radv_hash_vs_prolog(const void *key_)
173 {
174    const struct radv_vs_prolog_key *key = key_;
175    return _mesa_hash_data(key, sizeof(*key));
176 }
177 
178 static bool
radv_cmp_vs_prolog(const void * a_,const void * b_)179 radv_cmp_vs_prolog(const void *a_, const void *b_)
180 {
181    const struct radv_vs_prolog_key *a = a_;
182    const struct radv_vs_prolog_key *b = b_;
183 
184    return memcmp(a, b, sizeof(*a)) == 0;
185 }
186 
187 static struct radv_shader_part_cache_ops vs_prolog_ops = {
188    .create = _radv_create_vs_prolog,
189    .hash = radv_hash_vs_prolog,
190    .equals = radv_cmp_vs_prolog,
191 };
192 
193 static VkResult
radv_device_init_vs_prologs(struct radv_device * device)194 radv_device_init_vs_prologs(struct radv_device *device)
195 {
196    if (!radv_shader_part_cache_init(&device->vs_prologs, &vs_prolog_ops))
197       return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
198 
199    /* don't pre-compile prologs if we want to print them */
200    if (device->instance->debug_flags & RADV_DEBUG_DUMP_PROLOGS)
201       return VK_SUCCESS;
202 
203    struct radv_vs_prolog_key key;
204    memset(&key, 0, sizeof(key));
205    key.as_ls = false;
206    key.is_ngg = device->physical_device->use_ngg;
207    key.next_stage = MESA_SHADER_VERTEX;
208    key.wave32 = device->physical_device->ge_wave_size == 32;
209 
210    for (unsigned i = 1; i <= MAX_VERTEX_ATTRIBS; i++) {
211       key.instance_rate_inputs = 0;
212       key.num_attributes = i;
213 
214       device->simple_vs_prologs[i - 1] = radv_create_vs_prolog(device, &key);
215       if (!device->simple_vs_prologs[i - 1])
216          return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
217    }
218 
219    unsigned idx = 0;
220    for (unsigned num_attributes = 1; num_attributes <= 16; num_attributes++) {
221       for (unsigned count = 1; count <= num_attributes; count++) {
222          for (unsigned start = 0; start <= (num_attributes - count); start++) {
223             key.instance_rate_inputs = u_bit_consecutive(start, count);
224             key.num_attributes = num_attributes;
225 
226             struct radv_shader_part *prolog = radv_create_vs_prolog(device, &key);
227             if (!prolog)
228                return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
229 
230             assert(idx == radv_instance_rate_prolog_index(num_attributes, key.instance_rate_inputs));
231             device->instance_rate_vs_prologs[idx++] = prolog;
232          }
233       }
234    }
235    assert(idx == ARRAY_SIZE(device->instance_rate_vs_prologs));
236 
237    return VK_SUCCESS;
238 }
239 
240 static void
radv_device_finish_vs_prologs(struct radv_device * device)241 radv_device_finish_vs_prologs(struct radv_device *device)
242 {
243    if (device->vs_prologs.ops)
244       radv_shader_part_cache_finish(device, &device->vs_prologs);
245 
246    for (unsigned i = 0; i < ARRAY_SIZE(device->simple_vs_prologs); i++) {
247       if (!device->simple_vs_prologs[i])
248          continue;
249 
250       radv_shader_part_unref(device, device->simple_vs_prologs[i]);
251    }
252 
253    for (unsigned i = 0; i < ARRAY_SIZE(device->instance_rate_vs_prologs); i++) {
254       if (!device->instance_rate_vs_prologs[i])
255          continue;
256 
257       radv_shader_part_unref(device, device->instance_rate_vs_prologs[i]);
258    }
259 }
260 
261 static struct radv_shader_part *
_radv_create_ps_epilog(struct radv_device * device,const void * _key)262 _radv_create_ps_epilog(struct radv_device *device, const void *_key)
263 {
264    struct radv_ps_epilog_key *key = (struct radv_ps_epilog_key *)_key;
265    return radv_create_ps_epilog(device, key, NULL);
266 }
267 
268 static uint32_t
radv_hash_ps_epilog(const void * key_)269 radv_hash_ps_epilog(const void *key_)
270 {
271    const struct radv_ps_epilog_key *key = key_;
272    return _mesa_hash_data(key, sizeof(*key));
273 }
274 
275 static bool
radv_cmp_ps_epilog(const void * a_,const void * b_)276 radv_cmp_ps_epilog(const void *a_, const void *b_)
277 {
278    const struct radv_ps_epilog_key *a = a_;
279    const struct radv_ps_epilog_key *b = b_;
280 
281    return memcmp(a, b, sizeof(*a)) == 0;
282 }
283 
284 static struct radv_shader_part_cache_ops ps_epilog_ops = {
285    .create = _radv_create_ps_epilog,
286    .hash = radv_hash_ps_epilog,
287    .equals = radv_cmp_ps_epilog,
288 };
289 
290 static struct radv_shader_part *
_radv_create_tcs_epilog(struct radv_device * device,const void * _key)291 _radv_create_tcs_epilog(struct radv_device *device, const void *_key)
292 {
293    struct radv_tcs_epilog_key *key = (struct radv_tcs_epilog_key *)_key;
294    return radv_create_tcs_epilog(device, key);
295 }
296 
297 static uint32_t
radv_hash_tcs_epilog(const void * key_)298 radv_hash_tcs_epilog(const void *key_)
299 {
300    const struct radv_tcs_epilog_key *key = key_;
301    return _mesa_hash_data(key, sizeof(*key));
302 }
303 
304 static bool
radv_cmp_tcs_epilog(const void * a_,const void * b_)305 radv_cmp_tcs_epilog(const void *a_, const void *b_)
306 {
307    const struct radv_tcs_epilog_key *a = a_;
308    const struct radv_tcs_epilog_key *b = b_;
309 
310    return memcmp(a, b, sizeof(*a)) == 0;
311 }
312 
313 static struct radv_shader_part_cache_ops tcs_epilog_ops = {
314    .create = _radv_create_tcs_epilog,
315    .hash = radv_hash_tcs_epilog,
316    .equals = radv_cmp_tcs_epilog,
317 };
318 
319 VkResult
radv_device_init_vrs_state(struct radv_device * device)320 radv_device_init_vrs_state(struct radv_device *device)
321 {
322    VkDeviceMemory mem;
323    VkBuffer buffer;
324    VkResult result;
325    VkImage image;
326 
327    VkImageCreateInfo image_create_info = {
328       .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
329       .imageType = VK_IMAGE_TYPE_2D,
330       .format = VK_FORMAT_D16_UNORM,
331       .extent = {MAX_FRAMEBUFFER_WIDTH, MAX_FRAMEBUFFER_HEIGHT, 1},
332       .mipLevels = 1,
333       .arrayLayers = 1,
334       .samples = VK_SAMPLE_COUNT_1_BIT,
335       .tiling = VK_IMAGE_TILING_OPTIMAL,
336       .usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
337       .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
338       .queueFamilyIndexCount = 0,
339       .pQueueFamilyIndices = NULL,
340       .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
341    };
342 
343    result =
344       radv_image_create(radv_device_to_handle(device), &(struct radv_image_create_info){.vk_info = &image_create_info},
345                         &device->meta_state.alloc, &image, true);
346    if (result != VK_SUCCESS)
347       return result;
348 
349    VkBufferCreateInfo buffer_create_info = {
350       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
351       .pNext =
352          &(VkBufferUsageFlags2CreateInfoKHR){
353             .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR,
354             .usage = VK_BUFFER_USAGE_2_STORAGE_BUFFER_BIT_KHR,
355          },
356       .size = radv_image_from_handle(image)->planes[0].surface.meta_size,
357       .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
358    };
359 
360    result = radv_create_buffer(device, &buffer_create_info, &device->meta_state.alloc, &buffer, true);
361    if (result != VK_SUCCESS)
362       goto fail_create;
363 
364    VkBufferMemoryRequirementsInfo2 info = {
365       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
366       .buffer = buffer,
367    };
368    VkMemoryRequirements2 mem_req = {
369       .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
370    };
371    vk_common_GetBufferMemoryRequirements2(radv_device_to_handle(device), &info, &mem_req);
372 
373    VkMemoryAllocateInfo alloc_info = {
374       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
375       .allocationSize = mem_req.memoryRequirements.size,
376    };
377 
378    result = radv_alloc_memory(device, &alloc_info, &device->meta_state.alloc, &mem, true);
379    if (result != VK_SUCCESS)
380       goto fail_alloc;
381 
382    VkBindBufferMemoryInfo bind_info = {.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
383                                        .buffer = buffer,
384                                        .memory = mem,
385                                        .memoryOffset = 0};
386 
387    result = radv_BindBufferMemory2(radv_device_to_handle(device), 1, &bind_info);
388    if (result != VK_SUCCESS)
389       goto fail_bind;
390 
391    device->vrs.image = radv_image_from_handle(image);
392    device->vrs.buffer = radv_buffer_from_handle(buffer);
393    device->vrs.mem = radv_device_memory_from_handle(mem);
394 
395    return VK_SUCCESS;
396 
397 fail_bind:
398    radv_FreeMemory(radv_device_to_handle(device), mem, &device->meta_state.alloc);
399 fail_alloc:
400    radv_DestroyBuffer(radv_device_to_handle(device), buffer, &device->meta_state.alloc);
401 fail_create:
402    radv_DestroyImage(radv_device_to_handle(device), image, &device->meta_state.alloc);
403 
404    return result;
405 }
406 
407 static void
radv_device_finish_vrs_image(struct radv_device * device)408 radv_device_finish_vrs_image(struct radv_device *device)
409 {
410    if (!device->vrs.image)
411       return;
412 
413    radv_FreeMemory(radv_device_to_handle(device), radv_device_memory_to_handle(device->vrs.mem),
414                    &device->meta_state.alloc);
415    radv_DestroyBuffer(radv_device_to_handle(device), radv_buffer_to_handle(device->vrs.buffer),
416                       &device->meta_state.alloc);
417    radv_DestroyImage(radv_device_to_handle(device), radv_image_to_handle(device->vrs.image), &device->meta_state.alloc);
418 }
419 
420 static enum radv_force_vrs
radv_parse_vrs_rates(const char * str)421 radv_parse_vrs_rates(const char *str)
422 {
423    if (!strcmp(str, "2x2")) {
424       return RADV_FORCE_VRS_2x2;
425    } else if (!strcmp(str, "2x1")) {
426       return RADV_FORCE_VRS_2x1;
427    } else if (!strcmp(str, "1x2")) {
428       return RADV_FORCE_VRS_1x2;
429    } else if (!strcmp(str, "1x1")) {
430       return RADV_FORCE_VRS_1x1;
431    }
432 
433    fprintf(stderr, "radv: Invalid VRS rates specified (valid values are 2x2, 2x1, 1x2 and 1x1)\n");
434    return RADV_FORCE_VRS_1x1;
435 }
436 
437 static const char *
radv_get_force_vrs_config_file(void)438 radv_get_force_vrs_config_file(void)
439 {
440    return getenv("RADV_FORCE_VRS_CONFIG_FILE");
441 }
442 
443 static enum radv_force_vrs
radv_parse_force_vrs_config_file(const char * config_file)444 radv_parse_force_vrs_config_file(const char *config_file)
445 {
446    enum radv_force_vrs force_vrs = RADV_FORCE_VRS_1x1;
447    char buf[4];
448    FILE *f;
449 
450    f = fopen(config_file, "r");
451    if (!f) {
452       fprintf(stderr, "radv: Can't open file: '%s'.\n", config_file);
453       return force_vrs;
454    }
455 
456    if (fread(buf, sizeof(buf), 1, f) == 1) {
457       buf[3] = '\0';
458       force_vrs = radv_parse_vrs_rates(buf);
459    }
460 
461    fclose(f);
462    return force_vrs;
463 }
464 
465 #ifdef __linux__
466 
467 #define BUF_LEN ((10 * (sizeof(struct inotify_event) + NAME_MAX + 1)))
468 
469 static int
radv_notifier_thread_run(void * data)470 radv_notifier_thread_run(void *data)
471 {
472    struct radv_device *device = data;
473    struct radv_notifier *notifier = &device->notifier;
474    char buf[BUF_LEN];
475 
476    while (!notifier->quit) {
477       const char *file = radv_get_force_vrs_config_file();
478       struct timespec tm = {.tv_nsec = 100000000}; /* 1OOms */
479       int length, i = 0;
480 
481       length = read(notifier->fd, buf, BUF_LEN);
482       while (i < length) {
483          struct inotify_event *event = (struct inotify_event *)&buf[i];
484 
485          i += sizeof(struct inotify_event) + event->len;
486          if (event->mask & IN_MODIFY || event->mask & IN_DELETE_SELF) {
487             /* Sleep 100ms for editors that use a temporary file and delete the original. */
488             thrd_sleep(&tm, NULL);
489             device->force_vrs = radv_parse_force_vrs_config_file(file);
490 
491             fprintf(stderr, "radv: Updated the per-vertex VRS rate to '%d'.\n", device->force_vrs);
492 
493             if (event->mask & IN_DELETE_SELF) {
494                inotify_rm_watch(notifier->fd, notifier->watch);
495                notifier->watch = inotify_add_watch(notifier->fd, file, IN_MODIFY | IN_DELETE_SELF);
496             }
497          }
498       }
499 
500       thrd_sleep(&tm, NULL);
501    }
502 
503    return 0;
504 }
505 
506 #endif
507 
508 static int
radv_device_init_notifier(struct radv_device * device)509 radv_device_init_notifier(struct radv_device *device)
510 {
511 #ifndef __linux__
512    return true;
513 #else
514    struct radv_notifier *notifier = &device->notifier;
515    const char *file = radv_get_force_vrs_config_file();
516    int ret;
517 
518    notifier->fd = inotify_init1(IN_NONBLOCK);
519    if (notifier->fd < 0)
520       return false;
521 
522    notifier->watch = inotify_add_watch(notifier->fd, file, IN_MODIFY | IN_DELETE_SELF);
523    if (notifier->watch < 0)
524       goto fail_watch;
525 
526    ret = thrd_create(&notifier->thread, radv_notifier_thread_run, device);
527    if (ret)
528       goto fail_thread;
529 
530    return true;
531 
532 fail_thread:
533    inotify_rm_watch(notifier->fd, notifier->watch);
534 fail_watch:
535    close(notifier->fd);
536 
537    return false;
538 #endif
539 }
540 
541 static void
radv_device_finish_notifier(struct radv_device * device)542 radv_device_finish_notifier(struct radv_device *device)
543 {
544 #ifdef __linux__
545    struct radv_notifier *notifier = &device->notifier;
546 
547    if (!notifier->thread)
548       return;
549 
550    notifier->quit = true;
551    thrd_join(notifier->thread, NULL);
552    inotify_rm_watch(notifier->fd, notifier->watch);
553    close(notifier->fd);
554 #endif
555 }
556 
557 static void
radv_device_finish_perf_counter_lock_cs(struct radv_device * device)558 radv_device_finish_perf_counter_lock_cs(struct radv_device *device)
559 {
560    if (!device->perf_counter_lock_cs)
561       return;
562 
563    for (unsigned i = 0; i < 2 * PERF_CTR_MAX_PASSES; ++i) {
564       if (device->perf_counter_lock_cs[i])
565          device->ws->cs_destroy(device->perf_counter_lock_cs[i]);
566    }
567 
568    free(device->perf_counter_lock_cs);
569 }
570 
571 struct dispatch_table_builder {
572    struct vk_device_dispatch_table *tables[RADV_DISPATCH_TABLE_COUNT];
573    bool used[RADV_DISPATCH_TABLE_COUNT];
574    bool initialized[RADV_DISPATCH_TABLE_COUNT];
575 };
576 
577 static void
add_entrypoints(struct dispatch_table_builder * b,const struct vk_device_entrypoint_table * entrypoints,enum radv_dispatch_table table)578 add_entrypoints(struct dispatch_table_builder *b, const struct vk_device_entrypoint_table *entrypoints,
579                 enum radv_dispatch_table table)
580 {
581    for (int32_t i = table - 1; i >= RADV_DEVICE_DISPATCH_TABLE; i--) {
582       if (i == RADV_DEVICE_DISPATCH_TABLE || b->used[i]) {
583          vk_device_dispatch_table_from_entrypoints(b->tables[i], entrypoints, !b->initialized[i]);
584          b->initialized[i] = true;
585       }
586    }
587 
588    if (table < RADV_DISPATCH_TABLE_COUNT)
589       b->used[table] = true;
590 }
591 
592 static void
init_dispatch_tables(struct radv_device * device,struct radv_physical_device * physical_device)593 init_dispatch_tables(struct radv_device *device, struct radv_physical_device *physical_device)
594 {
595    struct dispatch_table_builder b = {0};
596    b.tables[RADV_DEVICE_DISPATCH_TABLE] = &device->vk.dispatch_table;
597    b.tables[RADV_APP_DISPATCH_TABLE] = &device->layer_dispatch.app;
598    b.tables[RADV_RGP_DISPATCH_TABLE] = &device->layer_dispatch.rgp;
599    b.tables[RADV_RRA_DISPATCH_TABLE] = &device->layer_dispatch.rra;
600    b.tables[RADV_RMV_DISPATCH_TABLE] = &device->layer_dispatch.rmv;
601    b.tables[RADV_CTX_ROLL_DISPATCH_TABLE] = &device->layer_dispatch.ctx_roll;
602 
603    if (!strcmp(physical_device->instance->drirc.app_layer, "metroexodus")) {
604       add_entrypoints(&b, &metro_exodus_device_entrypoints, RADV_APP_DISPATCH_TABLE);
605    } else if (!strcmp(physical_device->instance->drirc.app_layer, "rage2")) {
606       add_entrypoints(&b, &rage2_device_entrypoints, RADV_APP_DISPATCH_TABLE);
607    } else if (!strcmp(physical_device->instance->drirc.app_layer, "quanticdream")) {
608       add_entrypoints(&b, &quantic_dream_device_entrypoints, RADV_APP_DISPATCH_TABLE);
609    }
610 
611    if (physical_device->instance->vk.trace_mode & RADV_TRACE_MODE_RGP)
612       add_entrypoints(&b, &sqtt_device_entrypoints, RADV_RGP_DISPATCH_TABLE);
613 
614    if ((physical_device->instance->vk.trace_mode & RADV_TRACE_MODE_RRA) && radv_enable_rt(physical_device, false))
615       add_entrypoints(&b, &rra_device_entrypoints, RADV_RRA_DISPATCH_TABLE);
616 
617 #ifndef _WIN32
618    if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV)
619       add_entrypoints(&b, &rmv_device_entrypoints, RADV_RMV_DISPATCH_TABLE);
620 #endif
621 
622    if (physical_device->instance->vk.trace_mode & RADV_TRACE_MODE_CTX_ROLLS)
623       add_entrypoints(&b, &ctx_roll_device_entrypoints, RADV_CTX_ROLL_DISPATCH_TABLE);
624 
625    add_entrypoints(&b, &radv_device_entrypoints, RADV_DISPATCH_TABLE_COUNT);
626    add_entrypoints(&b, &wsi_device_entrypoints, RADV_DISPATCH_TABLE_COUNT);
627    add_entrypoints(&b, &vk_common_device_entrypoints, RADV_DISPATCH_TABLE_COUNT);
628 }
629 
630 static VkResult
capture_trace(VkQueue _queue)631 capture_trace(VkQueue _queue)
632 {
633    RADV_FROM_HANDLE(radv_queue, queue, _queue);
634 
635    VkResult result = VK_SUCCESS;
636 
637    if (queue->device->instance->vk.trace_mode & RADV_TRACE_MODE_RRA)
638       queue->device->rra_trace.triggered = true;
639 
640    if (queue->device->vk.memory_trace_data.is_enabled) {
641       simple_mtx_lock(&queue->device->vk.memory_trace_data.token_mtx);
642       radv_rmv_collect_trace_events(queue->device);
643       vk_dump_rmv_capture(&queue->device->vk.memory_trace_data);
644       simple_mtx_unlock(&queue->device->vk.memory_trace_data.token_mtx);
645    }
646 
647    if (queue->device->instance->vk.trace_mode & RADV_TRACE_MODE_RGP)
648       queue->device->sqtt_triggered = true;
649 
650    if (queue->device->instance->vk.trace_mode & RADV_TRACE_MODE_CTX_ROLLS) {
651       char filename[2048];
652       time_t t = time(NULL);
653       struct tm now = *localtime(&t);
654       snprintf(filename, sizeof(filename), "/tmp/%s_%04d.%02d.%02d_%02d.%02d.%02d.ctxroll", util_get_process_name(),
655                1900 + now.tm_year, now.tm_mon + 1, now.tm_mday, now.tm_hour, now.tm_min, now.tm_sec);
656 
657       simple_mtx_lock(&queue->device->ctx_roll_mtx);
658 
659       queue->device->ctx_roll_file = fopen(filename, "w");
660       if (queue->device->ctx_roll_file)
661          fprintf(stderr, "radv: Writing context rolls to '%s'...\n", filename);
662 
663       simple_mtx_unlock(&queue->device->ctx_roll_mtx);
664    }
665 
666    return result;
667 }
668 
669 static void
radv_device_init_cache_key(struct radv_device * device)670 radv_device_init_cache_key(struct radv_device *device)
671 {
672    struct radv_device_cache_key *key = &device->cache_key;
673 
674    key->disable_trunc_coord = device->disable_trunc_coord;
675    key->image_2d_view_of_3d =
676       device->vk.enabled_features.image2DViewOf3D && device->physical_device->rad_info.gfx_level == GFX9;
677    key->mesh_shader_queries = device->vk.enabled_features.meshShaderQueries;
678    key->primitives_generated_query = radv_uses_primitives_generated_query(device);
679 
680    /* The Vulkan spec says:
681     *  "Binary shaders retrieved from a physical device with a certain shaderBinaryUUID are
682     *   guaranteed to be compatible with all other physical devices reporting the same
683     *   shaderBinaryUUID and the same or higher shaderBinaryVersion."
684     *
685     * That means the driver should compile shaders for the "worst" case of all features being
686     * enabled, regardless of what features are actually enabled on the logical device.
687     */
688    if (device->vk.enabled_features.shaderObject) {
689       key->image_2d_view_of_3d = device->physical_device->rad_info.gfx_level == GFX9;
690       key->primitives_generated_query = true;
691    }
692 
693    _mesa_blake3_compute(key, sizeof(*key), device->cache_hash);
694 }
695 
696 VKAPI_ATTR VkResult VKAPI_CALL
radv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)697 radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo,
698                   const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
699 {
700    RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
701    VkResult result;
702    struct radv_device *device;
703 
704    bool keep_shader_info = false;
705    bool overallocation_disallowed = false;
706 
707    vk_foreach_struct_const (ext, pCreateInfo->pNext) {
708       switch (ext->sType) {
709       case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
710          const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext;
711          if (overallocation->overallocationBehavior == VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)
712             overallocation_disallowed = true;
713          break;
714       }
715       default:
716          break;
717       }
718    }
719 
720    device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,
721                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
722    if (!device)
723       return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
724 
725    result = vk_device_init(&device->vk, &physical_device->vk, NULL, pCreateInfo, pAllocator);
726    if (result != VK_SUCCESS) {
727       vk_free(&device->vk.alloc, device);
728       return result;
729    }
730 
731    init_dispatch_tables(device, physical_device);
732 
733    device->vk.capture_trace = capture_trace;
734 
735    device->vk.command_buffer_ops = &radv_cmd_buffer_ops;
736 
737    device->instance = physical_device->instance;
738    device->physical_device = physical_device;
739    simple_mtx_init(&device->ctx_roll_mtx, mtx_plain);
740    simple_mtx_init(&device->trace_mtx, mtx_plain);
741    simple_mtx_init(&device->pstate_mtx, mtx_plain);
742    simple_mtx_init(&device->rt_handles_mtx, mtx_plain);
743 
744    device->rt_handles = _mesa_hash_table_create(NULL, _mesa_hash_u32, _mesa_key_u32_equal);
745 
746    device->ws = physical_device->ws;
747    vk_device_set_drm_fd(&device->vk, device->ws->get_fd(device->ws));
748 
749    /* With update after bind we can't attach bo's to the command buffer
750     * from the descriptor set anymore, so we have to use a global BO list.
751     */
752    device->use_global_bo_list =
753       (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) || device->vk.enabled_features.bufferDeviceAddress ||
754       device->vk.enabled_features.descriptorIndexing || device->vk.enabled_extensions.EXT_descriptor_indexing ||
755       device->vk.enabled_extensions.EXT_buffer_device_address ||
756       device->vk.enabled_extensions.KHR_buffer_device_address ||
757       device->vk.enabled_extensions.KHR_ray_tracing_pipeline ||
758       device->vk.enabled_extensions.KHR_acceleration_structure ||
759       device->vk.enabled_extensions.VALVE_descriptor_set_host_mapping;
760 
761    device->buffer_robustness = device->vk.enabled_features.robustBufferAccess2  ? RADV_BUFFER_ROBUSTNESS_2
762                                : device->vk.enabled_features.robustBufferAccess ? RADV_BUFFER_ROBUSTNESS_1
763                                                                                 : RADV_BUFFER_ROBUSTNESS_DISABLED;
764 
765    radv_init_shader_arenas(device);
766 
767    device->overallocation_disallowed = overallocation_disallowed;
768    mtx_init(&device->overallocation_mutex, mtx_plain);
769 
770    if (physical_device->rad_info.register_shadowing_required || device->instance->debug_flags & RADV_DEBUG_SHADOW_REGS)
771       device->uses_shadow_regs = true;
772 
773    /* Create one context per queue priority. */
774    for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
775       const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
776       const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority =
777          vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
778       enum radeon_ctx_priority priority = radv_get_queue_global_priority(global_priority);
779 
780       if (device->hw_ctx[priority])
781          continue;
782 
783       result = device->ws->ctx_create(device->ws, priority, &device->hw_ctx[priority]);
784       if (result != VK_SUCCESS)
785          goto fail_queue;
786    }
787 
788    for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
789       const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
790       uint32_t qfi = queue_create->queueFamilyIndex;
791       const VkDeviceQueueGlobalPriorityCreateInfoKHR *global_priority =
792          vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
793 
794       device->queues[qfi] = vk_alloc(&device->vk.alloc, queue_create->queueCount * sizeof(struct radv_queue), 8,
795                                      VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
796       if (!device->queues[qfi]) {
797          result = VK_ERROR_OUT_OF_HOST_MEMORY;
798          goto fail_queue;
799       }
800 
801       memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
802 
803       device->queue_count[qfi] = queue_create->queueCount;
804 
805       for (unsigned q = 0; q < queue_create->queueCount; q++) {
806          result = radv_queue_init(device, &device->queues[qfi][q], q, queue_create, global_priority);
807          if (result != VK_SUCCESS)
808             goto fail_queue;
809       }
810    }
811    device->private_sdma_queue = VK_NULL_HANDLE;
812 
813    device->shader_use_invisible_vram = (device->instance->perftest_flags & RADV_PERFTEST_DMA_SHADERS) &&
814                                        /* SDMA buffer copy is only implemented for GFX7+. */
815                                        device->physical_device->rad_info.gfx_level >= GFX7;
816    result = radv_init_shader_upload_queue(device);
817    if (result != VK_SUCCESS)
818       goto fail;
819 
820    device->pbb_allowed =
821       device->physical_device->rad_info.gfx_level >= GFX9 && !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
822 
823    device->disable_trunc_coord = device->instance->drirc.disable_trunc_coord;
824 
825    if (device->instance->vk.app_info.engine_name && !strcmp(device->instance->vk.app_info.engine_name, "DXVK")) {
826       /* For DXVK 2.3.0 and older, use dualSrcBlend to determine if this is D3D9. */
827       bool is_d3d9 = !device->vk.enabled_features.dualSrcBlend;
828       if (device->instance->vk.app_info.engine_version > VK_MAKE_VERSION(2, 3, 0))
829          is_d3d9 = device->instance->vk.app_info.app_version & 0x1;
830 
831       device->disable_trunc_coord &= !is_d3d9;
832    }
833 
834    /* The maximum number of scratch waves. Scratch space isn't divided
835     * evenly between CUs. The number is only a function of the number of CUs.
836     * We can decrease the constant to decrease the scratch buffer size.
837     *
838     * sctx->scratch_waves must be >= the maximum possible size of
839     * 1 threadgroup, so that the hw doesn't hang from being unable
840     * to start any.
841     *
842     * The recommended value is 4 per CU at most. Higher numbers don't
843     * bring much benefit, but they still occupy chip resources (think
844     * async compute). I've seen ~2% performance difference between 4 and 32.
845     */
846    uint32_t max_threads_per_block = 2048;
847    device->scratch_waves = MAX2(32 * physical_device->rad_info.num_cu, max_threads_per_block / 64);
848 
849    device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
850 
851    if (device->physical_device->rad_info.gfx_level >= GFX7) {
852       /* If the KMD allows it (there is a KMD hw register for it),
853        * allow launching waves out-of-order.
854        */
855       device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
856    }
857    if (device->physical_device->rad_info.gfx_level >= GFX10) {
858       /* Enable asynchronous compute tunneling. The KMD restricts this feature
859        * to high-priority compute queues, so setting the bit on any other queue
860        * is a no-op. PAL always sets this bit as well.
861        */
862       device->dispatch_initiator |= S_00B800_TUNNEL_ENABLE(1);
863    }
864 
865    /* Disable partial preemption for task shaders.
866     * The kernel may not support preemption, but PAL always sets this bit,
867     * so let's also set it here for consistency.
868     */
869    device->dispatch_initiator_task = device->dispatch_initiator | S_00B800_DISABLE_DISP_PREMPT_EN(1);
870 
871    if (radv_device_fault_detection_enabled(device)) {
872       /* Enable GPU hangs detection and dump logs if a GPU hang is
873        * detected.
874        */
875       keep_shader_info = true;
876 
877       if (!radv_init_trace(device)) {
878          result = VK_ERROR_INITIALIZATION_FAILED;
879          goto fail;
880       }
881 
882       fprintf(stderr, "*****************************************************************************\n");
883       fprintf(stderr, "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
884       fprintf(stderr, "*****************************************************************************\n");
885 
886       /* Wait for idle after every draw/dispatch to identify the
887        * first bad call.
888        */
889       device->instance->debug_flags |= RADV_DEBUG_SYNC_SHADERS;
890 
891       radv_dump_enabled_options(device, stderr);
892    }
893 
894    if (device->instance->vk.trace_mode & RADV_TRACE_MODE_RGP) {
895       if (device->physical_device->rad_info.gfx_level < GFX8 || device->physical_device->rad_info.gfx_level > GFX11) {
896          fprintf(stderr, "GPU hardware not supported: refer to "
897                          "the RGP documentation for the list of "
898                          "supported GPUs!\n");
899          abort();
900       }
901 
902       if (!radv_sqtt_init(device)) {
903          result = VK_ERROR_INITIALIZATION_FAILED;
904          goto fail;
905       }
906 
907       fprintf(stderr,
908               "radv: Thread trace support is enabled (initial buffer size: %u MiB, "
909               "instruction timing: %s, cache counters: %s, queue events: %s).\n",
910               device->sqtt.buffer_size / (1024 * 1024), radv_is_instruction_timing_enabled() ? "enabled" : "disabled",
911               radv_spm_trace_enabled(device->instance) ? "enabled" : "disabled",
912               radv_sqtt_queue_events_enabled() ? "enabled" : "disabled");
913 
914       if (radv_spm_trace_enabled(device->instance)) {
915          if (device->physical_device->rad_info.gfx_level >= GFX10) {
916             if (!radv_spm_init(device)) {
917                result = VK_ERROR_INITIALIZATION_FAILED;
918                goto fail;
919             }
920          } else {
921             fprintf(stderr, "radv: SPM isn't supported for this GPU (%s)!\n", device->physical_device->name);
922          }
923       }
924    }
925 
926 #ifndef _WIN32
927    if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV) {
928       struct vk_rmv_device_info info;
929       memset(&info, 0, sizeof(struct vk_rmv_device_info));
930       radv_rmv_fill_device_info(physical_device, &info);
931       vk_memory_trace_init(&device->vk, &info);
932       radv_memory_trace_init(device);
933    }
934 #endif
935 
936    if (getenv("RADV_TRAP_HANDLER")) {
937       /* TODO: Add support for more hardware. */
938       assert(device->physical_device->rad_info.gfx_level == GFX8);
939 
940       fprintf(stderr, "**********************************************************************\n");
941       fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
942       fprintf(stderr, "**********************************************************************\n");
943 
944       /* To get the disassembly of the faulty shaders, we have to
945        * keep some shader info around.
946        */
947       keep_shader_info = true;
948 
949       if (!radv_trap_handler_init(device)) {
950          result = VK_ERROR_INITIALIZATION_FAILED;
951          goto fail;
952       }
953    }
954 
955    if (device->physical_device->rad_info.gfx_level == GFX10_3) {
956       if (getenv("RADV_FORCE_VRS_CONFIG_FILE")) {
957          const char *file = radv_get_force_vrs_config_file();
958 
959          device->force_vrs = radv_parse_force_vrs_config_file(file);
960 
961          if (radv_device_init_notifier(device)) {
962             device->force_vrs_enabled = true;
963          } else {
964             fprintf(stderr, "radv: Failed to initialize the notifier for RADV_FORCE_VRS_CONFIG_FILE!\n");
965          }
966       } else if (getenv("RADV_FORCE_VRS")) {
967          const char *vrs_rates = getenv("RADV_FORCE_VRS");
968 
969          device->force_vrs = radv_parse_vrs_rates(vrs_rates);
970          device->force_vrs_enabled = device->force_vrs != RADV_FORCE_VRS_1x1;
971       }
972    }
973 
974    /* PKT3_LOAD_SH_REG_INDEX is supported on GFX8+, but it hangs with compute queues until GFX10.3. */
975    device->load_grid_size_from_user_sgpr = device->physical_device->rad_info.gfx_level >= GFX10_3;
976 
977    device->keep_shader_info = keep_shader_info;
978 
979    /* Initialize the per-device cache key before compiling meta shaders. */
980    radv_device_init_cache_key(device);
981 
982    result = radv_device_init_meta(device);
983    if (result != VK_SUCCESS)
984       goto fail;
985 
986    radv_device_init_msaa(device);
987 
988    /* If the border color extension is enabled, let's create the buffer we need. */
989    if (device->vk.enabled_features.customBorderColors) {
990       result = radv_device_init_border_color(device);
991       if (result != VK_SUCCESS)
992          goto fail;
993    }
994 
995    if (device->vk.enabled_features.vertexInputDynamicState || device->vk.enabled_features.graphicsPipelineLibrary ||
996        device->vk.enabled_features.shaderObject) {
997       result = radv_device_init_vs_prologs(device);
998       if (result != VK_SUCCESS)
999          goto fail;
1000    }
1001 
1002    if (device->vk.enabled_features.shaderObject) {
1003       if (!radv_shader_part_cache_init(&device->tcs_epilogs, &tcs_epilog_ops)) {
1004          result = VK_ERROR_OUT_OF_HOST_MEMORY;
1005          goto fail;
1006       }
1007    }
1008 
1009    if (device->vk.enabled_features.graphicsPipelineLibrary || device->vk.enabled_features.shaderObject ||
1010        device->vk.enabled_features.extendedDynamicState3ColorBlendEnable ||
1011        device->vk.enabled_features.extendedDynamicState3ColorWriteMask ||
1012        device->vk.enabled_features.extendedDynamicState3AlphaToCoverageEnable ||
1013        device->vk.enabled_features.extendedDynamicState3ColorBlendEquation) {
1014       if (!radv_shader_part_cache_init(&device->ps_epilogs, &ps_epilog_ops)) {
1015          result = VK_ERROR_OUT_OF_HOST_MEMORY;
1016          goto fail;
1017       }
1018    }
1019 
1020    if (!(device->instance->debug_flags & RADV_DEBUG_NO_IBS))
1021       radv_create_gfx_config(device);
1022 
1023    struct vk_pipeline_cache_create_info info = {.weak_ref = true};
1024    device->mem_cache = vk_pipeline_cache_create(&device->vk, &info, NULL);
1025    if (!device->mem_cache) {
1026       result = VK_ERROR_OUT_OF_HOST_MEMORY;
1027       goto fail_meta;
1028    }
1029 
1030    device->force_aniso = MIN2(16, (int)debug_get_num_option("RADV_TEX_ANISO", -1));
1031    if (device->force_aniso >= 0) {
1032       fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n", 1 << util_logbase2(device->force_aniso));
1033    }
1034 
1035    if (device->vk.enabled_features.performanceCounterQueryPools) {
1036       size_t bo_size = PERF_CTR_BO_PASS_OFFSET + sizeof(uint64_t) * PERF_CTR_MAX_PASSES;
1037       result = device->ws->buffer_create(device->ws, bo_size, 4096, RADEON_DOMAIN_GTT,
1038                                          RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
1039                                          RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->perf_counter_bo);
1040       if (result != VK_SUCCESS)
1041          goto fail_cache;
1042 
1043       device->perf_counter_lock_cs = calloc(sizeof(struct radeon_winsys_cs *), 2 * PERF_CTR_MAX_PASSES);
1044       if (!device->perf_counter_lock_cs) {
1045          result = VK_ERROR_OUT_OF_HOST_MEMORY;
1046          goto fail_cache;
1047       }
1048 
1049       if (!device->physical_device->ac_perfcounters.blocks) {
1050          result = VK_ERROR_INITIALIZATION_FAILED;
1051          goto fail_cache;
1052       }
1053    }
1054 
1055    if ((device->instance->vk.trace_mode & RADV_TRACE_MODE_RRA) && radv_enable_rt(physical_device, false)) {
1056       result = radv_rra_trace_init(device);
1057       if (result != VK_SUCCESS)
1058          goto fail;
1059    }
1060 
1061    if (device->vk.enabled_features.rayTracingPipelineShaderGroupHandleCaptureReplay) {
1062       device->capture_replay_arena_vas = _mesa_hash_table_u64_create(NULL);
1063    }
1064 
1065    result = radv_printf_data_init(device);
1066    if (result != VK_SUCCESS)
1067       goto fail_cache;
1068 
1069    *pDevice = radv_device_to_handle(device);
1070    return VK_SUCCESS;
1071 
1072 fail_cache:
1073    vk_pipeline_cache_destroy(device->mem_cache, NULL);
1074 fail_meta:
1075    radv_device_finish_meta(device);
1076 fail:
1077    radv_printf_data_finish(device);
1078 
1079    radv_sqtt_finish(device);
1080 
1081    radv_rra_trace_finish(radv_device_to_handle(device), &device->rra_trace);
1082 
1083    radv_spm_finish(device);
1084 
1085    radv_trap_handler_finish(device);
1086    radv_finish_trace(device);
1087 
1088    radv_device_finish_perf_counter_lock_cs(device);
1089    if (device->perf_counter_bo)
1090       device->ws->buffer_destroy(device->ws, device->perf_counter_bo);
1091    if (device->gfx_init)
1092       device->ws->buffer_destroy(device->ws, device->gfx_init);
1093 
1094    radv_device_finish_notifier(device);
1095    radv_device_finish_vs_prologs(device);
1096    if (device->tcs_epilogs.ops)
1097       radv_shader_part_cache_finish(device, &device->tcs_epilogs);
1098    if (device->ps_epilogs.ops)
1099       radv_shader_part_cache_finish(device, &device->ps_epilogs);
1100    radv_device_finish_border_color(device);
1101 
1102    radv_destroy_shader_upload_queue(device);
1103 
1104 fail_queue:
1105    for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1106       for (unsigned q = 0; q < device->queue_count[i]; q++)
1107          radv_queue_finish(&device->queues[i][q]);
1108       if (device->queue_count[i])
1109          vk_free(&device->vk.alloc, device->queues[i]);
1110    }
1111 
1112    for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
1113       if (device->hw_ctx[i])
1114          device->ws->ctx_destroy(device->hw_ctx[i]);
1115    }
1116 
1117    radv_destroy_shader_arenas(device);
1118 
1119    _mesa_hash_table_destroy(device->rt_handles, NULL);
1120 
1121    simple_mtx_destroy(&device->ctx_roll_mtx);
1122    simple_mtx_destroy(&device->pstate_mtx);
1123    simple_mtx_destroy(&device->trace_mtx);
1124    simple_mtx_destroy(&device->rt_handles_mtx);
1125    mtx_destroy(&device->overallocation_mutex);
1126 
1127    vk_device_finish(&device->vk);
1128    vk_free(&device->vk.alloc, device);
1129    return result;
1130 }
1131 
1132 VKAPI_ATTR void VKAPI_CALL
radv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)1133 radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
1134 {
1135    RADV_FROM_HANDLE(radv_device, device, _device);
1136 
1137    if (!device)
1138       return;
1139 
1140    if (device->capture_replay_arena_vas)
1141       _mesa_hash_table_u64_destroy(device->capture_replay_arena_vas);
1142 
1143    radv_device_finish_perf_counter_lock_cs(device);
1144    if (device->perf_counter_bo)
1145       device->ws->buffer_destroy(device->ws, device->perf_counter_bo);
1146 
1147    if (device->gfx_init)
1148       device->ws->buffer_destroy(device->ws, device->gfx_init);
1149 
1150    radv_device_finish_notifier(device);
1151    radv_device_finish_vs_prologs(device);
1152    if (device->tcs_epilogs.ops)
1153       radv_shader_part_cache_finish(device, &device->tcs_epilogs);
1154    if (device->ps_epilogs.ops)
1155       radv_shader_part_cache_finish(device, &device->ps_epilogs);
1156    radv_device_finish_border_color(device);
1157    radv_device_finish_vrs_image(device);
1158 
1159    for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
1160       for (unsigned q = 0; q < device->queue_count[i]; q++)
1161          radv_queue_finish(&device->queues[i][q]);
1162       if (device->queue_count[i])
1163          vk_free(&device->vk.alloc, device->queues[i]);
1164    }
1165    if (device->private_sdma_queue != VK_NULL_HANDLE) {
1166       radv_queue_finish(device->private_sdma_queue);
1167       vk_free(&device->vk.alloc, device->private_sdma_queue);
1168    }
1169 
1170    _mesa_hash_table_destroy(device->rt_handles, NULL);
1171 
1172    radv_device_finish_meta(device);
1173 
1174    vk_pipeline_cache_destroy(device->mem_cache, NULL);
1175 
1176    radv_destroy_shader_upload_queue(device);
1177 
1178    for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
1179       if (device->hw_ctx[i])
1180          device->ws->ctx_destroy(device->hw_ctx[i]);
1181    }
1182 
1183    mtx_destroy(&device->overallocation_mutex);
1184    simple_mtx_destroy(&device->ctx_roll_mtx);
1185    simple_mtx_destroy(&device->pstate_mtx);
1186    simple_mtx_destroy(&device->trace_mtx);
1187    simple_mtx_destroy(&device->rt_handles_mtx);
1188 
1189    radv_trap_handler_finish(device);
1190    radv_finish_trace(device);
1191 
1192    radv_destroy_shader_arenas(device);
1193 
1194    radv_printf_data_finish(device);
1195 
1196    radv_sqtt_finish(device);
1197 
1198    radv_rra_trace_finish(_device, &device->rra_trace);
1199 
1200    radv_memory_trace_finish(device);
1201 
1202    radv_spm_finish(device);
1203 
1204    ralloc_free(device->gpu_hang_report);
1205 
1206    vk_device_finish(&device->vk);
1207    vk_free(&device->vk.alloc, device);
1208 }
1209 
1210 bool
radv_get_memory_fd(struct radv_device * device,struct radv_device_memory * memory,int * pFD)1211 radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD)
1212 {
1213    /* Set BO metadata for dedicated image allocations.  We don't need it for import when the image
1214     * tiling is VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, but we set it anyway for foreign consumers.
1215     */
1216    if (memory->image) {
1217       struct radeon_bo_metadata metadata;
1218 
1219       assert(memory->image->bindings[0].offset == 0);
1220       radv_init_metadata(device, memory->image, &metadata);
1221       device->ws->buffer_set_metadata(device->ws, memory->bo, &metadata);
1222    }
1223 
1224    return device->ws->buffer_get_fd(device->ws, memory->bo, pFD);
1225 }
1226 
1227 VKAPI_ATTR void VKAPI_CALL
radv_GetImageMemoryRequirements2(VkDevice _device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)1228 radv_GetImageMemoryRequirements2(VkDevice _device, const VkImageMemoryRequirementsInfo2 *pInfo,
1229                                  VkMemoryRequirements2 *pMemoryRequirements)
1230 {
1231    RADV_FROM_HANDLE(radv_device, device, _device);
1232    RADV_FROM_HANDLE(radv_image, image, pInfo->image);
1233 
1234    pMemoryRequirements->memoryRequirements.memoryTypeBits =
1235       ((1u << device->physical_device->memory_properties.memoryTypeCount) - 1u) &
1236       ~device->physical_device->memory_types_32bit;
1237 
1238    pMemoryRequirements->memoryRequirements.size = image->size;
1239    pMemoryRequirements->memoryRequirements.alignment = image->alignment;
1240 
1241    vk_foreach_struct (ext, pMemoryRequirements->pNext) {
1242       switch (ext->sType) {
1243       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
1244          VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
1245          req->requiresDedicatedAllocation = image->shareable && image->vk.tiling != VK_IMAGE_TILING_LINEAR;
1246          req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
1247          break;
1248       }
1249       default:
1250          break;
1251       }
1252    }
1253 }
1254 
1255 VKAPI_ATTR void VKAPI_CALL
radv_GetDeviceImageMemoryRequirements(VkDevice device,const VkDeviceImageMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)1256 radv_GetDeviceImageMemoryRequirements(VkDevice device, const VkDeviceImageMemoryRequirements *pInfo,
1257                                       VkMemoryRequirements2 *pMemoryRequirements)
1258 {
1259    UNUSED VkResult result;
1260    VkImage image;
1261 
1262    /* Determining the image size/alignment require to create a surface, which is complicated without
1263     * creating an image.
1264     * TODO: Avoid creating an image.
1265     */
1266    result =
1267       radv_image_create(device, &(struct radv_image_create_info){.vk_info = pInfo->pCreateInfo}, NULL, &image, true);
1268    assert(result == VK_SUCCESS);
1269 
1270    VkImageMemoryRequirementsInfo2 info2 = {
1271       .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
1272       .image = image,
1273    };
1274 
1275    radv_GetImageMemoryRequirements2(device, &info2, pMemoryRequirements);
1276 
1277    radv_DestroyImage(device, image, NULL);
1278 }
1279 
1280 static uint32_t
radv_surface_max_layer_count(struct radv_image_view * iview)1281 radv_surface_max_layer_count(struct radv_image_view *iview)
1282 {
1283    return iview->vk.view_type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth
1284                                                        : (iview->vk.base_array_layer + iview->vk.layer_count);
1285 }
1286 
1287 unsigned
radv_get_dcc_max_uncompressed_block_size(const struct radv_device * device,const struct radv_image * image)1288 radv_get_dcc_max_uncompressed_block_size(const struct radv_device *device, const struct radv_image *image)
1289 {
1290    if (device->physical_device->rad_info.gfx_level < GFX10 && image->vk.samples > 1) {
1291       if (image->planes[0].surface.bpe == 1)
1292          return V_028C78_MAX_BLOCK_SIZE_64B;
1293       else if (image->planes[0].surface.bpe == 2)
1294          return V_028C78_MAX_BLOCK_SIZE_128B;
1295    }
1296 
1297    return V_028C78_MAX_BLOCK_SIZE_256B;
1298 }
1299 
1300 static unsigned
get_dcc_min_compressed_block_size(const struct radv_device * device)1301 get_dcc_min_compressed_block_size(const struct radv_device *device)
1302 {
1303    if (!device->physical_device->rad_info.has_dedicated_vram) {
1304       /* amdvlk: [min-compressed-block-size] should be set to 32 for
1305        * dGPU and 64 for APU because all of our APUs to date use
1306        * DIMMs which have a request granularity size of 64B while all
1307        * other chips have a 32B request size.
1308        */
1309       return V_028C78_MIN_BLOCK_SIZE_64B;
1310    }
1311 
1312    return V_028C78_MIN_BLOCK_SIZE_32B;
1313 }
1314 
1315 static uint32_t
radv_init_dcc_control_reg(struct radv_device * device,struct radv_image_view * iview)1316 radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iview)
1317 {
1318    unsigned max_uncompressed_block_size = radv_get_dcc_max_uncompressed_block_size(device, iview->image);
1319    unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device);
1320    unsigned max_compressed_block_size;
1321    unsigned independent_128b_blocks;
1322    unsigned independent_64b_blocks;
1323 
1324    if (!radv_dcc_enabled(iview->image, iview->vk.base_mip_level))
1325       return 0;
1326 
1327    /* For GFX9+ ac_surface computes values for us (except min_compressed
1328     * and max_uncompressed) */
1329    if (device->physical_device->rad_info.gfx_level >= GFX9) {
1330       max_compressed_block_size = iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size;
1331       independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_128B_blocks;
1332       independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_64B_blocks;
1333    } else {
1334       independent_128b_blocks = 0;
1335 
1336       if (iview->image->vk.usage &
1337           (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
1338          /* If this DCC image is potentially going to be used in texture
1339           * fetches, we need some special settings.
1340           */
1341          independent_64b_blocks = 1;
1342          max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
1343       } else {
1344          /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
1345           * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
1346           * big as possible for better compression state.
1347           */
1348          independent_64b_blocks = 0;
1349          max_compressed_block_size = max_uncompressed_block_size;
1350       }
1351    }
1352 
1353    uint32_t result = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
1354                      S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
1355                      S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
1356                      S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks);
1357 
1358    if (device->physical_device->rad_info.gfx_level >= GFX11) {
1359       result |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX11(independent_128b_blocks) |
1360                 S_028C78_DISABLE_CONSTANT_ENCODE_REG(1) |
1361                 S_028C78_FDCC_ENABLE(radv_dcc_enabled(iview->image, iview->vk.base_mip_level));
1362 
1363       if (device->physical_device->rad_info.family >= CHIP_GFX1103_R2) {
1364          result |= S_028C78_ENABLE_MAX_COMP_FRAG_OVERRIDE(1) | S_028C78_MAX_COMP_FRAGS(iview->image->vk.samples >= 4);
1365       }
1366    } else {
1367       result |= S_028C78_INDEPENDENT_128B_BLOCKS_GFX10(independent_128b_blocks);
1368    }
1369 
1370    return result;
1371 }
1372 
1373 void
radv_initialise_color_surface(struct radv_device * device,struct radv_color_buffer_info * cb,struct radv_image_view * iview)1374 radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
1375                               struct radv_image_view *iview)
1376 {
1377    const struct util_format_description *desc;
1378    unsigned ntype, format, swap, endian;
1379    unsigned blend_clamp = 0, blend_bypass = 0;
1380    uint64_t va;
1381    const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
1382    const struct radeon_surf *surf = &plane->surface;
1383    uint8_t tile_swizzle = plane->surface.tile_swizzle;
1384 
1385    desc = vk_format_description(iview->vk.format);
1386 
1387    memset(cb, 0, sizeof(*cb));
1388 
1389    /* Intensity is implemented as Red, so treat it that way. */
1390    if (device->physical_device->rad_info.gfx_level >= GFX11)
1391       cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX11(desc->swizzle[3] == PIPE_SWIZZLE_1);
1392    else
1393       cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1_GFX6(desc->swizzle[3] == PIPE_SWIZZLE_1);
1394 
1395    uint32_t plane_id = iview->image->disjoint ? iview->plane_id : 0;
1396    va = radv_buffer_get_va(iview->image->bindings[plane_id].bo) + iview->image->bindings[plane_id].offset;
1397 
1398    if (iview->nbc_view.valid) {
1399       va += iview->nbc_view.base_address_offset;
1400       tile_swizzle = iview->nbc_view.tile_swizzle;
1401    }
1402 
1403    cb->cb_color_base = va >> 8;
1404 
1405    if (device->physical_device->rad_info.gfx_level >= GFX9) {
1406       if (device->physical_device->rad_info.gfx_level >= GFX11) {
1407          cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
1408                                  S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
1409       } else if (device->physical_device->rad_info.gfx_level >= GFX10) {
1410          cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
1411                                  S_028EE0_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
1412                                  S_028EE0_CMASK_PIPE_ALIGNED(1) |
1413                                  S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
1414       } else {
1415          struct gfx9_surf_meta_flags meta = {
1416             .rb_aligned = 1,
1417             .pipe_aligned = 1,
1418          };
1419 
1420          if (surf->meta_offset)
1421             meta = surf->u.gfx9.color.dcc;
1422 
1423          cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
1424                                 S_028C74_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
1425                                 S_028C74_RB_ALIGNED(meta.rb_aligned) | S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
1426          cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.epitch);
1427       }
1428 
1429       cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
1430       cb->cb_color_base |= tile_swizzle;
1431    } else {
1432       const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->vk.base_mip_level];
1433       unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
1434 
1435       cb->cb_color_base += level_info->offset_256B;
1436       if (level_info->mode == RADEON_SURF_MODE_2D)
1437          cb->cb_color_base |= tile_swizzle;
1438 
1439       pitch_tile_max = level_info->nblk_x / 8 - 1;
1440       slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
1441       tile_mode_index = radv_tile_mode_index(plane, iview->vk.base_mip_level, false);
1442 
1443       cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
1444       cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
1445       cb->cb_color_cmask_slice = surf->u.legacy.color.cmask_slice_tile_max;
1446 
1447       cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
1448 
1449       if (radv_image_has_fmask(iview->image)) {
1450          if (device->physical_device->rad_info.gfx_level >= GFX7)
1451             cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1);
1452          cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index);
1453          cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max);
1454       } else {
1455          /* This must be set for fast clear to work without FMASK. */
1456          if (device->physical_device->rad_info.gfx_level >= GFX7)
1457             cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
1458          cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
1459          cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
1460       }
1461    }
1462 
1463    /* CMASK variables */
1464    va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset;
1465    va += surf->cmask_offset;
1466    cb->cb_color_cmask = va >> 8;
1467 
1468    va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset;
1469    va += surf->meta_offset;
1470 
1471    if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && device->physical_device->rad_info.gfx_level <= GFX8)
1472       va += plane->surface.u.legacy.color.dcc_level[iview->vk.base_mip_level].dcc_offset;
1473 
1474    unsigned dcc_tile_swizzle = tile_swizzle;
1475    dcc_tile_swizzle &= ((1 << surf->meta_alignment_log2) - 1) >> 8;
1476 
1477    cb->cb_dcc_base = va >> 8;
1478    cb->cb_dcc_base |= dcc_tile_swizzle;
1479 
1480    /* GFX10 field has the same base shift as the GFX6 field. */
1481    uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
1482    uint32_t slice_start = iview->nbc_view.valid ? 0 : iview->vk.base_array_layer;
1483    cb->cb_color_view = S_028C6C_SLICE_START(slice_start) | S_028C6C_SLICE_MAX_GFX10(max_slice);
1484 
1485    if (iview->image->vk.samples > 1) {
1486       unsigned log_samples = util_logbase2(iview->image->vk.samples);
1487 
1488       if (device->physical_device->rad_info.gfx_level >= GFX11)
1489          cb->cb_color_attrib |= S_028C74_NUM_FRAGMENTS_GFX11(log_samples);
1490       else
1491          cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS_GFX6(log_samples);
1492    }
1493 
1494    if (radv_image_has_fmask(iview->image)) {
1495       va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + surf->fmask_offset;
1496       cb->cb_color_fmask = va >> 8;
1497       cb->cb_color_fmask |= surf->fmask_tile_swizzle;
1498    } else {
1499       cb->cb_color_fmask = cb->cb_color_base;
1500    }
1501 
1502    ntype = ac_get_cb_number_type(desc->format);
1503    format = ac_get_cb_format(device->physical_device->rad_info.gfx_level, desc->format);
1504    assert(format != V_028C70_COLOR_INVALID);
1505 
1506    swap = radv_translate_colorswap(iview->vk.format, false);
1507    endian = radv_colorformat_endian_swap(format);
1508 
1509    /* blend clamp should be set for all NORM/SRGB types */
1510    if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM || ntype == V_028C70_NUMBER_SRGB)
1511       blend_clamp = 1;
1512 
1513    /* set blend bypass according to docs if SINT/UINT or
1514       8/24 COLOR variants */
1515    if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || format == V_028C70_COLOR_8_24 ||
1516        format == V_028C70_COLOR_24_8 || format == V_028C70_COLOR_X24_8_32_FLOAT) {
1517       blend_clamp = 0;
1518       blend_bypass = 1;
1519    }
1520 #if 0
1521 	if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
1522 	    (format == V_028C70_COLOR_8 ||
1523 	     format == V_028C70_COLOR_8_8 ||
1524 	     format == V_028C70_COLOR_8_8_8_8))
1525 		->color_is_int8 = true;
1526 #endif
1527    cb->cb_color_info = S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) |
1528                        S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) |
1529                        S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM &&
1530                                            ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 &&
1531                                            format != V_028C70_COLOR_24_8) |
1532                        S_028C70_NUMBER_TYPE(ntype);
1533 
1534    if (device->physical_device->rad_info.gfx_level >= GFX11)
1535       cb->cb_color_info |= S_028C70_FORMAT_GFX11(format);
1536    else
1537       cb->cb_color_info |= S_028C70_FORMAT_GFX6(format) | S_028C70_ENDIAN(endian);
1538 
1539    if (radv_image_has_fmask(iview->image)) {
1540       cb->cb_color_info |= S_028C70_COMPRESSION(1);
1541       if (device->physical_device->rad_info.gfx_level == GFX6) {
1542          unsigned fmask_bankh = util_logbase2(surf->u.legacy.color.fmask.bankh);
1543          cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
1544       }
1545 
1546       if (radv_image_is_tc_compat_cmask(iview->image)) {
1547          /* Allow the texture block to read FMASK directly without decompressing it. */
1548          cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
1549 
1550          if (device->physical_device->rad_info.gfx_level == GFX8) {
1551             /* Set CMASK into a tiling format that allows
1552              * the texture block to read it.
1553              */
1554             cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
1555          }
1556       }
1557    }
1558 
1559    if (radv_image_has_cmask(iview->image) && !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
1560       cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
1561 
1562    if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level) && !iview->disable_dcc_mrt &&
1563        device->physical_device->rad_info.gfx_level < GFX11)
1564       cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
1565 
1566    cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
1567 
1568    /* This must be set for fast clear to work without FMASK. */
1569    if (!radv_image_has_fmask(iview->image) && device->physical_device->rad_info.gfx_level == GFX6) {
1570       unsigned bankh = util_logbase2(surf->u.legacy.bankh);
1571       cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
1572    }
1573 
1574    if (device->physical_device->rad_info.gfx_level >= GFX9) {
1575       unsigned mip0_depth = iview->image->vk.image_type == VK_IMAGE_TYPE_3D ? (iview->extent.depth - 1)
1576                                                                             : (iview->image->vk.array_layers - 1);
1577       unsigned width = vk_format_get_plane_width(iview->image->vk.format, iview->plane_id, iview->extent.width);
1578       unsigned height = vk_format_get_plane_height(iview->image->vk.format, iview->plane_id, iview->extent.height);
1579       unsigned max_mip = iview->image->vk.mip_levels - 1;
1580 
1581       if (device->physical_device->rad_info.gfx_level >= GFX10) {
1582          unsigned base_level = iview->vk.base_mip_level;
1583 
1584          if (iview->nbc_view.valid) {
1585             base_level = iview->nbc_view.level;
1586             max_mip = iview->nbc_view.num_levels - 1;
1587          }
1588 
1589          cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(base_level);
1590 
1591          cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) | S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
1592                                  S_028EE0_RESOURCE_LEVEL(device->physical_device->rad_info.gfx_level >= GFX11 ? 0 : 1);
1593       } else {
1594          cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->vk.base_mip_level);
1595          cb->cb_color_attrib |= S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
1596       }
1597 
1598       /* GFX10.3+ can set a custom pitch for 1D and 2D non-array, but it must be a multiple
1599        * of 256B. Only set it for 2D linear for multi-GPU interop.
1600        *
1601        * We set the pitch in MIP0_WIDTH.
1602        */
1603       if (device->physical_device->rad_info.gfx_level && iview->image->vk.image_type == VK_IMAGE_TYPE_2D &&
1604           iview->image->vk.array_layers == 1 && plane->surface.is_linear) {
1605          assert((plane->surface.u.gfx9.surf_pitch * plane->surface.bpe) % 256 == 0);
1606 
1607          width = plane->surface.u.gfx9.surf_pitch;
1608 
1609          /* Subsampled images have the pitch in the units of blocks. */
1610          if (plane->surface.blk_w == 2)
1611             width *= 2;
1612       }
1613 
1614       cb->cb_color_attrib2 =
1615          S_028C68_MIP0_WIDTH(width - 1) | S_028C68_MIP0_HEIGHT(height - 1) | S_028C68_MAX_MIP(max_mip);
1616    }
1617 }
1618 
1619 static unsigned
radv_calc_decompress_on_z_planes(const struct radv_device * device,struct radv_image_view * iview)1620 radv_calc_decompress_on_z_planes(const struct radv_device *device, struct radv_image_view *iview)
1621 {
1622    unsigned max_zplanes = 0;
1623 
1624    assert(radv_image_is_tc_compat_htile(iview->image));
1625 
1626    if (device->physical_device->rad_info.gfx_level >= GFX9) {
1627       /* Default value for 32-bit depth surfaces. */
1628       max_zplanes = 4;
1629 
1630       if (iview->vk.format == VK_FORMAT_D16_UNORM && iview->image->vk.samples > 1)
1631          max_zplanes = 2;
1632 
1633       /* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
1634       if (device->physical_device->rad_info.has_two_planes_iterate256_bug &&
1635           radv_image_get_iterate256(device, iview->image) && !radv_image_tile_stencil_disabled(device, iview->image) &&
1636           iview->image->vk.samples == 4) {
1637          max_zplanes = 1;
1638       }
1639 
1640       max_zplanes = max_zplanes + 1;
1641    } else {
1642       if (iview->vk.format == VK_FORMAT_D16_UNORM) {
1643          /* Do not enable Z plane compression for 16-bit depth
1644           * surfaces because isn't supported on GFX8. Only
1645           * 32-bit depth surfaces are supported by the hardware.
1646           * This allows to maintain shader compatibility and to
1647           * reduce the number of depth decompressions.
1648           */
1649          max_zplanes = 1;
1650       } else {
1651          if (iview->image->vk.samples <= 1)
1652             max_zplanes = 5;
1653          else if (iview->image->vk.samples <= 4)
1654             max_zplanes = 3;
1655          else
1656             max_zplanes = 2;
1657       }
1658    }
1659 
1660    return max_zplanes;
1661 }
1662 
1663 void
radv_initialise_vrs_surface(struct radv_image * image,struct radv_buffer * htile_buffer,struct radv_ds_buffer_info * ds)1664 radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer, struct radv_ds_buffer_info *ds)
1665 {
1666    const struct radeon_surf *surf = &image->planes[0].surface;
1667 
1668    assert(image->vk.format == VK_FORMAT_D16_UNORM);
1669    memset(ds, 0, sizeof(*ds));
1670 
1671    ds->db_z_info = S_028038_FORMAT(V_028040_Z_16) | S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
1672                    S_028038_ZRANGE_PRECISION(1) | S_028038_TILE_SURFACE_ENABLE(1);
1673    ds->db_stencil_info = S_02803C_FORMAT(V_028044_STENCIL_INVALID);
1674 
1675    ds->db_depth_size = S_02801C_X_MAX(image->vk.extent.width - 1) | S_02801C_Y_MAX(image->vk.extent.height - 1);
1676 
1677    ds->db_htile_data_base = radv_buffer_get_va(htile_buffer->bo) >> 8;
1678    ds->db_htile_surface =
1679       S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1) | S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
1680 }
1681 
1682 void
radv_initialise_ds_surface(const struct radv_device * device,struct radv_ds_buffer_info * ds,struct radv_image_view * iview,VkImageAspectFlags ds_aspects)1683 radv_initialise_ds_surface(const struct radv_device *device, struct radv_ds_buffer_info *ds,
1684                            struct radv_image_view *iview, VkImageAspectFlags ds_aspects)
1685 {
1686    unsigned level = iview->vk.base_mip_level;
1687    unsigned format, stencil_format;
1688    uint64_t va, s_offs, z_offs;
1689    bool stencil_only = iview->image->vk.format == VK_FORMAT_S8_UINT;
1690    const struct radv_image_plane *plane = &iview->image->planes[0];
1691    const struct radeon_surf *surf = &plane->surface;
1692 
1693    assert(vk_format_get_plane_count(iview->image->vk.format) == 1);
1694 
1695    memset(ds, 0, sizeof(*ds));
1696 
1697    format = radv_translate_dbformat(iview->image->vk.format);
1698    stencil_format = surf->has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
1699 
1700    uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
1701    ds->db_depth_view = S_028008_SLICE_START(iview->vk.base_array_layer) | S_028008_SLICE_MAX(max_slice) |
1702                        S_028008_Z_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) |
1703                        S_028008_STENCIL_READ_ONLY(!(ds_aspects & VK_IMAGE_ASPECT_STENCIL_BIT));
1704    if (device->physical_device->rad_info.gfx_level >= GFX10) {
1705       ds->db_depth_view |=
1706          S_028008_SLICE_START_HI(iview->vk.base_array_layer >> 11) | S_028008_SLICE_MAX_HI(max_slice >> 11);
1707    }
1708 
1709    ds->db_htile_data_base = 0;
1710    ds->db_htile_surface = 0;
1711 
1712    va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset;
1713    s_offs = z_offs = va;
1714 
1715    /* Recommended value for better performance with 4x and 8x. */
1716    ds->db_render_override2 = S_028010_DECOMPRESS_Z_ON_FLUSH(iview->image->vk.samples >= 4) |
1717                              S_028010_CENTROID_COMPUTATION_MODE(device->physical_device->rad_info.gfx_level >= GFX10_3);
1718 
1719    if (device->physical_device->rad_info.gfx_level >= GFX9) {
1720       assert(surf->u.gfx9.surf_offset == 0);
1721       s_offs += surf->u.gfx9.zs.stencil_offset;
1722 
1723       ds->db_z_info = S_028038_FORMAT(format) | S_028038_NUM_SAMPLES(util_logbase2(iview->image->vk.samples)) |
1724                       S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) | S_028038_MAXMIP(iview->image->vk.mip_levels - 1) |
1725                       S_028038_ZRANGE_PRECISION(1) |
1726                       S_028040_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11);
1727       ds->db_stencil_info = S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode) |
1728                             S_028044_ITERATE_256(device->physical_device->rad_info.gfx_level >= GFX11);
1729 
1730       if (device->physical_device->rad_info.gfx_level == GFX9) {
1731          ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch);
1732          ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.zs.stencil_epitch);
1733       }
1734 
1735       ds->db_depth_view |= S_028008_MIPID(level);
1736       ds->db_depth_size =
1737          S_02801C_X_MAX(iview->image->vk.extent.width - 1) | S_02801C_Y_MAX(iview->image->vk.extent.height - 1);
1738 
1739       if (radv_htile_enabled(iview->image, level)) {
1740          ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
1741 
1742          if (radv_image_is_tc_compat_htile(iview->image)) {
1743             unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
1744 
1745             ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
1746 
1747             if (device->physical_device->rad_info.gfx_level >= GFX10) {
1748                bool iterate256 = radv_image_get_iterate256(device, iview->image);
1749 
1750                ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
1751                ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
1752                ds->db_z_info |= S_028040_ITERATE_256(iterate256);
1753                ds->db_stencil_info |= S_028044_ITERATE_256(iterate256);
1754             } else {
1755                ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
1756                ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
1757             }
1758          }
1759 
1760          if (radv_image_tile_stencil_disabled(device, iview->image)) {
1761             ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
1762          }
1763 
1764          va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + surf->meta_offset;
1765          ds->db_htile_data_base = va >> 8;
1766          ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);
1767 
1768          if (device->physical_device->rad_info.gfx_level == GFX9) {
1769             ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
1770          }
1771 
1772          if (radv_image_has_vrs_htile(device, iview->image)) {
1773             ds->db_htile_surface |= S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
1774          }
1775       }
1776 
1777       if (device->physical_device->rad_info.gfx_level >= GFX11) {
1778          radv_gfx11_set_db_render_control(device, iview->image->vk.samples, &ds->db_render_control);
1779       }
1780    } else {
1781       const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
1782 
1783       if (stencil_only)
1784          level_info = &surf->u.legacy.zs.stencil_level[level];
1785 
1786       z_offs += (uint64_t)surf->u.legacy.level[level].offset_256B * 256;
1787       s_offs += (uint64_t)surf->u.legacy.zs.stencil_level[level].offset_256B * 256;
1788 
1789       ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
1790       ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
1791       ds->db_stencil_info = S_028044_FORMAT(stencil_format);
1792 
1793       if (iview->image->vk.samples > 1)
1794          ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->vk.samples));
1795 
1796       if (device->physical_device->rad_info.gfx_level >= GFX7) {
1797          const struct radeon_info *info = &device->physical_device->rad_info;
1798          unsigned tiling_index = surf->u.legacy.tiling_index[level];
1799          unsigned stencil_index = surf->u.legacy.zs.stencil_tiling_index[level];
1800          unsigned macro_index = surf->u.legacy.macro_tile_index;
1801          unsigned tile_mode = info->si_tile_mode_array[tiling_index];
1802          unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
1803          unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
1804 
1805          if (stencil_only)
1806             tile_mode = stencil_tile_mode;
1807 
1808          ds->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
1809                               S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
1810                               S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
1811                               S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
1812                               S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
1813                               S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
1814          ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
1815          ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
1816       } else {
1817          unsigned tile_mode_index = radv_tile_mode_index(&iview->image->planes[0], level, false);
1818          ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
1819          tile_mode_index = radv_tile_mode_index(&iview->image->planes[0], level, true);
1820          ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
1821          if (stencil_only)
1822             ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
1823       }
1824 
1825       ds->db_depth_size =
1826          S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) | S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
1827       ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
1828 
1829       if (radv_htile_enabled(iview->image, level)) {
1830          ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
1831 
1832          if (radv_image_tile_stencil_disabled(device, iview->image)) {
1833             ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
1834          }
1835 
1836          va = radv_buffer_get_va(iview->image->bindings[0].bo) + iview->image->bindings[0].offset + surf->meta_offset;
1837          ds->db_htile_data_base = va >> 8;
1838          ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
1839 
1840          if (radv_image_is_tc_compat_htile(iview->image)) {
1841             unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
1842 
1843             ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
1844             ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
1845          }
1846       }
1847    }
1848 
1849    ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
1850    ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
1851 }
1852 
1853 void
radv_gfx11_set_db_render_control(const struct radv_device * device,unsigned num_samples,unsigned * db_render_control)1854 radv_gfx11_set_db_render_control(const struct radv_device *device, unsigned num_samples, unsigned *db_render_control)
1855 {
1856    const struct radv_physical_device *pdevice = device->physical_device;
1857    unsigned max_allowed_tiles_in_wave = 0;
1858 
1859    if (pdevice->rad_info.has_dedicated_vram) {
1860       if (num_samples == 8)
1861          max_allowed_tiles_in_wave = 6;
1862       else if (num_samples == 4)
1863          max_allowed_tiles_in_wave = 13;
1864       else
1865          max_allowed_tiles_in_wave = 0;
1866    } else {
1867       if (num_samples == 8)
1868          max_allowed_tiles_in_wave = 7;
1869       else if (num_samples == 4)
1870          max_allowed_tiles_in_wave = 15;
1871       else
1872          max_allowed_tiles_in_wave = 0;
1873    }
1874 
1875    *db_render_control |= S_028000_MAX_ALLOWED_TILES_IN_WAVE(max_allowed_tiles_in_wave);
1876 }
1877 
1878 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFD)1879 radv_GetMemoryFdKHR(VkDevice _device, const VkMemoryGetFdInfoKHR *pGetFdInfo, int *pFD)
1880 {
1881    RADV_FROM_HANDLE(radv_device, device, _device);
1882    RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
1883 
1884    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
1885 
1886    /* At the moment, we support only the below handle types. */
1887    assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
1888           pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1889 
1890    bool ret = radv_get_memory_fd(device, memory, pFD);
1891    if (ret == false)
1892       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1893    return VK_SUCCESS;
1894 }
1895 
1896 static uint32_t
radv_compute_valid_memory_types_attempt(struct radv_physical_device * dev,enum radeon_bo_domain domains,enum radeon_bo_flag flags,enum radeon_bo_flag ignore_flags)1897 radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev, enum radeon_bo_domain domains,
1898                                         enum radeon_bo_flag flags, enum radeon_bo_flag ignore_flags)
1899 {
1900    /* Don't count GTT/CPU as relevant:
1901     *
1902     * - We're not fully consistent between the two.
1903     * - Sometimes VRAM gets VRAM|GTT.
1904     */
1905    const enum radeon_bo_domain relevant_domains = RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA;
1906    uint32_t bits = 0;
1907    for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) {
1908       if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains))
1909          continue;
1910 
1911       if ((flags & ~ignore_flags) != (dev->memory_flags[i] & ~ignore_flags))
1912          continue;
1913 
1914       bits |= 1u << i;
1915    }
1916 
1917    return bits;
1918 }
1919 
1920 static uint32_t
radv_compute_valid_memory_types(struct radv_physical_device * dev,enum radeon_bo_domain domains,enum radeon_bo_flag flags)1921 radv_compute_valid_memory_types(struct radv_physical_device *dev, enum radeon_bo_domain domains,
1922                                 enum radeon_bo_flag flags)
1923 {
1924    enum radeon_bo_flag ignore_flags = ~(RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC);
1925    uint32_t bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
1926 
1927    if (!bits) {
1928       ignore_flags |= RADEON_FLAG_GTT_WC;
1929       bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
1930    }
1931 
1932    if (!bits) {
1933       ignore_flags |= RADEON_FLAG_NO_CPU_ACCESS;
1934       bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
1935    }
1936 
1937    /* Avoid 32-bit memory types for shared memory. */
1938    bits &= ~dev->memory_types_32bit;
1939 
1940    return bits;
1941 }
1942 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)1943 radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, int fd,
1944                               VkMemoryFdPropertiesKHR *pMemoryFdProperties)
1945 {
1946    RADV_FROM_HANDLE(radv_device, device, _device);
1947 
1948    switch (handleType) {
1949    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
1950       enum radeon_bo_domain domains;
1951       enum radeon_bo_flag flags;
1952       if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
1953          return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1954 
1955       pMemoryFdProperties->memoryTypeBits = radv_compute_valid_memory_types(device->physical_device, domains, flags);
1956       return VK_SUCCESS;
1957    }
1958    default:
1959       /* The valid usage section for this function says:
1960        *
1961        *    "handleType must not be one of the handle types defined as
1962        *    opaque."
1963        *
1964        * So opaque handle types fall into the default "unsupported" case.
1965        */
1966       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1967    }
1968 }
1969 
1970 VKAPI_ATTR VkResult VKAPI_CALL
radv_GetCalibratedTimestampsKHR(VkDevice _device,uint32_t timestampCount,const VkCalibratedTimestampInfoKHR * pTimestampInfos,uint64_t * pTimestamps,uint64_t * pMaxDeviation)1971 radv_GetCalibratedTimestampsKHR(VkDevice _device, uint32_t timestampCount,
1972                                 const VkCalibratedTimestampInfoKHR *pTimestampInfos, uint64_t *pTimestamps,
1973                                 uint64_t *pMaxDeviation)
1974 {
1975 #ifndef _WIN32
1976    RADV_FROM_HANDLE(radv_device, device, _device);
1977    uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
1978    int d;
1979    uint64_t begin, end;
1980    uint64_t max_clock_period = 0;
1981 
1982 #ifdef CLOCK_MONOTONIC_RAW
1983    begin = vk_clock_gettime(CLOCK_MONOTONIC_RAW);
1984 #else
1985    begin = vk_clock_gettime(CLOCK_MONOTONIC);
1986 #endif
1987 
1988    for (d = 0; d < timestampCount; d++) {
1989       switch (pTimestampInfos[d].timeDomain) {
1990       case VK_TIME_DOMAIN_DEVICE_KHR:
1991          pTimestamps[d] = device->ws->query_value(device->ws, RADEON_TIMESTAMP);
1992          uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
1993          max_clock_period = MAX2(max_clock_period, device_period);
1994          break;
1995       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_KHR:
1996          pTimestamps[d] = vk_clock_gettime(CLOCK_MONOTONIC);
1997          max_clock_period = MAX2(max_clock_period, 1);
1998          break;
1999 
2000 #ifdef CLOCK_MONOTONIC_RAW
2001       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_KHR:
2002          pTimestamps[d] = begin;
2003          break;
2004 #endif
2005       default:
2006          pTimestamps[d] = 0;
2007          break;
2008       }
2009    }
2010 
2011 #ifdef CLOCK_MONOTONIC_RAW
2012    end = vk_clock_gettime(CLOCK_MONOTONIC_RAW);
2013 #else
2014    end = vk_clock_gettime(CLOCK_MONOTONIC);
2015 #endif
2016 
2017    *pMaxDeviation = vk_time_max_deviation(begin, end, max_clock_period);
2018 
2019    return VK_SUCCESS;
2020 #else
2021    return VK_ERROR_FEATURE_NOT_PRESENT;
2022 #endif
2023 }
2024 
2025 bool
radv_device_set_pstate(struct radv_device * device,bool enable)2026 radv_device_set_pstate(struct radv_device *device, bool enable)
2027 {
2028    struct radeon_winsys *ws = device->ws;
2029    enum radeon_ctx_pstate pstate = enable ? RADEON_CTX_PSTATE_PEAK : RADEON_CTX_PSTATE_NONE;
2030 
2031    if (device->physical_device->rad_info.has_stable_pstate) {
2032       /* pstate is per-device; setting it for one ctx is sufficient.
2033        * We pick the first initialized one below. */
2034       for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++)
2035          if (device->hw_ctx[i])
2036             return ws->ctx_set_pstate(device->hw_ctx[i], pstate) >= 0;
2037    }
2038 
2039    return true;
2040 }
2041 
2042 bool
radv_device_acquire_performance_counters(struct radv_device * device)2043 radv_device_acquire_performance_counters(struct radv_device *device)
2044 {
2045    bool result = true;
2046    simple_mtx_lock(&device->pstate_mtx);
2047 
2048    if (device->pstate_cnt == 0) {
2049       result = radv_device_set_pstate(device, true);
2050       if (result)
2051          ++device->pstate_cnt;
2052    }
2053 
2054    simple_mtx_unlock(&device->pstate_mtx);
2055    return result;
2056 }
2057 
2058 void
radv_device_release_performance_counters(struct radv_device * device)2059 radv_device_release_performance_counters(struct radv_device *device)
2060 {
2061    simple_mtx_lock(&device->pstate_mtx);
2062 
2063    if (--device->pstate_cnt == 0)
2064       radv_device_set_pstate(device, false);
2065 
2066    simple_mtx_unlock(&device->pstate_mtx);
2067 }
2068 
2069 VKAPI_ATTR VkResult VKAPI_CALL
radv_AcquireProfilingLockKHR(VkDevice _device,const VkAcquireProfilingLockInfoKHR * pInfo)2070 radv_AcquireProfilingLockKHR(VkDevice _device, const VkAcquireProfilingLockInfoKHR *pInfo)
2071 {
2072    RADV_FROM_HANDLE(radv_device, device, _device);
2073    bool result = radv_device_acquire_performance_counters(device);
2074    return result ? VK_SUCCESS : VK_ERROR_UNKNOWN;
2075 }
2076 
2077 VKAPI_ATTR void VKAPI_CALL
radv_ReleaseProfilingLockKHR(VkDevice _device)2078 radv_ReleaseProfilingLockKHR(VkDevice _device)
2079 {
2080    RADV_FROM_HANDLE(radv_device, device, _device);
2081    radv_device_release_performance_counters(device);
2082 }
2083 
2084 VKAPI_ATTR void VKAPI_CALL
radv_GetDeviceImageSubresourceLayoutKHR(VkDevice device,const VkDeviceImageSubresourceInfoKHR * pInfo,VkSubresourceLayout2KHR * pLayout)2085 radv_GetDeviceImageSubresourceLayoutKHR(VkDevice device, const VkDeviceImageSubresourceInfoKHR *pInfo,
2086                                         VkSubresourceLayout2KHR *pLayout)
2087 {
2088    UNUSED VkResult result;
2089    VkImage image;
2090 
2091    result =
2092       radv_image_create(device, &(struct radv_image_create_info){.vk_info = pInfo->pCreateInfo}, NULL, &image, true);
2093    assert(result == VK_SUCCESS);
2094 
2095    radv_GetImageSubresourceLayout2KHR(device, image, pInfo->pSubresource, pLayout);
2096 
2097    radv_DestroyImage(device, image, NULL);
2098 }
2099