• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * based in part on anv driver which is:
6  * Copyright © 2015 Intel Corporation
7  *
8  * Permission is hereby granted, free of charge, to any person obtaining a
9  * copy of this software and associated documentation files (the "Software"),
10  * to deal in the Software without restriction, including without limitation
11  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12  * and/or sell copies of the Software, and to permit persons to whom the
13  * Software is furnished to do so, subject to the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the next
16  * paragraph) shall be included in all copies or substantial portions of the
17  * Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25  * IN THE SOFTWARE.
26  */
27 
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <string.h>
31 
32 #ifdef __FreeBSD__
33 #include <sys/types.h>
34 #elif !defined(_WIN32)
35 #include <sys/sysmacros.h>
36 #endif
37 
38 #include "util/debug.h"
39 #include "util/disk_cache.h"
40 #include "radv_cs.h"
41 #include "radv_debug.h"
42 #include "radv_private.h"
43 #include "radv_shader.h"
44 #include "vk_util.h"
45 #ifdef _WIN32
46 typedef void *drmDevicePtr;
47 #include <io.h>
48 #else
49 #include <amdgpu.h>
50 #include <xf86drm.h>
51 #include "drm-uapi/amdgpu_drm.h"
52 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
53 #endif
54 #include "util/build_id.h"
55 #include "util/debug.h"
56 #include "util/driconf.h"
57 #include "util/mesa-sha1.h"
58 #include "util/timespec.h"
59 #include "util/u_atomic.h"
60 #include "winsys/null/radv_null_winsys_public.h"
61 #include "git_sha1.h"
62 #include "sid.h"
63 #include "vk_format.h"
64 #include "vulkan/vk_icd.h"
65 
66 #ifdef LLVM_AVAILABLE
67 #include "ac_llvm_util.h"
68 #endif
69 
70 /* The number of IBs per submit isn't infinite, it depends on the ring type
71  * (ie. some initial setup needed for a submit) and the number of IBs (4 DW).
72  * This limit is arbitrary but should be safe for now.  Ideally, we should get
73  * this limit from the KMD.
74  */
75 #define RADV_MAX_IBS_PER_SUBMIT 192
76 
77 /* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
78 #if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
79 #define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
80 #endif
81 
82 static struct radv_timeline_point *
83 radv_timeline_find_point_at_least_locked(struct radv_device *device, struct radv_timeline *timeline,
84                                          uint64_t p);
85 
86 static struct radv_timeline_point *radv_timeline_add_point_locked(struct radv_device *device,
87                                                                   struct radv_timeline *timeline,
88                                                                   uint64_t p);
89 
90 static void radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
91                                                  struct list_head *processing_list);
92 
93 static void radv_destroy_semaphore_part(struct radv_device *device,
94                                         struct radv_semaphore_part *part);
95 
96 uint64_t
radv_get_current_time(void)97 radv_get_current_time(void)
98 {
99    return os_time_get_nano();
100 }
101 
102 static uint64_t
radv_get_absolute_timeout(uint64_t timeout)103 radv_get_absolute_timeout(uint64_t timeout)
104 {
105    if (timeout == UINT64_MAX) {
106       return timeout;
107    } else {
108       uint64_t current_time = radv_get_current_time();
109 
110       timeout = MIN2(UINT64_MAX - current_time, timeout);
111 
112       return current_time + timeout;
113    }
114 }
115 
116 static int
radv_device_get_cache_uuid(enum radeon_family family,void * uuid)117 radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
118 {
119    struct mesa_sha1 ctx;
120    unsigned char sha1[20];
121    unsigned ptr_size = sizeof(void *);
122 
123    memset(uuid, 0, VK_UUID_SIZE);
124    _mesa_sha1_init(&ctx);
125 
126    if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx)
127 #ifdef LLVM_AVAILABLE
128        || !disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx)
129 #endif
130    )
131       return -1;
132 
133    _mesa_sha1_update(&ctx, &family, sizeof(family));
134    _mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
135    _mesa_sha1_final(&ctx, sha1);
136 
137    memcpy(uuid, sha1, VK_UUID_SIZE);
138    return 0;
139 }
140 
141 static void
radv_get_driver_uuid(void * uuid)142 radv_get_driver_uuid(void *uuid)
143 {
144    ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
145 }
146 
147 static void
radv_get_device_uuid(struct radeon_info * info,void * uuid)148 radv_get_device_uuid(struct radeon_info *info, void *uuid)
149 {
150    ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
151 }
152 
153 static uint64_t
radv_get_adjusted_vram_size(struct radv_physical_device * device)154 radv_get_adjusted_vram_size(struct radv_physical_device *device)
155 {
156    int ov = driQueryOptioni(&device->instance->dri_options, "override_vram_size");
157    if (ov >= 0)
158       return MIN2(device->rad_info.vram_size, (uint64_t)ov << 20);
159    return device->rad_info.vram_size;
160 }
161 
162 static uint64_t
radv_get_visible_vram_size(struct radv_physical_device * device)163 radv_get_visible_vram_size(struct radv_physical_device *device)
164 {
165    return MIN2(radv_get_adjusted_vram_size(device), device->rad_info.vram_vis_size);
166 }
167 
168 static uint64_t
radv_get_vram_size(struct radv_physical_device * device)169 radv_get_vram_size(struct radv_physical_device *device)
170 {
171    uint64_t total_size = radv_get_adjusted_vram_size(device);
172    return total_size - MIN2(total_size, device->rad_info.vram_vis_size);
173 }
174 
175 enum radv_heap {
176    RADV_HEAP_VRAM = 1 << 0,
177    RADV_HEAP_GTT = 1 << 1,
178    RADV_HEAP_VRAM_VIS = 1 << 2,
179    RADV_HEAP_MAX = 1 << 3,
180 };
181 
182 static void
radv_physical_device_init_mem_types(struct radv_physical_device * device)183 radv_physical_device_init_mem_types(struct radv_physical_device *device)
184 {
185    uint64_t visible_vram_size = radv_get_visible_vram_size(device);
186    uint64_t vram_size = radv_get_vram_size(device);
187    uint64_t gtt_size = device->rad_info.gart_size;
188    int vram_index = -1, visible_vram_index = -1, gart_index = -1;
189 
190    device->memory_properties.memoryHeapCount = 0;
191    device->heaps = 0;
192 
193    if (!device->rad_info.has_dedicated_vram) {
194       /* On APUs, the carveout is usually too small for games that request a minimum VRAM size
195        * greater than it. To workaround this, we compute the total available memory size (GTT +
196        * visible VRAM size) and report 2/3 as VRAM and 1/3 as GTT.
197        */
198       const uint64_t total_size = gtt_size + visible_vram_size;
199       visible_vram_size = align64((total_size * 2) / 3, device->rad_info.gart_page_size);
200       gtt_size = total_size - visible_vram_size;
201       vram_size = 0;
202    }
203 
204    /* Only get a VRAM heap if it is significant, not if it is a 16 MiB
205     * remainder above visible VRAM. */
206    if (vram_size > 0 && vram_size * 9 >= visible_vram_size) {
207       vram_index = device->memory_properties.memoryHeapCount++;
208       device->heaps |= RADV_HEAP_VRAM;
209       device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap){
210          .size = vram_size,
211          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
212       };
213    }
214 
215    if (gtt_size > 0) {
216       gart_index = device->memory_properties.memoryHeapCount++;
217       device->heaps |= RADV_HEAP_GTT;
218       device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap){
219          .size = gtt_size,
220          .flags = 0,
221       };
222    }
223 
224    if (visible_vram_size) {
225       visible_vram_index = device->memory_properties.memoryHeapCount++;
226       device->heaps |= RADV_HEAP_VRAM_VIS;
227       device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap){
228          .size = visible_vram_size,
229          .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
230       };
231    }
232 
233    unsigned type_count = 0;
234 
235    if (vram_index >= 0 || visible_vram_index >= 0) {
236       device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
237       device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
238       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
239          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
240          .heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
241       };
242    }
243 
244    if (gart_index >= 0) {
245       device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
246       device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
247       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
248          .propertyFlags =
249             VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
250          .heapIndex = gart_index,
251       };
252    }
253    if (visible_vram_index >= 0) {
254       device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
255       device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
256       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
257          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
258                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
259                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
260          .heapIndex = visible_vram_index,
261       };
262    }
263 
264    if (gart_index >= 0) {
265       device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
266       device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
267       device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
268          .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
269                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
270          .heapIndex = gart_index,
271       };
272    }
273    device->memory_properties.memoryTypeCount = type_count;
274 
275    if (device->rad_info.has_l2_uncached) {
276       for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
277          VkMemoryType mem_type = device->memory_properties.memoryTypes[i];
278 
279          if ((mem_type.propertyFlags &
280               (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
281              mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
282 
283             VkMemoryPropertyFlags property_flags = mem_type.propertyFlags |
284                                                    VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
285                                                    VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
286 
287             device->memory_domains[type_count] = device->memory_domains[i];
288             device->memory_flags[type_count] = device->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
289             device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
290                .propertyFlags = property_flags,
291                .heapIndex = mem_type.heapIndex,
292             };
293          }
294       }
295       device->memory_properties.memoryTypeCount = type_count;
296    }
297 }
298 
299 static const char *
radv_get_compiler_string(struct radv_physical_device * pdevice)300 radv_get_compiler_string(struct radv_physical_device *pdevice)
301 {
302    if (!pdevice->use_llvm) {
303       /* Some games like SotTR apply shader workarounds if the LLVM
304        * version is too old or if the LLVM version string is
305        * missing. This gives 2-5% performance with SotTR and ACO.
306        */
307       if (driQueryOptionb(&pdevice->instance->dri_options, "radv_report_llvm9_version_string")) {
308          return " (LLVM 9.0.1)";
309       }
310 
311       return "";
312    }
313 
314 #ifdef LLVM_AVAILABLE
315    return " (LLVM " MESA_LLVM_VERSION_STRING ")";
316 #else
317    unreachable("LLVM is not available");
318 #endif
319 }
320 
321 int
radv_get_int_debug_option(const char * name,int default_value)322 radv_get_int_debug_option(const char *name, int default_value)
323 {
324    const char *str;
325    int result;
326 
327    str = getenv(name);
328    if (!str) {
329       result = default_value;
330    } else {
331       char *endptr;
332 
333       result = strtol(str, &endptr, 0);
334       if (str == endptr) {
335          /* No digits founs. */
336          result = default_value;
337       }
338    }
339 
340    return result;
341 }
342 
343 static bool
radv_thread_trace_enabled()344 radv_thread_trace_enabled()
345 {
346    return radv_get_int_debug_option("RADV_THREAD_TRACE", -1) >= 0 ||
347           getenv("RADV_THREAD_TRACE_TRIGGER");
348 }
349 
350 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) ||                    \
351    defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_DISPLAY_KHR)
352 #define RADV_USE_WSI_PLATFORM
353 #endif
354 
355 #ifdef ANDROID
356 #define RADV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
357 #else
358 #define RADV_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
359 #endif
360 
361 VkResult
radv_EnumerateInstanceVersion(uint32_t * pApiVersion)362 radv_EnumerateInstanceVersion(uint32_t *pApiVersion)
363 {
364    *pApiVersion = RADV_API_VERSION;
365    return VK_SUCCESS;
366 }
367 
368 static const struct vk_instance_extension_table radv_instance_extensions_supported = {
369    .KHR_device_group_creation = true,
370    .KHR_external_fence_capabilities = true,
371    .KHR_external_memory_capabilities = true,
372    .KHR_external_semaphore_capabilities = true,
373    .KHR_get_physical_device_properties2 = true,
374    .EXT_debug_report = true,
375 
376 #ifdef RADV_USE_WSI_PLATFORM
377    .KHR_get_surface_capabilities2 = true,
378    .KHR_surface = true,
379    .KHR_surface_protected_capabilities = true,
380 #endif
381 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
382    .KHR_wayland_surface = true,
383 #endif
384 #ifdef VK_USE_PLATFORM_XCB_KHR
385    .KHR_xcb_surface = true,
386 #endif
387 #ifdef VK_USE_PLATFORM_XLIB_KHR
388    .KHR_xlib_surface = true,
389 #endif
390 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
391    .EXT_acquire_xlib_display = true,
392 #endif
393 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
394    .KHR_display = true,
395    .KHR_get_display_properties2 = true,
396    .EXT_direct_mode_display = true,
397    .EXT_display_surface_counter = true,
398    .EXT_acquire_drm_display = true,
399 #endif
400 };
401 
402 static void
radv_physical_device_get_supported_extensions(const struct radv_physical_device * device,struct vk_device_extension_table * ext)403 radv_physical_device_get_supported_extensions(const struct radv_physical_device *device,
404                                               struct vk_device_extension_table *ext)
405 {
406    *ext = (struct vk_device_extension_table){
407       .KHR_8bit_storage = true,
408       .KHR_16bit_storage = true,
409       .KHR_acceleration_structure = !!(device->instance->perftest_flags & RADV_PERFTEST_RT),
410       .KHR_bind_memory2 = true,
411       .KHR_buffer_device_address = true,
412       .KHR_copy_commands2 = true,
413       .KHR_create_renderpass2 = true,
414       .KHR_dedicated_allocation = true,
415       .KHR_deferred_host_operations = true,
416       .KHR_depth_stencil_resolve = true,
417       .KHR_descriptor_update_template = true,
418       .KHR_device_group = true,
419       .KHR_draw_indirect_count = true,
420       .KHR_driver_properties = true,
421       .KHR_external_fence = true,
422       .KHR_external_fence_fd = true,
423       .KHR_external_memory = true,
424       .KHR_external_memory_fd = true,
425       .KHR_external_semaphore = true,
426       .KHR_external_semaphore_fd = true,
427       .KHR_format_feature_flags2 = true,
428       .KHR_fragment_shading_rate = device->rad_info.chip_class >= GFX10_3,
429       .KHR_get_memory_requirements2 = true,
430       .KHR_image_format_list = true,
431       .KHR_imageless_framebuffer = true,
432 #ifdef RADV_USE_WSI_PLATFORM
433       .KHR_incremental_present = true,
434 #endif
435       .KHR_maintenance1 = true,
436       .KHR_maintenance2 = true,
437       .KHR_maintenance3 = true,
438       .KHR_maintenance4 = true,
439       .KHR_multiview = true,
440       .KHR_pipeline_executable_properties = true,
441       .KHR_pipeline_library = (device->instance->perftest_flags & RADV_PERFTEST_RT) && !device->use_llvm,
442       .KHR_push_descriptor = true,
443       .KHR_ray_tracing_pipeline = (device->instance->perftest_flags & RADV_PERFTEST_RT) && !device->use_llvm,
444       .KHR_relaxed_block_layout = true,
445       .KHR_sampler_mirror_clamp_to_edge = true,
446       .KHR_sampler_ycbcr_conversion = true,
447       .KHR_separate_depth_stencil_layouts = true,
448       .KHR_shader_atomic_int64 = true,
449       .KHR_shader_clock = true,
450       .KHR_shader_draw_parameters = true,
451       .KHR_shader_float16_int8 = true,
452       .KHR_shader_float_controls = true,
453       .KHR_shader_integer_dot_product = true,
454       .KHR_shader_non_semantic_info = true,
455       .KHR_shader_subgroup_extended_types = true,
456       .KHR_shader_subgroup_uniform_control_flow = true,
457       .KHR_shader_terminate_invocation = true,
458       .KHR_spirv_1_4 = true,
459       .KHR_storage_buffer_storage_class = true,
460 #ifdef RADV_USE_WSI_PLATFORM
461       .KHR_swapchain = true,
462       .KHR_swapchain_mutable_format = true,
463 #endif
464       .KHR_timeline_semaphore = true,
465       .KHR_uniform_buffer_standard_layout = true,
466       .KHR_variable_pointers = true,
467       .KHR_vulkan_memory_model = true,
468       .KHR_workgroup_memory_explicit_layout = true,
469       .KHR_zero_initialize_workgroup_memory = true,
470       .EXT_4444_formats = true,
471       .EXT_buffer_device_address = true,
472       .EXT_calibrated_timestamps = RADV_SUPPORT_CALIBRATED_TIMESTAMPS,
473       .EXT_color_write_enable = true,
474       .EXT_conditional_rendering = true,
475       .EXT_conservative_rasterization = device->rad_info.chip_class >= GFX9,
476       .EXT_custom_border_color = true,
477       .EXT_debug_marker = radv_thread_trace_enabled(),
478       .EXT_depth_clip_enable = true,
479       .EXT_depth_range_unrestricted = true,
480       .EXT_descriptor_indexing = true,
481       .EXT_discard_rectangles = true,
482 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
483       .EXT_display_control = true,
484 #endif
485       .EXT_extended_dynamic_state = true,
486       .EXT_extended_dynamic_state2 = true,
487       .EXT_external_memory_dma_buf = true,
488       .EXT_external_memory_host = device->rad_info.has_userptr,
489       .EXT_global_priority = true,
490       .EXT_global_priority_query = true,
491       .EXT_host_query_reset = true,
492       .EXT_image_drm_format_modifier = device->rad_info.chip_class >= GFX9,
493       .EXT_image_robustness = true,
494       .EXT_index_type_uint8 = device->rad_info.chip_class >= GFX8,
495       .EXT_inline_uniform_block = true,
496       .EXT_line_rasterization = true,
497       .EXT_memory_budget = true,
498       .EXT_memory_priority = true,
499       .EXT_multi_draw = true,
500       .EXT_pci_bus_info = true,
501 #ifndef _WIN32
502       .EXT_physical_device_drm = true,
503 #endif
504       .EXT_pipeline_creation_cache_control = true,
505       .EXT_pipeline_creation_feedback = true,
506       .EXT_post_depth_coverage = device->rad_info.chip_class >= GFX10,
507       .EXT_primitive_topology_list_restart = true,
508       .EXT_private_data = true,
509       .EXT_provoking_vertex = true,
510       .EXT_queue_family_foreign = true,
511       .EXT_robustness2 = true,
512       .EXT_sample_locations = device->rad_info.chip_class < GFX10,
513       .EXT_sampler_filter_minmax = true,
514       .EXT_scalar_block_layout = device->rad_info.chip_class >= GFX7,
515       .EXT_shader_atomic_float = true,
516 #ifdef LLVM_AVAILABLE
517       .EXT_shader_atomic_float2 = !device->use_llvm || LLVM_VERSION_MAJOR >= 14,
518 #else
519       .EXT_shader_atomic_float2 = true,
520 #endif
521       .EXT_shader_demote_to_helper_invocation = true,
522       .EXT_shader_image_atomic_int64 = true,
523       .EXT_shader_stencil_export = true,
524       .EXT_shader_subgroup_ballot = true,
525       .EXT_shader_subgroup_vote = true,
526       .EXT_shader_viewport_index_layer = true,
527       .EXT_subgroup_size_control = true,
528       .EXT_texel_buffer_alignment = true,
529       .EXT_transform_feedback = true,
530       .EXT_vertex_attribute_divisor = true,
531       .EXT_vertex_input_dynamic_state = !device->use_llvm,
532       .EXT_ycbcr_image_arrays = true,
533       .AMD_buffer_marker = true,
534       .AMD_device_coherent_memory = true,
535       .AMD_draw_indirect_count = true,
536       .AMD_gcn_shader = true,
537       .AMD_gpu_shader_half_float = device->rad_info.has_packed_math_16bit,
538       .AMD_gpu_shader_int16 = device->rad_info.has_packed_math_16bit,
539       .AMD_memory_overallocation_behavior = true,
540       .AMD_mixed_attachment_samples = true,
541       .AMD_rasterization_order = device->rad_info.has_out_of_order_rast,
542       .AMD_shader_ballot = true,
543       .AMD_shader_core_properties = true,
544       .AMD_shader_core_properties2 = true,
545       .AMD_shader_explicit_vertex_parameter = true,
546       .AMD_shader_fragment_mask = true,
547       .AMD_shader_image_load_store_lod = true,
548       .AMD_shader_info = true,
549       .AMD_shader_trinary_minmax = true,
550       .AMD_texture_gather_bias_lod = true,
551 #ifdef ANDROID
552       .ANDROID_external_memory_android_hardware_buffer = RADV_SUPPORT_ANDROID_HARDWARE_BUFFER,
553       .ANDROID_native_buffer = true,
554 #endif
555       .GOOGLE_decorate_string = true,
556       .GOOGLE_hlsl_functionality1 = true,
557       .GOOGLE_user_type = true,
558       .NV_compute_shader_derivatives = true,
559       .VALVE_mutable_descriptor_type = true,
560    };
561 }
562 
563 static VkResult
radv_physical_device_try_create(struct radv_instance * instance,drmDevicePtr drm_device,struct radv_physical_device ** device_out)564 radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm_device,
565                                 struct radv_physical_device **device_out)
566 {
567    VkResult result;
568    int fd = -1;
569    int master_fd = -1;
570 
571 #ifdef _WIN32
572    assert(drm_device == NULL);
573 #else
574    if (drm_device) {
575       const char *path = drm_device->nodes[DRM_NODE_RENDER];
576       drmVersionPtr version;
577 
578       fd = open(path, O_RDWR | O_CLOEXEC);
579       if (fd < 0) {
580          if (instance->debug_flags & RADV_DEBUG_STARTUP)
581             radv_logi("Could not open device '%s'", path);
582 
583          return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
584       }
585 
586       version = drmGetVersion(fd);
587       if (!version) {
588          close(fd);
589 
590          if (instance->debug_flags & RADV_DEBUG_STARTUP)
591             radv_logi("Could not get the kernel driver version for device '%s'", path);
592 
593          return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "failed to get version %s: %m",
594                           path);
595       }
596 
597       if (strcmp(version->name, "amdgpu")) {
598          drmFreeVersion(version);
599          close(fd);
600 
601          if (instance->debug_flags & RADV_DEBUG_STARTUP)
602             radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
603 
604          return VK_ERROR_INCOMPATIBLE_DRIVER;
605       }
606       drmFreeVersion(version);
607 
608       if (instance->debug_flags & RADV_DEBUG_STARTUP)
609          radv_logi("Found compatible device '%s'.", path);
610    }
611 #endif
612 
613    struct radv_physical_device *device = vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*device), 8,
614                                                     VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
615    if (!device) {
616       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
617       goto fail_fd;
618    }
619 
620    struct vk_physical_device_dispatch_table dispatch_table;
621    vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,
622                                                       &radv_physical_device_entrypoints, true);
623    vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,
624                                                       &wsi_physical_device_entrypoints, false);
625 
626    result = vk_physical_device_init(&device->vk, &instance->vk, NULL, &dispatch_table);
627    if (result != VK_SUCCESS) {
628       goto fail_alloc;
629    }
630 
631    device->instance = instance;
632 
633 #ifdef _WIN32
634    device->ws = radv_null_winsys_create();
635 #else
636    if (drm_device) {
637       device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags, false);
638    } else {
639       device->ws = radv_null_winsys_create();
640    }
641 #endif
642 
643    if (!device->ws) {
644       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to initialize winsys");
645       goto fail_base;
646    }
647 
648 #ifndef _WIN32
649    if (drm_device && instance->vk.enabled_extensions.KHR_display) {
650       master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
651       if (master_fd >= 0) {
652          uint32_t accel_working = 0;
653          struct drm_amdgpu_info request = {.return_pointer = (uintptr_t)&accel_working,
654                                            .return_size = sizeof(accel_working),
655                                            .query = AMDGPU_INFO_ACCEL_WORKING};
656 
657          if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) <
658                 0 ||
659              !accel_working) {
660             close(master_fd);
661             master_fd = -1;
662          }
663       }
664    }
665 #endif
666 
667    device->master_fd = master_fd;
668    device->local_fd = fd;
669    device->ws->query_info(device->ws, &device->rad_info);
670 
671    device->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
672 #ifndef LLVM_AVAILABLE
673    if (device->use_llvm) {
674       fprintf(stderr, "ERROR: LLVM compiler backend selected for radv, but LLVM support was not "
675                       "enabled at build time.\n");
676       abort();
677    }
678 #endif
679 
680    snprintf(device->name, sizeof(device->name), "AMD RADV %s%s", device->rad_info.name,
681             radv_get_compiler_string(device));
682 
683 #ifdef ENABLE_SHADER_CACHE
684    if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
685       result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID");
686       goto fail_wsi;
687    }
688 
689    /* The gpu id is already embedded in the uuid so we just pass "radv"
690     * when creating the cache.
691     */
692    char buf[VK_UUID_SIZE * 2 + 1];
693    disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
694    device->disk_cache = disk_cache_create(device->name, buf, 0);
695 #endif
696 
697    if (device->rad_info.chip_class < GFX8 || device->rad_info.chip_class > GFX10)
698       vk_warn_non_conformant_implementation("radv");
699 
700    radv_get_driver_uuid(&device->driver_uuid);
701    radv_get_device_uuid(&device->rad_info, &device->device_uuid);
702 
703    device->out_of_order_rast_allowed =
704       device->rad_info.has_out_of_order_rast &&
705       !(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
706 
707    device->dcc_msaa_allowed = (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
708 
709    device->use_ngg = device->rad_info.chip_class >= GFX10 &&
710                      device->rad_info.family != CHIP_NAVI14 &&
711                      !(device->instance->debug_flags & RADV_DEBUG_NO_NGG);
712 
713    device->use_ngg_culling =
714       device->use_ngg &&
715       device->rad_info.max_render_backends > 1 &&
716       (device->rad_info.chip_class >= GFX10_3 ||
717        (device->instance->perftest_flags & RADV_PERFTEST_NGGC)) &&
718       !(device->instance->debug_flags & RADV_DEBUG_NO_NGGC);
719 
720    device->use_ngg_streamout = false;
721 
722    /* Determine the number of threads per wave for all stages. */
723    device->cs_wave_size = 64;
724    device->ps_wave_size = 64;
725    device->ge_wave_size = 64;
726 
727    if (device->rad_info.chip_class >= GFX10) {
728       if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
729          device->cs_wave_size = 32;
730 
731       /* For pixel shaders, wave64 is recommanded. */
732       if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
733          device->ps_wave_size = 32;
734 
735       if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
736          device->ge_wave_size = 32;
737    }
738 
739    radv_physical_device_init_mem_types(device);
740 
741    radv_physical_device_get_supported_extensions(device, &device->vk.supported_extensions);
742 
743    radv_get_nir_options(device);
744 
745 #ifndef _WIN32
746    if (drm_device) {
747       struct stat primary_stat = {0}, render_stat = {0};
748 
749       device->available_nodes = drm_device->available_nodes;
750       device->bus_info = *drm_device->businfo.pci;
751 
752       if ((drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) &&
753           stat(drm_device->nodes[DRM_NODE_PRIMARY], &primary_stat) != 0) {
754          result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
755                             "failed to stat DRM primary node %s",
756                             drm_device->nodes[DRM_NODE_PRIMARY]);
757          goto fail_disk_cache;
758       }
759       device->primary_devid = primary_stat.st_rdev;
760 
761       if ((drm_device->available_nodes & (1 << DRM_NODE_RENDER)) &&
762           stat(drm_device->nodes[DRM_NODE_RENDER], &render_stat) != 0) {
763          result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
764                             "failed to stat DRM render node %s",
765                             drm_device->nodes[DRM_NODE_RENDER]);
766          goto fail_disk_cache;
767       }
768       device->render_devid = render_stat.st_rdev;
769    }
770 #endif
771 
772    if ((device->instance->debug_flags & RADV_DEBUG_INFO))
773       ac_print_gpu_info(&device->rad_info, stdout);
774 
775    /* The WSI is structured as a layer on top of the driver, so this has
776     * to be the last part of initialization (at least until we get other
777     * semi-layers).
778     */
779    result = radv_init_wsi(device);
780    if (result != VK_SUCCESS) {
781       vk_error(instance, result);
782       goto fail_disk_cache;
783    }
784 
785    *device_out = device;
786 
787    return VK_SUCCESS;
788 
789 fail_disk_cache:
790    disk_cache_destroy(device->disk_cache);
791 #ifdef ENABLE_SHADER_CACHE
792 fail_wsi:
793 #endif
794    device->ws->destroy(device->ws);
795 fail_base:
796    vk_physical_device_finish(&device->vk);
797 fail_alloc:
798    vk_free(&instance->vk.alloc, device);
799 fail_fd:
800    if (fd != -1)
801       close(fd);
802    if (master_fd != -1)
803       close(master_fd);
804    return result;
805 }
806 
807 static void
radv_physical_device_destroy(struct radv_physical_device * device)808 radv_physical_device_destroy(struct radv_physical_device *device)
809 {
810    radv_finish_wsi(device);
811    device->ws->destroy(device->ws);
812    disk_cache_destroy(device->disk_cache);
813    if (device->local_fd != -1)
814       close(device->local_fd);
815    if (device->master_fd != -1)
816       close(device->master_fd);
817    vk_physical_device_finish(&device->vk);
818    vk_free(&device->instance->vk.alloc, device);
819 }
820 
821 static const struct debug_control radv_debug_options[] = {
822    {"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
823    {"nodcc", RADV_DEBUG_NO_DCC},
824    {"shaders", RADV_DEBUG_DUMP_SHADERS},
825    {"nocache", RADV_DEBUG_NO_CACHE},
826    {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
827    {"nohiz", RADV_DEBUG_NO_HIZ},
828    {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
829    {"allbos", RADV_DEBUG_ALL_BOS},
830    {"noibs", RADV_DEBUG_NO_IBS},
831    {"spirv", RADV_DEBUG_DUMP_SPIRV},
832    {"vmfaults", RADV_DEBUG_VM_FAULTS},
833    {"zerovram", RADV_DEBUG_ZERO_VRAM},
834    {"syncshaders", RADV_DEBUG_SYNC_SHADERS},
835    {"preoptir", RADV_DEBUG_PREOPTIR},
836    {"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
837    {"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
838    {"info", RADV_DEBUG_INFO},
839    {"startup", RADV_DEBUG_STARTUP},
840    {"checkir", RADV_DEBUG_CHECKIR},
841    {"nobinning", RADV_DEBUG_NOBINNING},
842    {"nongg", RADV_DEBUG_NO_NGG},
843    {"metashaders", RADV_DEBUG_DUMP_META_SHADERS},
844    {"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE},
845    {"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},
846    {"llvm", RADV_DEBUG_LLVM},
847    {"forcecompress", RADV_DEBUG_FORCE_COMPRESS},
848    {"hang", RADV_DEBUG_HANG},
849    {"img", RADV_DEBUG_IMG},
850    {"noumr", RADV_DEBUG_NO_UMR},
851    {"invariantgeom", RADV_DEBUG_INVARIANT_GEOM},
852    {"nodisplaydcc", RADV_DEBUG_NO_DISPLAY_DCC},
853    {"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK},
854    {"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING},
855    {"noatocdithering", RADV_DEBUG_NO_ATOC_DITHERING},
856    {"nonggc", RADV_DEBUG_NO_NGGC},
857    {"prologs", RADV_DEBUG_DUMP_PROLOGS},
858    {NULL, 0}};
859 
860 const char *
radv_get_debug_option_name(int id)861 radv_get_debug_option_name(int id)
862 {
863    assert(id < ARRAY_SIZE(radv_debug_options) - 1);
864    return radv_debug_options[id].string;
865 }
866 
867 static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_PERFTEST_LOCAL_BOS},
868                                                              {"dccmsaa", RADV_PERFTEST_DCC_MSAA},
869                                                              {"bolist", RADV_PERFTEST_BO_LIST},
870                                                              {"cswave32", RADV_PERFTEST_CS_WAVE_32},
871                                                              {"pswave32", RADV_PERFTEST_PS_WAVE_32},
872                                                              {"gewave32", RADV_PERFTEST_GE_WAVE_32},
873                                                              {"nosam", RADV_PERFTEST_NO_SAM},
874                                                              {"sam", RADV_PERFTEST_SAM},
875                                                              {"rt", RADV_PERFTEST_RT},
876                                                              {"nggc", RADV_PERFTEST_NGGC},
877                                                              {"force_emulate_rt", RADV_PERFTEST_FORCE_EMULATE_RT},
878                                                              {NULL, 0}};
879 
880 const char *
radv_get_perftest_option_name(int id)881 radv_get_perftest_option_name(int id)
882 {
883    assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
884    return radv_perftest_options[id].string;
885 }
886 
887 // clang-format off
888 static const driOptionDescription radv_dri_options[] = {
889    DRI_CONF_SECTION_PERFORMANCE
890       DRI_CONF_ADAPTIVE_SYNC(true)
891       DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
892       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
893       DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
894       DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
895       DRI_CONF_RADV_REPORT_LLVM9_VERSION_STRING(false)
896       DRI_CONF_RADV_ENABLE_MRT_OUTPUT_NAN_FIXUP(false)
897       DRI_CONF_RADV_DISABLE_SHRINK_IMAGE_STORE(false)
898       DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false)
899       DRI_CONF_RADV_ABSOLUTE_DEPTH_BIAS(false)
900       DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0)
901    DRI_CONF_SECTION_END
902 
903    DRI_CONF_SECTION_DEBUG
904       DRI_CONF_OVERRIDE_VRAM_SIZE()
905       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
906       DRI_CONF_RADV_ZERO_VRAM(false)
907       DRI_CONF_RADV_LOWER_DISCARD_TO_DEMOTE(false)
908       DRI_CONF_RADV_INVARIANT_GEOM(false)
909       DRI_CONF_RADV_DISABLE_TC_COMPAT_HTILE_GENERAL(false)
910       DRI_CONF_RADV_DISABLE_DCC(false)
911       DRI_CONF_RADV_REPORT_APU_AS_DGPU(false)
912    DRI_CONF_SECTION_END
913 };
914 // clang-format on
915 
916 static void
radv_init_dri_options(struct radv_instance * instance)917 radv_init_dri_options(struct radv_instance *instance)
918 {
919    driParseOptionInfo(&instance->available_dri_options, radv_dri_options,
920                       ARRAY_SIZE(radv_dri_options));
921    driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "radv", NULL, NULL,
922                        instance->vk.app_info.app_name, instance->vk.app_info.app_version,
923                        instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
924 
925    instance->enable_mrt_output_nan_fixup =
926       driQueryOptionb(&instance->dri_options, "radv_enable_mrt_output_nan_fixup");
927 
928    instance->disable_shrink_image_store =
929       driQueryOptionb(&instance->dri_options, "radv_disable_shrink_image_store");
930 
931    instance->absolute_depth_bias =
932       driQueryOptionb(&instance->dri_options, "radv_absolute_depth_bias");
933 
934    instance->disable_tc_compat_htile_in_general =
935       driQueryOptionb(&instance->dri_options, "radv_disable_tc_compat_htile_general");
936 
937    if (driQueryOptionb(&instance->dri_options, "radv_no_dynamic_bounds"))
938       instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
939 
940    if (driQueryOptionb(&instance->dri_options, "radv_zero_vram"))
941       instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
942 
943    if (driQueryOptionb(&instance->dri_options, "radv_lower_discard_to_demote"))
944       instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE;
945 
946    if (driQueryOptionb(&instance->dri_options, "radv_invariant_geom"))
947       instance->debug_flags |= RADV_DEBUG_INVARIANT_GEOM;
948 
949    if (driQueryOptionb(&instance->dri_options, "radv_disable_dcc"))
950       instance->debug_flags |= RADV_DEBUG_NO_DCC;
951 
952    instance->report_apu_as_dgpu =
953       driQueryOptionb(&instance->dri_options, "radv_report_apu_as_dgpu");
954 }
955 
956 VkResult
radv_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)957 radv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
958                     const VkAllocationCallbacks *pAllocator, VkInstance *pInstance)
959 {
960    struct radv_instance *instance;
961    VkResult result;
962 
963    if (!pAllocator)
964       pAllocator = vk_default_allocator();
965 
966    instance = vk_zalloc(pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
967    if (!instance)
968       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
969 
970    struct vk_instance_dispatch_table dispatch_table;
971    vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &radv_instance_entrypoints, true);
972    vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &wsi_instance_entrypoints, false);
973    result = vk_instance_init(&instance->vk, &radv_instance_extensions_supported, &dispatch_table,
974                              pCreateInfo, pAllocator);
975    if (result != VK_SUCCESS) {
976       vk_free(pAllocator, instance);
977       return vk_error(instance, result);
978    }
979 
980    instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"), radv_debug_options);
981    instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"), radv_perftest_options);
982 
983    if (instance->debug_flags & RADV_DEBUG_STARTUP)
984       radv_logi("Created an instance");
985 
986    instance->physical_devices_enumerated = false;
987    list_inithead(&instance->physical_devices);
988 
989    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
990 
991    radv_init_dri_options(instance);
992 
993    *pInstance = radv_instance_to_handle(instance);
994 
995    return VK_SUCCESS;
996 }
997 
998 void
radv_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)999 radv_DestroyInstance(VkInstance _instance, const VkAllocationCallbacks *pAllocator)
1000 {
1001    RADV_FROM_HANDLE(radv_instance, instance, _instance);
1002 
1003    if (!instance)
1004       return;
1005 
1006    list_for_each_entry_safe(struct radv_physical_device, pdevice, &instance->physical_devices, link)
1007    {
1008       radv_physical_device_destroy(pdevice);
1009    }
1010 
1011    VG(VALGRIND_DESTROY_MEMPOOL(instance));
1012 
1013    driDestroyOptionCache(&instance->dri_options);
1014    driDestroyOptionInfo(&instance->available_dri_options);
1015 
1016    vk_instance_finish(&instance->vk);
1017    vk_free(&instance->vk.alloc, instance);
1018 }
1019 
1020 static VkResult
radv_enumerate_physical_devices(struct radv_instance * instance)1021 radv_enumerate_physical_devices(struct radv_instance *instance)
1022 {
1023    if (instance->physical_devices_enumerated)
1024       return VK_SUCCESS;
1025 
1026    instance->physical_devices_enumerated = true;
1027 
1028    VkResult result = VK_SUCCESS;
1029 
1030    if (getenv("RADV_FORCE_FAMILY")) {
1031       /* When RADV_FORCE_FAMILY is set, the driver creates a nul
1032        * device that allows to test the compiler without having an
1033        * AMDGPU instance.
1034        */
1035       struct radv_physical_device *pdevice;
1036 
1037       result = radv_physical_device_try_create(instance, NULL, &pdevice);
1038       if (result != VK_SUCCESS)
1039          return result;
1040 
1041       list_addtail(&pdevice->link, &instance->physical_devices);
1042       return VK_SUCCESS;
1043    }
1044 
1045 #ifndef _WIN32
1046    /* TODO: Check for more devices ? */
1047    drmDevicePtr devices[8];
1048    int max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
1049 
1050    if (instance->debug_flags & RADV_DEBUG_STARTUP)
1051       radv_logi("Found %d drm nodes", max_devices);
1052 
1053    if (max_devices < 1)
1054       return vk_error(instance, VK_SUCCESS);
1055 
1056    for (unsigned i = 0; i < (unsigned)max_devices; i++) {
1057       if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
1058           devices[i]->bustype == DRM_BUS_PCI &&
1059           devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
1060 
1061          struct radv_physical_device *pdevice;
1062          result = radv_physical_device_try_create(instance, devices[i], &pdevice);
1063          /* Incompatible DRM device, skip. */
1064          if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
1065             result = VK_SUCCESS;
1066             continue;
1067          }
1068 
1069          /* Error creating the physical device, report the error. */
1070          if (result != VK_SUCCESS)
1071             break;
1072 
1073          list_addtail(&pdevice->link, &instance->physical_devices);
1074       }
1075    }
1076    drmFreeDevices(devices, max_devices);
1077 #endif
1078 
1079    /* If we successfully enumerated any devices, call it success */
1080    return result;
1081 }
1082 
1083 VkResult
radv_EnumeratePhysicalDevices(VkInstance _instance,uint32_t * pPhysicalDeviceCount,VkPhysicalDevice * pPhysicalDevices)1084 radv_EnumeratePhysicalDevices(VkInstance _instance, uint32_t *pPhysicalDeviceCount,
1085                               VkPhysicalDevice *pPhysicalDevices)
1086 {
1087    RADV_FROM_HANDLE(radv_instance, instance, _instance);
1088    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices, pPhysicalDeviceCount);
1089 
1090    VkResult result = radv_enumerate_physical_devices(instance);
1091    if (result != VK_SUCCESS)
1092       return result;
1093 
1094    list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link)
1095    {
1096       vk_outarray_append_typed(VkPhysicalDevice, &out, i)
1097       {
1098          *i = radv_physical_device_to_handle(pdevice);
1099       }
1100    }
1101 
1102    return vk_outarray_status(&out);
1103 }
1104 
1105 VkResult
radv_EnumeratePhysicalDeviceGroups(VkInstance _instance,uint32_t * pPhysicalDeviceGroupCount,VkPhysicalDeviceGroupProperties * pPhysicalDeviceGroupProperties)1106 radv_EnumeratePhysicalDeviceGroups(VkInstance _instance, uint32_t *pPhysicalDeviceGroupCount,
1107                                    VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties)
1108 {
1109    RADV_FROM_HANDLE(radv_instance, instance, _instance);
1110    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out, pPhysicalDeviceGroupProperties,
1111                           pPhysicalDeviceGroupCount);
1112 
1113    VkResult result = radv_enumerate_physical_devices(instance);
1114    if (result != VK_SUCCESS)
1115       return result;
1116 
1117    list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link)
1118    {
1119       vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p)
1120       {
1121          p->physicalDeviceCount = 1;
1122          memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
1123          p->physicalDevices[0] = radv_physical_device_to_handle(pdevice);
1124          p->subsetAllocation = false;
1125       }
1126    }
1127 
1128    return vk_outarray_status(&out);
1129 }
1130 
1131 void
radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures * pFeatures)1132 radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures *pFeatures)
1133 {
1134    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1135    memset(pFeatures, 0, sizeof(*pFeatures));
1136 
1137    *pFeatures = (VkPhysicalDeviceFeatures){
1138       .robustBufferAccess = true,
1139       .fullDrawIndexUint32 = true,
1140       .imageCubeArray = true,
1141       .independentBlend = true,
1142       .geometryShader = true,
1143       .tessellationShader = true,
1144       .sampleRateShading = true,
1145       .dualSrcBlend = true,
1146       .logicOp = true,
1147       .multiDrawIndirect = true,
1148       .drawIndirectFirstInstance = true,
1149       .depthClamp = true,
1150       .depthBiasClamp = true,
1151       .fillModeNonSolid = true,
1152       .depthBounds = true,
1153       .wideLines = true,
1154       .largePoints = true,
1155       .alphaToOne = false,
1156       .multiViewport = true,
1157       .samplerAnisotropy = true,
1158       .textureCompressionETC2 = radv_device_supports_etc(pdevice),
1159       .textureCompressionASTC_LDR = false,
1160       .textureCompressionBC = true,
1161       .occlusionQueryPrecise = true,
1162       .pipelineStatisticsQuery = true,
1163       .vertexPipelineStoresAndAtomics = true,
1164       .fragmentStoresAndAtomics = true,
1165       .shaderTessellationAndGeometryPointSize = true,
1166       .shaderImageGatherExtended = true,
1167       .shaderStorageImageExtendedFormats = true,
1168       .shaderStorageImageMultisample = true,
1169       .shaderUniformBufferArrayDynamicIndexing = true,
1170       .shaderSampledImageArrayDynamicIndexing = true,
1171       .shaderStorageBufferArrayDynamicIndexing = true,
1172       .shaderStorageImageArrayDynamicIndexing = true,
1173       .shaderStorageImageReadWithoutFormat = true,
1174       .shaderStorageImageWriteWithoutFormat = true,
1175       .shaderClipDistance = true,
1176       .shaderCullDistance = true,
1177       .shaderFloat64 = true,
1178       .shaderInt64 = true,
1179       .shaderInt16 = true,
1180       .sparseBinding = true,
1181       .sparseResidencyBuffer = pdevice->rad_info.family >= CHIP_POLARIS10,
1182       .sparseResidencyImage2D = pdevice->rad_info.family >= CHIP_POLARIS10,
1183       .sparseResidencyAliased = pdevice->rad_info.family >= CHIP_POLARIS10,
1184       .variableMultisampleRate = true,
1185       .shaderResourceMinLod = true,
1186       .shaderResourceResidency = true,
1187       .inheritedQueries = true,
1188    };
1189 }
1190 
1191 static void
radv_get_physical_device_features_1_1(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan11Features * f)1192 radv_get_physical_device_features_1_1(struct radv_physical_device *pdevice,
1193                                       VkPhysicalDeviceVulkan11Features *f)
1194 {
1195    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
1196 
1197    f->storageBuffer16BitAccess = true;
1198    f->uniformAndStorageBuffer16BitAccess = true;
1199    f->storagePushConstant16 = true;
1200    f->storageInputOutput16 = pdevice->rad_info.has_packed_math_16bit;
1201    f->multiview = true;
1202    f->multiviewGeometryShader = true;
1203    f->multiviewTessellationShader = true;
1204    f->variablePointersStorageBuffer = true;
1205    f->variablePointers = true;
1206    f->protectedMemory = false;
1207    f->samplerYcbcrConversion = true;
1208    f->shaderDrawParameters = true;
1209 }
1210 
1211 static void
radv_get_physical_device_features_1_2(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan12Features * f)1212 radv_get_physical_device_features_1_2(struct radv_physical_device *pdevice,
1213                                       VkPhysicalDeviceVulkan12Features *f)
1214 {
1215    assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
1216 
1217    f->samplerMirrorClampToEdge = true;
1218    f->drawIndirectCount = true;
1219    f->storageBuffer8BitAccess = true;
1220    f->uniformAndStorageBuffer8BitAccess = true;
1221    f->storagePushConstant8 = true;
1222    f->shaderBufferInt64Atomics = true;
1223    f->shaderSharedInt64Atomics = true;
1224    f->shaderFloat16 = pdevice->rad_info.has_packed_math_16bit;
1225    f->shaderInt8 = true;
1226 
1227    f->descriptorIndexing = true;
1228    f->shaderInputAttachmentArrayDynamicIndexing = true;
1229    f->shaderUniformTexelBufferArrayDynamicIndexing = true;
1230    f->shaderStorageTexelBufferArrayDynamicIndexing = true;
1231    f->shaderUniformBufferArrayNonUniformIndexing = true;
1232    f->shaderSampledImageArrayNonUniformIndexing = true;
1233    f->shaderStorageBufferArrayNonUniformIndexing = true;
1234    f->shaderStorageImageArrayNonUniformIndexing = true;
1235    f->shaderInputAttachmentArrayNonUniformIndexing = true;
1236    f->shaderUniformTexelBufferArrayNonUniformIndexing = true;
1237    f->shaderStorageTexelBufferArrayNonUniformIndexing = true;
1238    f->descriptorBindingUniformBufferUpdateAfterBind = true;
1239    f->descriptorBindingSampledImageUpdateAfterBind = true;
1240    f->descriptorBindingStorageImageUpdateAfterBind = true;
1241    f->descriptorBindingStorageBufferUpdateAfterBind = true;
1242    f->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
1243    f->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
1244    f->descriptorBindingUpdateUnusedWhilePending = true;
1245    f->descriptorBindingPartiallyBound = true;
1246    f->descriptorBindingVariableDescriptorCount = true;
1247    f->runtimeDescriptorArray = true;
1248 
1249    f->samplerFilterMinmax = true;
1250    f->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
1251    f->imagelessFramebuffer = true;
1252    f->uniformBufferStandardLayout = true;
1253    f->shaderSubgroupExtendedTypes = true;
1254    f->separateDepthStencilLayouts = true;
1255    f->hostQueryReset = true;
1256    f->timelineSemaphore = true, f->bufferDeviceAddress = true;
1257    f->bufferDeviceAddressCaptureReplay = true;
1258    f->bufferDeviceAddressMultiDevice = true;
1259    f->vulkanMemoryModel = true;
1260    f->vulkanMemoryModelDeviceScope = true;
1261    f->vulkanMemoryModelAvailabilityVisibilityChains = false;
1262    f->shaderOutputViewportIndex = true;
1263    f->shaderOutputLayer = true;
1264    f->subgroupBroadcastDynamicId = true;
1265 }
1266 
1267 void
radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures2 * pFeatures)1268 radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
1269                                 VkPhysicalDeviceFeatures2 *pFeatures)
1270 {
1271    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1272    radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
1273 
1274    VkPhysicalDeviceVulkan11Features core_1_1 = {
1275       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1276    };
1277    radv_get_physical_device_features_1_1(pdevice, &core_1_1);
1278 
1279    VkPhysicalDeviceVulkan12Features core_1_2 = {
1280       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1281    };
1282    radv_get_physical_device_features_1_2(pdevice, &core_1_2);
1283 
1284 #define CORE_FEATURE(major, minor, feature) features->feature = core_##major##_##minor.feature
1285 
1286    vk_foreach_struct(ext, pFeatures->pNext)
1287    {
1288       if (vk_get_physical_device_core_1_1_feature_ext(ext, &core_1_1))
1289          continue;
1290       if (vk_get_physical_device_core_1_2_feature_ext(ext, &core_1_2))
1291          continue;
1292 
1293       switch (ext->sType) {
1294       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
1295          VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
1296             (VkPhysicalDeviceConditionalRenderingFeaturesEXT *)ext;
1297          features->conditionalRendering = true;
1298          features->inheritedConditionalRendering = false;
1299          break;
1300       }
1301       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
1302          VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
1303             (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
1304          features->vertexAttributeInstanceRateDivisor = true;
1305          features->vertexAttributeInstanceRateZeroDivisor = true;
1306          break;
1307       }
1308       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
1309          VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
1310             (VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext;
1311          features->transformFeedback = true;
1312          features->geometryStreams = !pdevice->use_ngg_streamout;
1313          break;
1314       }
1315       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: {
1316          VkPhysicalDeviceScalarBlockLayoutFeatures *features =
1317             (VkPhysicalDeviceScalarBlockLayoutFeatures *)ext;
1318          CORE_FEATURE(1, 2, scalarBlockLayout);
1319          break;
1320       }
1321       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
1322          VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
1323             (VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
1324          features->memoryPriority = true;
1325          break;
1326       }
1327       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
1328          VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features =
1329             (VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext;
1330          CORE_FEATURE(1, 2, bufferDeviceAddress);
1331          CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay);
1332          CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice);
1333          break;
1334       }
1335       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
1336          VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
1337             (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
1338          features->depthClipEnable = true;
1339          break;
1340       }
1341       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {
1342          VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features =
1343             (VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *)ext;
1344          features->shaderDemoteToHelperInvocation = true;
1345          break;
1346       }
1347       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
1348          VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
1349             (VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
1350 
1351          features->inlineUniformBlock = true;
1352          features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
1353          break;
1354       }
1355       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
1356          VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
1357             (VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
1358          features->computeDerivativeGroupQuads = false;
1359          features->computeDerivativeGroupLinear = true;
1360          break;
1361       }
1362       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
1363          VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
1364             (VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *)ext;
1365          features->ycbcrImageArrays = true;
1366          break;
1367       }
1368       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
1369          VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
1370             (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
1371          features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8;
1372          break;
1373       }
1374       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
1375          VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
1376             (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
1377          features->pipelineExecutableInfo = true;
1378          break;
1379       }
1380       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
1381          VkPhysicalDeviceShaderClockFeaturesKHR *features =
1382             (VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
1383          features->shaderSubgroupClock = true;
1384          features->shaderDeviceClock = pdevice->rad_info.chip_class >= GFX8;
1385          break;
1386       }
1387       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
1388          VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
1389             (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
1390          features->texelBufferAlignment = true;
1391          break;
1392       }
1393       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
1394          VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =
1395             (VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;
1396          features->subgroupSizeControl = true;
1397          features->computeFullSubgroups = true;
1398          break;
1399       }
1400       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: {
1401          VkPhysicalDeviceCoherentMemoryFeaturesAMD *features =
1402             (VkPhysicalDeviceCoherentMemoryFeaturesAMD *)ext;
1403          features->deviceCoherentMemory = pdevice->rad_info.has_l2_uncached;
1404          break;
1405       }
1406       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
1407          VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
1408             (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
1409          features->rectangularLines = false;
1410          features->bresenhamLines = true;
1411          features->smoothLines = false;
1412          features->stippledRectangularLines = false;
1413          /* FIXME: Some stippled Bresenham CTS fails on Vega10
1414           * but work on Raven.
1415           */
1416          features->stippledBresenhamLines = pdevice->rad_info.chip_class != GFX9;
1417          features->stippledSmoothLines = false;
1418          break;
1419       }
1420       case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
1421          VkDeviceMemoryOverallocationCreateInfoAMD *features =
1422             (VkDeviceMemoryOverallocationCreateInfoAMD *)ext;
1423          features->overallocationBehavior = true;
1424          break;
1425       }
1426       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1427          VkPhysicalDeviceRobustness2FeaturesEXT *features =
1428             (VkPhysicalDeviceRobustness2FeaturesEXT *)ext;
1429          features->robustBufferAccess2 = true;
1430          features->robustImageAccess2 = true;
1431          features->nullDescriptor = true;
1432          break;
1433       }
1434       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1435          VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
1436             (VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
1437          features->customBorderColors = true;
1438          features->customBorderColorWithoutFormat = true;
1439          break;
1440       }
1441       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: {
1442          VkPhysicalDevicePrivateDataFeaturesEXT *features =
1443             (VkPhysicalDevicePrivateDataFeaturesEXT *)ext;
1444          features->privateData = true;
1445          break;
1446       }
1447       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: {
1448          VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features =
1449             (VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *)ext;
1450          features->pipelineCreationCacheControl = true;
1451          break;
1452       }
1453       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
1454          VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
1455             (VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *)ext;
1456          features->extendedDynamicState = true;
1457          break;
1458       }
1459       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: {
1460          VkPhysicalDeviceImageRobustnessFeaturesEXT *features =
1461             (VkPhysicalDeviceImageRobustnessFeaturesEXT *)ext;
1462          features->robustImageAccess = true;
1463          break;
1464       }
1465       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
1466          VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features =
1467             (VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *)ext;
1468          features->shaderBufferFloat32Atomics = true;
1469          features->shaderBufferFloat32AtomicAdd = false;
1470          features->shaderBufferFloat64Atomics = true;
1471          features->shaderBufferFloat64AtomicAdd = false;
1472          features->shaderSharedFloat32Atomics = true;
1473          features->shaderSharedFloat32AtomicAdd = pdevice->rad_info.chip_class >= GFX8;
1474          features->shaderSharedFloat64Atomics = true;
1475          features->shaderSharedFloat64AtomicAdd = false;
1476          features->shaderImageFloat32Atomics = true;
1477          features->shaderImageFloat32AtomicAdd = false;
1478          features->sparseImageFloat32Atomics = true;
1479          features->sparseImageFloat32AtomicAdd = false;
1480          break;
1481       }
1482       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
1483          VkPhysicalDevice4444FormatsFeaturesEXT *features =
1484             (VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
1485          features->formatA4R4G4B4 = true;
1486          features->formatA4B4G4R4 = true;
1487          break;
1488       }
1489       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TERMINATE_INVOCATION_FEATURES_KHR: {
1490          VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *features =
1491             (VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *)ext;
1492          features->shaderTerminateInvocation = true;
1493          break;
1494       }
1495       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT: {
1496          VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *features =
1497             (VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *)ext;
1498          features->shaderImageInt64Atomics = true;
1499          features->sparseImageInt64Atomics = true;
1500          break;
1501       }
1502       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
1503          VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
1504             (VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
1505          features->mutableDescriptorType = true;
1506          break;
1507       }
1508       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
1509          VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
1510             (VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
1511          features->pipelineFragmentShadingRate = true;
1512          features->primitiveFragmentShadingRate = true;
1513          features->attachmentFragmentShadingRate = true;
1514          break;
1515       }
1516       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR: {
1517          VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *features =
1518             (VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *)ext;
1519          features->workgroupMemoryExplicitLayout = true;
1520          features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
1521          features->workgroupMemoryExplicitLayout8BitAccess = true;
1522          features->workgroupMemoryExplicitLayout16BitAccess = true;
1523          break;
1524       }
1525       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_WORKGROUP_MEMORY_FEATURES_KHR: {
1526          VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *features =
1527             (VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *)ext;
1528          features->shaderZeroInitializeWorkgroupMemory = true;
1529          break;
1530       }
1531       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
1532          VkPhysicalDeviceProvokingVertexFeaturesEXT *features =
1533             (VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext;
1534          features->provokingVertexLast = true;
1535          features->transformFeedbackPreservesProvokingVertex = true;
1536          break;
1537       }
1538       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: {
1539          VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features =
1540             (VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext;
1541          features->extendedDynamicState2 = true;
1542          features->extendedDynamicState2LogicOp = true;
1543          features->extendedDynamicState2PatchControlPoints = false;
1544          break;
1545       }
1546       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_EXT: {
1547          VkPhysicalDeviceGlobalPriorityQueryFeaturesEXT *features =
1548             (VkPhysicalDeviceGlobalPriorityQueryFeaturesEXT *)ext;
1549          features->globalPriorityQuery = true;
1550          break;
1551       }
1552       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR: {
1553          VkPhysicalDeviceAccelerationStructureFeaturesKHR *features =
1554             (VkPhysicalDeviceAccelerationStructureFeaturesKHR *)ext;
1555          features->accelerationStructure = true;
1556          features->accelerationStructureCaptureReplay = false;
1557          features->accelerationStructureIndirectBuild = false;
1558          features->accelerationStructureHostCommands = true;
1559          features->descriptorBindingAccelerationStructureUpdateAfterBind = true;
1560          break;
1561       }
1562       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_FEATURES_KHR: {
1563          VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *features =
1564             (VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *)ext;
1565          features->shaderSubgroupUniformControlFlow = true;
1566          break;
1567       }
1568       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT: {
1569          VkPhysicalDeviceMultiDrawFeaturesEXT *features = (VkPhysicalDeviceMultiDrawFeaturesEXT *)ext;
1570          features->multiDraw = true;
1571          break;
1572       }
1573       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: {
1574          VkPhysicalDeviceColorWriteEnableFeaturesEXT *features =
1575             (VkPhysicalDeviceColorWriteEnableFeaturesEXT *)ext;
1576          features->colorWriteEnable = true;
1577          break;
1578       }
1579       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: {
1580          VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features =
1581             (VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *)ext;
1582          bool has_shader_buffer_float_minmax = ((pdevice->rad_info.chip_class == GFX6 ||
1583                                                  pdevice->rad_info.chip_class == GFX7) &&
1584                                                 !pdevice->use_llvm) ||
1585                                                pdevice->rad_info.chip_class >= GFX10;
1586          bool has_shader_image_float_minmax = pdevice->rad_info.chip_class != GFX8 &&
1587                                               pdevice->rad_info.chip_class != GFX9;
1588          features->shaderBufferFloat16Atomics = false;
1589          features->shaderBufferFloat16AtomicAdd = false;
1590          features->shaderBufferFloat16AtomicMinMax = false;
1591          features->shaderBufferFloat32AtomicMinMax = has_shader_buffer_float_minmax;
1592          features->shaderBufferFloat64AtomicMinMax = has_shader_buffer_float_minmax;
1593          features->shaderSharedFloat16Atomics = false;
1594          features->shaderSharedFloat16AtomicAdd = false;
1595          features->shaderSharedFloat16AtomicMinMax = false;
1596          features->shaderSharedFloat32AtomicMinMax = true;
1597          features->shaderSharedFloat64AtomicMinMax = true;
1598          features->shaderImageFloat32AtomicMinMax = has_shader_image_float_minmax;
1599          features->sparseImageFloat32AtomicMinMax = has_shader_image_float_minmax;
1600          break;
1601       }
1602       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT: {
1603          VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *features =
1604             (VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *)ext;
1605          features->primitiveTopologyListRestart = true;
1606          features->primitiveTopologyPatchListRestart = false;
1607          break;
1608       }
1609       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_FEATURES_KHR: {
1610          VkPhysicalDeviceShaderIntegerDotProductFeaturesKHR *features =
1611             (VkPhysicalDeviceShaderIntegerDotProductFeaturesKHR *)ext;
1612          features->shaderIntegerDotProduct = true;
1613          break;
1614       }
1615       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR: {
1616          VkPhysicalDeviceRayTracingPipelineFeaturesKHR *features =
1617             (VkPhysicalDeviceRayTracingPipelineFeaturesKHR *)ext;
1618          features->rayTracingPipeline = true;
1619          features->rayTracingPipelineShaderGroupHandleCaptureReplay = false;
1620          features->rayTracingPipelineShaderGroupHandleCaptureReplayMixed = false;
1621          features->rayTracingPipelineTraceRaysIndirect = false;
1622          features->rayTraversalPrimitiveCulling = false;
1623          break;
1624       }
1625       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_FEATURES_KHR: {
1626          VkPhysicalDeviceMaintenance4FeaturesKHR *features =
1627             (VkPhysicalDeviceMaintenance4FeaturesKHR *)ext;
1628          features->maintenance4 = true;
1629          break;
1630       }
1631       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT: {
1632          VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT *features =
1633             (VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT *)ext;
1634          features->vertexInputDynamicState = true;
1635          break;
1636       }
1637       default:
1638          break;
1639       }
1640    }
1641 }
1642 
1643 static size_t
radv_max_descriptor_set_size()1644 radv_max_descriptor_set_size()
1645 {
1646    /* make sure that the entire descriptor set is addressable with a signed
1647     * 32-bit int. So the sum of all limits scaled by descriptor size has to
1648     * be at most 2 GiB. the combined image & samples object count as one of
1649     * both. This limit is for the pipeline layout, not for the set layout, but
1650     * there is no set limit, so we just set a pipeline limit. I don't think
1651     * any app is going to hit this soon. */
1652    return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
1653            MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1654           (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1655            32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1656            32 /* sampler, largest when combined with image */ + 64 /* sampled image */ +
1657            64 /* storage image */);
1658 }
1659 
1660 static uint32_t
radv_uniform_buffer_offset_alignment(const struct radv_physical_device * pdevice)1661 radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdevice)
1662 {
1663    uint32_t uniform_offset_alignment =
1664       driQueryOptioni(&pdevice->instance->dri_options, "radv_override_uniform_offset_alignment");
1665    if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {
1666       fprintf(stderr,
1667               "ERROR: invalid radv_override_uniform_offset_alignment setting %d:"
1668               "not a power of two\n",
1669               uniform_offset_alignment);
1670       uniform_offset_alignment = 0;
1671    }
1672 
1673    /* Take at least the hardware limit. */
1674    return MAX2(uniform_offset_alignment, 4);
1675 }
1676 
1677 void
radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties * pProperties)1678 radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
1679                                  VkPhysicalDeviceProperties *pProperties)
1680 {
1681    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1682    VkSampleCountFlags sample_counts = 0xf;
1683 
1684    size_t max_descriptor_set_size = radv_max_descriptor_set_size();
1685 
1686    VkPhysicalDeviceLimits limits = {
1687       .maxImageDimension1D = (1 << 14),
1688       .maxImageDimension2D = (1 << 14),
1689       .maxImageDimension3D = (1 << 11),
1690       .maxImageDimensionCube = (1 << 14),
1691       .maxImageArrayLayers = (1 << 11),
1692       .maxTexelBufferElements = UINT32_MAX,
1693       .maxUniformBufferRange = UINT32_MAX,
1694       .maxStorageBufferRange = UINT32_MAX,
1695       .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1696       .maxMemoryAllocationCount = UINT32_MAX,
1697       .maxSamplerAllocationCount = 64 * 1024,
1698       .bufferImageGranularity = 1,
1699       .sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
1700       .maxBoundDescriptorSets = MAX_SETS,
1701       .maxPerStageDescriptorSamplers = max_descriptor_set_size,
1702       .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1703       .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1704       .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1705       .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1706       .maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
1707       .maxPerStageResources = max_descriptor_set_size,
1708       .maxDescriptorSetSamplers = max_descriptor_set_size,
1709       .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1710       .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1711       .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1712       .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1713       .maxDescriptorSetSampledImages = max_descriptor_set_size,
1714       .maxDescriptorSetStorageImages = max_descriptor_set_size,
1715       .maxDescriptorSetInputAttachments = max_descriptor_set_size,
1716       .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
1717       .maxVertexInputBindings = MAX_VBS,
1718       .maxVertexInputAttributeOffset = UINT32_MAX,
1719       .maxVertexInputBindingStride = 2048,
1720       .maxVertexOutputComponents = 128,
1721       .maxTessellationGenerationLevel = 64,
1722       .maxTessellationPatchSize = 32,
1723       .maxTessellationControlPerVertexInputComponents = 128,
1724       .maxTessellationControlPerVertexOutputComponents = 128,
1725       .maxTessellationControlPerPatchOutputComponents = 120,
1726       .maxTessellationControlTotalOutputComponents = 4096,
1727       .maxTessellationEvaluationInputComponents = 128,
1728       .maxTessellationEvaluationOutputComponents = 128,
1729       .maxGeometryShaderInvocations = 127,
1730       .maxGeometryInputComponents = 64,
1731       .maxGeometryOutputComponents = 128,
1732       .maxGeometryOutputVertices = 256,
1733       .maxGeometryTotalOutputComponents = 1024,
1734       .maxFragmentInputComponents = 128,
1735       .maxFragmentOutputAttachments = 8,
1736       .maxFragmentDualSrcAttachments = 1,
1737       .maxFragmentCombinedOutputResources = 8,
1738       .maxComputeSharedMemorySize = pdevice->rad_info.chip_class >= GFX7 ? 65536 : 32768,
1739       .maxComputeWorkGroupCount = {65535, 65535, 65535},
1740       .maxComputeWorkGroupInvocations = 1024,
1741       .maxComputeWorkGroupSize = {1024, 1024, 1024},
1742       .subPixelPrecisionBits = 8,
1743       .subTexelPrecisionBits = 8,
1744       .mipmapPrecisionBits = 8,
1745       .maxDrawIndexedIndexValue = UINT32_MAX,
1746       .maxDrawIndirectCount = UINT32_MAX,
1747       .maxSamplerLodBias = 16,
1748       .maxSamplerAnisotropy = 16,
1749       .maxViewports = MAX_VIEWPORTS,
1750       .maxViewportDimensions = {(1 << 14), (1 << 14)},
1751       .viewportBoundsRange = {INT16_MIN, INT16_MAX},
1752       .viewportSubPixelBits = 8,
1753       .minMemoryMapAlignment = 4096, /* A page */
1754       .minTexelBufferOffsetAlignment = 4,
1755       .minUniformBufferOffsetAlignment = radv_uniform_buffer_offset_alignment(pdevice),
1756       .minStorageBufferOffsetAlignment = 4,
1757       .minTexelOffset = -32,
1758       .maxTexelOffset = 31,
1759       .minTexelGatherOffset = -32,
1760       .maxTexelGatherOffset = 31,
1761       .minInterpolationOffset = -2,
1762       .maxInterpolationOffset = 2,
1763       .subPixelInterpolationOffsetBits = 8,
1764       .maxFramebufferWidth = (1 << 14),
1765       .maxFramebufferHeight = (1 << 14),
1766       .maxFramebufferLayers = (1 << 10),
1767       .framebufferColorSampleCounts = sample_counts,
1768       .framebufferDepthSampleCounts = sample_counts,
1769       .framebufferStencilSampleCounts = sample_counts,
1770       .framebufferNoAttachmentsSampleCounts = sample_counts,
1771       .maxColorAttachments = MAX_RTS,
1772       .sampledImageColorSampleCounts = sample_counts,
1773       .sampledImageIntegerSampleCounts = sample_counts,
1774       .sampledImageDepthSampleCounts = sample_counts,
1775       .sampledImageStencilSampleCounts = sample_counts,
1776       .storageImageSampleCounts = sample_counts,
1777       .maxSampleMaskWords = 1,
1778       .timestampComputeAndGraphics = true,
1779       .timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
1780       .maxClipDistances = 8,
1781       .maxCullDistances = 8,
1782       .maxCombinedClipAndCullDistances = 8,
1783       .discreteQueuePriorities = 2,
1784       .pointSizeRange = {0.0, 8191.875},
1785       .lineWidthRange = {0.0, 8191.875},
1786       .pointSizeGranularity = (1.0 / 8.0),
1787       .lineWidthGranularity = (1.0 / 8.0),
1788       .strictLines = false, /* FINISHME */
1789       .standardSampleLocations = true,
1790       .optimalBufferCopyOffsetAlignment = 128,
1791       .optimalBufferCopyRowPitchAlignment = 128,
1792       .nonCoherentAtomSize = 64,
1793    };
1794 
1795    VkPhysicalDeviceType device_type;
1796 
1797    if (pdevice->rad_info.has_dedicated_vram || pdevice->instance->report_apu_as_dgpu) {
1798       device_type = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
1799    } else {
1800       device_type = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
1801    }
1802 
1803    *pProperties = (VkPhysicalDeviceProperties){
1804       .apiVersion = RADV_API_VERSION,
1805       .driverVersion = vk_get_driver_version(),
1806       .vendorID = ATI_VENDOR_ID,
1807       .deviceID = pdevice->rad_info.pci_id,
1808       .deviceType = device_type,
1809       .limits = limits,
1810       .sparseProperties =
1811          {
1812             .residencyNonResidentStrict = pdevice->rad_info.family >= CHIP_POLARIS10,
1813             .residencyStandard2DBlockShape = pdevice->rad_info.family >= CHIP_POLARIS10,
1814          },
1815    };
1816 
1817    strcpy(pProperties->deviceName, pdevice->name);
1818    memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1819 }
1820 
1821 static void
radv_get_physical_device_properties_1_1(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan11Properties * p)1822 radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice,
1823                                         VkPhysicalDeviceVulkan11Properties *p)
1824 {
1825    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
1826 
1827    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1828    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1829    memset(p->deviceLUID, 0, VK_LUID_SIZE);
1830    /* The LUID is for Windows. */
1831    p->deviceLUIDValid = false;
1832    p->deviceNodeMask = 0;
1833 
1834    p->subgroupSize = RADV_SUBGROUP_SIZE;
1835    p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT;
1836    p->subgroupSupportedOperations =
1837       VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
1838       VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
1839       VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
1840       VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
1841    p->subgroupQuadOperationsInAllStages = true;
1842 
1843    p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
1844    p->maxMultiviewViewCount = MAX_VIEWS;
1845    p->maxMultiviewInstanceIndex = INT_MAX;
1846    p->protectedNoFault = false;
1847    p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS;
1848    p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1849 }
1850 
1851 static void
radv_get_physical_device_properties_1_2(struct radv_physical_device * pdevice,VkPhysicalDeviceVulkan12Properties * p)1852 radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice,
1853                                         VkPhysicalDeviceVulkan12Properties *p)
1854 {
1855    assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
1856 
1857    p->driverID = VK_DRIVER_ID_MESA_RADV;
1858    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
1859    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 "%s",
1860             radv_get_compiler_string(pdevice));
1861    p->conformanceVersion = (VkConformanceVersion){
1862       .major = 1,
1863       .minor = 2,
1864       .subminor = 3,
1865       .patch = 0,
1866    };
1867 
1868    /* On AMD hardware, denormals and rounding modes for fp16/fp64 are
1869     * controlled by the same config register.
1870     */
1871    if (pdevice->rad_info.has_packed_math_16bit) {
1872       p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
1873       p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
1874    } else {
1875       p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
1876       p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
1877    }
1878 
1879    /* With LLVM, do not allow both preserving and flushing denorms because
1880     * different shaders in the same pipeline can have different settings and
1881     * this won't work for merged shaders. To make it work, this requires LLVM
1882     * support for changing the register. The same logic applies for the
1883     * rounding modes because they are configured with the same config
1884     * register.
1885     */
1886    p->shaderDenormFlushToZeroFloat32 = true;
1887    p->shaderDenormPreserveFloat32 = !pdevice->use_llvm;
1888    p->shaderRoundingModeRTEFloat32 = true;
1889    p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm;
1890    p->shaderSignedZeroInfNanPreserveFloat32 = true;
1891 
1892    p->shaderDenormFlushToZeroFloat16 =
1893       pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
1894    p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
1895    p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit;
1896    p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
1897    p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
1898 
1899    p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
1900    p->shaderDenormPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
1901    p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.chip_class >= GFX8;
1902    p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
1903    p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
1904 
1905    p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1906    p->shaderUniformBufferArrayNonUniformIndexingNative = false;
1907    p->shaderSampledImageArrayNonUniformIndexingNative = false;
1908    p->shaderStorageBufferArrayNonUniformIndexingNative = false;
1909    p->shaderStorageImageArrayNonUniformIndexingNative = false;
1910    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1911    p->robustBufferAccessUpdateAfterBind = true;
1912    p->quadDivergentImplicitLod = false;
1913 
1914    size_t max_descriptor_set_size =
1915       ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
1916        MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1917       (32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1918        32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1919        32 /* sampler, largest when combined with image */ + 64 /* sampled image */ +
1920        64 /* storage image */);
1921    p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1922    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1923    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1924    p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1925    p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1926    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1927    p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1928    p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1929    p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1930    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1931    p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1932    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1933    p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1934    p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1935    p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1936 
1937    /* We support all of the depth resolve modes */
1938    p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1939                                    VK_RESOLVE_MODE_AVERAGE_BIT_KHR | VK_RESOLVE_MODE_MIN_BIT_KHR |
1940                                    VK_RESOLVE_MODE_MAX_BIT_KHR;
1941 
1942    /* Average doesn't make sense for stencil so we don't support that */
1943    p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1944                                      VK_RESOLVE_MODE_MIN_BIT_KHR | VK_RESOLVE_MODE_MAX_BIT_KHR;
1945 
1946    p->independentResolveNone = true;
1947    p->independentResolve = true;
1948 
1949    /* GFX6-8 only support single channel min/max filter. */
1950    p->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
1951    p->filterMinmaxSingleComponentFormats = true;
1952 
1953    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1954 
1955    p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;
1956 }
1957 
1958 void
radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties2 * pProperties)1959 radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
1960                                   VkPhysicalDeviceProperties2 *pProperties)
1961 {
1962    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1963    radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
1964 
1965    VkPhysicalDeviceVulkan11Properties core_1_1 = {
1966       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
1967    };
1968    radv_get_physical_device_properties_1_1(pdevice, &core_1_1);
1969 
1970    VkPhysicalDeviceVulkan12Properties core_1_2 = {
1971       .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
1972    };
1973    radv_get_physical_device_properties_1_2(pdevice, &core_1_2);
1974 
1975    vk_foreach_struct(ext, pProperties->pNext)
1976    {
1977       if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
1978          continue;
1979       if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
1980          continue;
1981 
1982       switch (ext->sType) {
1983       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1984          VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1985             (VkPhysicalDevicePushDescriptorPropertiesKHR *)ext;
1986          properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1987          break;
1988       }
1989       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
1990          VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
1991             (VkPhysicalDeviceDiscardRectanglePropertiesEXT *)ext;
1992          properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
1993          break;
1994       }
1995       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
1996          VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
1997             (VkPhysicalDeviceExternalMemoryHostPropertiesEXT *)ext;
1998          properties->minImportedHostPointerAlignment = 4096;
1999          break;
2000       }
2001       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
2002          VkPhysicalDeviceShaderCorePropertiesAMD *properties =
2003             (VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
2004 
2005          /* Shader engines. */
2006          properties->shaderEngineCount = pdevice->rad_info.max_se;
2007          properties->shaderArraysPerEngineCount = pdevice->rad_info.max_sa_per_se;
2008          properties->computeUnitsPerShaderArray = pdevice->rad_info.min_good_cu_per_sa;
2009          properties->simdPerComputeUnit = pdevice->rad_info.num_simd_per_compute_unit;
2010          properties->wavefrontsPerSimd = pdevice->rad_info.max_wave64_per_simd;
2011          properties->wavefrontSize = 64;
2012 
2013          /* SGPR. */
2014          properties->sgprsPerSimd = pdevice->rad_info.num_physical_sgprs_per_simd;
2015          properties->minSgprAllocation = pdevice->rad_info.min_sgpr_alloc;
2016          properties->maxSgprAllocation = pdevice->rad_info.max_sgpr_alloc;
2017          properties->sgprAllocationGranularity = pdevice->rad_info.sgpr_alloc_granularity;
2018 
2019          /* VGPR. */
2020          properties->vgprsPerSimd = pdevice->rad_info.num_physical_wave64_vgprs_per_simd;
2021          properties->minVgprAllocation = pdevice->rad_info.min_wave64_vgpr_alloc;
2022          properties->maxVgprAllocation = pdevice->rad_info.max_vgpr_alloc;
2023          properties->vgprAllocationGranularity = pdevice->rad_info.wave64_vgpr_alloc_granularity;
2024          break;
2025       }
2026       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
2027          VkPhysicalDeviceShaderCoreProperties2AMD *properties =
2028             (VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
2029 
2030          properties->shaderCoreFeatures = 0;
2031          properties->activeComputeUnitCount = pdevice->rad_info.num_good_compute_units;
2032          break;
2033       }
2034       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
2035          VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
2036             (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
2037          properties->maxVertexAttribDivisor = UINT32_MAX;
2038          break;
2039       }
2040       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
2041          VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
2042             (VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
2043          properties->primitiveOverestimationSize = 0;
2044          properties->maxExtraPrimitiveOverestimationSize = 0;
2045          properties->extraPrimitiveOverestimationSizeGranularity = 0;
2046          properties->primitiveUnderestimation = false;
2047          properties->conservativePointAndLineRasterization = false;
2048          properties->degenerateTrianglesRasterized = true;
2049          properties->degenerateLinesRasterized = false;
2050          properties->fullyCoveredFragmentShaderInputVariable = false;
2051          properties->conservativeRasterizationPostDepthCoverage = false;
2052          break;
2053       }
2054 #ifndef _WIN32
2055       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
2056          VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
2057             (VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
2058          properties->pciDomain = pdevice->bus_info.domain;
2059          properties->pciBus = pdevice->bus_info.bus;
2060          properties->pciDevice = pdevice->bus_info.dev;
2061          properties->pciFunction = pdevice->bus_info.func;
2062          break;
2063       }
2064 #endif
2065       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
2066          VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
2067             (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
2068          properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
2069          properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
2070          properties->maxTransformFeedbackBufferSize = UINT32_MAX;
2071          properties->maxTransformFeedbackStreamDataSize = 512;
2072          properties->maxTransformFeedbackBufferDataSize = 512;
2073          properties->maxTransformFeedbackBufferDataStride = 512;
2074          properties->transformFeedbackQueries = !pdevice->use_ngg_streamout;
2075          properties->transformFeedbackStreamsLinesTriangles = !pdevice->use_ngg_streamout;
2076          properties->transformFeedbackRasterizationStreamSelect = false;
2077          properties->transformFeedbackDraw = true;
2078          break;
2079       }
2080       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
2081          VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
2082             (VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
2083 
2084          props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
2085          props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
2086          props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
2087             MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
2088          props->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
2089          props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
2090          break;
2091       }
2092       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
2093          VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
2094             (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
2095          properties->sampleLocationSampleCounts = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT |
2096                                                   VK_SAMPLE_COUNT_8_BIT;
2097          properties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
2098          properties->sampleLocationCoordinateRange[0] = 0.0f;
2099          properties->sampleLocationCoordinateRange[1] = 0.9375f;
2100          properties->sampleLocationSubPixelBits = 4;
2101          properties->variableSampleLocations = false;
2102          break;
2103       }
2104       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
2105          VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *properties =
2106             (VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;
2107          properties->storageTexelBufferOffsetAlignmentBytes = 4;
2108          properties->storageTexelBufferOffsetSingleTexelAlignment = true;
2109          properties->uniformTexelBufferOffsetAlignmentBytes = 4;
2110          properties->uniformTexelBufferOffsetSingleTexelAlignment = true;
2111          break;
2112       }
2113       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
2114          VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
2115             (VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
2116          props->minSubgroupSize = 64;
2117          props->maxSubgroupSize = 64;
2118          props->maxComputeWorkgroupSubgroups = UINT32_MAX;
2119          props->requiredSubgroupSizeStages = 0;
2120 
2121          if (pdevice->rad_info.chip_class >= GFX10) {
2122             /* Only GFX10+ supports wave32. */
2123             props->minSubgroupSize = 32;
2124             props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
2125          }
2126          break;
2127       }
2128       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
2129          VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
2130             (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
2131          props->lineSubPixelPrecisionBits = 4;
2132          break;
2133       }
2134       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
2135          VkPhysicalDeviceRobustness2PropertiesEXT *properties =
2136             (VkPhysicalDeviceRobustness2PropertiesEXT *)ext;
2137          properties->robustStorageBufferAccessSizeAlignment = 4;
2138          properties->robustUniformBufferAccessSizeAlignment = 4;
2139          break;
2140       }
2141       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
2142          VkPhysicalDeviceCustomBorderColorPropertiesEXT *props =
2143             (VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
2144          props->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;
2145          break;
2146       }
2147       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
2148          VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
2149             (VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
2150          props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};
2151          props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};
2152          props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
2153          props->primitiveFragmentShadingRateWithMultipleViewports = true;
2154          props->layeredShadingRateAttachments = false; /* TODO */
2155          props->fragmentShadingRateNonTrivialCombinerOps = true;
2156          props->maxFragmentSize = (VkExtent2D){2, 2};
2157          props->maxFragmentSizeAspectRatio = 2;
2158          props->maxFragmentShadingRateCoverageSamples = 32;
2159          props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_8_BIT;
2160          props->fragmentShadingRateWithShaderDepthStencilWrites = false;
2161          props->fragmentShadingRateWithSampleMask = true;
2162          props->fragmentShadingRateWithShaderSampleMask = false;
2163          props->fragmentShadingRateWithConservativeRasterization = true;
2164          props->fragmentShadingRateWithFragmentShaderInterlock = false;
2165          props->fragmentShadingRateWithCustomSampleLocations = false;
2166          props->fragmentShadingRateStrictMultiplyCombiner = true;
2167          break;
2168       }
2169       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
2170          VkPhysicalDeviceProvokingVertexPropertiesEXT *props =
2171             (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
2172          props->provokingVertexModePerPipeline = true;
2173          props->transformFeedbackPreservesTriangleFanProvokingVertex = true;
2174          break;
2175       }
2176       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR: {
2177          VkPhysicalDeviceAccelerationStructurePropertiesKHR *props =
2178             (VkPhysicalDeviceAccelerationStructurePropertiesKHR *)ext;
2179          props->maxGeometryCount = (1 << 24) - 1;
2180          props->maxInstanceCount = (1 << 24) - 1;
2181          props->maxPrimitiveCount = (1 << 29) - 1;
2182          props->maxPerStageDescriptorAccelerationStructures =
2183             pProperties->properties.limits.maxPerStageDescriptorStorageBuffers;
2184          props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures =
2185             pProperties->properties.limits.maxPerStageDescriptorStorageBuffers;
2186          props->maxDescriptorSetAccelerationStructures =
2187             pProperties->properties.limits.maxDescriptorSetStorageBuffers;
2188          props->maxDescriptorSetUpdateAfterBindAccelerationStructures =
2189             pProperties->properties.limits.maxDescriptorSetStorageBuffers;
2190          props->minAccelerationStructureScratchOffsetAlignment = 128;
2191          break;
2192       }
2193 #ifndef _WIN32
2194       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
2195          VkPhysicalDeviceDrmPropertiesEXT *props = (VkPhysicalDeviceDrmPropertiesEXT *)ext;
2196          if (pdevice->available_nodes & (1 << DRM_NODE_PRIMARY)) {
2197             props->hasPrimary = true;
2198             props->primaryMajor = (int64_t)major(pdevice->primary_devid);
2199             props->primaryMinor = (int64_t)minor(pdevice->primary_devid);
2200          } else {
2201             props->hasPrimary = false;
2202          }
2203          if (pdevice->available_nodes & (1 << DRM_NODE_RENDER)) {
2204             props->hasRender = true;
2205             props->renderMajor = (int64_t)major(pdevice->render_devid);
2206             props->renderMinor = (int64_t)minor(pdevice->render_devid);
2207          } else {
2208             props->hasRender = false;
2209          }
2210          break;
2211       }
2212 #endif
2213       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: {
2214          VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;
2215          props->maxMultiDrawCount = 2048;
2216          break;
2217       }
2218       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_INTEGER_DOT_PRODUCT_PROPERTIES_KHR: {
2219          VkPhysicalDeviceShaderIntegerDotProductPropertiesKHR *props =
2220             (VkPhysicalDeviceShaderIntegerDotProductPropertiesKHR *)ext;
2221 
2222          bool accel = pdevice->rad_info.has_accelerated_dot_product;
2223 
2224          props->integerDotProduct8BitUnsignedAccelerated = accel;
2225          props->integerDotProduct8BitSignedAccelerated = accel;
2226          props->integerDotProduct8BitMixedSignednessAccelerated = false;
2227          props->integerDotProduct4x8BitPackedUnsignedAccelerated = accel;
2228          props->integerDotProduct4x8BitPackedSignedAccelerated = accel;
2229          props->integerDotProduct4x8BitPackedMixedSignednessAccelerated = false;
2230          props->integerDotProduct16BitUnsignedAccelerated = accel;
2231          props->integerDotProduct16BitSignedAccelerated = accel;
2232          props->integerDotProduct16BitMixedSignednessAccelerated = false;
2233          props->integerDotProduct32BitUnsignedAccelerated = false;
2234          props->integerDotProduct32BitSignedAccelerated = false;
2235          props->integerDotProduct32BitMixedSignednessAccelerated = false;
2236          props->integerDotProduct64BitUnsignedAccelerated = false;
2237          props->integerDotProduct64BitSignedAccelerated = false;
2238          props->integerDotProduct64BitMixedSignednessAccelerated = false;
2239          props->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = accel;
2240          props->integerDotProductAccumulatingSaturating8BitSignedAccelerated = accel;
2241          props->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
2242          props->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = accel;
2243          props->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = accel;
2244          props->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated =
2245             false;
2246          props->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = accel;
2247          props->integerDotProductAccumulatingSaturating16BitSignedAccelerated = accel;
2248          props->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
2249          props->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
2250          props->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
2251          props->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
2252          props->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
2253          props->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
2254          props->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
2255          break;
2256       }
2257       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR: {
2258          VkPhysicalDeviceRayTracingPipelinePropertiesKHR *props =
2259             (VkPhysicalDeviceRayTracingPipelinePropertiesKHR *)ext;
2260          props->shaderGroupHandleSize = RADV_RT_HANDLE_SIZE;
2261          props->maxRayRecursionDepth = 31;    /* Minimum allowed for DXR. */
2262          props->maxShaderGroupStride = 16384; /* dummy */
2263          props->shaderGroupBaseAlignment = 16;
2264          props->shaderGroupHandleCaptureReplaySize = 16;
2265          props->maxRayDispatchInvocationCount = 1024 * 1024 * 64;
2266          props->shaderGroupHandleAlignment = 16;
2267          props->maxRayHitAttributeSize = RADV_MAX_HIT_ATTRIB_SIZE;
2268          break;
2269       }
2270       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_4_PROPERTIES_KHR: {
2271          VkPhysicalDeviceMaintenance4PropertiesKHR *properties =
2272             (VkPhysicalDeviceMaintenance4PropertiesKHR *)ext;
2273          properties->maxBufferSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
2274          break;
2275       }
2276       default:
2277          break;
2278       }
2279    }
2280 }
2281 
2282 static void
radv_get_physical_device_queue_family_properties(struct radv_physical_device * pdevice,uint32_t * pCount,VkQueueFamilyProperties ** pQueueFamilyProperties)2283 radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdevice,
2284                                                  uint32_t *pCount,
2285                                                  VkQueueFamilyProperties **pQueueFamilyProperties)
2286 {
2287    int num_queue_families = 1;
2288    int idx;
2289    if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
2290        !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
2291       num_queue_families++;
2292 
2293    if (pQueueFamilyProperties == NULL) {
2294       *pCount = num_queue_families;
2295       return;
2296    }
2297 
2298    if (!*pCount)
2299       return;
2300 
2301    idx = 0;
2302    if (*pCount >= 1) {
2303       *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2304          .queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT |
2305                        VK_QUEUE_SPARSE_BINDING_BIT,
2306          .queueCount = 1,
2307          .timestampValidBits = 64,
2308          .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2309       };
2310       idx++;
2311    }
2312 
2313    if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
2314        !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
2315       if (*pCount > idx) {
2316          *pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2317             .queueFlags =
2318                VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
2319             .queueCount = pdevice->rad_info.num_rings[RING_COMPUTE],
2320             .timestampValidBits = 64,
2321             .minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2322          };
2323          idx++;
2324       }
2325    }
2326    *pCount = idx;
2327 }
2328 
2329 void
radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties * pQueueFamilyProperties)2330 radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, uint32_t *pCount,
2331                                             VkQueueFamilyProperties *pQueueFamilyProperties)
2332 {
2333    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2334    if (!pQueueFamilyProperties) {
2335       radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
2336       return;
2337    }
2338    VkQueueFamilyProperties *properties[] = {
2339       pQueueFamilyProperties + 0,
2340       pQueueFamilyProperties + 1,
2341       pQueueFamilyProperties + 2,
2342    };
2343    radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
2344    assert(*pCount <= 3);
2345 }
2346 
2347 static const VkQueueGlobalPriorityEXT radv_global_queue_priorities[] = {
2348    VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT,
2349    VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT,
2350    VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT,
2351    VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT,
2352 };
2353 
2354 void
radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)2355 radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, uint32_t *pCount,
2356                                              VkQueueFamilyProperties2 *pQueueFamilyProperties)
2357 {
2358    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2359    if (!pQueueFamilyProperties) {
2360       radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
2361       return;
2362    }
2363    VkQueueFamilyProperties *properties[] = {
2364       &pQueueFamilyProperties[0].queueFamilyProperties,
2365       &pQueueFamilyProperties[1].queueFamilyProperties,
2366       &pQueueFamilyProperties[2].queueFamilyProperties,
2367    };
2368    radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
2369    assert(*pCount <= 3);
2370 
2371    for (uint32_t i = 0; i < *pCount; i++) {
2372       vk_foreach_struct(ext, pQueueFamilyProperties[i].pNext)
2373       {
2374          switch (ext->sType) {
2375          case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_EXT: {
2376             VkQueueFamilyGlobalPriorityPropertiesEXT *prop =
2377                (VkQueueFamilyGlobalPriorityPropertiesEXT *)ext;
2378             STATIC_ASSERT(ARRAY_SIZE(radv_global_queue_priorities) <= VK_MAX_GLOBAL_PRIORITY_SIZE_EXT);
2379             prop->priorityCount = ARRAY_SIZE(radv_global_queue_priorities);
2380             memcpy(&prop->priorities, radv_global_queue_priorities, sizeof(radv_global_queue_priorities));
2381             break;
2382          }
2383          default:
2384             break;
2385          }
2386       }
2387    }
2388 }
2389 
2390 void
radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)2391 radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,
2392                                        VkPhysicalDeviceMemoryProperties *pMemoryProperties)
2393 {
2394    RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2395 
2396    *pMemoryProperties = physical_device->memory_properties;
2397 }
2398 
2399 static void
radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryBudgetPropertiesEXT * memoryBudget)2400 radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
2401                                   VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2402 {
2403    RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2404    VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
2405 
2406    /* For all memory heaps, the computation of budget is as follow:
2407     *	heap_budget = heap_size - global_heap_usage + app_heap_usage
2408     *
2409     * The Vulkan spec 1.1.97 says that the budget should include any
2410     * currently allocated device memory.
2411     *
2412     * Note that the application heap usages are not really accurate (eg.
2413     * in presence of shared buffers).
2414     */
2415    if (!device->rad_info.has_dedicated_vram) {
2416       /* On APUs, the driver exposes fake heaps to the application because usually the carveout is
2417        * too small for games but the budgets need to be redistributed accordingly.
2418        */
2419 
2420       assert(device->heaps == (RADV_HEAP_GTT | RADV_HEAP_VRAM_VIS));
2421       assert(device->memory_properties.memoryHeaps[0].flags == 0); /* GTT */
2422       assert(device->memory_properties.memoryHeaps[1].flags == VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
2423       uint8_t gtt_heap_idx = 0, vram_vis_heap_idx = 1;
2424 
2425       /* Get the visible VRAM/GTT heap sizes and internal usages. */
2426       uint64_t gtt_heap_size = device->memory_properties.memoryHeaps[gtt_heap_idx].size;
2427       uint64_t vram_vis_heap_size = device->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
2428 
2429       uint64_t vram_vis_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) +
2430                                          device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2431       uint64_t gtt_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
2432 
2433       /* Compute the total heap size, internal and system usage. */
2434       uint64_t total_heap_size = vram_vis_heap_size + gtt_heap_size;
2435       uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage;
2436       uint64_t total_system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
2437                                     device->ws->query_value(device->ws, RADEON_GTT_USAGE);
2438 
2439       uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);
2440 
2441       /* Compute the total free space that can be allocated for this process accross all heaps. */
2442       uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
2443 
2444       /* Compute the remaining visible VRAM size for this process. */
2445       uint64_t vram_vis_free_space = vram_vis_heap_size - MIN2(vram_vis_heap_size, vram_vis_internal_usage);
2446 
2447       /* Distribute the total free space (2/3rd as VRAM and 1/3rd as GTT) to match the heap sizes,
2448        * and align down to the page size to be conservative.
2449        */
2450       vram_vis_free_space = ROUND_DOWN_TO(MIN2((total_free_space * 2) / 3, vram_vis_free_space),
2451                                           device->rad_info.gart_page_size);
2452       uint64_t gtt_free_space = total_free_space - vram_vis_free_space;
2453 
2454       memoryBudget->heapBudget[vram_vis_heap_idx] = vram_vis_free_space + vram_vis_internal_usage;
2455       memoryBudget->heapUsage[vram_vis_heap_idx] = vram_vis_internal_usage;
2456       memoryBudget->heapBudget[gtt_heap_idx] = gtt_free_space + gtt_internal_usage;
2457       memoryBudget->heapUsage[gtt_heap_idx] = gtt_internal_usage;
2458    } else {
2459       unsigned mask = device->heaps;
2460       unsigned heap = 0;
2461       while (mask) {
2462          uint64_t internal_usage = 0, system_usage = 0;
2463          unsigned type = 1u << u_bit_scan(&mask);
2464 
2465          switch (type) {
2466          case RADV_HEAP_VRAM:
2467             internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2468             system_usage = device->ws->query_value(device->ws, RADEON_VRAM_USAGE);
2469             break;
2470          case RADV_HEAP_VRAM_VIS:
2471             internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS);
2472             if (!(device->heaps & RADV_HEAP_VRAM))
2473                internal_usage += device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2474             system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE);
2475             break;
2476          case RADV_HEAP_GTT:
2477             internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
2478             system_usage = device->ws->query_value(device->ws, RADEON_GTT_USAGE);
2479             break;
2480          }
2481 
2482          uint64_t total_usage = MAX2(internal_usage, system_usage);
2483 
2484          uint64_t free_space = device->memory_properties.memoryHeaps[heap].size -
2485                                MIN2(device->memory_properties.memoryHeaps[heap].size, total_usage);
2486          memoryBudget->heapBudget[heap] = free_space + internal_usage;
2487          memoryBudget->heapUsage[heap] = internal_usage;
2488          ++heap;
2489       }
2490 
2491       assert(heap == memory_properties->memoryHeapCount);
2492    }
2493 
2494    /* The heapBudget and heapUsage values must be zero for array elements
2495     * greater than or equal to
2496     * VkPhysicalDeviceMemoryProperties::memoryHeapCount.
2497     */
2498    for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
2499       memoryBudget->heapBudget[i] = 0;
2500       memoryBudget->heapUsage[i] = 0;
2501    }
2502 }
2503 
2504 void
radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)2505 radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
2506                                         VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
2507 {
2508    radv_GetPhysicalDeviceMemoryProperties(physicalDevice, &pMemoryProperties->memoryProperties);
2509 
2510    VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
2511       vk_find_struct(pMemoryProperties->pNext, PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
2512    if (memory_budget)
2513       radv_get_memory_budget_properties(physicalDevice, memory_budget);
2514 }
2515 
2516 VkResult
radv_GetMemoryHostPointerPropertiesEXT(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,const void * pHostPointer,VkMemoryHostPointerPropertiesEXT * pMemoryHostPointerProperties)2517 radv_GetMemoryHostPointerPropertiesEXT(
2518    VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, const void *pHostPointer,
2519    VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
2520 {
2521    RADV_FROM_HANDLE(radv_device, device, _device);
2522 
2523    switch (handleType) {
2524    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
2525       const struct radv_physical_device *physical_device = device->physical_device;
2526       uint32_t memoryTypeBits = 0;
2527       for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
2528          if (physical_device->memory_domains[i] == RADEON_DOMAIN_GTT &&
2529              !(physical_device->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
2530             memoryTypeBits = (1 << i);
2531             break;
2532          }
2533       }
2534       pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
2535       return VK_SUCCESS;
2536    }
2537    default:
2538       return VK_ERROR_INVALID_EXTERNAL_HANDLE;
2539    }
2540 }
2541 
2542 static enum radeon_ctx_priority
radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT * pObj)2543 radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
2544 {
2545    /* Default to MEDIUM when a specific global priority isn't requested */
2546    if (!pObj)
2547       return RADEON_CTX_PRIORITY_MEDIUM;
2548 
2549    switch (pObj->globalPriority) {
2550    case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
2551       return RADEON_CTX_PRIORITY_REALTIME;
2552    case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
2553       return RADEON_CTX_PRIORITY_HIGH;
2554    case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
2555       return RADEON_CTX_PRIORITY_MEDIUM;
2556    case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
2557       return RADEON_CTX_PRIORITY_LOW;
2558    default:
2559       unreachable("Illegal global priority value");
2560       return RADEON_CTX_PRIORITY_INVALID;
2561    }
2562 }
2563 
2564 static int
radv_queue_init(struct radv_device * device,struct radv_queue * queue,int idx,const VkDeviceQueueCreateInfo * create_info,const VkDeviceQueueGlobalPriorityCreateInfoEXT * global_priority)2565 radv_queue_init(struct radv_device *device, struct radv_queue *queue,
2566                 int idx, const VkDeviceQueueCreateInfo *create_info,
2567                 const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
2568 {
2569    queue->device = device;
2570    queue->priority = radv_get_queue_global_priority(global_priority);
2571    queue->hw_ctx = device->hw_ctx[queue->priority];
2572 
2573    VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);
2574    if (result != VK_SUCCESS)
2575       return result;
2576 
2577    list_inithead(&queue->pending_submissions);
2578    mtx_init(&queue->pending_mutex, mtx_plain);
2579 
2580    mtx_init(&queue->thread_mutex, mtx_plain);
2581    if (u_cnd_monotonic_init(&queue->thread_cond)) {
2582       vk_queue_finish(&queue->vk);
2583       return vk_error(device, VK_ERROR_INITIALIZATION_FAILED);
2584    }
2585    queue->cond_created = true;
2586 
2587    return VK_SUCCESS;
2588 }
2589 
2590 static void
radv_queue_finish(struct radv_queue * queue)2591 radv_queue_finish(struct radv_queue *queue)
2592 {
2593    if (queue->hw_ctx) {
2594       if (queue->cond_created) {
2595          if (queue->thread_running) {
2596             p_atomic_set(&queue->thread_exit, true);
2597             u_cnd_monotonic_broadcast(&queue->thread_cond);
2598             thrd_join(queue->submission_thread, NULL);
2599          }
2600 
2601          u_cnd_monotonic_destroy(&queue->thread_cond);
2602       }
2603 
2604       mtx_destroy(&queue->pending_mutex);
2605       mtx_destroy(&queue->thread_mutex);
2606    }
2607 
2608    if (queue->initial_full_flush_preamble_cs)
2609       queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2610    if (queue->initial_preamble_cs)
2611       queue->device->ws->cs_destroy(queue->initial_preamble_cs);
2612    if (queue->continue_preamble_cs)
2613       queue->device->ws->cs_destroy(queue->continue_preamble_cs);
2614    if (queue->descriptor_bo)
2615       queue->device->ws->buffer_destroy(queue->device->ws, queue->descriptor_bo);
2616    if (queue->scratch_bo)
2617       queue->device->ws->buffer_destroy(queue->device->ws, queue->scratch_bo);
2618    if (queue->esgs_ring_bo)
2619       queue->device->ws->buffer_destroy(queue->device->ws, queue->esgs_ring_bo);
2620    if (queue->gsvs_ring_bo)
2621       queue->device->ws->buffer_destroy(queue->device->ws, queue->gsvs_ring_bo);
2622    if (queue->tess_rings_bo)
2623       queue->device->ws->buffer_destroy(queue->device->ws, queue->tess_rings_bo);
2624    if (queue->gds_bo)
2625       queue->device->ws->buffer_destroy(queue->device->ws, queue->gds_bo);
2626    if (queue->gds_oa_bo)
2627       queue->device->ws->buffer_destroy(queue->device->ws, queue->gds_oa_bo);
2628    if (queue->compute_scratch_bo)
2629       queue->device->ws->buffer_destroy(queue->device->ws, queue->compute_scratch_bo);
2630 
2631    vk_queue_finish(&queue->vk);
2632 }
2633 
2634 static void
radv_device_init_gs_info(struct radv_device * device)2635 radv_device_init_gs_info(struct radv_device *device)
2636 {
2637    device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
2638                                                   device->physical_device->rad_info.family);
2639 }
2640 
2641 static VkResult
radv_device_init_border_color(struct radv_device * device)2642 radv_device_init_border_color(struct radv_device *device)
2643 {
2644    VkResult result;
2645 
2646    result = device->ws->buffer_create(
2647       device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
2648       RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING,
2649       RADV_BO_PRIORITY_SHADER, 0, &device->border_color_data.bo);
2650 
2651    if (result != VK_SUCCESS)
2652       return vk_error(device, result);
2653 
2654    result = device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, true);
2655    if (result != VK_SUCCESS)
2656       return vk_error(device, result);
2657 
2658    device->border_color_data.colors_gpu_ptr = device->ws->buffer_map(device->border_color_data.bo);
2659    if (!device->border_color_data.colors_gpu_ptr)
2660       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2661    mtx_init(&device->border_color_data.mutex, mtx_plain);
2662 
2663    return VK_SUCCESS;
2664 }
2665 
2666 static void
radv_device_finish_border_color(struct radv_device * device)2667 radv_device_finish_border_color(struct radv_device *device)
2668 {
2669    if (device->border_color_data.bo) {
2670       device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, false);
2671       device->ws->buffer_destroy(device->ws, device->border_color_data.bo);
2672 
2673       mtx_destroy(&device->border_color_data.mutex);
2674    }
2675 }
2676 
2677 static VkResult
radv_device_init_vs_prologs(struct radv_device * device)2678 radv_device_init_vs_prologs(struct radv_device *device)
2679 {
2680    u_rwlock_init(&device->vs_prologs_lock);
2681    device->vs_prologs = _mesa_hash_table_create(NULL, &radv_hash_vs_prolog, &radv_cmp_vs_prolog);
2682    if (!device->vs_prologs)
2683       return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2684 
2685    /* don't pre-compile prologs if we want to print them */
2686    if (device->instance->debug_flags & RADV_DEBUG_DUMP_PROLOGS)
2687       return VK_SUCCESS;
2688 
2689    struct radv_vs_input_state state;
2690    state.nontrivial_divisors = 0;
2691    memset(state.offsets, 0, sizeof(state.offsets));
2692    state.alpha_adjust_lo = 0;
2693    state.alpha_adjust_hi = 0;
2694    memset(state.formats, 0, sizeof(state.formats));
2695 
2696    struct radv_vs_prolog_key key;
2697    key.state = &state;
2698    key.misaligned_mask = 0;
2699    key.as_ls = false;
2700    key.is_ngg = device->physical_device->use_ngg;
2701    key.next_stage = MESA_SHADER_VERTEX;
2702    key.wave32 = device->physical_device->ge_wave_size == 32;
2703 
2704    for (unsigned i = 1; i <= MAX_VERTEX_ATTRIBS; i++) {
2705       state.attribute_mask = BITFIELD_MASK(i);
2706       state.instance_rate_inputs = 0;
2707 
2708       key.num_attributes = i;
2709 
2710       device->simple_vs_prologs[i - 1] = radv_create_vs_prolog(device, &key);
2711       if (!device->simple_vs_prologs[i - 1])
2712          return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2713    }
2714 
2715    unsigned idx = 0;
2716    for (unsigned num_attributes = 1; num_attributes <= 16; num_attributes++) {
2717       state.attribute_mask = BITFIELD_MASK(num_attributes);
2718 
2719       for (unsigned i = 0; i < num_attributes; i++)
2720          state.divisors[i] = 1;
2721 
2722       for (unsigned count = 1; count <= num_attributes; count++) {
2723          for (unsigned start = 0; start <= (num_attributes - count); start++) {
2724             state.instance_rate_inputs = u_bit_consecutive(start, count);
2725 
2726             key.num_attributes = num_attributes;
2727 
2728             struct radv_shader_prolog *prolog = radv_create_vs_prolog(device, &key);
2729             if (!prolog)
2730                return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2731 
2732             assert(idx ==
2733                    radv_instance_rate_prolog_index(num_attributes, state.instance_rate_inputs));
2734             device->instance_rate_vs_prologs[idx++] = prolog;
2735          }
2736       }
2737    }
2738    assert(idx == ARRAY_SIZE(device->instance_rate_vs_prologs));
2739 
2740    return VK_SUCCESS;
2741 }
2742 
2743 static void
radv_device_finish_vs_prologs(struct radv_device * device)2744 radv_device_finish_vs_prologs(struct radv_device *device)
2745 {
2746    if (device->vs_prologs) {
2747       hash_table_foreach(device->vs_prologs, entry)
2748       {
2749          free((void *)entry->key);
2750          radv_prolog_destroy(device, entry->data);
2751       }
2752       _mesa_hash_table_destroy(device->vs_prologs, NULL);
2753    }
2754 
2755    for (unsigned i = 0; i < ARRAY_SIZE(device->simple_vs_prologs); i++)
2756       radv_prolog_destroy(device, device->simple_vs_prologs[i]);
2757 
2758    for (unsigned i = 0; i < ARRAY_SIZE(device->instance_rate_vs_prologs); i++)
2759       radv_prolog_destroy(device, device->instance_rate_vs_prologs[i]);
2760 }
2761 
2762 VkResult
radv_device_init_vrs_state(struct radv_device * device)2763 radv_device_init_vrs_state(struct radv_device *device)
2764 {
2765    /* FIXME: 4k depth buffers should be large enough for now but we might want to adjust this
2766     * dynamically at some point.
2767     */
2768    uint32_t width = 4096, height = 4096;
2769    VkDeviceMemory mem;
2770    VkBuffer buffer;
2771    VkResult result;
2772    VkImage image;
2773 
2774    VkImageCreateInfo image_create_info = {
2775       .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2776       .imageType = VK_IMAGE_TYPE_2D,
2777       .format = VK_FORMAT_D16_UNORM,
2778       .extent = {width, height, 1},
2779       .mipLevels = 1,
2780       .arrayLayers = 1,
2781       .samples = VK_SAMPLE_COUNT_1_BIT,
2782       .tiling = VK_IMAGE_TILING_OPTIMAL,
2783       .usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
2784       .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2785       .queueFamilyIndexCount = 0,
2786       .pQueueFamilyIndices = NULL,
2787       .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2788    };
2789 
2790    result = radv_CreateImage(radv_device_to_handle(device), &image_create_info,
2791                              &device->meta_state.alloc, &image);
2792    if (result != VK_SUCCESS)
2793       return result;
2794 
2795    VkBufferCreateInfo buffer_create_info = {
2796       .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
2797       .size = radv_image_from_handle(image)->planes[0].surface.meta_size,
2798       .usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
2799       .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2800    };
2801 
2802    result = radv_CreateBuffer(radv_device_to_handle(device), &buffer_create_info,
2803                               &device->meta_state.alloc, &buffer);
2804    if (result != VK_SUCCESS)
2805       goto fail_create;
2806 
2807    VkBufferMemoryRequirementsInfo2 info = {
2808       .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
2809       .buffer = buffer,
2810    };
2811    VkMemoryRequirements2 mem_req = {
2812       .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
2813    };
2814    radv_GetBufferMemoryRequirements2(radv_device_to_handle(device), &info, &mem_req);
2815 
2816    VkMemoryAllocateInfo alloc_info = {
2817       .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
2818       .allocationSize = mem_req.memoryRequirements.size,
2819    };
2820 
2821    result = radv_AllocateMemory(radv_device_to_handle(device), &alloc_info,
2822                                 &device->meta_state.alloc, &mem);
2823    if (result != VK_SUCCESS)
2824       goto fail_alloc;
2825 
2826    VkBindBufferMemoryInfo bind_info = {
2827       .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
2828       .buffer = buffer,
2829       .memory = mem,
2830       .memoryOffset = 0
2831    };
2832 
2833    result = radv_BindBufferMemory2(radv_device_to_handle(device), 1, &bind_info);
2834    if (result != VK_SUCCESS)
2835       goto fail_bind;
2836 
2837    device->vrs.image = radv_image_from_handle(image);
2838    device->vrs.buffer = radv_buffer_from_handle(buffer);
2839    device->vrs.mem = radv_device_memory_from_handle(mem);
2840 
2841    return VK_SUCCESS;
2842 
2843 fail_bind:
2844    radv_FreeMemory(radv_device_to_handle(device), mem, &device->meta_state.alloc);
2845 fail_alloc:
2846    radv_DestroyBuffer(radv_device_to_handle(device), buffer, &device->meta_state.alloc);
2847 fail_create:
2848    radv_DestroyImage(radv_device_to_handle(device), image, &device->meta_state.alloc);
2849 
2850    return result;
2851 }
2852 
2853 static void
radv_device_finish_vrs_image(struct radv_device * device)2854 radv_device_finish_vrs_image(struct radv_device *device)
2855 {
2856    radv_FreeMemory(radv_device_to_handle(device), radv_device_memory_to_handle(device->vrs.mem),
2857                    &device->meta_state.alloc);
2858    radv_DestroyBuffer(radv_device_to_handle(device), radv_buffer_to_handle(device->vrs.buffer),
2859                      &device->meta_state.alloc);
2860    radv_DestroyImage(radv_device_to_handle(device), radv_image_to_handle(device->vrs.image),
2861                      &device->meta_state.alloc);
2862 }
2863 
2864 VkResult
_radv_device_set_lost(struct radv_device * device,const char * file,int line,const char * msg,...)2865 _radv_device_set_lost(struct radv_device *device, const char *file, int line, const char *msg, ...)
2866 {
2867    VkResult err;
2868    va_list ap;
2869 
2870    p_atomic_inc(&device->lost);
2871 
2872    va_start(ap, msg);
2873    err =
2874       __vk_errorv(device, VK_ERROR_DEVICE_LOST, file, line, msg, ap);
2875    va_end(ap);
2876 
2877    return err;
2878 }
2879 
2880 VkResult
radv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2881 radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo,
2882                   const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
2883 {
2884    RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2885    VkResult result;
2886    struct radv_device *device;
2887 
2888    bool keep_shader_info = false;
2889    bool robust_buffer_access = false;
2890    bool robust_buffer_access2 = false;
2891    bool overallocation_disallowed = false;
2892    bool custom_border_colors = false;
2893    bool attachment_vrs_enabled = false;
2894    bool image_float32_atomics = false;
2895    bool vs_prologs = false;
2896 
2897    /* Check enabled features */
2898    if (pCreateInfo->pEnabledFeatures) {
2899       if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
2900          robust_buffer_access = true;
2901    }
2902 
2903    vk_foreach_struct_const(ext, pCreateInfo->pNext)
2904    {
2905       switch (ext->sType) {
2906       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
2907          const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
2908          if (features->features.robustBufferAccess)
2909             robust_buffer_access = true;
2910          break;
2911       }
2912       case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
2913          const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext;
2914          if (overallocation->overallocationBehavior ==
2915              VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)
2916             overallocation_disallowed = true;
2917          break;
2918       }
2919       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
2920          const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features =
2921             (const void *)ext;
2922          custom_border_colors = border_color_features->customBorderColors;
2923          break;
2924       }
2925       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
2926          const VkPhysicalDeviceFragmentShadingRateFeaturesKHR *vrs = (const void *)ext;
2927          attachment_vrs_enabled = vrs->attachmentFragmentShadingRate;
2928          break;
2929       }
2930       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
2931          const VkPhysicalDeviceRobustness2FeaturesEXT *features = (const void *)ext;
2932          if (features->robustBufferAccess2)
2933             robust_buffer_access2 = true;
2934          break;
2935       }
2936       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
2937          const VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features = (const void *)ext;
2938          if (features->shaderImageFloat32Atomics ||
2939              features->sparseImageFloat32Atomics)
2940             image_float32_atomics = true;
2941          break;
2942       }
2943       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_2_FEATURES_EXT: {
2944          const VkPhysicalDeviceShaderAtomicFloat2FeaturesEXT *features = (const void *)ext;
2945          if (features->shaderImageFloat32AtomicMinMax ||
2946              features->sparseImageFloat32AtomicMinMax)
2947             image_float32_atomics = true;
2948          break;
2949       }
2950       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_INPUT_DYNAMIC_STATE_FEATURES_EXT: {
2951          const VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT *features = (const void *)ext;
2952          if (features->vertexInputDynamicState)
2953             vs_prologs = true;
2954          break;
2955       }
2956       default:
2957          break;
2958       }
2959    }
2960 
2961    device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,
2962                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2963    if (!device)
2964       return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2965 
2966    struct vk_device_dispatch_table dispatch_table;
2967 
2968    if (physical_device->instance->vk.app_info.app_name &&
2969        !strcmp(physical_device->instance->vk.app_info.app_name, "metroexodus")) {
2970       /* Metro Exodus (Linux native) calls vkGetSemaphoreCounterValue() with a NULL semaphore and it
2971        * crashes sometimes.  Workaround this game bug by enabling an internal layer. Remove this
2972        * when the game is fixed.
2973        */
2974       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &metro_exodus_device_entrypoints, true);
2975       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false);
2976    } else if (radv_thread_trace_enabled()) {
2977       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &sqtt_device_entrypoints, true);
2978       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false);
2979    } else {
2980       vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, true);
2981    }
2982    vk_device_dispatch_table_from_entrypoints(&dispatch_table, &wsi_device_entrypoints, false);
2983 
2984    result =
2985       vk_device_init(&device->vk, &physical_device->vk, &dispatch_table, pCreateInfo, pAllocator);
2986    if (result != VK_SUCCESS) {
2987       vk_free(&device->vk.alloc, device);
2988       return result;
2989    }
2990 
2991    device->instance = physical_device->instance;
2992    device->physical_device = physical_device;
2993 
2994    device->ws = physical_device->ws;
2995 
2996    keep_shader_info = device->vk.enabled_extensions.AMD_shader_info;
2997 
2998    /* With update after bind we can't attach bo's to the command buffer
2999     * from the descriptor set anymore, so we have to use a global BO list.
3000     */
3001    device->use_global_bo_list = (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
3002                                 device->vk.enabled_extensions.EXT_descriptor_indexing ||
3003                                 device->vk.enabled_extensions.EXT_buffer_device_address ||
3004                                 device->vk.enabled_extensions.KHR_buffer_device_address ||
3005                                 device->vk.enabled_extensions.KHR_ray_tracing_pipeline ||
3006                                 device->vk.enabled_extensions.KHR_acceleration_structure;
3007 
3008    device->robust_buffer_access = robust_buffer_access || robust_buffer_access2;
3009    device->robust_buffer_access2 = robust_buffer_access2;
3010 
3011    device->attachment_vrs_enabled = attachment_vrs_enabled;
3012 
3013    device->image_float32_atomics = image_float32_atomics;
3014 
3015    radv_init_shader_arenas(device);
3016 
3017    device->overallocation_disallowed = overallocation_disallowed;
3018    mtx_init(&device->overallocation_mutex, mtx_plain);
3019 
3020    /* Create one context per queue priority. */
3021    for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3022       const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
3023       const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
3024          vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
3025       enum radeon_ctx_priority priority = radv_get_queue_global_priority(global_priority);
3026 
3027       if (device->hw_ctx[priority])
3028          continue;
3029 
3030       result = device->ws->ctx_create(device->ws, priority, &device->hw_ctx[priority]);
3031       if (result != VK_SUCCESS)
3032          goto fail;
3033    }
3034 
3035    for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3036       const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
3037       uint32_t qfi = queue_create->queueFamilyIndex;
3038       const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
3039          vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
3040 
3041       device->queues[qfi] =
3042          vk_alloc(&device->vk.alloc, queue_create->queueCount * sizeof(struct radv_queue), 8,
3043                   VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3044       if (!device->queues[qfi]) {
3045          result = VK_ERROR_OUT_OF_HOST_MEMORY;
3046          goto fail;
3047       }
3048 
3049       memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
3050 
3051       device->queue_count[qfi] = queue_create->queueCount;
3052 
3053       for (unsigned q = 0; q < queue_create->queueCount; q++) {
3054          result = radv_queue_init(device, &device->queues[qfi][q], q, queue_create, global_priority);
3055          if (result != VK_SUCCESS)
3056             goto fail;
3057       }
3058    }
3059 
3060    device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
3061                          !(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
3062 
3063    /* The maximum number of scratch waves. Scratch space isn't divided
3064     * evenly between CUs. The number is only a function of the number of CUs.
3065     * We can decrease the constant to decrease the scratch buffer size.
3066     *
3067     * sctx->scratch_waves must be >= the maximum possible size of
3068     * 1 threadgroup, so that the hw doesn't hang from being unable
3069     * to start any.
3070     *
3071     * The recommended value is 4 per CU at most. Higher numbers don't
3072     * bring much benefit, but they still occupy chip resources (think
3073     * async compute). I've seen ~2% performance difference between 4 and 32.
3074     */
3075    uint32_t max_threads_per_block = 2048;
3076    device->scratch_waves =
3077       MAX2(32 * physical_device->rad_info.num_good_compute_units, max_threads_per_block / 64);
3078 
3079    device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
3080 
3081    if (device->physical_device->rad_info.chip_class >= GFX7) {
3082       /* If the KMD allows it (there is a KMD hw register for it),
3083        * allow launching waves out-of-order.
3084        */
3085       device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
3086    }
3087 
3088    radv_device_init_gs_info(device);
3089 
3090    device->tess_offchip_block_dw_size =
3091       device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
3092 
3093    if (getenv("RADV_TRACE_FILE")) {
3094       fprintf(
3095          stderr,
3096          "***********************************************************************************\n");
3097       fprintf(
3098          stderr,
3099          "* WARNING: RADV_TRACE_FILE=<file> is deprecated and replaced by RADV_DEBUG=hang *\n");
3100       fprintf(
3101          stderr,
3102          "***********************************************************************************\n");
3103       abort();
3104    }
3105 
3106    if (device->instance->debug_flags & RADV_DEBUG_HANG) {
3107       /* Enable GPU hangs detection and dump logs if a GPU hang is
3108        * detected.
3109        */
3110       keep_shader_info = true;
3111 
3112       if (!radv_init_trace(device))
3113          goto fail;
3114 
3115       fprintf(stderr,
3116               "*****************************************************************************\n");
3117       fprintf(stderr,
3118               "* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
3119       fprintf(stderr,
3120               "*****************************************************************************\n");
3121 
3122       /* Wait for idle after every draw/dispatch to identify the
3123        * first bad call.
3124        */
3125       device->instance->debug_flags |= RADV_DEBUG_SYNC_SHADERS;
3126 
3127       radv_dump_enabled_options(device, stderr);
3128    }
3129 
3130    if (radv_thread_trace_enabled()) {
3131       fprintf(stderr, "*************************************************\n");
3132       fprintf(stderr, "* WARNING: Thread trace support is experimental *\n");
3133       fprintf(stderr, "*************************************************\n");
3134 
3135       if (device->physical_device->rad_info.chip_class < GFX8 ||
3136           device->physical_device->rad_info.chip_class > GFX10_3) {
3137          fprintf(stderr, "GPU hardware not supported: refer to "
3138                          "the RGP documentation for the list of "
3139                          "supported GPUs!\n");
3140          abort();
3141       }
3142 
3143       if (!radv_thread_trace_init(device))
3144          goto fail;
3145    }
3146 
3147    if (getenv("RADV_TRAP_HANDLER")) {
3148       /* TODO: Add support for more hardware. */
3149       assert(device->physical_device->rad_info.chip_class == GFX8);
3150 
3151       fprintf(stderr, "**********************************************************************\n");
3152       fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
3153       fprintf(stderr, "**********************************************************************\n");
3154 
3155       /* To get the disassembly of the faulty shaders, we have to
3156        * keep some shader info around.
3157        */
3158       keep_shader_info = true;
3159 
3160       if (!radv_trap_handler_init(device))
3161          goto fail;
3162    }
3163 
3164    if (getenv("RADV_FORCE_VRS")) {
3165       const char *vrs_rates = getenv("RADV_FORCE_VRS");
3166 
3167       if (device->physical_device->rad_info.chip_class < GFX10_3)
3168          fprintf(stderr, "radv: VRS is only supported on RDNA2+\n");
3169       else if (!strcmp(vrs_rates, "2x2"))
3170          device->force_vrs = RADV_FORCE_VRS_2x2;
3171       else if (!strcmp(vrs_rates, "2x1"))
3172          device->force_vrs = RADV_FORCE_VRS_2x1;
3173       else if (!strcmp(vrs_rates, "1x2"))
3174          device->force_vrs = RADV_FORCE_VRS_1x2;
3175       else
3176          fprintf(stderr, "radv: Invalid VRS rates specified "
3177                          "(valid values are 2x2, 2x1 and 1x2)\n");
3178    }
3179 
3180    device->adjust_frag_coord_z =
3181       (device->vk.enabled_extensions.KHR_fragment_shading_rate ||
3182        device->force_vrs != RADV_FORCE_VRS_NONE) &&
3183       (device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID ||
3184        device->physical_device->rad_info.family == CHIP_NAVY_FLOUNDER ||
3185        device->physical_device->rad_info.family == CHIP_VANGOGH);
3186 
3187    device->keep_shader_info = keep_shader_info;
3188    result = radv_device_init_meta(device);
3189    if (result != VK_SUCCESS)
3190       goto fail;
3191 
3192    radv_device_init_msaa(device);
3193 
3194    /* If the border color extension is enabled, let's create the buffer we need. */
3195    if (custom_border_colors) {
3196       result = radv_device_init_border_color(device);
3197       if (result != VK_SUCCESS)
3198          goto fail;
3199    }
3200 
3201    if (vs_prologs) {
3202       result = radv_device_init_vs_prologs(device);
3203       if (result != VK_SUCCESS)
3204          goto fail;
3205    }
3206 
3207    for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
3208       device->empty_cs[family] = device->ws->cs_create(device->ws, family);
3209       if (!device->empty_cs[family])
3210          goto fail;
3211 
3212       switch (family) {
3213       case RADV_QUEUE_GENERAL:
3214          radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
3215          radeon_emit(device->empty_cs[family], CC0_UPDATE_LOAD_ENABLES(1));
3216          radeon_emit(device->empty_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));
3217          break;
3218       case RADV_QUEUE_COMPUTE:
3219          radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
3220          radeon_emit(device->empty_cs[family], 0);
3221          break;
3222       }
3223 
3224       result = device->ws->cs_finalize(device->empty_cs[family]);
3225       if (result != VK_SUCCESS)
3226          goto fail;
3227    }
3228 
3229    if (device->physical_device->rad_info.chip_class >= GFX7)
3230       cik_create_gfx_config(device);
3231 
3232    VkPipelineCacheCreateInfo ci;
3233    ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
3234    ci.pNext = NULL;
3235    ci.flags = 0;
3236    ci.pInitialData = NULL;
3237    ci.initialDataSize = 0;
3238    VkPipelineCache pc;
3239    result = radv_CreatePipelineCache(radv_device_to_handle(device), &ci, NULL, &pc);
3240    if (result != VK_SUCCESS)
3241       goto fail_meta;
3242 
3243    device->mem_cache = radv_pipeline_cache_from_handle(pc);
3244 
3245    if (u_cnd_monotonic_init(&device->timeline_cond)) {
3246       result = VK_ERROR_INITIALIZATION_FAILED;
3247       goto fail_mem_cache;
3248    }
3249 
3250    device->force_aniso = MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
3251    if (device->force_aniso >= 0) {
3252       fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
3253               1 << util_logbase2(device->force_aniso));
3254    }
3255 
3256    *pDevice = radv_device_to_handle(device);
3257    return VK_SUCCESS;
3258 
3259 fail_mem_cache:
3260    radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
3261 fail_meta:
3262    radv_device_finish_meta(device);
3263 fail:
3264    radv_thread_trace_finish(device);
3265    free(device->thread_trace.trigger_file);
3266 
3267    radv_trap_handler_finish(device);
3268    radv_finish_trace(device);
3269 
3270    if (device->gfx_init)
3271       device->ws->buffer_destroy(device->ws, device->gfx_init);
3272 
3273    radv_device_finish_vs_prologs(device);
3274    radv_device_finish_border_color(device);
3275 
3276    for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
3277       for (unsigned q = 0; q < device->queue_count[i]; q++)
3278          radv_queue_finish(&device->queues[i][q]);
3279       if (device->queue_count[i])
3280          vk_free(&device->vk.alloc, device->queues[i]);
3281    }
3282 
3283    for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
3284       if (device->hw_ctx[i])
3285          device->ws->ctx_destroy(device->hw_ctx[i]);
3286    }
3287 
3288    vk_device_finish(&device->vk);
3289    vk_free(&device->vk.alloc, device);
3290    return result;
3291 }
3292 
3293 void
radv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)3294 radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
3295 {
3296    RADV_FROM_HANDLE(radv_device, device, _device);
3297 
3298    if (!device)
3299       return;
3300 
3301    if (device->gfx_init)
3302       device->ws->buffer_destroy(device->ws, device->gfx_init);
3303 
3304    radv_device_finish_vs_prologs(device);
3305    radv_device_finish_border_color(device);
3306    radv_device_finish_vrs_image(device);
3307 
3308    for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
3309       for (unsigned q = 0; q < device->queue_count[i]; q++)
3310          radv_queue_finish(&device->queues[i][q]);
3311       if (device->queue_count[i])
3312          vk_free(&device->vk.alloc, device->queues[i]);
3313       if (device->empty_cs[i])
3314          device->ws->cs_destroy(device->empty_cs[i]);
3315    }
3316 
3317    for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
3318       if (device->hw_ctx[i])
3319          device->ws->ctx_destroy(device->hw_ctx[i]);
3320    }
3321 
3322    radv_device_finish_meta(device);
3323 
3324    VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
3325    radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
3326 
3327    radv_trap_handler_finish(device);
3328    radv_finish_trace(device);
3329 
3330    radv_destroy_shader_arenas(device);
3331 
3332    u_cnd_monotonic_destroy(&device->timeline_cond);
3333 
3334    free(device->thread_trace.trigger_file);
3335    radv_thread_trace_finish(device);
3336 
3337    vk_device_finish(&device->vk);
3338    vk_free(&device->vk.alloc, device);
3339 }
3340 
3341 VkResult
radv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)3342 radv_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, VkLayerProperties *pProperties)
3343 {
3344    if (pProperties == NULL) {
3345       *pPropertyCount = 0;
3346       return VK_SUCCESS;
3347    }
3348 
3349    /* None supported at this time */
3350    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3351 }
3352 
3353 VkResult
radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkLayerProperties * pProperties)3354 radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount,
3355                                     VkLayerProperties *pProperties)
3356 {
3357    if (pProperties == NULL) {
3358       *pPropertyCount = 0;
3359       return VK_SUCCESS;
3360    }
3361 
3362    /* None supported at this time */
3363    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3364 }
3365 
3366 static void
fill_geom_tess_rings(struct radv_queue * queue,uint32_t * map,bool add_sample_positions,uint32_t esgs_ring_size,struct radeon_winsys_bo * esgs_ring_bo,uint32_t gsvs_ring_size,struct radeon_winsys_bo * gsvs_ring_bo,uint32_t tess_factor_ring_size,uint32_t tess_offchip_ring_offset,uint32_t tess_offchip_ring_size,struct radeon_winsys_bo * tess_rings_bo)3367 fill_geom_tess_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_positions,
3368                      uint32_t esgs_ring_size, struct radeon_winsys_bo *esgs_ring_bo,
3369                      uint32_t gsvs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo,
3370                      uint32_t tess_factor_ring_size, uint32_t tess_offchip_ring_offset,
3371                      uint32_t tess_offchip_ring_size, struct radeon_winsys_bo *tess_rings_bo)
3372 {
3373    uint32_t *desc = &map[4];
3374 
3375    if (esgs_ring_bo) {
3376       uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
3377 
3378       /* stride 0, num records - size, add tid, swizzle, elsize4,
3379          index stride 64 */
3380       desc[0] = esgs_va;
3381       desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) | S_008F04_SWIZZLE_ENABLE(true);
3382       desc[2] = esgs_ring_size;
3383       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3384                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3385                 S_008F0C_INDEX_STRIDE(3) | S_008F0C_ADD_TID_ENABLE(1);
3386 
3387       if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3388          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3389                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3390       } else {
3391          desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3392                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);
3393       }
3394 
3395       /* GS entry for ES->GS ring */
3396       /* stride 0, num records - size, elsize0,
3397          index stride 0 */
3398       desc[4] = esgs_va;
3399       desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
3400       desc[6] = esgs_ring_size;
3401       desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3402                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3403 
3404       if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3405          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3406                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3407       } else {
3408          desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3409                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3410       }
3411    }
3412 
3413    desc += 8;
3414 
3415    if (gsvs_ring_bo) {
3416       uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
3417 
3418       /* VS entry for GS->VS ring */
3419       /* stride 0, num records - size, elsize0,
3420          index stride 0 */
3421       desc[0] = gsvs_va;
3422       desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
3423       desc[2] = gsvs_ring_size;
3424       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3425                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3426 
3427       if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3428          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3429                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3430       } else {
3431          desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3432                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3433       }
3434 
3435       /* stride gsvs_itemsize, num records 64
3436          elsize 4, index stride 16 */
3437       /* shader will patch stride and desc[2] */
3438       desc[4] = gsvs_va;
3439       desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) | S_008F04_SWIZZLE_ENABLE(1);
3440       desc[6] = 0;
3441       desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3442                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3443                 S_008F0C_INDEX_STRIDE(1) | S_008F0C_ADD_TID_ENABLE(true);
3444 
3445       if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3446          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3447                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3448       } else {
3449          desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3450                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);
3451       }
3452    }
3453 
3454    desc += 8;
3455 
3456    if (tess_rings_bo) {
3457       uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
3458       uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
3459 
3460       desc[0] = tess_va;
3461       desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
3462       desc[2] = tess_factor_ring_size;
3463       desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3464                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3465 
3466       if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3467          desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3468                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
3469       } else {
3470          desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3471                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3472       }
3473 
3474       desc[4] = tess_offchip_va;
3475       desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
3476       desc[6] = tess_offchip_ring_size;
3477       desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3478                 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3479 
3480       if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3481          desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3482                     S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
3483       } else {
3484          desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3485                     S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3486       }
3487    }
3488 
3489    desc += 8;
3490 
3491    if (add_sample_positions) {
3492       /* add sample positions after all rings */
3493       memcpy(desc, queue->device->sample_locations_1x, 8);
3494       desc += 2;
3495       memcpy(desc, queue->device->sample_locations_2x, 16);
3496       desc += 4;
3497       memcpy(desc, queue->device->sample_locations_4x, 32);
3498       desc += 8;
3499       memcpy(desc, queue->device->sample_locations_8x, 64);
3500    }
3501 }
3502 
3503 static unsigned
radv_get_hs_offchip_param(struct radv_device * device,uint32_t * max_offchip_buffers_p)3504 radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
3505 {
3506    bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 &&
3507                                  device->physical_device->rad_info.family != CHIP_CARRIZO &&
3508                                  device->physical_device->rad_info.family != CHIP_STONEY;
3509    unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
3510    unsigned max_offchip_buffers;
3511    unsigned offchip_granularity;
3512    unsigned hs_offchip_param;
3513 
3514    /*
3515     * Per RadeonSI:
3516     * This must be one less than the maximum number due to a hw limitation.
3517     * Various hardware bugs need thGFX7
3518     *
3519     * Per AMDVLK:
3520     * Vega10 should limit max_offchip_buffers to 508 (4 * 127).
3521     * Gfx7 should limit max_offchip_buffers to 508
3522     * Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
3523     *
3524     * Follow AMDVLK here.
3525     */
3526    if (device->physical_device->rad_info.chip_class >= GFX10) {
3527       max_offchip_buffers_per_se = 128;
3528    } else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
3529               device->physical_device->rad_info.chip_class == GFX7 ||
3530               device->physical_device->rad_info.chip_class == GFX6)
3531       --max_offchip_buffers_per_se;
3532 
3533    max_offchip_buffers = max_offchip_buffers_per_se * device->physical_device->rad_info.max_se;
3534 
3535    /* Hawaii has a bug with offchip buffers > 256 that can be worked
3536     * around by setting 4K granularity.
3537     */
3538    if (device->tess_offchip_block_dw_size == 4096) {
3539       assert(device->physical_device->rad_info.family == CHIP_HAWAII);
3540       offchip_granularity = V_03093C_X_4K_DWORDS;
3541    } else {
3542       assert(device->tess_offchip_block_dw_size == 8192);
3543       offchip_granularity = V_03093C_X_8K_DWORDS;
3544    }
3545 
3546    switch (device->physical_device->rad_info.chip_class) {
3547    case GFX6:
3548       max_offchip_buffers = MIN2(max_offchip_buffers, 126);
3549       break;
3550    case GFX7:
3551    case GFX8:
3552    case GFX9:
3553       max_offchip_buffers = MIN2(max_offchip_buffers, 508);
3554       break;
3555    case GFX10:
3556       break;
3557    default:
3558       break;
3559    }
3560 
3561    *max_offchip_buffers_p = max_offchip_buffers;
3562    if (device->physical_device->rad_info.chip_class >= GFX10_3) {
3563       hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
3564                          S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
3565    } else if (device->physical_device->rad_info.chip_class >= GFX7) {
3566       if (device->physical_device->rad_info.chip_class >= GFX8)
3567          --max_offchip_buffers;
3568       hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |
3569                          S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);
3570    } else {
3571       hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
3572    }
3573    return hs_offchip_param;
3574 }
3575 
3576 static void
radv_emit_gs_ring_sizes(struct radv_queue * queue,struct radeon_cmdbuf * cs,struct radeon_winsys_bo * esgs_ring_bo,uint32_t esgs_ring_size,struct radeon_winsys_bo * gsvs_ring_bo,uint32_t gsvs_ring_size)3577 radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3578                         struct radeon_winsys_bo *esgs_ring_bo, uint32_t esgs_ring_size,
3579                         struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size)
3580 {
3581    if (!esgs_ring_bo && !gsvs_ring_bo)
3582       return;
3583 
3584    if (esgs_ring_bo)
3585       radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
3586 
3587    if (gsvs_ring_bo)
3588       radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
3589 
3590    if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
3591       radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
3592       radeon_emit(cs, esgs_ring_size >> 8);
3593       radeon_emit(cs, gsvs_ring_size >> 8);
3594    } else {
3595       radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
3596       radeon_emit(cs, esgs_ring_size >> 8);
3597       radeon_emit(cs, gsvs_ring_size >> 8);
3598    }
3599 }
3600 
3601 static void
radv_emit_tess_factor_ring(struct radv_queue * queue,struct radeon_cmdbuf * cs,unsigned hs_offchip_param,unsigned tf_ring_size,struct radeon_winsys_bo * tess_rings_bo)3602 radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3603                            unsigned hs_offchip_param, unsigned tf_ring_size,
3604                            struct radeon_winsys_bo *tess_rings_bo)
3605 {
3606    uint64_t tf_va;
3607 
3608    if (!tess_rings_bo)
3609       return;
3610 
3611    tf_va = radv_buffer_get_va(tess_rings_bo);
3612 
3613    radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
3614 
3615    if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
3616       radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size / 4));
3617       radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, tf_va >> 8);
3618 
3619       if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3620          radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI_UMD,
3621                                 S_030984_BASE_HI(tf_va >> 40));
3622       } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
3623          radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40));
3624       }
3625       radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
3626    } else {
3627       radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size / 4));
3628       radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8);
3629       radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
3630    }
3631 }
3632 
3633 static void
radv_emit_graphics_scratch(struct radv_queue * queue,struct radeon_cmdbuf * cs,uint32_t size_per_wave,uint32_t waves,struct radeon_winsys_bo * scratch_bo)3634 radv_emit_graphics_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3635                            uint32_t size_per_wave, uint32_t waves,
3636                            struct radeon_winsys_bo *scratch_bo)
3637 {
3638    if (queue->vk.queue_family_index != RADV_QUEUE_GENERAL)
3639       return;
3640 
3641    if (!scratch_bo)
3642       return;
3643 
3644    radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
3645 
3646    radeon_set_context_reg(
3647       cs, R_0286E8_SPI_TMPRING_SIZE,
3648       S_0286E8_WAVES(waves) | S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024)));
3649 }
3650 
3651 static void
radv_emit_compute_scratch(struct radv_queue * queue,struct radeon_cmdbuf * cs,uint32_t size_per_wave,uint32_t waves,struct radeon_winsys_bo * compute_scratch_bo)3652 radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3653                           uint32_t size_per_wave, uint32_t waves,
3654                           struct radeon_winsys_bo *compute_scratch_bo)
3655 {
3656    uint64_t scratch_va;
3657 
3658    if (!compute_scratch_bo)
3659       return;
3660 
3661    scratch_va = radv_buffer_get_va(compute_scratch_bo);
3662 
3663    radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
3664 
3665    radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
3666    radeon_emit(cs, scratch_va);
3667    radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | S_008F04_SWIZZLE_ENABLE(1));
3668 
3669    radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
3670                      S_00B860_WAVES(waves) | S_00B860_WAVESIZE(round_up_u32(size_per_wave, 1024)));
3671 }
3672 
3673 static void
radv_emit_global_shader_pointers(struct radv_queue * queue,struct radeon_cmdbuf * cs,struct radeon_winsys_bo * descriptor_bo)3674 radv_emit_global_shader_pointers(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3675                                  struct radeon_winsys_bo *descriptor_bo)
3676 {
3677    uint64_t va;
3678 
3679    if (!descriptor_bo)
3680       return;
3681 
3682    va = radv_buffer_get_va(descriptor_bo);
3683 
3684    radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
3685 
3686    if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3687       uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
3688                          R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
3689                          R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
3690 
3691       for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3692          radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);
3693       }
3694    } else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
3695       uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
3696                          R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
3697                          R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
3698 
3699       for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3700          radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);
3701       }
3702    } else {
3703       uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
3704                          R_00B230_SPI_SHADER_USER_DATA_GS_0, R_00B330_SPI_SHADER_USER_DATA_ES_0,
3705                          R_00B430_SPI_SHADER_USER_DATA_HS_0, R_00B530_SPI_SHADER_USER_DATA_LS_0};
3706 
3707       for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3708          radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);
3709       }
3710    }
3711 }
3712 
3713 static void
radv_init_graphics_state(struct radeon_cmdbuf * cs,struct radv_queue * queue)3714 radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
3715 {
3716    struct radv_device *device = queue->device;
3717 
3718    if (device->gfx_init) {
3719       uint64_t va = radv_buffer_get_va(device->gfx_init);
3720 
3721       radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
3722       radeon_emit(cs, va);
3723       radeon_emit(cs, va >> 32);
3724       radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
3725 
3726       radv_cs_add_buffer(device->ws, cs, device->gfx_init);
3727    } else {
3728       si_emit_graphics(device, cs);
3729    }
3730 }
3731 
3732 static void
radv_init_compute_state(struct radeon_cmdbuf * cs,struct radv_queue * queue)3733 radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
3734 {
3735    si_emit_compute(queue->device, cs);
3736 }
3737 
3738 static VkResult
radv_get_preamble_cs(struct radv_queue * queue,uint32_t scratch_size_per_wave,uint32_t scratch_waves,uint32_t compute_scratch_size_per_wave,uint32_t compute_scratch_waves,uint32_t esgs_ring_size,uint32_t gsvs_ring_size,bool needs_tess_rings,bool needs_gds,bool needs_gds_oa,bool needs_sample_positions,struct radeon_cmdbuf ** initial_full_flush_preamble_cs,struct radeon_cmdbuf ** initial_preamble_cs,struct radeon_cmdbuf ** continue_preamble_cs)3739 radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
3740                      uint32_t scratch_waves, uint32_t compute_scratch_size_per_wave,
3741                      uint32_t compute_scratch_waves, uint32_t esgs_ring_size,
3742                      uint32_t gsvs_ring_size, bool needs_tess_rings, bool needs_gds,
3743                      bool needs_gds_oa, bool needs_sample_positions,
3744                      struct radeon_cmdbuf **initial_full_flush_preamble_cs,
3745                      struct radeon_cmdbuf **initial_preamble_cs,
3746                      struct radeon_cmdbuf **continue_preamble_cs)
3747 {
3748    struct radeon_winsys_bo *scratch_bo = NULL;
3749    struct radeon_winsys_bo *descriptor_bo = NULL;
3750    struct radeon_winsys_bo *compute_scratch_bo = NULL;
3751    struct radeon_winsys_bo *esgs_ring_bo = NULL;
3752    struct radeon_winsys_bo *gsvs_ring_bo = NULL;
3753    struct radeon_winsys_bo *tess_rings_bo = NULL;
3754    struct radeon_winsys_bo *gds_bo = NULL;
3755    struct radeon_winsys_bo *gds_oa_bo = NULL;
3756    struct radeon_cmdbuf *dest_cs[3] = {0};
3757    bool add_tess_rings = false, add_gds = false, add_gds_oa = false, add_sample_positions = false;
3758    unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
3759    unsigned max_offchip_buffers;
3760    unsigned hs_offchip_param = 0;
3761    unsigned tess_offchip_ring_offset;
3762    uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
3763    VkResult result = VK_SUCCESS;
3764    if (!queue->has_tess_rings) {
3765       if (needs_tess_rings)
3766          add_tess_rings = true;
3767    }
3768    if (!queue->has_gds) {
3769       if (needs_gds)
3770          add_gds = true;
3771    }
3772    if (!queue->has_gds_oa) {
3773       if (needs_gds_oa)
3774          add_gds_oa = true;
3775    }
3776    if (!queue->has_sample_positions) {
3777       if (needs_sample_positions)
3778          add_sample_positions = true;
3779    }
3780    tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
3781    hs_offchip_param = radv_get_hs_offchip_param(queue->device, &max_offchip_buffers);
3782    tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
3783    tess_offchip_ring_size = max_offchip_buffers * queue->device->tess_offchip_block_dw_size * 4;
3784 
3785    scratch_size_per_wave = MAX2(scratch_size_per_wave, queue->scratch_size_per_wave);
3786    if (scratch_size_per_wave)
3787       scratch_waves = MIN2(scratch_waves, UINT32_MAX / scratch_size_per_wave);
3788    else
3789       scratch_waves = 0;
3790 
3791    compute_scratch_size_per_wave =
3792       MAX2(compute_scratch_size_per_wave, queue->compute_scratch_size_per_wave);
3793    if (compute_scratch_size_per_wave)
3794       compute_scratch_waves =
3795          MIN2(compute_scratch_waves, UINT32_MAX / compute_scratch_size_per_wave);
3796    else
3797       compute_scratch_waves = 0;
3798 
3799    if (scratch_size_per_wave <= queue->scratch_size_per_wave &&
3800        scratch_waves <= queue->scratch_waves &&
3801        compute_scratch_size_per_wave <= queue->compute_scratch_size_per_wave &&
3802        compute_scratch_waves <= queue->compute_scratch_waves &&
3803        esgs_ring_size <= queue->esgs_ring_size && gsvs_ring_size <= queue->gsvs_ring_size &&
3804        !add_tess_rings && !add_gds && !add_gds_oa && !add_sample_positions &&
3805        queue->initial_preamble_cs) {
3806       *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
3807       *initial_preamble_cs = queue->initial_preamble_cs;
3808       *continue_preamble_cs = queue->continue_preamble_cs;
3809       if (!scratch_size_per_wave && !compute_scratch_size_per_wave && !esgs_ring_size &&
3810           !gsvs_ring_size && !needs_tess_rings && !needs_gds && !needs_gds_oa &&
3811           !needs_sample_positions)
3812          *continue_preamble_cs = NULL;
3813       return VK_SUCCESS;
3814    }
3815 
3816    uint32_t scratch_size = scratch_size_per_wave * scratch_waves;
3817    uint32_t queue_scratch_size = queue->scratch_size_per_wave * queue->scratch_waves;
3818    if (scratch_size > queue_scratch_size) {
3819       result =
3820          queue->device->ws->buffer_create(queue->device->ws, scratch_size, 4096, RADEON_DOMAIN_VRAM,
3821                                           ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &scratch_bo);
3822       if (result != VK_SUCCESS)
3823          goto fail;
3824    } else
3825       scratch_bo = queue->scratch_bo;
3826 
3827    uint32_t compute_scratch_size = compute_scratch_size_per_wave * compute_scratch_waves;
3828    uint32_t compute_queue_scratch_size =
3829       queue->compute_scratch_size_per_wave * queue->compute_scratch_waves;
3830    if (compute_scratch_size > compute_queue_scratch_size) {
3831       result = queue->device->ws->buffer_create(queue->device->ws, compute_scratch_size, 4096,
3832                                                 RADEON_DOMAIN_VRAM, ring_bo_flags,
3833                                                 RADV_BO_PRIORITY_SCRATCH, 0, &compute_scratch_bo);
3834       if (result != VK_SUCCESS)
3835          goto fail;
3836 
3837    } else
3838       compute_scratch_bo = queue->compute_scratch_bo;
3839 
3840    if (esgs_ring_size > queue->esgs_ring_size) {
3841       result = queue->device->ws->buffer_create(queue->device->ws, esgs_ring_size, 4096,
3842                                                 RADEON_DOMAIN_VRAM, ring_bo_flags,
3843                                                 RADV_BO_PRIORITY_SCRATCH, 0, &esgs_ring_bo);
3844       if (result != VK_SUCCESS)
3845          goto fail;
3846    } else {
3847       esgs_ring_bo = queue->esgs_ring_bo;
3848       esgs_ring_size = queue->esgs_ring_size;
3849    }
3850 
3851    if (gsvs_ring_size > queue->gsvs_ring_size) {
3852       result = queue->device->ws->buffer_create(queue->device->ws, gsvs_ring_size, 4096,
3853                                                 RADEON_DOMAIN_VRAM, ring_bo_flags,
3854                                                 RADV_BO_PRIORITY_SCRATCH, 0, &gsvs_ring_bo);
3855       if (result != VK_SUCCESS)
3856          goto fail;
3857    } else {
3858       gsvs_ring_bo = queue->gsvs_ring_bo;
3859       gsvs_ring_size = queue->gsvs_ring_size;
3860    }
3861 
3862    if (add_tess_rings) {
3863       result = queue->device->ws->buffer_create(
3864          queue->device->ws, tess_offchip_ring_offset + tess_offchip_ring_size, 256,
3865          RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &tess_rings_bo);
3866       if (result != VK_SUCCESS)
3867          goto fail;
3868    } else {
3869       tess_rings_bo = queue->tess_rings_bo;
3870    }
3871 
3872    if (add_gds) {
3873       assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
3874 
3875       /* 4 streamout GDS counters.
3876        * We need 256B (64 dw) of GDS, otherwise streamout hangs.
3877        */
3878       result =
3879          queue->device->ws->buffer_create(queue->device->ws, 256, 4, RADEON_DOMAIN_GDS,
3880                                           ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &gds_bo);
3881       if (result != VK_SUCCESS)
3882          goto fail;
3883    } else {
3884       gds_bo = queue->gds_bo;
3885    }
3886 
3887    if (add_gds_oa) {
3888       assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
3889 
3890       result =
3891          queue->device->ws->buffer_create(queue->device->ws, 4, 1, RADEON_DOMAIN_OA, ring_bo_flags,
3892                                           RADV_BO_PRIORITY_SCRATCH, 0, &gds_oa_bo);
3893       if (result != VK_SUCCESS)
3894          goto fail;
3895    } else {
3896       gds_oa_bo = queue->gds_oa_bo;
3897    }
3898 
3899    if (scratch_bo != queue->scratch_bo || esgs_ring_bo != queue->esgs_ring_bo ||
3900        gsvs_ring_bo != queue->gsvs_ring_bo || tess_rings_bo != queue->tess_rings_bo ||
3901        add_sample_positions) {
3902       uint32_t size = 0;
3903       if (gsvs_ring_bo || esgs_ring_bo || tess_rings_bo || add_sample_positions) {
3904          size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
3905          if (add_sample_positions)
3906             size += 128; /* 64+32+16+8 = 120 bytes */
3907       } else if (scratch_bo)
3908          size = 8; /* 2 dword */
3909 
3910       result = queue->device->ws->buffer_create(
3911          queue->device->ws, size, 4096, RADEON_DOMAIN_VRAM,
3912          RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
3913          RADV_BO_PRIORITY_DESCRIPTOR, 0, &descriptor_bo);
3914       if (result != VK_SUCCESS)
3915          goto fail;
3916    } else
3917       descriptor_bo = queue->descriptor_bo;
3918 
3919    if (descriptor_bo != queue->descriptor_bo) {
3920       uint32_t *map = (uint32_t *)queue->device->ws->buffer_map(descriptor_bo);
3921       if (!map)
3922          goto fail;
3923 
3924       if (scratch_bo) {
3925          uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
3926          uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | S_008F04_SWIZZLE_ENABLE(1);
3927          map[0] = scratch_va;
3928          map[1] = rsrc1;
3929       }
3930 
3931       if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions)
3932          fill_geom_tess_rings(queue, map, add_sample_positions, esgs_ring_size, esgs_ring_bo,
3933                               gsvs_ring_size, gsvs_ring_bo, tess_factor_ring_size,
3934                               tess_offchip_ring_offset, tess_offchip_ring_size, tess_rings_bo);
3935 
3936       queue->device->ws->buffer_unmap(descriptor_bo);
3937    }
3938 
3939    for (int i = 0; i < 3; ++i) {
3940       enum rgp_flush_bits sqtt_flush_bits = 0;
3941       struct radeon_cmdbuf *cs = NULL;
3942       cs = queue->device->ws->cs_create(queue->device->ws,
3943                                         queue->vk.queue_family_index ? RING_COMPUTE : RING_GFX);
3944       if (!cs) {
3945          result = VK_ERROR_OUT_OF_HOST_MEMORY;
3946          goto fail;
3947       }
3948 
3949       dest_cs[i] = cs;
3950 
3951       if (scratch_bo)
3952          radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
3953 
3954       /* Emit initial configuration. */
3955       switch (queue->vk.queue_family_index) {
3956       case RADV_QUEUE_GENERAL:
3957          radv_init_graphics_state(cs, queue);
3958          break;
3959       case RADV_QUEUE_COMPUTE:
3960          radv_init_compute_state(cs, queue);
3961          break;
3962       case RADV_QUEUE_TRANSFER:
3963          break;
3964       }
3965 
3966       if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) {
3967          radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
3968          radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
3969 
3970          radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
3971          radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
3972       }
3973 
3974       radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size, gsvs_ring_bo,
3975                               gsvs_ring_size);
3976       radv_emit_tess_factor_ring(queue, cs, hs_offchip_param, tess_factor_ring_size, tess_rings_bo);
3977       radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
3978       radv_emit_compute_scratch(queue, cs, compute_scratch_size_per_wave, compute_scratch_waves,
3979                                 compute_scratch_bo);
3980       radv_emit_graphics_scratch(queue, cs, scratch_size_per_wave, scratch_waves, scratch_bo);
3981 
3982       if (gds_bo)
3983          radv_cs_add_buffer(queue->device->ws, cs, gds_bo);
3984       if (gds_oa_bo)
3985          radv_cs_add_buffer(queue->device->ws, cs, gds_oa_bo);
3986 
3987       if (i == 0) {
3988          si_cs_emit_cache_flush(
3989             cs, queue->device->physical_device->rad_info.chip_class, NULL, 0,
3990             queue->vk.queue_family_index == RING_COMPUTE &&
3991                queue->device->physical_device->rad_info.chip_class >= GFX7,
3992             (queue->vk.queue_family_index == RADV_QUEUE_COMPUTE
3993                 ? RADV_CMD_FLAG_CS_PARTIAL_FLUSH
3994                 : (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
3995                RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |
3996                RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_START_PIPELINE_STATS,
3997             &sqtt_flush_bits, 0);
3998       } else if (i == 1) {
3999          si_cs_emit_cache_flush(cs, queue->device->physical_device->rad_info.chip_class, NULL, 0,
4000                                 queue->vk.queue_family_index == RING_COMPUTE &&
4001                                    queue->device->physical_device->rad_info.chip_class >= GFX7,
4002                                 RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE |
4003                                    RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2 |
4004                                    RADV_CMD_FLAG_START_PIPELINE_STATS,
4005                                 &sqtt_flush_bits, 0);
4006       }
4007 
4008       result = queue->device->ws->cs_finalize(cs);
4009       if (result != VK_SUCCESS)
4010          goto fail;
4011    }
4012 
4013    if (queue->initial_full_flush_preamble_cs)
4014       queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
4015 
4016    if (queue->initial_preamble_cs)
4017       queue->device->ws->cs_destroy(queue->initial_preamble_cs);
4018 
4019    if (queue->continue_preamble_cs)
4020       queue->device->ws->cs_destroy(queue->continue_preamble_cs);
4021 
4022    queue->initial_full_flush_preamble_cs = dest_cs[0];
4023    queue->initial_preamble_cs = dest_cs[1];
4024    queue->continue_preamble_cs = dest_cs[2];
4025 
4026    if (scratch_bo != queue->scratch_bo) {
4027       if (queue->scratch_bo)
4028          queue->device->ws->buffer_destroy(queue->device->ws, queue->scratch_bo);
4029       queue->scratch_bo = scratch_bo;
4030    }
4031    queue->scratch_size_per_wave = scratch_size_per_wave;
4032    queue->scratch_waves = scratch_waves;
4033 
4034    if (compute_scratch_bo != queue->compute_scratch_bo) {
4035       if (queue->compute_scratch_bo)
4036          queue->device->ws->buffer_destroy(queue->device->ws, queue->compute_scratch_bo);
4037       queue->compute_scratch_bo = compute_scratch_bo;
4038    }
4039    queue->compute_scratch_size_per_wave = compute_scratch_size_per_wave;
4040    queue->compute_scratch_waves = compute_scratch_waves;
4041 
4042    if (esgs_ring_bo != queue->esgs_ring_bo) {
4043       if (queue->esgs_ring_bo)
4044          queue->device->ws->buffer_destroy(queue->device->ws, queue->esgs_ring_bo);
4045       queue->esgs_ring_bo = esgs_ring_bo;
4046       queue->esgs_ring_size = esgs_ring_size;
4047    }
4048 
4049    if (gsvs_ring_bo != queue->gsvs_ring_bo) {
4050       if (queue->gsvs_ring_bo)
4051          queue->device->ws->buffer_destroy(queue->device->ws, queue->gsvs_ring_bo);
4052       queue->gsvs_ring_bo = gsvs_ring_bo;
4053       queue->gsvs_ring_size = gsvs_ring_size;
4054    }
4055 
4056    if (tess_rings_bo != queue->tess_rings_bo) {
4057       queue->tess_rings_bo = tess_rings_bo;
4058       queue->has_tess_rings = true;
4059    }
4060 
4061    if (gds_bo != queue->gds_bo) {
4062       queue->gds_bo = gds_bo;
4063       queue->has_gds = true;
4064    }
4065 
4066    if (gds_oa_bo != queue->gds_oa_bo) {
4067       queue->gds_oa_bo = gds_oa_bo;
4068       queue->has_gds_oa = true;
4069    }
4070 
4071    if (descriptor_bo != queue->descriptor_bo) {
4072       if (queue->descriptor_bo)
4073          queue->device->ws->buffer_destroy(queue->device->ws, queue->descriptor_bo);
4074 
4075       queue->descriptor_bo = descriptor_bo;
4076    }
4077 
4078    if (add_sample_positions)
4079       queue->has_sample_positions = true;
4080 
4081    *initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
4082    *initial_preamble_cs = queue->initial_preamble_cs;
4083    *continue_preamble_cs = queue->continue_preamble_cs;
4084    if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
4085       *continue_preamble_cs = NULL;
4086    return VK_SUCCESS;
4087 fail:
4088    for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
4089       if (dest_cs[i])
4090          queue->device->ws->cs_destroy(dest_cs[i]);
4091    if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
4092       queue->device->ws->buffer_destroy(queue->device->ws, descriptor_bo);
4093    if (scratch_bo && scratch_bo != queue->scratch_bo)
4094       queue->device->ws->buffer_destroy(queue->device->ws, scratch_bo);
4095    if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
4096       queue->device->ws->buffer_destroy(queue->device->ws, compute_scratch_bo);
4097    if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
4098       queue->device->ws->buffer_destroy(queue->device->ws, esgs_ring_bo);
4099    if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
4100       queue->device->ws->buffer_destroy(queue->device->ws, gsvs_ring_bo);
4101    if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
4102       queue->device->ws->buffer_destroy(queue->device->ws, tess_rings_bo);
4103    if (gds_bo && gds_bo != queue->gds_bo)
4104       queue->device->ws->buffer_destroy(queue->device->ws, gds_bo);
4105    if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo)
4106       queue->device->ws->buffer_destroy(queue->device->ws, gds_oa_bo);
4107 
4108    return vk_error(queue, result);
4109 }
4110 
4111 static VkResult
radv_alloc_sem_counts(struct radv_device * device,struct radv_winsys_sem_counts * counts,int num_sems,struct radv_semaphore_part ** sems,const uint64_t * timeline_values,VkFence _fence,bool is_signal)4112 radv_alloc_sem_counts(struct radv_device *device, struct radv_winsys_sem_counts *counts,
4113                       int num_sems, struct radv_semaphore_part **sems,
4114                       const uint64_t *timeline_values, VkFence _fence, bool is_signal)
4115 {
4116    int syncobj_idx = 0, non_reset_idx = 0, timeline_idx = 0;
4117 
4118    if (num_sems == 0 && _fence == VK_NULL_HANDLE)
4119       return VK_SUCCESS;
4120 
4121    for (uint32_t i = 0; i < num_sems; i++) {
4122       switch (sems[i]->kind) {
4123       case RADV_SEMAPHORE_SYNCOBJ:
4124          counts->syncobj_count++;
4125          counts->syncobj_reset_count++;
4126          break;
4127       case RADV_SEMAPHORE_NONE:
4128          break;
4129       case RADV_SEMAPHORE_TIMELINE:
4130          counts->syncobj_count++;
4131          break;
4132       case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
4133          counts->timeline_syncobj_count++;
4134          break;
4135       }
4136    }
4137 
4138    if (_fence != VK_NULL_HANDLE)
4139       counts->syncobj_count++;
4140 
4141    if (counts->syncobj_count || counts->timeline_syncobj_count) {
4142       counts->points = (uint64_t *)malloc(sizeof(*counts->syncobj) * counts->syncobj_count +
4143                                           (sizeof(*counts->syncobj) + sizeof(*counts->points)) *
4144                                              counts->timeline_syncobj_count);
4145       if (!counts->points)
4146          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4147       counts->syncobj = (uint32_t *)(counts->points + counts->timeline_syncobj_count);
4148    }
4149 
4150    non_reset_idx = counts->syncobj_reset_count;
4151 
4152    for (uint32_t i = 0; i < num_sems; i++) {
4153       switch (sems[i]->kind) {
4154       case RADV_SEMAPHORE_NONE:
4155          unreachable("Empty semaphore");
4156          break;
4157       case RADV_SEMAPHORE_SYNCOBJ:
4158          counts->syncobj[syncobj_idx++] = sems[i]->syncobj;
4159          break;
4160       case RADV_SEMAPHORE_TIMELINE: {
4161          mtx_lock(&sems[i]->timeline.mutex);
4162          struct radv_timeline_point *point = NULL;
4163          if (is_signal) {
4164             point = radv_timeline_add_point_locked(device, &sems[i]->timeline, timeline_values[i]);
4165          } else {
4166             point = radv_timeline_find_point_at_least_locked(device, &sems[i]->timeline,
4167                                                              timeline_values[i]);
4168          }
4169 
4170          mtx_unlock(&sems[i]->timeline.mutex);
4171 
4172          if (point) {
4173             counts->syncobj[non_reset_idx++] = point->syncobj;
4174          } else {
4175             /* Explicitly remove the semaphore so we might not find
4176              * a point later post-submit. */
4177             sems[i] = NULL;
4178          }
4179          break;
4180       }
4181       case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
4182          counts->syncobj[counts->syncobj_count + timeline_idx] = sems[i]->syncobj;
4183          counts->points[timeline_idx] = timeline_values[i];
4184          ++timeline_idx;
4185          break;
4186       }
4187    }
4188 
4189    if (_fence != VK_NULL_HANDLE) {
4190       RADV_FROM_HANDLE(radv_fence, fence, _fence);
4191 
4192       struct radv_fence_part *part =
4193          fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
4194       counts->syncobj[non_reset_idx++] = part->syncobj;
4195    }
4196 
4197    assert(MAX2(syncobj_idx, non_reset_idx) <= counts->syncobj_count);
4198    counts->syncobj_count = MAX2(syncobj_idx, non_reset_idx);
4199 
4200    return VK_SUCCESS;
4201 }
4202 
4203 static void
radv_free_sem_info(struct radv_winsys_sem_info * sem_info)4204 radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
4205 {
4206    free(sem_info->wait.points);
4207    free(sem_info->signal.points);
4208 }
4209 
4210 static void
radv_free_temp_syncobjs(struct radv_device * device,int num_sems,struct radv_semaphore_part * sems)4211 radv_free_temp_syncobjs(struct radv_device *device, int num_sems, struct radv_semaphore_part *sems)
4212 {
4213    for (uint32_t i = 0; i < num_sems; i++) {
4214       radv_destroy_semaphore_part(device, sems + i);
4215    }
4216 }
4217 
4218 static VkResult
radv_alloc_sem_info(struct radv_device * device,struct radv_winsys_sem_info * sem_info,int num_wait_sems,struct radv_semaphore_part ** wait_sems,const uint64_t * wait_values,int num_signal_sems,struct radv_semaphore_part ** signal_sems,const uint64_t * signal_values,VkFence fence)4219 radv_alloc_sem_info(struct radv_device *device, struct radv_winsys_sem_info *sem_info,
4220                     int num_wait_sems, struct radv_semaphore_part **wait_sems,
4221                     const uint64_t *wait_values, int num_signal_sems,
4222                     struct radv_semaphore_part **signal_sems, const uint64_t *signal_values,
4223                     VkFence fence)
4224 {
4225    VkResult ret;
4226 
4227    ret = radv_alloc_sem_counts(device, &sem_info->wait, num_wait_sems, wait_sems, wait_values,
4228                                VK_NULL_HANDLE, false);
4229    if (ret)
4230       return ret;
4231    ret = radv_alloc_sem_counts(device, &sem_info->signal, num_signal_sems, signal_sems,
4232                                signal_values, fence, true);
4233    if (ret)
4234       radv_free_sem_info(sem_info);
4235 
4236    /* caller can override these */
4237    sem_info->cs_emit_wait = true;
4238    sem_info->cs_emit_signal = true;
4239    return ret;
4240 }
4241 
4242 static void
radv_finalize_timelines(struct radv_device * device,uint32_t num_wait_sems,struct radv_semaphore_part ** wait_sems,const uint64_t * wait_values,uint32_t num_signal_sems,struct radv_semaphore_part ** signal_sems,const uint64_t * signal_values,struct list_head * processing_list)4243 radv_finalize_timelines(struct radv_device *device, uint32_t num_wait_sems,
4244                         struct radv_semaphore_part **wait_sems, const uint64_t *wait_values,
4245                         uint32_t num_signal_sems, struct radv_semaphore_part **signal_sems,
4246                         const uint64_t *signal_values, struct list_head *processing_list)
4247 {
4248    for (uint32_t i = 0; i < num_wait_sems; ++i) {
4249       if (wait_sems[i] && wait_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4250          mtx_lock(&wait_sems[i]->timeline.mutex);
4251          struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(
4252             device, &wait_sems[i]->timeline, wait_values[i]);
4253          point->wait_count -= 2;
4254          mtx_unlock(&wait_sems[i]->timeline.mutex);
4255       }
4256    }
4257    for (uint32_t i = 0; i < num_signal_sems; ++i) {
4258       if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4259          mtx_lock(&signal_sems[i]->timeline.mutex);
4260          struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(
4261             device, &signal_sems[i]->timeline, signal_values[i]);
4262          signal_sems[i]->timeline.highest_submitted =
4263             MAX2(signal_sems[i]->timeline.highest_submitted, point->value);
4264          point->wait_count -= 2;
4265          radv_timeline_trigger_waiters_locked(&signal_sems[i]->timeline, processing_list);
4266          mtx_unlock(&signal_sems[i]->timeline.mutex);
4267       } else if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ) {
4268          signal_sems[i]->timeline_syncobj.max_point =
4269             MAX2(signal_sems[i]->timeline_syncobj.max_point, signal_values[i]);
4270       }
4271    }
4272 }
4273 
4274 static VkResult
radv_sparse_buffer_bind_memory(struct radv_device * device,const VkSparseBufferMemoryBindInfo * bind)4275 radv_sparse_buffer_bind_memory(struct radv_device *device, const VkSparseBufferMemoryBindInfo *bind)
4276 {
4277    RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
4278    VkResult result;
4279 
4280    for (uint32_t i = 0; i < bind->bindCount; ++i) {
4281       struct radv_device_memory *mem = NULL;
4282 
4283       if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4284          mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4285 
4286       result = device->ws->buffer_virtual_bind(device->ws, buffer->bo,
4287                                                bind->pBinds[i].resourceOffset, bind->pBinds[i].size,
4288                                                mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);
4289       if (result != VK_SUCCESS)
4290          return result;
4291    }
4292 
4293    return VK_SUCCESS;
4294 }
4295 
4296 static VkResult
radv_sparse_image_opaque_bind_memory(struct radv_device * device,const VkSparseImageOpaqueMemoryBindInfo * bind)4297 radv_sparse_image_opaque_bind_memory(struct radv_device *device,
4298                                      const VkSparseImageOpaqueMemoryBindInfo *bind)
4299 {
4300    RADV_FROM_HANDLE(radv_image, image, bind->image);
4301    VkResult result;
4302 
4303    for (uint32_t i = 0; i < bind->bindCount; ++i) {
4304       struct radv_device_memory *mem = NULL;
4305 
4306       if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4307          mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4308 
4309       result = device->ws->buffer_virtual_bind(device->ws, image->bo,
4310                                                bind->pBinds[i].resourceOffset, bind->pBinds[i].size,
4311                                                mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);
4312       if (result != VK_SUCCESS)
4313          return result;
4314    }
4315 
4316    return VK_SUCCESS;
4317 }
4318 
4319 static VkResult
radv_sparse_image_bind_memory(struct radv_device * device,const VkSparseImageMemoryBindInfo * bind)4320 radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMemoryBindInfo *bind)
4321 {
4322    RADV_FROM_HANDLE(radv_image, image, bind->image);
4323    struct radeon_surf *surface = &image->planes[0].surface;
4324    uint32_t bs = vk_format_get_blocksize(image->vk_format);
4325    VkResult result;
4326 
4327    for (uint32_t i = 0; i < bind->bindCount; ++i) {
4328       struct radv_device_memory *mem = NULL;
4329       uint32_t offset, pitch;
4330       uint32_t mem_offset = bind->pBinds[i].memoryOffset;
4331       const uint32_t layer = bind->pBinds[i].subresource.arrayLayer;
4332       const uint32_t level = bind->pBinds[i].subresource.mipLevel;
4333 
4334       VkExtent3D bind_extent = bind->pBinds[i].extent;
4335       bind_extent.width =
4336          DIV_ROUND_UP(bind_extent.width, vk_format_get_blockwidth(image->vk_format));
4337       bind_extent.height =
4338          DIV_ROUND_UP(bind_extent.height, vk_format_get_blockheight(image->vk_format));
4339 
4340       VkOffset3D bind_offset = bind->pBinds[i].offset;
4341       bind_offset.x /= vk_format_get_blockwidth(image->vk_format);
4342       bind_offset.y /= vk_format_get_blockheight(image->vk_format);
4343 
4344       if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4345          mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4346 
4347       if (device->physical_device->rad_info.chip_class >= GFX9) {
4348          offset = surface->u.gfx9.surf_slice_size * layer + surface->u.gfx9.prt_level_offset[level];
4349          pitch = surface->u.gfx9.prt_level_pitch[level];
4350       } else {
4351          offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 +
4352                   surface->u.legacy.level[level].slice_size_dw * 4 * layer;
4353          pitch = surface->u.legacy.level[level].nblk_x;
4354       }
4355 
4356       offset += (bind_offset.y * pitch * bs) + (bind_offset.x * surface->prt_tile_height * bs);
4357 
4358       uint32_t aligned_extent_width = ALIGN(bind_extent.width, surface->prt_tile_width);
4359 
4360       bool whole_subres = bind_offset.x == 0 && aligned_extent_width == pitch;
4361 
4362       if (whole_subres) {
4363          uint32_t aligned_extent_height = ALIGN(bind_extent.height, surface->prt_tile_height);
4364 
4365          uint32_t size = aligned_extent_width * aligned_extent_height * bs;
4366          result = device->ws->buffer_virtual_bind(device->ws, image->bo, offset, size,
4367                                                   mem ? mem->bo : NULL, mem_offset);
4368          if (result != VK_SUCCESS)
4369             return result;
4370       } else {
4371          uint32_t img_increment = pitch * bs;
4372          uint32_t mem_increment = aligned_extent_width * bs;
4373          uint32_t size = mem_increment * surface->prt_tile_height;
4374          for (unsigned y = 0; y < bind_extent.height; y += surface->prt_tile_height) {
4375             result = device->ws->buffer_virtual_bind(
4376                device->ws, image->bo, offset + img_increment * y, size, mem ? mem->bo : NULL,
4377                mem_offset + mem_increment * y);
4378             if (result != VK_SUCCESS)
4379                return result;
4380          }
4381       }
4382    }
4383 
4384    return VK_SUCCESS;
4385 }
4386 
4387 static VkResult
radv_get_preambles(struct radv_queue * queue,const VkCommandBuffer * cmd_buffers,uint32_t cmd_buffer_count,struct radeon_cmdbuf ** initial_full_flush_preamble_cs,struct radeon_cmdbuf ** initial_preamble_cs,struct radeon_cmdbuf ** continue_preamble_cs)4388 radv_get_preambles(struct radv_queue *queue, const VkCommandBuffer *cmd_buffers,
4389                    uint32_t cmd_buffer_count, struct radeon_cmdbuf **initial_full_flush_preamble_cs,
4390                    struct radeon_cmdbuf **initial_preamble_cs,
4391                    struct radeon_cmdbuf **continue_preamble_cs)
4392 {
4393    uint32_t scratch_size_per_wave = 0, waves_wanted = 0;
4394    uint32_t compute_scratch_size_per_wave = 0, compute_waves_wanted = 0;
4395    uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
4396    bool tess_rings_needed = false;
4397    bool gds_needed = false;
4398    bool gds_oa_needed = false;
4399    bool sample_positions_needed = false;
4400 
4401    for (uint32_t j = 0; j < cmd_buffer_count; j++) {
4402       RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, cmd_buffers[j]);
4403 
4404       scratch_size_per_wave = MAX2(scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed);
4405       waves_wanted = MAX2(waves_wanted, cmd_buffer->scratch_waves_wanted);
4406       compute_scratch_size_per_wave =
4407          MAX2(compute_scratch_size_per_wave, cmd_buffer->compute_scratch_size_per_wave_needed);
4408       compute_waves_wanted = MAX2(compute_waves_wanted, cmd_buffer->compute_scratch_waves_wanted);
4409       esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
4410       gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
4411       tess_rings_needed |= cmd_buffer->tess_rings_needed;
4412       gds_needed |= cmd_buffer->gds_needed;
4413       gds_oa_needed |= cmd_buffer->gds_oa_needed;
4414       sample_positions_needed |= cmd_buffer->sample_positions_needed;
4415    }
4416 
4417    return radv_get_preamble_cs(queue, scratch_size_per_wave, waves_wanted,
4418                                compute_scratch_size_per_wave, compute_waves_wanted, esgs_ring_size,
4419                                gsvs_ring_size, tess_rings_needed, gds_needed, gds_oa_needed,
4420                                sample_positions_needed, initial_full_flush_preamble_cs,
4421                                initial_preamble_cs, continue_preamble_cs);
4422 }
4423 
4424 struct radv_deferred_queue_submission {
4425    struct radv_queue *queue;
4426    VkCommandBuffer *cmd_buffers;
4427    uint32_t cmd_buffer_count;
4428 
4429    /* Sparse bindings that happen on a queue. */
4430    VkSparseBufferMemoryBindInfo *buffer_binds;
4431    uint32_t buffer_bind_count;
4432    VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
4433    uint32_t image_opaque_bind_count;
4434    VkSparseImageMemoryBindInfo *image_binds;
4435    uint32_t image_bind_count;
4436 
4437    bool flush_caches;
4438    VkShaderStageFlags wait_dst_stage_mask;
4439    struct radv_semaphore_part **wait_semaphores;
4440    uint32_t wait_semaphore_count;
4441    struct radv_semaphore_part **signal_semaphores;
4442    uint32_t signal_semaphore_count;
4443    VkFence fence;
4444 
4445    uint64_t *wait_values;
4446    uint64_t *signal_values;
4447 
4448    struct radv_semaphore_part *temporary_semaphore_parts;
4449    uint32_t temporary_semaphore_part_count;
4450 
4451    struct list_head queue_pending_list;
4452    uint32_t submission_wait_count;
4453    struct radv_timeline_waiter *wait_nodes;
4454 
4455    struct list_head processing_list;
4456 };
4457 
4458 struct radv_queue_submission {
4459    const VkCommandBuffer *cmd_buffers;
4460    uint32_t cmd_buffer_count;
4461 
4462    /* Sparse bindings that happen on a queue. */
4463    const VkSparseBufferMemoryBindInfo *buffer_binds;
4464    uint32_t buffer_bind_count;
4465    const VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
4466    uint32_t image_opaque_bind_count;
4467    const VkSparseImageMemoryBindInfo *image_binds;
4468    uint32_t image_bind_count;
4469 
4470    bool flush_caches;
4471    VkPipelineStageFlags wait_dst_stage_mask;
4472    const VkSemaphore *wait_semaphores;
4473    uint32_t wait_semaphore_count;
4474    const VkSemaphore *signal_semaphores;
4475    uint32_t signal_semaphore_count;
4476    VkFence fence;
4477 
4478    const uint64_t *wait_values;
4479    uint32_t wait_value_count;
4480    const uint64_t *signal_values;
4481    uint32_t signal_value_count;
4482 };
4483 
4484 static VkResult radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission,
4485                                               uint32_t decrement,
4486                                               struct list_head *processing_list);
4487 
4488 static VkResult
radv_create_deferred_submission(struct radv_queue * queue,const struct radv_queue_submission * submission,struct radv_deferred_queue_submission ** out)4489 radv_create_deferred_submission(struct radv_queue *queue,
4490                                 const struct radv_queue_submission *submission,
4491                                 struct radv_deferred_queue_submission **out)
4492 {
4493    struct radv_deferred_queue_submission *deferred = NULL;
4494    size_t size = sizeof(struct radv_deferred_queue_submission);
4495 
4496    uint32_t temporary_count = 0;
4497    for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4498       RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
4499       if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE)
4500          ++temporary_count;
4501    }
4502 
4503    size += submission->cmd_buffer_count * sizeof(VkCommandBuffer);
4504    size += submission->buffer_bind_count * sizeof(VkSparseBufferMemoryBindInfo);
4505    size += submission->image_opaque_bind_count * sizeof(VkSparseImageOpaqueMemoryBindInfo);
4506    size += submission->image_bind_count * sizeof(VkSparseImageMemoryBindInfo);
4507 
4508    for (uint32_t i = 0; i < submission->image_bind_count; ++i)
4509       size += submission->image_binds[i].bindCount * sizeof(VkSparseImageMemoryBind);
4510 
4511    size += submission->wait_semaphore_count * sizeof(struct radv_semaphore_part *);
4512    size += temporary_count * sizeof(struct radv_semaphore_part);
4513    size += submission->signal_semaphore_count * sizeof(struct radv_semaphore_part *);
4514    size += submission->wait_value_count * sizeof(uint64_t);
4515    size += submission->signal_value_count * sizeof(uint64_t);
4516    size += submission->wait_semaphore_count * sizeof(struct radv_timeline_waiter);
4517 
4518    deferred = calloc(1, size);
4519    if (!deferred)
4520       return VK_ERROR_OUT_OF_HOST_MEMORY;
4521 
4522    deferred->queue = queue;
4523 
4524    deferred->cmd_buffers = (void *)(deferred + 1);
4525    deferred->cmd_buffer_count = submission->cmd_buffer_count;
4526    if (submission->cmd_buffer_count) {
4527       memcpy(deferred->cmd_buffers, submission->cmd_buffers,
4528              submission->cmd_buffer_count * sizeof(*deferred->cmd_buffers));
4529    }
4530 
4531    deferred->buffer_binds = (void *)(deferred->cmd_buffers + submission->cmd_buffer_count);
4532    deferred->buffer_bind_count = submission->buffer_bind_count;
4533    if (submission->buffer_bind_count) {
4534       memcpy(deferred->buffer_binds, submission->buffer_binds,
4535              submission->buffer_bind_count * sizeof(*deferred->buffer_binds));
4536    }
4537 
4538    deferred->image_opaque_binds = (void *)(deferred->buffer_binds + submission->buffer_bind_count);
4539    deferred->image_opaque_bind_count = submission->image_opaque_bind_count;
4540    if (submission->image_opaque_bind_count) {
4541       memcpy(deferred->image_opaque_binds, submission->image_opaque_binds,
4542              submission->image_opaque_bind_count * sizeof(*deferred->image_opaque_binds));
4543    }
4544 
4545    deferred->image_binds =
4546       (void *)(deferred->image_opaque_binds + deferred->image_opaque_bind_count);
4547    deferred->image_bind_count = submission->image_bind_count;
4548 
4549    VkSparseImageMemoryBind *sparse_image_binds =
4550       (void *)(deferred->image_binds + deferred->image_bind_count);
4551    for (uint32_t i = 0; i < deferred->image_bind_count; ++i) {
4552       deferred->image_binds[i] = submission->image_binds[i];
4553       deferred->image_binds[i].pBinds = sparse_image_binds;
4554 
4555       for (uint32_t j = 0; j < deferred->image_binds[i].bindCount; ++j)
4556          *sparse_image_binds++ = submission->image_binds[i].pBinds[j];
4557    }
4558 
4559    deferred->flush_caches = submission->flush_caches;
4560    deferred->wait_dst_stage_mask = submission->wait_dst_stage_mask;
4561 
4562    deferred->wait_semaphores = (void *)sparse_image_binds;
4563    deferred->wait_semaphore_count = submission->wait_semaphore_count;
4564 
4565    deferred->signal_semaphores =
4566       (void *)(deferred->wait_semaphores + deferred->wait_semaphore_count);
4567    deferred->signal_semaphore_count = submission->signal_semaphore_count;
4568 
4569    deferred->fence = submission->fence;
4570 
4571    deferred->temporary_semaphore_parts =
4572       (void *)(deferred->signal_semaphores + deferred->signal_semaphore_count);
4573    deferred->temporary_semaphore_part_count = temporary_count;
4574 
4575    uint32_t temporary_idx = 0;
4576    for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4577       RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
4578       if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
4579          deferred->wait_semaphores[i] = &deferred->temporary_semaphore_parts[temporary_idx];
4580          deferred->temporary_semaphore_parts[temporary_idx] = semaphore->temporary;
4581          semaphore->temporary.kind = RADV_SEMAPHORE_NONE;
4582          ++temporary_idx;
4583       } else
4584          deferred->wait_semaphores[i] = &semaphore->permanent;
4585    }
4586 
4587    for (uint32_t i = 0; i < submission->signal_semaphore_count; ++i) {
4588       RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->signal_semaphores[i]);
4589       if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
4590          deferred->signal_semaphores[i] = &semaphore->temporary;
4591       } else {
4592          deferred->signal_semaphores[i] = &semaphore->permanent;
4593       }
4594    }
4595 
4596    deferred->wait_values = (void *)(deferred->temporary_semaphore_parts + temporary_count);
4597    if (submission->wait_value_count) {
4598       memcpy(deferred->wait_values, submission->wait_values,
4599              submission->wait_value_count * sizeof(uint64_t));
4600    }
4601    deferred->signal_values = deferred->wait_values + submission->wait_value_count;
4602    if (submission->signal_value_count) {
4603       memcpy(deferred->signal_values, submission->signal_values,
4604              submission->signal_value_count * sizeof(uint64_t));
4605    }
4606 
4607    deferred->wait_nodes = (void *)(deferred->signal_values + submission->signal_value_count);
4608    /* This is worst-case. radv_queue_enqueue_submission will fill in further, but this
4609     * ensure the submission is not accidentally triggered early when adding wait timelines. */
4610    deferred->submission_wait_count = 1 + submission->wait_semaphore_count;
4611 
4612    *out = deferred;
4613    return VK_SUCCESS;
4614 }
4615 
4616 static VkResult
radv_queue_enqueue_submission(struct radv_deferred_queue_submission * submission,struct list_head * processing_list)4617 radv_queue_enqueue_submission(struct radv_deferred_queue_submission *submission,
4618                               struct list_head *processing_list)
4619 {
4620    uint32_t wait_cnt = 0;
4621    struct radv_timeline_waiter *waiter = submission->wait_nodes;
4622    for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4623       if (submission->wait_semaphores[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4624          mtx_lock(&submission->wait_semaphores[i]->timeline.mutex);
4625          if (submission->wait_semaphores[i]->timeline.highest_submitted <
4626              submission->wait_values[i]) {
4627             ++wait_cnt;
4628             waiter->value = submission->wait_values[i];
4629             waiter->submission = submission;
4630             list_addtail(&waiter->list, &submission->wait_semaphores[i]->timeline.waiters);
4631             ++waiter;
4632          }
4633          mtx_unlock(&submission->wait_semaphores[i]->timeline.mutex);
4634       }
4635    }
4636 
4637    mtx_lock(&submission->queue->pending_mutex);
4638 
4639    bool is_first = list_is_empty(&submission->queue->pending_submissions);
4640    list_addtail(&submission->queue_pending_list, &submission->queue->pending_submissions);
4641 
4642    mtx_unlock(&submission->queue->pending_mutex);
4643 
4644    /* If there is already a submission in the queue, that will decrement the counter by 1 when
4645     * submitted, but if the queue was empty, we decrement ourselves as there is no previous
4646     * submission. */
4647    uint32_t decrement = submission->wait_semaphore_count - wait_cnt + (is_first ? 1 : 0);
4648 
4649    /* if decrement is zero, then we don't have a refcounted reference to the
4650     * submission anymore, so it is not safe to access the submission. */
4651    if (!decrement)
4652       return VK_SUCCESS;
4653 
4654    return radv_queue_trigger_submission(submission, decrement, processing_list);
4655 }
4656 
4657 static void
radv_queue_submission_update_queue(struct radv_deferred_queue_submission * submission,struct list_head * processing_list)4658 radv_queue_submission_update_queue(struct radv_deferred_queue_submission *submission,
4659                                    struct list_head *processing_list)
4660 {
4661    mtx_lock(&submission->queue->pending_mutex);
4662    list_del(&submission->queue_pending_list);
4663 
4664    /* trigger the next submission in the queue. */
4665    if (!list_is_empty(&submission->queue->pending_submissions)) {
4666       struct radv_deferred_queue_submission *next_submission =
4667          list_first_entry(&submission->queue->pending_submissions,
4668                           struct radv_deferred_queue_submission, queue_pending_list);
4669       radv_queue_trigger_submission(next_submission, 1, processing_list);
4670    }
4671    mtx_unlock(&submission->queue->pending_mutex);
4672 
4673    u_cnd_monotonic_broadcast(&submission->queue->device->timeline_cond);
4674 }
4675 
4676 static VkResult
radv_queue_submit_deferred(struct radv_deferred_queue_submission * submission,struct list_head * processing_list)4677 radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission,
4678                            struct list_head *processing_list)
4679 {
4680    struct radv_queue *queue = submission->queue;
4681    struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4682    uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
4683    bool do_flush = submission->flush_caches || submission->wait_dst_stage_mask;
4684    bool can_patch = true;
4685    uint32_t advance;
4686    struct radv_winsys_sem_info sem_info = {0};
4687    VkResult result;
4688    struct radeon_cmdbuf *initial_preamble_cs = NULL;
4689    struct radeon_cmdbuf *initial_flush_preamble_cs = NULL;
4690    struct radeon_cmdbuf *continue_preamble_cs = NULL;
4691 
4692    result =
4693       radv_get_preambles(queue, submission->cmd_buffers, submission->cmd_buffer_count,
4694                          &initial_preamble_cs, &initial_flush_preamble_cs, &continue_preamble_cs);
4695    if (result != VK_SUCCESS)
4696       goto fail;
4697 
4698    result = radv_alloc_sem_info(queue->device, &sem_info, submission->wait_semaphore_count,
4699                                 submission->wait_semaphores, submission->wait_values,
4700                                 submission->signal_semaphore_count, submission->signal_semaphores,
4701                                 submission->signal_values, submission->fence);
4702    if (result != VK_SUCCESS)
4703       goto fail;
4704 
4705    for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) {
4706       result = radv_sparse_buffer_bind_memory(queue->device, submission->buffer_binds + i);
4707       if (result != VK_SUCCESS)
4708          goto fail;
4709    }
4710 
4711    for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {
4712       result =
4713          radv_sparse_image_opaque_bind_memory(queue->device, submission->image_opaque_binds + i);
4714       if (result != VK_SUCCESS)
4715          goto fail;
4716    }
4717 
4718    for (uint32_t i = 0; i < submission->image_bind_count; ++i) {
4719       result = radv_sparse_image_bind_memory(queue->device, submission->image_binds + i);
4720       if (result != VK_SUCCESS)
4721          goto fail;
4722    }
4723 
4724    if (!submission->cmd_buffer_count) {
4725       result = queue->device->ws->cs_submit(ctx, queue->vk.index_in_family,
4726                                             &queue->device->empty_cs[queue->vk.queue_family_index], 1,
4727                                             NULL, NULL, &sem_info, false);
4728       if (result != VK_SUCCESS)
4729          goto fail;
4730    } else {
4731       struct radeon_cmdbuf **cs_array =
4732          malloc(sizeof(struct radeon_cmdbuf *) * (submission->cmd_buffer_count));
4733 
4734       for (uint32_t j = 0; j < submission->cmd_buffer_count; j++) {
4735          RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submission->cmd_buffers[j]);
4736          assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4737 
4738          cs_array[j] = cmd_buffer->cs;
4739          if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
4740             can_patch = false;
4741 
4742          cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
4743       }
4744 
4745       for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) {
4746          struct radeon_cmdbuf *initial_preamble =
4747             (do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
4748          advance = MIN2(max_cs_submission, submission->cmd_buffer_count - j);
4749 
4750          if (queue->device->trace_bo)
4751             *queue->device->trace_id_ptr = 0;
4752 
4753          sem_info.cs_emit_wait = j == 0;
4754          sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count;
4755 
4756          result = queue->device->ws->cs_submit(ctx, queue->vk.index_in_family, cs_array + j, advance,
4757                                                initial_preamble, continue_preamble_cs, &sem_info,
4758                                                can_patch);
4759          if (result != VK_SUCCESS) {
4760             free(cs_array);
4761             goto fail;
4762          }
4763 
4764          if (queue->device->trace_bo) {
4765             radv_check_gpu_hangs(queue, cs_array[j]);
4766          }
4767 
4768          if (queue->device->tma_bo) {
4769             radv_check_trap_handler(queue);
4770          }
4771       }
4772 
4773       free(cs_array);
4774    }
4775 
4776    radv_finalize_timelines(queue->device, submission->wait_semaphore_count,
4777                            submission->wait_semaphores, submission->wait_values,
4778                            submission->signal_semaphore_count, submission->signal_semaphores,
4779                            submission->signal_values, processing_list);
4780    /* Has to happen after timeline finalization to make sure the
4781     * condition variable is only triggered when timelines and queue have
4782     * been updated. */
4783    radv_queue_submission_update_queue(submission, processing_list);
4784 
4785 fail:
4786    if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
4787       /* When something bad happened during the submission, such as
4788        * an out of memory issue, it might be hard to recover from
4789        * this inconsistent state. To avoid this sort of problem, we
4790        * assume that we are in a really bad situation and return
4791        * VK_ERROR_DEVICE_LOST to ensure the clients do not attempt
4792        * to submit the same job again to this device.
4793        */
4794       result = radv_device_set_lost(queue->device, "vkQueueSubmit() failed");
4795    }
4796 
4797    radv_free_temp_syncobjs(queue->device, submission->temporary_semaphore_part_count,
4798                            submission->temporary_semaphore_parts);
4799    radv_free_sem_info(&sem_info);
4800    free(submission);
4801    return result;
4802 }
4803 
4804 static VkResult
radv_process_submissions(struct list_head * processing_list)4805 radv_process_submissions(struct list_head *processing_list)
4806 {
4807    while (!list_is_empty(processing_list)) {
4808       struct radv_deferred_queue_submission *submission =
4809          list_first_entry(processing_list, struct radv_deferred_queue_submission, processing_list);
4810       list_del(&submission->processing_list);
4811 
4812       VkResult result = radv_queue_submit_deferred(submission, processing_list);
4813       if (result != VK_SUCCESS)
4814          return result;
4815    }
4816    return VK_SUCCESS;
4817 }
4818 
4819 static VkResult
wait_for_submission_timelines_available(struct radv_deferred_queue_submission * submission,uint64_t timeout)4820 wait_for_submission_timelines_available(struct radv_deferred_queue_submission *submission,
4821                                         uint64_t timeout)
4822 {
4823    struct radv_device *device = submission->queue->device;
4824    uint32_t syncobj_count = 0;
4825    uint32_t syncobj_idx = 0;
4826 
4827    for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4828       if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
4829          continue;
4830 
4831       if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
4832          continue;
4833       ++syncobj_count;
4834    }
4835 
4836    if (!syncobj_count)
4837       return VK_SUCCESS;
4838 
4839    uint64_t *points = malloc((sizeof(uint64_t) + sizeof(uint32_t)) * syncobj_count);
4840    if (!points)
4841       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
4842 
4843    uint32_t *syncobj = (uint32_t *)(points + syncobj_count);
4844 
4845    for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4846       if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
4847          continue;
4848 
4849       if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
4850          continue;
4851 
4852       syncobj[syncobj_idx] = submission->wait_semaphores[i]->syncobj;
4853       points[syncobj_idx] = submission->wait_values[i];
4854       ++syncobj_idx;
4855    }
4856 
4857    bool success = true;
4858    if (syncobj_idx > 0) {
4859       success = device->ws->wait_timeline_syncobj(device->ws, syncobj, points, syncobj_idx, true,
4860                                                   true, timeout);
4861    }
4862 
4863    free(points);
4864    return success ? VK_SUCCESS : VK_TIMEOUT;
4865 }
4866 
4867 static int
radv_queue_submission_thread_run(void * q)4868 radv_queue_submission_thread_run(void *q)
4869 {
4870    struct radv_queue *queue = q;
4871 
4872    mtx_lock(&queue->thread_mutex);
4873    while (!p_atomic_read(&queue->thread_exit)) {
4874       struct radv_deferred_queue_submission *submission = queue->thread_submission;
4875       struct list_head processing_list;
4876       VkResult result = VK_SUCCESS;
4877       if (!submission) {
4878          u_cnd_monotonic_wait(&queue->thread_cond, &queue->thread_mutex);
4879          continue;
4880       }
4881       mtx_unlock(&queue->thread_mutex);
4882 
4883       /* Wait at most 5 seconds so we have a chance to notice shutdown when
4884        * a semaphore never gets signaled. If it takes longer we just retry
4885        * the wait next iteration. */
4886       result =
4887          wait_for_submission_timelines_available(submission, radv_get_absolute_timeout(5000000000));
4888       if (result != VK_SUCCESS) {
4889          mtx_lock(&queue->thread_mutex);
4890          continue;
4891       }
4892 
4893       /* The lock isn't held but nobody will add one until we finish
4894        * the current submission. */
4895       p_atomic_set(&queue->thread_submission, NULL);
4896 
4897       list_inithead(&processing_list);
4898       list_addtail(&submission->processing_list, &processing_list);
4899       result = radv_process_submissions(&processing_list);
4900 
4901       mtx_lock(&queue->thread_mutex);
4902    }
4903    mtx_unlock(&queue->thread_mutex);
4904    return 0;
4905 }
4906 
4907 static VkResult
radv_queue_trigger_submission(struct radv_deferred_queue_submission * submission,uint32_t decrement,struct list_head * processing_list)4908 radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission, uint32_t decrement,
4909                               struct list_head *processing_list)
4910 {
4911    struct radv_queue *queue = submission->queue;
4912    int ret;
4913    if (p_atomic_add_return(&submission->submission_wait_count, -decrement))
4914       return VK_SUCCESS;
4915 
4916    if (wait_for_submission_timelines_available(submission, radv_get_absolute_timeout(0)) ==
4917        VK_SUCCESS) {
4918       list_addtail(&submission->processing_list, processing_list);
4919       return VK_SUCCESS;
4920    }
4921 
4922    mtx_lock(&queue->thread_mutex);
4923 
4924    /* A submission can only be ready for the thread if it doesn't have
4925     * any predecessors in the same queue, so there can only be one such
4926     * submission at a time. */
4927    assert(queue->thread_submission == NULL);
4928 
4929    /* Only start the thread on demand to save resources for the many games
4930     * which only use binary semaphores. */
4931    if (!queue->thread_running) {
4932       ret = thrd_create(&queue->submission_thread, radv_queue_submission_thread_run, queue);
4933       if (ret) {
4934          mtx_unlock(&queue->thread_mutex);
4935          return vk_errorf(queue, VK_ERROR_DEVICE_LOST,
4936                           "Failed to start submission thread");
4937       }
4938       queue->thread_running = true;
4939    }
4940 
4941    queue->thread_submission = submission;
4942    mtx_unlock(&queue->thread_mutex);
4943 
4944    u_cnd_monotonic_signal(&queue->thread_cond);
4945    return VK_SUCCESS;
4946 }
4947 
4948 static VkResult
radv_queue_submit(struct radv_queue * queue,const struct radv_queue_submission * submission)4949 radv_queue_submit(struct radv_queue *queue, const struct radv_queue_submission *submission)
4950 {
4951    struct radv_deferred_queue_submission *deferred = NULL;
4952 
4953    VkResult result = radv_create_deferred_submission(queue, submission, &deferred);
4954    if (result != VK_SUCCESS)
4955       return result;
4956 
4957    struct list_head processing_list;
4958    list_inithead(&processing_list);
4959 
4960    result = radv_queue_enqueue_submission(deferred, &processing_list);
4961    if (result != VK_SUCCESS) {
4962       /* If anything is in the list we leak. */
4963       assert(list_is_empty(&processing_list));
4964       return result;
4965    }
4966    return radv_process_submissions(&processing_list);
4967 }
4968 
4969 bool
radv_queue_internal_submit(struct radv_queue * queue,struct radeon_cmdbuf * cs)4970 radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs)
4971 {
4972    struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4973    struct radv_winsys_sem_info sem_info = {0};
4974    VkResult result;
4975 
4976    result = radv_alloc_sem_info(queue->device, &sem_info, 0, NULL, 0, 0, 0, NULL, VK_NULL_HANDLE);
4977    if (result != VK_SUCCESS)
4978       return false;
4979 
4980    result =
4981       queue->device->ws->cs_submit(ctx, queue->vk.index_in_family, &cs, 1,
4982                                    NULL, NULL, &sem_info, false);
4983    radv_free_sem_info(&sem_info);
4984    if (result != VK_SUCCESS)
4985       return false;
4986 
4987    return true;
4988 }
4989 
4990 /* Signals fence as soon as all the work currently put on queue is done. */
4991 static VkResult
radv_signal_fence(struct radv_queue * queue,VkFence fence)4992 radv_signal_fence(struct radv_queue *queue, VkFence fence)
4993 {
4994    return radv_queue_submit(queue, &(struct radv_queue_submission){.fence = fence});
4995 }
4996 
4997 static bool
radv_submit_has_effects(const VkSubmitInfo * info)4998 radv_submit_has_effects(const VkSubmitInfo *info)
4999 {
5000    return info->commandBufferCount || info->waitSemaphoreCount || info->signalSemaphoreCount;
5001 }
5002 
5003 VkResult
radv_QueueSubmit(VkQueue _queue,uint32_t submitCount,const VkSubmitInfo * pSubmits,VkFence fence)5004 radv_QueueSubmit(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence)
5005 {
5006    RADV_FROM_HANDLE(radv_queue, queue, _queue);
5007    VkResult result;
5008    uint32_t fence_idx = 0;
5009    bool flushed_caches = false;
5010 
5011    if (radv_device_is_lost(queue->device))
5012       return VK_ERROR_DEVICE_LOST;
5013 
5014    if (fence != VK_NULL_HANDLE) {
5015       for (uint32_t i = 0; i < submitCount; ++i)
5016          if (radv_submit_has_effects(pSubmits + i))
5017             fence_idx = i;
5018    } else
5019       fence_idx = UINT32_MAX;
5020 
5021    for (uint32_t i = 0; i < submitCount; i++) {
5022       if (!radv_submit_has_effects(pSubmits + i) && fence_idx != i)
5023          continue;
5024 
5025       VkPipelineStageFlags wait_dst_stage_mask = 0;
5026       for (unsigned j = 0; j < pSubmits[i].waitSemaphoreCount; ++j) {
5027          wait_dst_stage_mask |= pSubmits[i].pWaitDstStageMask[j];
5028       }
5029 
5030       const VkTimelineSemaphoreSubmitInfo *timeline_info =
5031          vk_find_struct_const(pSubmits[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
5032 
5033       result = radv_queue_submit(
5034          queue, &(struct radv_queue_submission){
5035                    .cmd_buffers = pSubmits[i].pCommandBuffers,
5036                    .cmd_buffer_count = pSubmits[i].commandBufferCount,
5037                    .wait_dst_stage_mask = wait_dst_stage_mask,
5038                    .flush_caches = !flushed_caches,
5039                    .wait_semaphores = pSubmits[i].pWaitSemaphores,
5040                    .wait_semaphore_count = pSubmits[i].waitSemaphoreCount,
5041                    .signal_semaphores = pSubmits[i].pSignalSemaphores,
5042                    .signal_semaphore_count = pSubmits[i].signalSemaphoreCount,
5043                    .fence = i == fence_idx ? fence : VK_NULL_HANDLE,
5044                    .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
5045                    .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues
5046                                           ? timeline_info->waitSemaphoreValueCount
5047                                           : 0,
5048                    .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
5049                    .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues
5050                                             ? timeline_info->signalSemaphoreValueCount
5051                                             : 0,
5052                 });
5053       if (result != VK_SUCCESS)
5054          return result;
5055 
5056       flushed_caches = true;
5057    }
5058 
5059    if (fence != VK_NULL_HANDLE && !submitCount) {
5060       result = radv_signal_fence(queue, fence);
5061       if (result != VK_SUCCESS)
5062          return result;
5063    }
5064 
5065    return VK_SUCCESS;
5066 }
5067 
5068 static const char *
radv_get_queue_family_name(struct radv_queue * queue)5069 radv_get_queue_family_name(struct radv_queue *queue)
5070 {
5071    switch (queue->vk.queue_family_index) {
5072    case RADV_QUEUE_GENERAL:
5073       return "graphics";
5074    case RADV_QUEUE_COMPUTE:
5075       return "compute";
5076    case RADV_QUEUE_TRANSFER:
5077       return "transfer";
5078    default:
5079       unreachable("Unknown queue family");
5080    }
5081 }
5082 
5083 VkResult
radv_QueueWaitIdle(VkQueue _queue)5084 radv_QueueWaitIdle(VkQueue _queue)
5085 {
5086    RADV_FROM_HANDLE(radv_queue, queue, _queue);
5087 
5088    if (radv_device_is_lost(queue->device))
5089       return VK_ERROR_DEVICE_LOST;
5090 
5091    mtx_lock(&queue->pending_mutex);
5092    while (!list_is_empty(&queue->pending_submissions)) {
5093       u_cnd_monotonic_wait(&queue->device->timeline_cond, &queue->pending_mutex);
5094    }
5095    mtx_unlock(&queue->pending_mutex);
5096 
5097    if (!queue->device->ws->ctx_wait_idle(
5098           queue->hw_ctx, radv_queue_family_to_ring(queue->vk.queue_family_index),
5099           queue->vk.index_in_family)) {
5100       return radv_device_set_lost(queue->device,
5101                                   "Failed to wait for a '%s' queue "
5102                                   "to be idle. GPU hang ?",
5103                                   radv_get_queue_family_name(queue));
5104    }
5105 
5106    return VK_SUCCESS;
5107 }
5108 
5109 VkResult
radv_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)5110 radv_EnumerateInstanceExtensionProperties(const char *pLayerName, uint32_t *pPropertyCount,
5111                                           VkExtensionProperties *pProperties)
5112 {
5113    if (pLayerName)
5114       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
5115 
5116    return vk_enumerate_instance_extension_properties(&radv_instance_extensions_supported,
5117                                                      pPropertyCount, pProperties);
5118 }
5119 
5120 PFN_vkVoidFunction
radv_GetInstanceProcAddr(VkInstance _instance,const char * pName)5121 radv_GetInstanceProcAddr(VkInstance _instance, const char *pName)
5122 {
5123    RADV_FROM_HANDLE(radv_instance, instance, _instance);
5124 
5125    /* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly
5126     * when we have to return valid function pointers, NULL, or it's left
5127     * undefined.  See the table for exact details.
5128     */
5129    if (pName == NULL)
5130       return NULL;
5131 
5132 #define LOOKUP_RADV_ENTRYPOINT(entrypoint)                                                         \
5133    if (strcmp(pName, "vk" #entrypoint) == 0)                                                       \
5134    return (PFN_vkVoidFunction)radv_##entrypoint
5135 
5136    LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceExtensionProperties);
5137    LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceLayerProperties);
5138    LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceVersion);
5139    LOOKUP_RADV_ENTRYPOINT(CreateInstance);
5140 
5141    /* GetInstanceProcAddr() can also be called with a NULL instance.
5142     * See https://gitlab.khronos.org/vulkan/vulkan/issues/2057
5143     */
5144    LOOKUP_RADV_ENTRYPOINT(GetInstanceProcAddr);
5145 
5146 #undef LOOKUP_RADV_ENTRYPOINT
5147 
5148    if (instance == NULL)
5149       return NULL;
5150 
5151    return vk_instance_get_proc_addr(&instance->vk, &radv_instance_entrypoints, pName);
5152 }
5153 
5154 /* Windows will use a dll definition file to avoid build errors. */
5155 #ifdef _WIN32
5156 #undef PUBLIC
5157 #define PUBLIC
5158 #endif
5159 
5160 /* The loader wants us to expose a second GetInstanceProcAddr function
5161  * to work around certain LD_PRELOAD issues seen in apps.
5162  */
5163 PUBLIC
5164 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)5165 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
5166 {
5167    return radv_GetInstanceProcAddr(instance, pName);
5168 }
5169 
5170 PUBLIC
5171 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,const char * pName)5172 vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, const char *pName)
5173 {
5174    RADV_FROM_HANDLE(radv_instance, instance, _instance);
5175    return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
5176 }
5177 
5178 bool
radv_get_memory_fd(struct radv_device * device,struct radv_device_memory * memory,int * pFD)5179 radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD)
5180 {
5181    /* Only set BO metadata for the first plane */
5182    if (memory->image && memory->image->offset == 0) {
5183       struct radeon_bo_metadata metadata;
5184       radv_init_metadata(device, memory->image, &metadata);
5185       device->ws->buffer_set_metadata(device->ws, memory->bo, &metadata);
5186    }
5187 
5188    return device->ws->buffer_get_fd(device->ws, memory->bo, pFD);
5189 }
5190 
5191 void
radv_device_memory_init(struct radv_device_memory * mem,struct radv_device * device,struct radeon_winsys_bo * bo)5192 radv_device_memory_init(struct radv_device_memory *mem, struct radv_device *device,
5193                         struct radeon_winsys_bo *bo)
5194 {
5195    memset(mem, 0, sizeof(*mem));
5196    vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY);
5197 
5198    mem->bo = bo;
5199 }
5200 
5201 void
radv_device_memory_finish(struct radv_device_memory * mem)5202 radv_device_memory_finish(struct radv_device_memory *mem)
5203 {
5204    vk_object_base_finish(&mem->base);
5205 }
5206 
5207 void
radv_free_memory(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_device_memory * mem)5208 radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
5209                  struct radv_device_memory *mem)
5210 {
5211    if (mem == NULL)
5212       return;
5213 
5214 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
5215    if (mem->android_hardware_buffer)
5216       AHardwareBuffer_release(mem->android_hardware_buffer);
5217 #endif
5218 
5219    if (mem->bo) {
5220       if (device->overallocation_disallowed) {
5221          mtx_lock(&device->overallocation_mutex);
5222          device->allocated_memory_size[mem->heap_index] -= mem->alloc_size;
5223          mtx_unlock(&device->overallocation_mutex);
5224       }
5225 
5226       if (device->use_global_bo_list)
5227          device->ws->buffer_make_resident(device->ws, mem->bo, false);
5228       device->ws->buffer_destroy(device->ws, mem->bo);
5229       mem->bo = NULL;
5230    }
5231 
5232    radv_device_memory_finish(mem);
5233    vk_free2(&device->vk.alloc, pAllocator, mem);
5234 }
5235 
5236 static VkResult
radv_alloc_memory(struct radv_device * device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)5237 radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAllocateInfo,
5238                   const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)
5239 {
5240    struct radv_device_memory *mem;
5241    VkResult result;
5242    enum radeon_bo_domain domain;
5243    uint32_t flags = 0;
5244 
5245    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
5246 
5247    const VkImportMemoryFdInfoKHR *import_info =
5248       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
5249    const VkMemoryDedicatedAllocateInfo *dedicate_info =
5250       vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
5251    const VkExportMemoryAllocateInfo *export_info =
5252       vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
5253    const struct VkImportAndroidHardwareBufferInfoANDROID *ahb_import_info =
5254       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID);
5255    const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
5256       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
5257 
5258    const struct wsi_memory_allocate_info *wsi_info =
5259       vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
5260 
5261    if (pAllocateInfo->allocationSize == 0 && !ahb_import_info &&
5262        !(export_info && (export_info->handleTypes &
5263                          VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) {
5264       /* Apparently, this is allowed */
5265       *pMem = VK_NULL_HANDLE;
5266       return VK_SUCCESS;
5267    }
5268 
5269    mem =
5270       vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5271    if (mem == NULL)
5272       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
5273 
5274    radv_device_memory_init(mem, device, NULL);
5275 
5276    if (wsi_info) {
5277       if(wsi_info->implicit_sync)
5278          flags |= RADEON_FLAG_IMPLICIT_SYNC;
5279 
5280       /* In case of prime, linear buffer is allocated in default heap which is VRAM.
5281        * Due to this when display is connected to iGPU and render on dGPU, ddx
5282        * function amdgpu_present_check_flip() fails due to which there is blit
5283        * instead of flip. Setting the flag RADEON_FLAG_GTT_WC allows kernel to
5284        * allocate GTT memory in supported hardware where GTT can be directly scanout.
5285        * Using wsi_info variable check to set the flag RADEON_FLAG_GTT_WC so that
5286        * only for memory allocated by driver this flag is set.
5287        */
5288       flags |= RADEON_FLAG_GTT_WC;
5289    }
5290 
5291    if (dedicate_info) {
5292       mem->image = radv_image_from_handle(dedicate_info->image);
5293       mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
5294    } else {
5295       mem->image = NULL;
5296       mem->buffer = NULL;
5297    }
5298 
5299    float priority_float = 0.5;
5300    const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
5301       vk_find_struct_const(pAllocateInfo->pNext, MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
5302    if (priority_ext)
5303       priority_float = priority_ext->priority;
5304 
5305    uint64_t replay_address = 0;
5306    const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info =
5307       vk_find_struct_const(pAllocateInfo->pNext, MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO);
5308    if (replay_info && replay_info->opaqueCaptureAddress)
5309       replay_address = replay_info->opaqueCaptureAddress;
5310 
5311    unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
5312                             (int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
5313 
5314    mem->user_ptr = NULL;
5315 
5316 #if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
5317    mem->android_hardware_buffer = NULL;
5318 #endif
5319 
5320    if (ahb_import_info) {
5321       result = radv_import_ahb_memory(device, mem, priority, ahb_import_info);
5322       if (result != VK_SUCCESS)
5323          goto fail;
5324    } else if (export_info && (export_info->handleTypes &
5325                               VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) {
5326       result = radv_create_ahb_memory(device, mem, priority, pAllocateInfo);
5327       if (result != VK_SUCCESS)
5328          goto fail;
5329    } else if (import_info) {
5330       assert(import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
5331              import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
5332       result = device->ws->buffer_from_fd(device->ws, import_info->fd, priority, &mem->bo, NULL);
5333       if (result != VK_SUCCESS) {
5334          goto fail;
5335       } else {
5336          close(import_info->fd);
5337       }
5338 
5339       if (mem->image && mem->image->plane_count == 1 &&
5340           !vk_format_is_depth_or_stencil(mem->image->vk_format) && mem->image->info.samples == 1 &&
5341           mem->image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
5342          struct radeon_bo_metadata metadata;
5343          device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata);
5344 
5345          struct radv_image_create_info create_info = {.no_metadata_planes = true,
5346                                                       .bo_metadata = &metadata};
5347 
5348          /* This gives a basic ability to import radeonsi images
5349           * that don't have DCC. This is not guaranteed by any
5350           * spec and can be removed after we support modifiers. */
5351          result = radv_image_create_layout(device, create_info, NULL, mem->image);
5352          if (result != VK_SUCCESS) {
5353             device->ws->buffer_destroy(device->ws, mem->bo);
5354             goto fail;
5355          }
5356       }
5357    } else if (host_ptr_info) {
5358       assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
5359       result = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
5360                                            pAllocateInfo->allocationSize, priority, &mem->bo);
5361       if (result != VK_SUCCESS) {
5362          goto fail;
5363       } else {
5364          mem->user_ptr = host_ptr_info->pHostPointer;
5365       }
5366    } else {
5367       uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
5368       uint32_t heap_index;
5369 
5370       heap_index =
5371          device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex]
5372             .heapIndex;
5373       domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex];
5374       flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex];
5375 
5376       if (!import_info && (!export_info || !export_info->handleTypes)) {
5377          flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
5378          if (device->use_global_bo_list) {
5379             flags |= RADEON_FLAG_PREFER_LOCAL_BO;
5380          }
5381       }
5382 
5383       const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const(pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO);
5384       if (flags_info && flags_info->flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)
5385          flags |= RADEON_FLAG_REPLAYABLE;
5386 
5387       if (device->overallocation_disallowed) {
5388          uint64_t total_size =
5389             device->physical_device->memory_properties.memoryHeaps[heap_index].size;
5390 
5391          mtx_lock(&device->overallocation_mutex);
5392          if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {
5393             mtx_unlock(&device->overallocation_mutex);
5394             result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
5395             goto fail;
5396          }
5397          device->allocated_memory_size[heap_index] += alloc_size;
5398          mtx_unlock(&device->overallocation_mutex);
5399       }
5400 
5401       result = device->ws->buffer_create(device->ws, alloc_size,
5402                                          device->physical_device->rad_info.max_alignment, domain,
5403                                          flags, priority, replay_address, &mem->bo);
5404 
5405       if (result != VK_SUCCESS) {
5406          if (device->overallocation_disallowed) {
5407             mtx_lock(&device->overallocation_mutex);
5408             device->allocated_memory_size[heap_index] -= alloc_size;
5409             mtx_unlock(&device->overallocation_mutex);
5410          }
5411          goto fail;
5412       }
5413 
5414       mem->heap_index = heap_index;
5415       mem->alloc_size = alloc_size;
5416    }
5417 
5418    if (!wsi_info) {
5419       if (device->use_global_bo_list) {
5420          result = device->ws->buffer_make_resident(device->ws, mem->bo, true);
5421          if (result != VK_SUCCESS)
5422             goto fail;
5423       }
5424    }
5425 
5426    *pMem = radv_device_memory_to_handle(mem);
5427 
5428    return VK_SUCCESS;
5429 
5430 fail:
5431    radv_free_memory(device, pAllocator, mem);
5432 
5433    return result;
5434 }
5435 
5436 VkResult
radv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)5437 radv_AllocateMemory(VkDevice _device, const VkMemoryAllocateInfo *pAllocateInfo,
5438                     const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)
5439 {
5440    RADV_FROM_HANDLE(radv_device, device, _device);
5441    return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
5442 }
5443 
5444 void
radv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)5445 radv_FreeMemory(VkDevice _device, VkDeviceMemory _mem, const VkAllocationCallbacks *pAllocator)
5446 {
5447    RADV_FROM_HANDLE(radv_device, device, _device);
5448    RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
5449 
5450    radv_free_memory(device, pAllocator, mem);
5451 }
5452 
5453 VkResult
radv_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)5454 radv_MapMemory(VkDevice _device, VkDeviceMemory _memory, VkDeviceSize offset, VkDeviceSize size,
5455                VkMemoryMapFlags flags, void **ppData)
5456 {
5457    RADV_FROM_HANDLE(radv_device, device, _device);
5458    RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5459 
5460    if (mem == NULL) {
5461       *ppData = NULL;
5462       return VK_SUCCESS;
5463    }
5464 
5465    if (mem->user_ptr)
5466       *ppData = mem->user_ptr;
5467    else
5468       *ppData = device->ws->buffer_map(mem->bo);
5469 
5470    if (*ppData) {
5471       *ppData = (uint8_t *)*ppData + offset;
5472       return VK_SUCCESS;
5473    }
5474 
5475    return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
5476 }
5477 
5478 void
radv_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)5479 radv_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
5480 {
5481    RADV_FROM_HANDLE(radv_device, device, _device);
5482    RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5483 
5484    if (mem == NULL)
5485       return;
5486 
5487    if (mem->user_ptr == NULL)
5488       device->ws->buffer_unmap(mem->bo);
5489 }
5490 
5491 VkResult
radv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)5492 radv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
5493                              const VkMappedMemoryRange *pMemoryRanges)
5494 {
5495    return VK_SUCCESS;
5496 }
5497 
5498 VkResult
radv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)5499 radv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
5500                                   const VkMappedMemoryRange *pMemoryRanges)
5501 {
5502    return VK_SUCCESS;
5503 }
5504 
5505 static void
radv_get_buffer_memory_requirements(struct radv_device * device,VkDeviceSize size,VkBufferCreateFlags flags,VkMemoryRequirements2 * pMemoryRequirements)5506 radv_get_buffer_memory_requirements(struct radv_device *device,
5507                                     VkDeviceSize size,
5508                                     VkBufferCreateFlags flags,
5509                                     VkMemoryRequirements2 *pMemoryRequirements)
5510 {
5511    pMemoryRequirements->memoryRequirements.memoryTypeBits =
5512       (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5513 
5514    if (flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
5515       pMemoryRequirements->memoryRequirements.alignment = 4096;
5516    else
5517       pMemoryRequirements->memoryRequirements.alignment = 16;
5518 
5519    pMemoryRequirements->memoryRequirements.size =
5520       align64(size, pMemoryRequirements->memoryRequirements.alignment);
5521 
5522    vk_foreach_struct(ext, pMemoryRequirements->pNext)
5523    {
5524       switch (ext->sType) {
5525       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5526          VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
5527          req->requiresDedicatedAllocation = false;
5528          req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5529          break;
5530       }
5531       default:
5532          break;
5533       }
5534    }
5535 }
5536 
5537 void
radv_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)5538 radv_GetBufferMemoryRequirements2(VkDevice _device, const VkBufferMemoryRequirementsInfo2 *pInfo,
5539                                   VkMemoryRequirements2 *pMemoryRequirements)
5540 {
5541    RADV_FROM_HANDLE(radv_device, device, _device);
5542    RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
5543 
5544    radv_get_buffer_memory_requirements(device, buffer->size, buffer->flags, pMemoryRequirements);
5545 }
5546 
5547 void
radv_GetDeviceBufferMemoryRequirementsKHR(VkDevice _device,const VkDeviceBufferMemoryRequirementsKHR * pInfo,VkMemoryRequirements2 * pMemoryRequirements)5548 radv_GetDeviceBufferMemoryRequirementsKHR(VkDevice _device,
5549                                           const VkDeviceBufferMemoryRequirementsKHR* pInfo,
5550                                           VkMemoryRequirements2 *pMemoryRequirements)
5551 {
5552    RADV_FROM_HANDLE(radv_device, device, _device);
5553 
5554    radv_get_buffer_memory_requirements(device, pInfo->pCreateInfo->size, pInfo->pCreateInfo->flags,
5555                                        pMemoryRequirements);
5556 }
5557 
5558 void
radv_GetImageMemoryRequirements2(VkDevice _device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)5559 radv_GetImageMemoryRequirements2(VkDevice _device, const VkImageMemoryRequirementsInfo2 *pInfo,
5560                                  VkMemoryRequirements2 *pMemoryRequirements)
5561 {
5562    RADV_FROM_HANDLE(radv_device, device, _device);
5563    RADV_FROM_HANDLE(radv_image, image, pInfo->image);
5564 
5565    pMemoryRequirements->memoryRequirements.memoryTypeBits =
5566       (1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5567 
5568    pMemoryRequirements->memoryRequirements.size = image->size;
5569    pMemoryRequirements->memoryRequirements.alignment = image->alignment;
5570 
5571    vk_foreach_struct(ext, pMemoryRequirements->pNext)
5572    {
5573       switch (ext->sType) {
5574       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5575          VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
5576          req->requiresDedicatedAllocation =
5577             image->shareable && image->tiling != VK_IMAGE_TILING_LINEAR;
5578          req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5579          break;
5580       }
5581       default:
5582          break;
5583       }
5584    }
5585 }
5586 
5587 void
radv_GetDeviceImageMemoryRequirementsKHR(VkDevice device,const VkDeviceImageMemoryRequirementsKHR * pInfo,VkMemoryRequirements2 * pMemoryRequirements)5588 radv_GetDeviceImageMemoryRequirementsKHR(VkDevice device,
5589                                          const VkDeviceImageMemoryRequirementsKHR *pInfo,
5590                                          VkMemoryRequirements2 *pMemoryRequirements)
5591 {
5592    UNUSED VkResult result;
5593    VkImage image;
5594 
5595    /* Determining the image size/alignment require to create a surface, which is complicated without
5596     * creating an image.
5597     * TODO: Avoid creating an image.
5598     */
5599    result = radv_CreateImage(device, pInfo->pCreateInfo, NULL, &image);
5600    assert(result == VK_SUCCESS);
5601 
5602    VkImageMemoryRequirementsInfo2 info2 = {
5603       .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
5604       .image = image,
5605    };
5606 
5607    radv_GetImageMemoryRequirements2(device, &info2, pMemoryRequirements);
5608 
5609    radv_DestroyImage(device, image, NULL);
5610 }
5611 
5612 void
radv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)5613 radv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory,
5614                                VkDeviceSize *pCommittedMemoryInBytes)
5615 {
5616    *pCommittedMemoryInBytes = 0;
5617 }
5618 
5619 VkResult
radv_BindBufferMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)5620 radv_BindBufferMemory2(VkDevice _device, uint32_t bindInfoCount,
5621                        const VkBindBufferMemoryInfo *pBindInfos)
5622 {
5623    RADV_FROM_HANDLE(radv_device, device, _device);
5624 
5625    for (uint32_t i = 0; i < bindInfoCount; ++i) {
5626       RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5627       RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
5628 
5629       if (mem) {
5630          if (mem->alloc_size) {
5631             VkBufferMemoryRequirementsInfo2 info = {
5632                .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_REQUIREMENTS_INFO_2,
5633                .buffer = pBindInfos[i].buffer,
5634             };
5635             VkMemoryRequirements2 reqs = {
5636                .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
5637             };
5638 
5639             radv_GetBufferMemoryRequirements2(_device, &info, &reqs);
5640 
5641             if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) {
5642                return vk_errorf(device, VK_ERROR_UNKNOWN,
5643                                 "Device memory object too small for the buffer.\n");
5644             }
5645          }
5646 
5647          buffer->bo = mem->bo;
5648          buffer->offset = pBindInfos[i].memoryOffset;
5649       } else {
5650          buffer->bo = NULL;
5651       }
5652    }
5653    return VK_SUCCESS;
5654 }
5655 
5656 VkResult
radv_BindImageMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos)5657 radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount,
5658                       const VkBindImageMemoryInfo *pBindInfos)
5659 {
5660    RADV_FROM_HANDLE(radv_device, device, _device);
5661 
5662    for (uint32_t i = 0; i < bindInfoCount; ++i) {
5663       RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5664       RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
5665 
5666       if (mem) {
5667          if (mem->alloc_size) {
5668             VkImageMemoryRequirementsInfo2 info = {
5669                .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_REQUIREMENTS_INFO_2,
5670                .image = pBindInfos[i].image,
5671             };
5672             VkMemoryRequirements2 reqs = {
5673                .sType = VK_STRUCTURE_TYPE_MEMORY_REQUIREMENTS_2,
5674             };
5675 
5676             radv_GetImageMemoryRequirements2(_device, &info, &reqs);
5677 
5678             if (pBindInfos[i].memoryOffset + reqs.memoryRequirements.size > mem->alloc_size) {
5679                return vk_errorf(device, VK_ERROR_UNKNOWN,
5680                                 "Device memory object too small for the image.\n");
5681             }
5682          }
5683 
5684          image->bo = mem->bo;
5685          image->offset = pBindInfos[i].memoryOffset;
5686       } else {
5687          image->bo = NULL;
5688          image->offset = 0;
5689       }
5690    }
5691    return VK_SUCCESS;
5692 }
5693 
5694 static bool
radv_sparse_bind_has_effects(const VkBindSparseInfo * info)5695 radv_sparse_bind_has_effects(const VkBindSparseInfo *info)
5696 {
5697    return info->bufferBindCount || info->imageOpaqueBindCount || info->imageBindCount ||
5698           info->waitSemaphoreCount || info->signalSemaphoreCount;
5699 }
5700 
5701 VkResult
radv_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)5702 radv_QueueBindSparse(VkQueue _queue, uint32_t bindInfoCount, const VkBindSparseInfo *pBindInfo,
5703                      VkFence fence)
5704 {
5705    RADV_FROM_HANDLE(radv_queue, queue, _queue);
5706    uint32_t fence_idx = 0;
5707 
5708    if (radv_device_is_lost(queue->device))
5709       return VK_ERROR_DEVICE_LOST;
5710 
5711    if (fence != VK_NULL_HANDLE) {
5712       for (uint32_t i = 0; i < bindInfoCount; ++i)
5713          if (radv_sparse_bind_has_effects(pBindInfo + i))
5714             fence_idx = i;
5715    } else
5716       fence_idx = UINT32_MAX;
5717 
5718    for (uint32_t i = 0; i < bindInfoCount; ++i) {
5719       if (i != fence_idx && !radv_sparse_bind_has_effects(pBindInfo + i))
5720          continue;
5721 
5722       const VkTimelineSemaphoreSubmitInfo *timeline_info =
5723          vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
5724 
5725       VkResult result = radv_queue_submit(
5726          queue, &(struct radv_queue_submission){
5727                    .buffer_binds = pBindInfo[i].pBufferBinds,
5728                    .buffer_bind_count = pBindInfo[i].bufferBindCount,
5729                    .image_opaque_binds = pBindInfo[i].pImageOpaqueBinds,
5730                    .image_opaque_bind_count = pBindInfo[i].imageOpaqueBindCount,
5731                    .image_binds = pBindInfo[i].pImageBinds,
5732                    .image_bind_count = pBindInfo[i].imageBindCount,
5733                    .wait_semaphores = pBindInfo[i].pWaitSemaphores,
5734                    .wait_semaphore_count = pBindInfo[i].waitSemaphoreCount,
5735                    .signal_semaphores = pBindInfo[i].pSignalSemaphores,
5736                    .signal_semaphore_count = pBindInfo[i].signalSemaphoreCount,
5737                    .fence = i == fence_idx ? fence : VK_NULL_HANDLE,
5738                    .wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
5739                    .wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues
5740                                           ? timeline_info->waitSemaphoreValueCount
5741                                           : 0,
5742                    .signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
5743                    .signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues
5744                                             ? timeline_info->signalSemaphoreValueCount
5745                                             : 0,
5746                 });
5747 
5748       if (result != VK_SUCCESS)
5749          return result;
5750    }
5751 
5752    if (fence != VK_NULL_HANDLE && !bindInfoCount) {
5753       VkResult result = radv_signal_fence(queue, fence);
5754       if (result != VK_SUCCESS)
5755          return result;
5756    }
5757 
5758    return VK_SUCCESS;
5759 }
5760 
5761 static void
radv_destroy_fence_part(struct radv_device * device,struct radv_fence_part * part)5762 radv_destroy_fence_part(struct radv_device *device, struct radv_fence_part *part)
5763 {
5764    if (part->kind != RADV_FENCE_NONE)
5765       device->ws->destroy_syncobj(device->ws, part->syncobj);
5766    part->kind = RADV_FENCE_NONE;
5767 }
5768 
5769 static void
radv_destroy_fence(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_fence * fence)5770 radv_destroy_fence(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
5771                    struct radv_fence *fence)
5772 {
5773    radv_destroy_fence_part(device, &fence->temporary);
5774    radv_destroy_fence_part(device, &fence->permanent);
5775 
5776    vk_object_base_finish(&fence->base);
5777    vk_free2(&device->vk.alloc, pAllocator, fence);
5778 }
5779 
5780 VkResult
radv_CreateFence(VkDevice _device,const VkFenceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFence * pFence)5781 radv_CreateFence(VkDevice _device, const VkFenceCreateInfo *pCreateInfo,
5782                  const VkAllocationCallbacks *pAllocator, VkFence *pFence)
5783 {
5784    RADV_FROM_HANDLE(radv_device, device, _device);
5785    bool create_signaled = false;
5786    struct radv_fence *fence;
5787    int ret;
5788 
5789    fence = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*fence), 8,
5790                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5791    if (!fence)
5792       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
5793 
5794    vk_object_base_init(&device->vk, &fence->base, VK_OBJECT_TYPE_FENCE);
5795 
5796    fence->permanent.kind = RADV_FENCE_SYNCOBJ;
5797 
5798    if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
5799       create_signaled = true;
5800 
5801    ret = device->ws->create_syncobj(device->ws, create_signaled, &fence->permanent.syncobj);
5802    if (ret) {
5803       radv_destroy_fence(device, pAllocator, fence);
5804       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
5805    }
5806 
5807    *pFence = radv_fence_to_handle(fence);
5808 
5809    return VK_SUCCESS;
5810 }
5811 
5812 void
radv_DestroyFence(VkDevice _device,VkFence _fence,const VkAllocationCallbacks * pAllocator)5813 radv_DestroyFence(VkDevice _device, VkFence _fence, const VkAllocationCallbacks *pAllocator)
5814 {
5815    RADV_FROM_HANDLE(radv_device, device, _device);
5816    RADV_FROM_HANDLE(radv_fence, fence, _fence);
5817 
5818    if (!fence)
5819       return;
5820 
5821    radv_destroy_fence(device, pAllocator, fence);
5822 }
5823 
5824 VkResult
radv_WaitForFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences,VkBool32 waitAll,uint64_t timeout)5825 radv_WaitForFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences, VkBool32 waitAll,
5826                    uint64_t timeout)
5827 {
5828    RADV_FROM_HANDLE(radv_device, device, _device);
5829    uint32_t *handles;
5830 
5831    if (radv_device_is_lost(device))
5832       return VK_ERROR_DEVICE_LOST;
5833 
5834    timeout = radv_get_absolute_timeout(timeout);
5835 
5836    handles = malloc(sizeof(uint32_t) * fenceCount);
5837    if (!handles)
5838       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
5839 
5840    for (uint32_t i = 0; i < fenceCount; ++i) {
5841       RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5842 
5843       struct radv_fence_part *part =
5844          fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
5845 
5846       assert(part->kind == RADV_FENCE_SYNCOBJ);
5847       handles[i] = part->syncobj;
5848    }
5849 
5850    bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
5851    free(handles);
5852    return success ? VK_SUCCESS : VK_TIMEOUT;
5853 }
5854 
5855 VkResult
radv_ResetFences(VkDevice _device,uint32_t fenceCount,const VkFence * pFences)5856 radv_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
5857 {
5858    RADV_FROM_HANDLE(radv_device, device, _device);
5859 
5860    for (unsigned i = 0; i < fenceCount; ++i) {
5861       RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5862 
5863       /* From the Vulkan 1.0.53 spec:
5864        *
5865        *    "If any member of pFences currently has its payload
5866        *    imported with temporary permanence, that fence’s prior
5867        *    permanent payload is irst restored. The remaining
5868        *    operations described therefore operate on the restored
5869        *    payload."
5870        */
5871       if (fence->temporary.kind != RADV_FENCE_NONE)
5872          radv_destroy_fence_part(device, &fence->temporary);
5873 
5874       device->ws->reset_syncobj(device->ws, fence->permanent.syncobj);
5875    }
5876 
5877    return VK_SUCCESS;
5878 }
5879 
5880 VkResult
radv_GetFenceStatus(VkDevice _device,VkFence _fence)5881 radv_GetFenceStatus(VkDevice _device, VkFence _fence)
5882 {
5883    RADV_FROM_HANDLE(radv_device, device, _device);
5884    RADV_FROM_HANDLE(radv_fence, fence, _fence);
5885 
5886    struct radv_fence_part *part =
5887       fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
5888 
5889    if (radv_device_is_lost(device))
5890       return VK_ERROR_DEVICE_LOST;
5891 
5892    bool success = device->ws->wait_syncobj(device->ws, &part->syncobj, 1, true, 0);
5893    return success ? VK_SUCCESS : VK_NOT_READY;
5894 }
5895 
5896 // Queue semaphore functions
5897 
5898 static void
radv_create_timeline(struct radv_timeline * timeline,uint64_t value)5899 radv_create_timeline(struct radv_timeline *timeline, uint64_t value)
5900 {
5901    timeline->highest_signaled = value;
5902    timeline->highest_submitted = value;
5903    list_inithead(&timeline->points);
5904    list_inithead(&timeline->free_points);
5905    list_inithead(&timeline->waiters);
5906    mtx_init(&timeline->mutex, mtx_plain);
5907 }
5908 
5909 static void
radv_destroy_timeline(struct radv_device * device,struct radv_timeline * timeline)5910 radv_destroy_timeline(struct radv_device *device, struct radv_timeline *timeline)
5911 {
5912    list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->free_points, list)
5913    {
5914       list_del(&point->list);
5915       device->ws->destroy_syncobj(device->ws, point->syncobj);
5916       free(point);
5917    }
5918    list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->points, list)
5919    {
5920       list_del(&point->list);
5921       device->ws->destroy_syncobj(device->ws, point->syncobj);
5922       free(point);
5923    }
5924    mtx_destroy(&timeline->mutex);
5925 }
5926 
5927 static void
radv_timeline_gc_locked(struct radv_device * device,struct radv_timeline * timeline)5928 radv_timeline_gc_locked(struct radv_device *device, struct radv_timeline *timeline)
5929 {
5930    list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->points, list)
5931    {
5932       if (point->wait_count || point->value > timeline->highest_submitted)
5933          return;
5934 
5935       if (device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, 0)) {
5936          timeline->highest_signaled = point->value;
5937          list_del(&point->list);
5938          list_add(&point->list, &timeline->free_points);
5939       }
5940    }
5941 }
5942 
5943 static struct radv_timeline_point *
radv_timeline_find_point_at_least_locked(struct radv_device * device,struct radv_timeline * timeline,uint64_t p)5944 radv_timeline_find_point_at_least_locked(struct radv_device *device, struct radv_timeline *timeline,
5945                                          uint64_t p)
5946 {
5947    radv_timeline_gc_locked(device, timeline);
5948 
5949    if (p <= timeline->highest_signaled)
5950       return NULL;
5951 
5952    list_for_each_entry(struct radv_timeline_point, point, &timeline->points, list)
5953    {
5954       if (point->value >= p) {
5955          ++point->wait_count;
5956          return point;
5957       }
5958    }
5959    return NULL;
5960 }
5961 
5962 static struct radv_timeline_point *
radv_timeline_add_point_locked(struct radv_device * device,struct radv_timeline * timeline,uint64_t p)5963 radv_timeline_add_point_locked(struct radv_device *device, struct radv_timeline *timeline,
5964                                uint64_t p)
5965 {
5966    radv_timeline_gc_locked(device, timeline);
5967 
5968    struct radv_timeline_point *ret = NULL;
5969    struct radv_timeline_point *prev = NULL;
5970    int r;
5971 
5972    if (p <= timeline->highest_signaled)
5973       return NULL;
5974 
5975    list_for_each_entry(struct radv_timeline_point, point, &timeline->points, list)
5976    {
5977       if (point->value == p) {
5978          return NULL;
5979       }
5980 
5981       if (point->value < p)
5982          prev = point;
5983    }
5984 
5985    if (list_is_empty(&timeline->free_points)) {
5986       ret = malloc(sizeof(struct radv_timeline_point));
5987       r = device->ws->create_syncobj(device->ws, false, &ret->syncobj);
5988       if (r) {
5989          free(ret);
5990          return NULL;
5991       }
5992    } else {
5993       ret = list_first_entry(&timeline->free_points, struct radv_timeline_point, list);
5994       list_del(&ret->list);
5995 
5996       device->ws->reset_syncobj(device->ws, ret->syncobj);
5997    }
5998 
5999    ret->value = p;
6000    ret->wait_count = 1;
6001 
6002    if (prev) {
6003       list_add(&ret->list, &prev->list);
6004    } else {
6005       list_addtail(&ret->list, &timeline->points);
6006    }
6007    return ret;
6008 }
6009 
6010 static VkResult
radv_timeline_wait(struct radv_device * device,struct radv_timeline * timeline,uint64_t value,uint64_t abs_timeout)6011 radv_timeline_wait(struct radv_device *device, struct radv_timeline *timeline, uint64_t value,
6012                    uint64_t abs_timeout)
6013 {
6014    mtx_lock(&timeline->mutex);
6015 
6016    while (timeline->highest_submitted < value) {
6017       struct timespec abstime;
6018       timespec_from_nsec(&abstime, abs_timeout);
6019 
6020       u_cnd_monotonic_timedwait(&device->timeline_cond, &timeline->mutex, &abstime);
6021 
6022       if (radv_get_current_time() >= abs_timeout && timeline->highest_submitted < value) {
6023          mtx_unlock(&timeline->mutex);
6024          return VK_TIMEOUT;
6025       }
6026    }
6027 
6028    struct radv_timeline_point *point =
6029       radv_timeline_find_point_at_least_locked(device, timeline, value);
6030    mtx_unlock(&timeline->mutex);
6031    if (!point)
6032       return VK_SUCCESS;
6033 
6034    bool success = device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, abs_timeout);
6035 
6036    mtx_lock(&timeline->mutex);
6037    point->wait_count--;
6038    mtx_unlock(&timeline->mutex);
6039    return success ? VK_SUCCESS : VK_TIMEOUT;
6040 }
6041 
6042 static void
radv_timeline_trigger_waiters_locked(struct radv_timeline * timeline,struct list_head * processing_list)6043 radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
6044                                      struct list_head *processing_list)
6045 {
6046    list_for_each_entry_safe(struct radv_timeline_waiter, waiter, &timeline->waiters, list)
6047    {
6048       if (waiter->value > timeline->highest_submitted)
6049          continue;
6050 
6051       radv_queue_trigger_submission(waiter->submission, 1, processing_list);
6052       list_del(&waiter->list);
6053    }
6054 }
6055 
6056 static void
radv_destroy_semaphore_part(struct radv_device * device,struct radv_semaphore_part * part)6057 radv_destroy_semaphore_part(struct radv_device *device, struct radv_semaphore_part *part)
6058 {
6059    switch (part->kind) {
6060    case RADV_SEMAPHORE_NONE:
6061       break;
6062    case RADV_SEMAPHORE_TIMELINE:
6063       radv_destroy_timeline(device, &part->timeline);
6064       break;
6065    case RADV_SEMAPHORE_SYNCOBJ:
6066    case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
6067       device->ws->destroy_syncobj(device->ws, part->syncobj);
6068       break;
6069    }
6070    part->kind = RADV_SEMAPHORE_NONE;
6071 }
6072 
6073 static VkSemaphoreTypeKHR
radv_get_semaphore_type(const void * pNext,uint64_t * initial_value)6074 radv_get_semaphore_type(const void *pNext, uint64_t *initial_value)
6075 {
6076    const VkSemaphoreTypeCreateInfo *type_info =
6077       vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO);
6078 
6079    if (!type_info)
6080       return VK_SEMAPHORE_TYPE_BINARY;
6081 
6082    if (initial_value)
6083       *initial_value = type_info->initialValue;
6084    return type_info->semaphoreType;
6085 }
6086 
6087 static void
radv_destroy_semaphore(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_semaphore * sem)6088 radv_destroy_semaphore(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
6089                        struct radv_semaphore *sem)
6090 {
6091    radv_destroy_semaphore_part(device, &sem->temporary);
6092    radv_destroy_semaphore_part(device, &sem->permanent);
6093    vk_object_base_finish(&sem->base);
6094    vk_free2(&device->vk.alloc, pAllocator, sem);
6095 }
6096 
6097 VkResult
radv_CreateSemaphore(VkDevice _device,const VkSemaphoreCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSemaphore * pSemaphore)6098 radv_CreateSemaphore(VkDevice _device, const VkSemaphoreCreateInfo *pCreateInfo,
6099                      const VkAllocationCallbacks *pAllocator, VkSemaphore *pSemaphore)
6100 {
6101    RADV_FROM_HANDLE(radv_device, device, _device);
6102    uint64_t initial_value = 0;
6103    VkSemaphoreTypeKHR type = radv_get_semaphore_type(pCreateInfo->pNext, &initial_value);
6104 
6105    struct radv_semaphore *sem =
6106       vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6107    if (!sem)
6108       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
6109 
6110    vk_object_base_init(&device->vk, &sem->base, VK_OBJECT_TYPE_SEMAPHORE);
6111 
6112    sem->temporary.kind = RADV_SEMAPHORE_NONE;
6113    sem->permanent.kind = RADV_SEMAPHORE_NONE;
6114 
6115    if (type == VK_SEMAPHORE_TYPE_TIMELINE &&
6116        device->physical_device->rad_info.has_timeline_syncobj) {
6117       int ret = device->ws->create_syncobj(device->ws, false, &sem->permanent.syncobj);
6118       if (ret) {
6119          radv_destroy_semaphore(device, pAllocator, sem);
6120          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
6121       }
6122       device->ws->signal_syncobj(device->ws, sem->permanent.syncobj, initial_value);
6123       sem->permanent.timeline_syncobj.max_point = initial_value;
6124       sem->permanent.kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
6125    } else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
6126       radv_create_timeline(&sem->permanent.timeline, initial_value);
6127       sem->permanent.kind = RADV_SEMAPHORE_TIMELINE;
6128    } else {
6129       int ret = device->ws->create_syncobj(device->ws, false, &sem->permanent.syncobj);
6130       if (ret) {
6131          radv_destroy_semaphore(device, pAllocator, sem);
6132          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
6133       }
6134       sem->permanent.kind = RADV_SEMAPHORE_SYNCOBJ;
6135    }
6136 
6137    *pSemaphore = radv_semaphore_to_handle(sem);
6138    return VK_SUCCESS;
6139 }
6140 
6141 void
radv_DestroySemaphore(VkDevice _device,VkSemaphore _semaphore,const VkAllocationCallbacks * pAllocator)6142 radv_DestroySemaphore(VkDevice _device, VkSemaphore _semaphore,
6143                       const VkAllocationCallbacks *pAllocator)
6144 {
6145    RADV_FROM_HANDLE(radv_device, device, _device);
6146    RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
6147    if (!_semaphore)
6148       return;
6149 
6150    radv_destroy_semaphore(device, pAllocator, sem);
6151 }
6152 
6153 VkResult
radv_GetSemaphoreCounterValue(VkDevice _device,VkSemaphore _semaphore,uint64_t * pValue)6154 radv_GetSemaphoreCounterValue(VkDevice _device, VkSemaphore _semaphore, uint64_t *pValue)
6155 {
6156    RADV_FROM_HANDLE(radv_device, device, _device);
6157    RADV_FROM_HANDLE(radv_semaphore, semaphore, _semaphore);
6158 
6159    if (radv_device_is_lost(device))
6160       return VK_ERROR_DEVICE_LOST;
6161 
6162    struct radv_semaphore_part *part = semaphore->temporary.kind != RADV_SEMAPHORE_NONE
6163                                          ? &semaphore->temporary
6164                                          : &semaphore->permanent;
6165 
6166    switch (part->kind) {
6167    case RADV_SEMAPHORE_TIMELINE: {
6168       mtx_lock(&part->timeline.mutex);
6169       radv_timeline_gc_locked(device, &part->timeline);
6170       *pValue = part->timeline.highest_signaled;
6171       mtx_unlock(&part->timeline.mutex);
6172       return VK_SUCCESS;
6173    }
6174    case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
6175       return device->ws->query_syncobj(device->ws, part->syncobj, pValue);
6176    }
6177    case RADV_SEMAPHORE_NONE:
6178    case RADV_SEMAPHORE_SYNCOBJ:
6179       unreachable("Invalid semaphore type");
6180    }
6181    unreachable("Unhandled semaphore type");
6182 }
6183 
6184 static VkResult
radv_wait_timelines(struct radv_device * device,const VkSemaphoreWaitInfo * pWaitInfo,uint64_t abs_timeout)6185 radv_wait_timelines(struct radv_device *device, const VkSemaphoreWaitInfo *pWaitInfo,
6186                     uint64_t abs_timeout)
6187 {
6188    if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) && pWaitInfo->semaphoreCount > 1) {
6189       for (;;) {
6190          for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6191             RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6192             VkResult result =
6193                radv_timeline_wait(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], 0);
6194 
6195             if (result == VK_SUCCESS)
6196                return VK_SUCCESS;
6197          }
6198          if (radv_get_current_time() > abs_timeout)
6199             return VK_TIMEOUT;
6200       }
6201    }
6202 
6203    for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6204       RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6205       VkResult result = radv_timeline_wait(device, &semaphore->permanent.timeline,
6206                                            pWaitInfo->pValues[i], abs_timeout);
6207 
6208       if (result != VK_SUCCESS)
6209          return result;
6210    }
6211    return VK_SUCCESS;
6212 }
6213 VkResult
radv_WaitSemaphores(VkDevice _device,const VkSemaphoreWaitInfo * pWaitInfo,uint64_t timeout)6214 radv_WaitSemaphores(VkDevice _device, const VkSemaphoreWaitInfo *pWaitInfo, uint64_t timeout)
6215 {
6216    RADV_FROM_HANDLE(radv_device, device, _device);
6217 
6218    if (radv_device_is_lost(device))
6219       return VK_ERROR_DEVICE_LOST;
6220 
6221    uint64_t abs_timeout = radv_get_absolute_timeout(timeout);
6222 
6223    if (radv_semaphore_from_handle(pWaitInfo->pSemaphores[0])->permanent.kind ==
6224        RADV_SEMAPHORE_TIMELINE)
6225       return radv_wait_timelines(device, pWaitInfo, abs_timeout);
6226 
6227    if (pWaitInfo->semaphoreCount > UINT32_MAX / sizeof(uint32_t))
6228       return vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY,
6229                        "semaphoreCount integer overflow");
6230 
6231    bool wait_all = !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR);
6232    uint32_t *handles = malloc(sizeof(*handles) * pWaitInfo->semaphoreCount);
6233    if (!handles)
6234       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
6235 
6236    for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6237       RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6238       handles[i] = semaphore->permanent.syncobj;
6239    }
6240 
6241    bool success =
6242       device->ws->wait_timeline_syncobj(device->ws, handles, pWaitInfo->pValues,
6243                                         pWaitInfo->semaphoreCount, wait_all, false, abs_timeout);
6244    free(handles);
6245    return success ? VK_SUCCESS : VK_TIMEOUT;
6246 }
6247 
6248 VkResult
radv_SignalSemaphore(VkDevice _device,const VkSemaphoreSignalInfo * pSignalInfo)6249 radv_SignalSemaphore(VkDevice _device, const VkSemaphoreSignalInfo *pSignalInfo)
6250 {
6251    RADV_FROM_HANDLE(radv_device, device, _device);
6252    RADV_FROM_HANDLE(radv_semaphore, semaphore, pSignalInfo->semaphore);
6253 
6254    struct radv_semaphore_part *part = semaphore->temporary.kind != RADV_SEMAPHORE_NONE
6255                                          ? &semaphore->temporary
6256                                          : &semaphore->permanent;
6257 
6258    switch (part->kind) {
6259    case RADV_SEMAPHORE_TIMELINE: {
6260       mtx_lock(&part->timeline.mutex);
6261       radv_timeline_gc_locked(device, &part->timeline);
6262       part->timeline.highest_submitted = MAX2(part->timeline.highest_submitted, pSignalInfo->value);
6263       part->timeline.highest_signaled = MAX2(part->timeline.highest_signaled, pSignalInfo->value);
6264 
6265       struct list_head processing_list;
6266       list_inithead(&processing_list);
6267       radv_timeline_trigger_waiters_locked(&part->timeline, &processing_list);
6268       mtx_unlock(&part->timeline.mutex);
6269 
6270       VkResult result = radv_process_submissions(&processing_list);
6271 
6272       /* This needs to happen after radv_process_submissions, so
6273        * that any submitted submissions that are now unblocked get
6274        * processed before we wake the application. This way we
6275        * ensure that any binary semaphores that are now unblocked
6276        * are usable by the application. */
6277       u_cnd_monotonic_broadcast(&device->timeline_cond);
6278 
6279       return result;
6280    }
6281    case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
6282       part->timeline_syncobj.max_point = MAX2(part->timeline_syncobj.max_point, pSignalInfo->value);
6283       device->ws->signal_syncobj(device->ws, part->syncobj, pSignalInfo->value);
6284       break;
6285    }
6286    case RADV_SEMAPHORE_NONE:
6287    case RADV_SEMAPHORE_SYNCOBJ:
6288       unreachable("Invalid semaphore type");
6289    }
6290    return VK_SUCCESS;
6291 }
6292 
6293 static void
radv_destroy_event(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_event * event)6294 radv_destroy_event(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
6295                    struct radv_event *event)
6296 {
6297    if (event->bo)
6298       device->ws->buffer_destroy(device->ws, event->bo);
6299 
6300    vk_object_base_finish(&event->base);
6301    vk_free2(&device->vk.alloc, pAllocator, event);
6302 }
6303 
6304 VkResult
radv_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)6305 radv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo,
6306                  const VkAllocationCallbacks *pAllocator, VkEvent *pEvent)
6307 {
6308    RADV_FROM_HANDLE(radv_device, device, _device);
6309    struct radv_event *event = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8,
6310                                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6311 
6312    if (!event)
6313       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
6314 
6315    vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT);
6316 
6317    VkResult result = device->ws->buffer_create(
6318       device->ws, 8, 8, RADEON_DOMAIN_GTT,
6319       RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
6320       RADV_BO_PRIORITY_FENCE, 0, &event->bo);
6321    if (result != VK_SUCCESS) {
6322       radv_destroy_event(device, pAllocator, event);
6323       return vk_error(device, result);
6324    }
6325 
6326    event->map = (uint64_t *)device->ws->buffer_map(event->bo);
6327    if (!event->map) {
6328       radv_destroy_event(device, pAllocator, event);
6329       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
6330    }
6331 
6332    *pEvent = radv_event_to_handle(event);
6333 
6334    return VK_SUCCESS;
6335 }
6336 
6337 void
radv_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)6338 radv_DestroyEvent(VkDevice _device, VkEvent _event, const VkAllocationCallbacks *pAllocator)
6339 {
6340    RADV_FROM_HANDLE(radv_device, device, _device);
6341    RADV_FROM_HANDLE(radv_event, event, _event);
6342 
6343    if (!event)
6344       return;
6345 
6346    radv_destroy_event(device, pAllocator, event);
6347 }
6348 
6349 VkResult
radv_GetEventStatus(VkDevice _device,VkEvent _event)6350 radv_GetEventStatus(VkDevice _device, VkEvent _event)
6351 {
6352    RADV_FROM_HANDLE(radv_device, device, _device);
6353    RADV_FROM_HANDLE(radv_event, event, _event);
6354 
6355    if (radv_device_is_lost(device))
6356       return VK_ERROR_DEVICE_LOST;
6357 
6358    if (*event->map == 1)
6359       return VK_EVENT_SET;
6360    return VK_EVENT_RESET;
6361 }
6362 
6363 VkResult
radv_SetEvent(VkDevice _device,VkEvent _event)6364 radv_SetEvent(VkDevice _device, VkEvent _event)
6365 {
6366    RADV_FROM_HANDLE(radv_event, event, _event);
6367    *event->map = 1;
6368 
6369    return VK_SUCCESS;
6370 }
6371 
6372 VkResult
radv_ResetEvent(VkDevice _device,VkEvent _event)6373 radv_ResetEvent(VkDevice _device, VkEvent _event)
6374 {
6375    RADV_FROM_HANDLE(radv_event, event, _event);
6376    *event->map = 0;
6377 
6378    return VK_SUCCESS;
6379 }
6380 
6381 void
radv_buffer_init(struct radv_buffer * buffer,struct radv_device * device,struct radeon_winsys_bo * bo,uint64_t size,uint64_t offset)6382 radv_buffer_init(struct radv_buffer *buffer, struct radv_device *device,
6383                  struct radeon_winsys_bo *bo, uint64_t size,
6384                  uint64_t offset)
6385 {
6386    vk_object_base_init(&device->vk, &buffer->base, VK_OBJECT_TYPE_BUFFER);
6387 
6388    buffer->usage = 0;
6389    buffer->flags = 0;
6390    buffer->bo = bo;
6391    buffer->size = size;
6392    buffer->offset = offset;
6393 }
6394 
6395 void
radv_buffer_finish(struct radv_buffer * buffer)6396 radv_buffer_finish(struct radv_buffer *buffer)
6397 {
6398    vk_object_base_finish(&buffer->base);
6399 }
6400 
6401 static void
radv_destroy_buffer(struct radv_device * device,const VkAllocationCallbacks * pAllocator,struct radv_buffer * buffer)6402 radv_destroy_buffer(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
6403                     struct radv_buffer *buffer)
6404 {
6405    if ((buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) && buffer->bo)
6406       device->ws->buffer_destroy(device->ws, buffer->bo);
6407 
6408    radv_buffer_finish(buffer);
6409    vk_free2(&device->vk.alloc, pAllocator, buffer);
6410 }
6411 
6412 VkResult
radv_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)6413 radv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo,
6414                   const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer)
6415 {
6416    RADV_FROM_HANDLE(radv_device, device, _device);
6417    struct radv_buffer *buffer;
6418 
6419    if (pCreateInfo->size > RADV_MAX_MEMORY_ALLOCATION_SIZE)
6420       return VK_ERROR_OUT_OF_DEVICE_MEMORY;
6421 
6422    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
6423 
6424    buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8,
6425                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6426    if (buffer == NULL)
6427       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
6428 
6429    radv_buffer_init(buffer, device, NULL, pCreateInfo->size, 0);
6430 
6431    buffer->usage = pCreateInfo->usage;
6432    buffer->flags = pCreateInfo->flags;
6433 
6434    buffer->shareable =
6435       vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL;
6436 
6437    if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
6438       enum radeon_bo_flag flags = RADEON_FLAG_VIRTUAL;
6439       if (pCreateInfo->flags & VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)
6440          flags |= RADEON_FLAG_REPLAYABLE;
6441 
6442       uint64_t replay_address = 0;
6443       const VkBufferOpaqueCaptureAddressCreateInfo *replay_info =
6444          vk_find_struct_const(pCreateInfo->pNext, BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO);
6445       if (replay_info && replay_info->opaqueCaptureAddress)
6446          replay_address = replay_info->opaqueCaptureAddress;
6447 
6448       VkResult result = device->ws->buffer_create(device->ws, align64(buffer->size, 4096), 4096, 0,
6449                                                   flags, RADV_BO_PRIORITY_VIRTUAL,
6450                                                   replay_address, &buffer->bo);
6451       if (result != VK_SUCCESS) {
6452          radv_destroy_buffer(device, pAllocator, buffer);
6453          return vk_error(device, result);
6454       }
6455    }
6456 
6457    *pBuffer = radv_buffer_to_handle(buffer);
6458 
6459    return VK_SUCCESS;
6460 }
6461 
6462 void
radv_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)6463 radv_DestroyBuffer(VkDevice _device, VkBuffer _buffer, const VkAllocationCallbacks *pAllocator)
6464 {
6465    RADV_FROM_HANDLE(radv_device, device, _device);
6466    RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
6467 
6468    if (!buffer)
6469       return;
6470 
6471    radv_destroy_buffer(device, pAllocator, buffer);
6472 }
6473 
6474 VkDeviceAddress
radv_GetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)6475 radv_GetBufferDeviceAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo)
6476 {
6477    RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
6478    return radv_buffer_get_va(buffer->bo) + buffer->offset;
6479 }
6480 
6481 uint64_t
radv_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)6482 radv_GetBufferOpaqueCaptureAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo)
6483 {
6484    RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
6485    return buffer->bo ? radv_buffer_get_va(buffer->bo) + buffer->offset : 0;
6486 }
6487 
6488 uint64_t
radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)6489 radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,
6490                                          const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo)
6491 {
6492    RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory);
6493    return radv_buffer_get_va(mem->bo);
6494 }
6495 
6496 static inline unsigned
si_tile_mode_index(const struct radv_image_plane * plane,unsigned level,bool stencil)6497 si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
6498 {
6499    if (stencil)
6500       return plane->surface.u.legacy.zs.stencil_tiling_index[level];
6501    else
6502       return plane->surface.u.legacy.tiling_index[level];
6503 }
6504 
6505 static uint32_t
radv_surface_max_layer_count(struct radv_image_view * iview)6506 radv_surface_max_layer_count(struct radv_image_view *iview)
6507 {
6508    return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth
6509                                                : (iview->base_layer + iview->layer_count);
6510 }
6511 
6512 static unsigned
get_dcc_max_uncompressed_block_size(const struct radv_device * device,const struct radv_image_view * iview)6513 get_dcc_max_uncompressed_block_size(const struct radv_device *device,
6514                                     const struct radv_image_view *iview)
6515 {
6516    if (device->physical_device->rad_info.chip_class < GFX10 && iview->image->info.samples > 1) {
6517       if (iview->image->planes[0].surface.bpe == 1)
6518          return V_028C78_MAX_BLOCK_SIZE_64B;
6519       else if (iview->image->planes[0].surface.bpe == 2)
6520          return V_028C78_MAX_BLOCK_SIZE_128B;
6521    }
6522 
6523    return V_028C78_MAX_BLOCK_SIZE_256B;
6524 }
6525 
6526 static unsigned
get_dcc_min_compressed_block_size(const struct radv_device * device)6527 get_dcc_min_compressed_block_size(const struct radv_device *device)
6528 {
6529    if (!device->physical_device->rad_info.has_dedicated_vram) {
6530       /* amdvlk: [min-compressed-block-size] should be set to 32 for
6531        * dGPU and 64 for APU because all of our APUs to date use
6532        * DIMMs which have a request granularity size of 64B while all
6533        * other chips have a 32B request size.
6534        */
6535       return V_028C78_MIN_BLOCK_SIZE_64B;
6536    }
6537 
6538    return V_028C78_MIN_BLOCK_SIZE_32B;
6539 }
6540 
6541 static uint32_t
radv_init_dcc_control_reg(struct radv_device * device,struct radv_image_view * iview)6542 radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iview)
6543 {
6544    unsigned max_uncompressed_block_size = get_dcc_max_uncompressed_block_size(device, iview);
6545    unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device);
6546    unsigned max_compressed_block_size;
6547    unsigned independent_128b_blocks;
6548    unsigned independent_64b_blocks;
6549 
6550    if (!radv_dcc_enabled(iview->image, iview->base_mip))
6551       return 0;
6552 
6553    /* For GFX9+ ac_surface computes values for us (except min_compressed
6554     * and max_uncompressed) */
6555    if (device->physical_device->rad_info.chip_class >= GFX9) {
6556       max_compressed_block_size =
6557          iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size;
6558       independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_128B_blocks;
6559       independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_64B_blocks;
6560    } else {
6561       independent_128b_blocks = 0;
6562 
6563       if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
6564                                  VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
6565          /* If this DCC image is potentially going to be used in texture
6566           * fetches, we need some special settings.
6567           */
6568          independent_64b_blocks = 1;
6569          max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
6570       } else {
6571          /* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
6572           * MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
6573           * big as possible for better compression state.
6574           */
6575          independent_64b_blocks = 0;
6576          max_compressed_block_size = max_uncompressed_block_size;
6577       }
6578    }
6579 
6580    return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
6581           S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
6582           S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
6583           S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks) |
6584           S_028C78_INDEPENDENT_128B_BLOCKS(independent_128b_blocks);
6585 }
6586 
6587 void
radv_initialise_color_surface(struct radv_device * device,struct radv_color_buffer_info * cb,struct radv_image_view * iview)6588 radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
6589                               struct radv_image_view *iview)
6590 {
6591    const struct util_format_description *desc;
6592    unsigned ntype, format, swap, endian;
6593    unsigned blend_clamp = 0, blend_bypass = 0;
6594    uint64_t va;
6595    const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
6596    const struct radeon_surf *surf = &plane->surface;
6597 
6598    desc = vk_format_description(iview->vk_format);
6599 
6600    memset(cb, 0, sizeof(*cb));
6601 
6602    /* Intensity is implemented as Red, so treat it that way. */
6603    cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1);
6604 
6605    va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6606 
6607    cb->cb_color_base = va >> 8;
6608 
6609    if (device->physical_device->rad_info.chip_class >= GFX9) {
6610       if (device->physical_device->rad_info.chip_class >= GFX10) {
6611          cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
6612                                  S_028EE0_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
6613                                  S_028EE0_CMASK_PIPE_ALIGNED(1) |
6614                                  S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
6615       } else {
6616          struct gfx9_surf_meta_flags meta = {
6617             .rb_aligned = 1,
6618             .pipe_aligned = 1,
6619          };
6620 
6621          if (surf->meta_offset)
6622             meta = surf->u.gfx9.color.dcc;
6623 
6624          cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
6625                                 S_028C74_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
6626                                 S_028C74_RB_ALIGNED(meta.rb_aligned) |
6627                                 S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
6628          cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.epitch);
6629       }
6630 
6631       cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
6632       cb->cb_color_base |= surf->tile_swizzle;
6633    } else {
6634       const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
6635       unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
6636 
6637       cb->cb_color_base += level_info->offset_256B;
6638       if (level_info->mode == RADEON_SURF_MODE_2D)
6639          cb->cb_color_base |= surf->tile_swizzle;
6640 
6641       pitch_tile_max = level_info->nblk_x / 8 - 1;
6642       slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
6643       tile_mode_index = si_tile_mode_index(plane, iview->base_mip, false);
6644 
6645       cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
6646       cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
6647       cb->cb_color_cmask_slice = surf->u.legacy.color.cmask_slice_tile_max;
6648 
6649       cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
6650 
6651       if (radv_image_has_fmask(iview->image)) {
6652          if (device->physical_device->rad_info.chip_class >= GFX7)
6653             cb->cb_color_pitch |=
6654                S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1);
6655          cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index);
6656          cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max);
6657       } else {
6658          /* This must be set for fast clear to work without FMASK. */
6659          if (device->physical_device->rad_info.chip_class >= GFX7)
6660             cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
6661          cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
6662          cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
6663       }
6664    }
6665 
6666    /* CMASK variables */
6667    va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6668    va += surf->cmask_offset;
6669    cb->cb_color_cmask = va >> 8;
6670 
6671    va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6672    va += surf->meta_offset;
6673 
6674    if (radv_dcc_enabled(iview->image, iview->base_mip) &&
6675        device->physical_device->rad_info.chip_class <= GFX8)
6676       va += plane->surface.u.legacy.color.dcc_level[iview->base_mip].dcc_offset;
6677 
6678    unsigned dcc_tile_swizzle = surf->tile_swizzle;
6679    dcc_tile_swizzle &= ((1 << surf->meta_alignment_log2) - 1) >> 8;
6680 
6681    cb->cb_dcc_base = va >> 8;
6682    cb->cb_dcc_base |= dcc_tile_swizzle;
6683 
6684    /* GFX10 field has the same base shift as the GFX6 field. */
6685    uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
6686    cb->cb_color_view =
6687       S_028C6C_SLICE_START(iview->base_layer) | S_028C6C_SLICE_MAX_GFX10(max_slice);
6688 
6689    if (iview->image->info.samples > 1) {
6690       unsigned log_samples = util_logbase2(iview->image->info.samples);
6691 
6692       cb->cb_color_attrib |=
6693          S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS(log_samples);
6694    }
6695 
6696    if (radv_image_has_fmask(iview->image)) {
6697       va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->fmask_offset;
6698       cb->cb_color_fmask = va >> 8;
6699       cb->cb_color_fmask |= surf->fmask_tile_swizzle;
6700    } else {
6701       cb->cb_color_fmask = cb->cb_color_base;
6702    }
6703 
6704    ntype = radv_translate_color_numformat(iview->vk_format, desc,
6705                                           vk_format_get_first_non_void_channel(iview->vk_format));
6706    format = radv_translate_colorformat(iview->vk_format);
6707    assert(format != V_028C70_COLOR_INVALID);
6708 
6709    swap = radv_translate_colorswap(iview->vk_format, false);
6710    endian = radv_colorformat_endian_swap(format);
6711 
6712    /* blend clamp should be set for all NORM/SRGB types */
6713    if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM ||
6714        ntype == V_028C70_NUMBER_SRGB)
6715       blend_clamp = 1;
6716 
6717    /* set blend bypass according to docs if SINT/UINT or
6718       8/24 COLOR variants */
6719    if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
6720        format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
6721        format == V_028C70_COLOR_X24_8_32_FLOAT) {
6722       blend_clamp = 0;
6723       blend_bypass = 1;
6724    }
6725 #if 0
6726 	if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
6727 	    (format == V_028C70_COLOR_8 ||
6728 	     format == V_028C70_COLOR_8_8 ||
6729 	     format == V_028C70_COLOR_8_8_8_8))
6730 		->color_is_int8 = true;
6731 #endif
6732    cb->cb_color_info =
6733       S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) |
6734       S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) |
6735       S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM &&
6736                           ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 &&
6737                           format != V_028C70_COLOR_24_8) |
6738       S_028C70_NUMBER_TYPE(ntype) | S_028C70_ENDIAN(endian);
6739    if (radv_image_has_fmask(iview->image)) {
6740       cb->cb_color_info |= S_028C70_COMPRESSION(1);
6741       if (device->physical_device->rad_info.chip_class == GFX6) {
6742          unsigned fmask_bankh = util_logbase2(surf->u.legacy.color.fmask.bankh);
6743          cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
6744       }
6745 
6746       if (radv_image_is_tc_compat_cmask(iview->image)) {
6747          /* Allow the texture block to read FMASK directly
6748           * without decompressing it. This bit must be cleared
6749           * when performing FMASK_DECOMPRESS or DCC_COMPRESS,
6750           * otherwise the operation doesn't happen.
6751           */
6752          cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
6753 
6754          if (device->physical_device->rad_info.chip_class == GFX8) {
6755             /* Set CMASK into a tiling format that allows
6756              * the texture block to read it.
6757              */
6758             cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
6759          }
6760       }
6761    }
6762 
6763    if (radv_image_has_cmask(iview->image) &&
6764        !(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
6765       cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
6766 
6767    if (radv_dcc_enabled(iview->image, iview->base_mip))
6768       cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
6769 
6770    cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
6771 
6772    /* This must be set for fast clear to work without FMASK. */
6773    if (!radv_image_has_fmask(iview->image) &&
6774        device->physical_device->rad_info.chip_class == GFX6) {
6775       unsigned bankh = util_logbase2(surf->u.legacy.bankh);
6776       cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
6777    }
6778 
6779    if (device->physical_device->rad_info.chip_class >= GFX9) {
6780       unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D
6781                                ? (iview->extent.depth - 1)
6782                                : (iview->image->info.array_size - 1);
6783       unsigned width =
6784          vk_format_get_plane_width(iview->image->vk_format, iview->plane_id, iview->extent.width);
6785       unsigned height =
6786          vk_format_get_plane_height(iview->image->vk_format, iview->plane_id, iview->extent.height);
6787 
6788       if (device->physical_device->rad_info.chip_class >= GFX10) {
6789          cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->base_mip);
6790 
6791          cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) |
6792                                  S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
6793                                  S_028EE0_RESOURCE_LEVEL(1);
6794       } else {
6795          cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->base_mip);
6796          cb->cb_color_attrib |=
6797             S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
6798       }
6799 
6800       cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) | S_028C68_MIP0_HEIGHT(height - 1) |
6801                              S_028C68_MAX_MIP(iview->image->info.levels - 1);
6802    }
6803 }
6804 
6805 static unsigned
radv_calc_decompress_on_z_planes(struct radv_device * device,struct radv_image_view * iview)6806 radv_calc_decompress_on_z_planes(struct radv_device *device, struct radv_image_view *iview)
6807 {
6808    unsigned max_zplanes = 0;
6809 
6810    assert(radv_image_is_tc_compat_htile(iview->image));
6811 
6812    if (device->physical_device->rad_info.chip_class >= GFX9) {
6813       /* Default value for 32-bit depth surfaces. */
6814       max_zplanes = 4;
6815 
6816       if (iview->vk_format == VK_FORMAT_D16_UNORM && iview->image->info.samples > 1)
6817          max_zplanes = 2;
6818 
6819       /* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
6820       if (device->physical_device->rad_info.has_two_planes_iterate256_bug &&
6821           radv_image_get_iterate256(device, iview->image) &&
6822           !radv_image_tile_stencil_disabled(device, iview->image) &&
6823           iview->image->info.samples == 4) {
6824          max_zplanes = 1;
6825       }
6826 
6827       max_zplanes = max_zplanes + 1;
6828    } else {
6829       if (iview->vk_format == VK_FORMAT_D16_UNORM) {
6830          /* Do not enable Z plane compression for 16-bit depth
6831           * surfaces because isn't supported on GFX8. Only
6832           * 32-bit depth surfaces are supported by the hardware.
6833           * This allows to maintain shader compatibility and to
6834           * reduce the number of depth decompressions.
6835           */
6836          max_zplanes = 1;
6837       } else {
6838          if (iview->image->info.samples <= 1)
6839             max_zplanes = 5;
6840          else if (iview->image->info.samples <= 4)
6841             max_zplanes = 3;
6842          else
6843             max_zplanes = 2;
6844       }
6845    }
6846 
6847    return max_zplanes;
6848 }
6849 
6850 void
radv_initialise_vrs_surface(struct radv_image * image,struct radv_buffer * htile_buffer,struct radv_ds_buffer_info * ds)6851 radv_initialise_vrs_surface(struct radv_image *image, struct radv_buffer *htile_buffer,
6852                             struct radv_ds_buffer_info *ds)
6853 {
6854    const struct radeon_surf *surf = &image->planes[0].surface;
6855 
6856    assert(image->vk_format == VK_FORMAT_D16_UNORM);
6857    memset(ds, 0, sizeof(*ds));
6858 
6859    ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
6860 
6861    ds->db_z_info = S_028038_FORMAT(V_028040_Z_16) |
6862                    S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
6863                    S_028038_ZRANGE_PRECISION(1) |
6864                    S_028038_TILE_SURFACE_ENABLE(1);
6865    ds->db_stencil_info = S_02803C_FORMAT(V_028044_STENCIL_INVALID);
6866 
6867    ds->db_depth_size = S_02801C_X_MAX(image->info.width - 1) |
6868                        S_02801C_Y_MAX(image->info.height - 1);
6869 
6870    ds->db_htile_data_base = radv_buffer_get_va(htile_buffer->bo) >> 8;
6871    ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1) |
6872                           S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
6873 }
6874 
6875 void
radv_initialise_ds_surface(struct radv_device * device,struct radv_ds_buffer_info * ds,struct radv_image_view * iview)6876 radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,
6877                            struct radv_image_view *iview)
6878 {
6879    unsigned level = iview->base_mip;
6880    unsigned format, stencil_format;
6881    uint64_t va, s_offs, z_offs;
6882    bool stencil_only = iview->image->vk_format == VK_FORMAT_S8_UINT;
6883    const struct radv_image_plane *plane = &iview->image->planes[0];
6884    const struct radeon_surf *surf = &plane->surface;
6885 
6886    assert(vk_format_get_plane_count(iview->image->vk_format) == 1);
6887 
6888    memset(ds, 0, sizeof(*ds));
6889    if (!device->instance->absolute_depth_bias) {
6890       switch (iview->image->vk_format) {
6891       case VK_FORMAT_D24_UNORM_S8_UINT:
6892       case VK_FORMAT_X8_D24_UNORM_PACK32:
6893          ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
6894          break;
6895       case VK_FORMAT_D16_UNORM:
6896       case VK_FORMAT_D16_UNORM_S8_UINT:
6897          ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
6898          break;
6899       case VK_FORMAT_D32_SFLOAT:
6900       case VK_FORMAT_D32_SFLOAT_S8_UINT:
6901          ds->pa_su_poly_offset_db_fmt_cntl =
6902             S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
6903          break;
6904       default:
6905          break;
6906       }
6907    }
6908 
6909    format = radv_translate_dbformat(iview->image->vk_format);
6910    stencil_format = surf->has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
6911 
6912    uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
6913    ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) | S_028008_SLICE_MAX(max_slice);
6914    if (device->physical_device->rad_info.chip_class >= GFX10) {
6915       ds->db_depth_view |=
6916          S_028008_SLICE_START_HI(iview->base_layer >> 11) | S_028008_SLICE_MAX_HI(max_slice >> 11);
6917    }
6918 
6919    ds->db_htile_data_base = 0;
6920    ds->db_htile_surface = 0;
6921 
6922    va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6923    s_offs = z_offs = va;
6924 
6925    if (device->physical_device->rad_info.chip_class >= GFX9) {
6926       assert(surf->u.gfx9.surf_offset == 0);
6927       s_offs += surf->u.gfx9.zs.stencil_offset;
6928 
6929       ds->db_z_info = S_028038_FORMAT(format) |
6930                       S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
6931                       S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
6932                       S_028038_MAXMIP(iview->image->info.levels - 1) | S_028038_ZRANGE_PRECISION(1);
6933       ds->db_stencil_info =
6934          S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode);
6935 
6936       if (device->physical_device->rad_info.chip_class == GFX9) {
6937          ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch);
6938          ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.zs.stencil_epitch);
6939       }
6940 
6941       ds->db_depth_view |= S_028008_MIPID(level);
6942       ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
6943                           S_02801C_Y_MAX(iview->image->info.height - 1);
6944 
6945       if (radv_htile_enabled(iview->image, level)) {
6946          ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
6947 
6948          if (radv_image_is_tc_compat_htile(iview->image)) {
6949             unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
6950 
6951             ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
6952 
6953             if (device->physical_device->rad_info.chip_class >= GFX10) {
6954                bool iterate256 = radv_image_get_iterate256(device, iview->image);
6955 
6956                ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
6957                ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
6958                ds->db_z_info |= S_028040_ITERATE_256(iterate256);
6959                ds->db_stencil_info |= S_028044_ITERATE_256(iterate256);
6960             } else {
6961                ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
6962                ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
6963             }
6964          }
6965 
6966          if (radv_image_tile_stencil_disabled(device, iview->image)) {
6967             ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
6968          }
6969 
6970          va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->meta_offset;
6971          ds->db_htile_data_base = va >> 8;
6972          ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);
6973 
6974          if (device->physical_device->rad_info.chip_class == GFX9) {
6975             ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
6976          }
6977 
6978          if (radv_image_has_vrs_htile(device, iview->image)) {
6979             ds->db_htile_surface |= S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
6980          }
6981       }
6982    } else {
6983       const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
6984 
6985       if (stencil_only)
6986          level_info = &surf->u.legacy.zs.stencil_level[level];
6987 
6988       z_offs += (uint64_t)surf->u.legacy.level[level].offset_256B * 256;
6989       s_offs += (uint64_t)surf->u.legacy.zs.stencil_level[level].offset_256B * 256;
6990 
6991       ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
6992       ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
6993       ds->db_stencil_info = S_028044_FORMAT(stencil_format);
6994 
6995       if (iview->image->info.samples > 1)
6996          ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
6997 
6998       if (device->physical_device->rad_info.chip_class >= GFX7) {
6999          struct radeon_info *info = &device->physical_device->rad_info;
7000          unsigned tiling_index = surf->u.legacy.tiling_index[level];
7001          unsigned stencil_index = surf->u.legacy.zs.stencil_tiling_index[level];
7002          unsigned macro_index = surf->u.legacy.macro_tile_index;
7003          unsigned tile_mode = info->si_tile_mode_array[tiling_index];
7004          unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
7005          unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
7006 
7007          if (stencil_only)
7008             tile_mode = stencil_tile_mode;
7009 
7010          ds->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
7011                               S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
7012                               S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
7013                               S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
7014                               S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
7015                               S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
7016          ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
7017          ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
7018       } else {
7019          unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false);
7020          ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
7021          tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true);
7022          ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
7023          if (stencil_only)
7024             ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
7025       }
7026 
7027       ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
7028                           S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
7029       ds->db_depth_slice =
7030          S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
7031 
7032       if (radv_htile_enabled(iview->image, level)) {
7033          ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
7034 
7035          if (radv_image_tile_stencil_disabled(device, iview->image)) {
7036             ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
7037          }
7038 
7039          va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->meta_offset;
7040          ds->db_htile_data_base = va >> 8;
7041          ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
7042 
7043          if (radv_image_is_tc_compat_htile(iview->image)) {
7044             unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
7045 
7046             ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
7047             ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
7048          }
7049       }
7050    }
7051 
7052    ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
7053    ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
7054 }
7055 
7056 VkResult
radv_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)7057 radv_CreateFramebuffer(VkDevice _device, const VkFramebufferCreateInfo *pCreateInfo,
7058                        const VkAllocationCallbacks *pAllocator, VkFramebuffer *pFramebuffer)
7059 {
7060    RADV_FROM_HANDLE(radv_device, device, _device);
7061    struct radv_framebuffer *framebuffer;
7062    const VkFramebufferAttachmentsCreateInfo *imageless_create_info =
7063       vk_find_struct_const(pCreateInfo->pNext, FRAMEBUFFER_ATTACHMENTS_CREATE_INFO);
7064 
7065    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
7066 
7067    size_t size = sizeof(*framebuffer);
7068    if (!imageless_create_info)
7069       size += sizeof(struct radv_image_view *) * pCreateInfo->attachmentCount;
7070    framebuffer =
7071       vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7072    if (framebuffer == NULL)
7073       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
7074 
7075    vk_object_base_init(&device->vk, &framebuffer->base, VK_OBJECT_TYPE_FRAMEBUFFER);
7076 
7077    framebuffer->attachment_count = pCreateInfo->attachmentCount;
7078    framebuffer->width = pCreateInfo->width;
7079    framebuffer->height = pCreateInfo->height;
7080    framebuffer->layers = pCreateInfo->layers;
7081    framebuffer->imageless = !!imageless_create_info;
7082 
7083    if (!imageless_create_info) {
7084       for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
7085          VkImageView _iview = pCreateInfo->pAttachments[i];
7086          struct radv_image_view *iview = radv_image_view_from_handle(_iview);
7087          framebuffer->attachments[i] = iview;
7088       }
7089    }
7090 
7091    *pFramebuffer = radv_framebuffer_to_handle(framebuffer);
7092    return VK_SUCCESS;
7093 }
7094 
7095 void
radv_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)7096 radv_DestroyFramebuffer(VkDevice _device, VkFramebuffer _fb,
7097                         const VkAllocationCallbacks *pAllocator)
7098 {
7099    RADV_FROM_HANDLE(radv_device, device, _device);
7100    RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
7101 
7102    if (!fb)
7103       return;
7104    vk_object_base_finish(&fb->base);
7105    vk_free2(&device->vk.alloc, pAllocator, fb);
7106 }
7107 
7108 static unsigned
radv_tex_wrap(VkSamplerAddressMode address_mode)7109 radv_tex_wrap(VkSamplerAddressMode address_mode)
7110 {
7111    switch (address_mode) {
7112    case VK_SAMPLER_ADDRESS_MODE_REPEAT:
7113       return V_008F30_SQ_TEX_WRAP;
7114    case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
7115       return V_008F30_SQ_TEX_MIRROR;
7116    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
7117       return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
7118    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
7119       return V_008F30_SQ_TEX_CLAMP_BORDER;
7120    case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
7121       return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
7122    default:
7123       unreachable("illegal tex wrap mode");
7124       break;
7125    }
7126 }
7127 
7128 static unsigned
radv_tex_compare(VkCompareOp op)7129 radv_tex_compare(VkCompareOp op)
7130 {
7131    switch (op) {
7132    case VK_COMPARE_OP_NEVER:
7133       return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
7134    case VK_COMPARE_OP_LESS:
7135       return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
7136    case VK_COMPARE_OP_EQUAL:
7137       return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
7138    case VK_COMPARE_OP_LESS_OR_EQUAL:
7139       return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
7140    case VK_COMPARE_OP_GREATER:
7141       return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
7142    case VK_COMPARE_OP_NOT_EQUAL:
7143       return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
7144    case VK_COMPARE_OP_GREATER_OR_EQUAL:
7145       return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
7146    case VK_COMPARE_OP_ALWAYS:
7147       return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
7148    default:
7149       unreachable("illegal compare mode");
7150       break;
7151    }
7152 }
7153 
7154 static unsigned
radv_tex_filter(VkFilter filter,unsigned max_ansio)7155 radv_tex_filter(VkFilter filter, unsigned max_ansio)
7156 {
7157    switch (filter) {
7158    case VK_FILTER_NEAREST:
7159       return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT
7160                             : V_008F38_SQ_TEX_XY_FILTER_POINT);
7161    case VK_FILTER_LINEAR:
7162       return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR
7163                             : V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
7164    case VK_FILTER_CUBIC_IMG:
7165    default:
7166       fprintf(stderr, "illegal texture filter");
7167       return 0;
7168    }
7169 }
7170 
7171 static unsigned
radv_tex_mipfilter(VkSamplerMipmapMode mode)7172 radv_tex_mipfilter(VkSamplerMipmapMode mode)
7173 {
7174    switch (mode) {
7175    case VK_SAMPLER_MIPMAP_MODE_NEAREST:
7176       return V_008F38_SQ_TEX_Z_FILTER_POINT;
7177    case VK_SAMPLER_MIPMAP_MODE_LINEAR:
7178       return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
7179    default:
7180       return V_008F38_SQ_TEX_Z_FILTER_NONE;
7181    }
7182 }
7183 
7184 static unsigned
radv_tex_bordercolor(VkBorderColor bcolor)7185 radv_tex_bordercolor(VkBorderColor bcolor)
7186 {
7187    switch (bcolor) {
7188    case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
7189    case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
7190       return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
7191    case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
7192    case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
7193       return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
7194    case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
7195    case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
7196       return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
7197    case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
7198    case VK_BORDER_COLOR_INT_CUSTOM_EXT:
7199       return V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
7200    default:
7201       break;
7202    }
7203    return 0;
7204 }
7205 
7206 static unsigned
radv_tex_aniso_filter(unsigned filter)7207 radv_tex_aniso_filter(unsigned filter)
7208 {
7209    if (filter < 2)
7210       return 0;
7211    if (filter < 4)
7212       return 1;
7213    if (filter < 8)
7214       return 2;
7215    if (filter < 16)
7216       return 3;
7217    return 4;
7218 }
7219 
7220 static unsigned
radv_tex_filter_mode(VkSamplerReductionMode mode)7221 radv_tex_filter_mode(VkSamplerReductionMode mode)
7222 {
7223    switch (mode) {
7224    case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
7225       return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
7226    case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
7227       return V_008F30_SQ_IMG_FILTER_MODE_MIN;
7228    case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
7229       return V_008F30_SQ_IMG_FILTER_MODE_MAX;
7230    default:
7231       break;
7232    }
7233    return 0;
7234 }
7235 
7236 static uint32_t
radv_get_max_anisotropy(struct radv_device * device,const VkSamplerCreateInfo * pCreateInfo)7237 radv_get_max_anisotropy(struct radv_device *device, const VkSamplerCreateInfo *pCreateInfo)
7238 {
7239    if (device->force_aniso >= 0)
7240       return device->force_aniso;
7241 
7242    if (pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0f)
7243       return (uint32_t)pCreateInfo->maxAnisotropy;
7244 
7245    return 0;
7246 }
7247 
7248 static inline int
S_FIXED(float value,unsigned frac_bits)7249 S_FIXED(float value, unsigned frac_bits)
7250 {
7251    return value * (1 << frac_bits);
7252 }
7253 
7254 static uint32_t
radv_register_border_color(struct radv_device * device,VkClearColorValue value)7255 radv_register_border_color(struct radv_device *device, VkClearColorValue value)
7256 {
7257    uint32_t slot;
7258 
7259    mtx_lock(&device->border_color_data.mutex);
7260 
7261    for (slot = 0; slot < RADV_BORDER_COLOR_COUNT; slot++) {
7262       if (!device->border_color_data.used[slot]) {
7263          /* Copy to the GPU wrt endian-ness. */
7264          util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot], &value,
7265                                  sizeof(VkClearColorValue));
7266 
7267          device->border_color_data.used[slot] = true;
7268          break;
7269       }
7270    }
7271 
7272    mtx_unlock(&device->border_color_data.mutex);
7273 
7274    return slot;
7275 }
7276 
7277 static void
radv_unregister_border_color(struct radv_device * device,uint32_t slot)7278 radv_unregister_border_color(struct radv_device *device, uint32_t slot)
7279 {
7280    mtx_lock(&device->border_color_data.mutex);
7281 
7282    device->border_color_data.used[slot] = false;
7283 
7284    mtx_unlock(&device->border_color_data.mutex);
7285 }
7286 
7287 static void
radv_init_sampler(struct radv_device * device,struct radv_sampler * sampler,const VkSamplerCreateInfo * pCreateInfo)7288 radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler,
7289                   const VkSamplerCreateInfo *pCreateInfo)
7290 {
7291    uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
7292    uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
7293    bool compat_mode = device->physical_device->rad_info.chip_class == GFX8 ||
7294                       device->physical_device->rad_info.chip_class == GFX9;
7295    unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
7296    unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
7297    bool trunc_coord =
7298       pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST;
7299    bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
7300                             pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
7301                             pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
7302    VkBorderColor border_color =
7303       uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
7304    uint32_t border_color_ptr;
7305 
7306    const struct VkSamplerReductionModeCreateInfo *sampler_reduction =
7307       vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO);
7308    if (sampler_reduction)
7309       filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
7310 
7311    if (pCreateInfo->compareEnable)
7312       depth_compare_func = radv_tex_compare(pCreateInfo->compareOp);
7313 
7314    sampler->border_color_slot = RADV_BORDER_COLOR_COUNT;
7315 
7316    if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT ||
7317        border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
7318       const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
7319          vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
7320 
7321       assert(custom_border_color);
7322 
7323       sampler->border_color_slot =
7324          radv_register_border_color(device, custom_border_color->customBorderColor);
7325 
7326       /* Did we fail to find a slot? */
7327       if (sampler->border_color_slot == RADV_BORDER_COLOR_COUNT) {
7328          fprintf(stderr, "WARNING: no free border color slots, defaulting to TRANS_BLACK.\n");
7329          border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
7330       }
7331    }
7332 
7333    /* If we don't have a custom color, set the ptr to 0 */
7334    border_color_ptr =
7335       sampler->border_color_slot != RADV_BORDER_COLOR_COUNT ? sampler->border_color_slot : 0;
7336 
7337    sampler->state[0] =
7338       (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
7339        S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
7340        S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
7341        S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) |
7342        S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
7343        S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) |
7344        S_008F30_DISABLE_CUBE_WRAP(0) | S_008F30_COMPAT_MODE(compat_mode) |
7345        S_008F30_FILTER_MODE(filter_mode) | S_008F30_TRUNC_COORD(trunc_coord));
7346    sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
7347                         S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
7348                         S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
7349    sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
7350                         S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
7351                         S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
7352                         S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
7353                         S_008F38_MIP_POINT_PRECLAMP(0));
7354    sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(border_color_ptr) |
7355                         S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color)));
7356 
7357    if (device->physical_device->rad_info.chip_class >= GFX10) {
7358       sampler->state[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1);
7359    } else {
7360       sampler->state[2] |=
7361          S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |
7362          S_008F38_FILTER_PREC_FIX(1) |
7363          S_008F38_ANISO_OVERRIDE_GFX8(device->physical_device->rad_info.chip_class >= GFX8);
7364    }
7365 }
7366 
7367 VkResult
radv_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)7368 radv_CreateSampler(VkDevice _device, const VkSamplerCreateInfo *pCreateInfo,
7369                    const VkAllocationCallbacks *pAllocator, VkSampler *pSampler)
7370 {
7371    RADV_FROM_HANDLE(radv_device, device, _device);
7372    struct radv_sampler *sampler;
7373 
7374    const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
7375       vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
7376 
7377    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
7378 
7379    sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
7380                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7381    if (!sampler)
7382       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
7383 
7384    vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER);
7385 
7386    radv_init_sampler(device, sampler, pCreateInfo);
7387 
7388    sampler->ycbcr_sampler =
7389       ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion)
7390                        : NULL;
7391    *pSampler = radv_sampler_to_handle(sampler);
7392 
7393    return VK_SUCCESS;
7394 }
7395 
7396 void
radv_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)7397 radv_DestroySampler(VkDevice _device, VkSampler _sampler, const VkAllocationCallbacks *pAllocator)
7398 {
7399    RADV_FROM_HANDLE(radv_device, device, _device);
7400    RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
7401 
7402    if (!sampler)
7403       return;
7404 
7405    if (sampler->border_color_slot != RADV_BORDER_COLOR_COUNT)
7406       radv_unregister_border_color(device, sampler->border_color_slot);
7407 
7408    vk_object_base_finish(&sampler->base);
7409    vk_free2(&device->vk.alloc, pAllocator, sampler);
7410 }
7411 
7412 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t * pSupportedVersion)7413 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
7414 {
7415    /* For the full details on loader interface versioning, see
7416     * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
7417     * What follows is a condensed summary, to help you navigate the large and
7418     * confusing official doc.
7419     *
7420     *   - Loader interface v0 is incompatible with later versions. We don't
7421     *     support it.
7422     *
7423     *   - In loader interface v1:
7424     *       - The first ICD entrypoint called by the loader is
7425     *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
7426     *         entrypoint.
7427     *       - The ICD must statically expose no other Vulkan symbol unless it is
7428     *         linked with -Bsymbolic.
7429     *       - Each dispatchable Vulkan handle created by the ICD must be
7430     *         a pointer to a struct whose first member is VK_LOADER_DATA. The
7431     *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
7432     *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
7433     *         vkDestroySurfaceKHR(). The ICD must be capable of working with
7434     *         such loader-managed surfaces.
7435     *
7436     *    - Loader interface v2 differs from v1 in:
7437     *       - The first ICD entrypoint called by the loader is
7438     *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
7439     *         statically expose this entrypoint.
7440     *
7441     *    - Loader interface v3 differs from v2 in:
7442     *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
7443     *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
7444     *          because the loader no longer does so.
7445     */
7446    *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
7447    return VK_SUCCESS;
7448 }
7449 
7450 VkResult
radv_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFD)7451 radv_GetMemoryFdKHR(VkDevice _device, const VkMemoryGetFdInfoKHR *pGetFdInfo, int *pFD)
7452 {
7453    RADV_FROM_HANDLE(radv_device, device, _device);
7454    RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
7455 
7456    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
7457 
7458    /* At the moment, we support only the below handle types. */
7459    assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
7460           pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
7461 
7462    bool ret = radv_get_memory_fd(device, memory, pFD);
7463    if (ret == false)
7464       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
7465    return VK_SUCCESS;
7466 }
7467 
7468 static uint32_t
radv_compute_valid_memory_types_attempt(struct radv_physical_device * dev,enum radeon_bo_domain domains,enum radeon_bo_flag flags,enum radeon_bo_flag ignore_flags)7469 radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev,
7470                                         enum radeon_bo_domain domains, enum radeon_bo_flag flags,
7471                                         enum radeon_bo_flag ignore_flags)
7472 {
7473    /* Don't count GTT/CPU as relevant:
7474     *
7475     * - We're not fully consistent between the two.
7476     * - Sometimes VRAM gets VRAM|GTT.
7477     */
7478    const enum radeon_bo_domain relevant_domains =
7479       RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA;
7480    uint32_t bits = 0;
7481    for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) {
7482       if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains))
7483          continue;
7484 
7485       if ((flags & ~ignore_flags) != (dev->memory_flags[i] & ~ignore_flags))
7486          continue;
7487 
7488       bits |= 1u << i;
7489    }
7490 
7491    return bits;
7492 }
7493 
7494 static uint32_t
radv_compute_valid_memory_types(struct radv_physical_device * dev,enum radeon_bo_domain domains,enum radeon_bo_flag flags)7495 radv_compute_valid_memory_types(struct radv_physical_device *dev, enum radeon_bo_domain domains,
7496                                 enum radeon_bo_flag flags)
7497 {
7498    enum radeon_bo_flag ignore_flags = ~(RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC);
7499    uint32_t bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
7500 
7501    if (!bits) {
7502       ignore_flags |= RADEON_FLAG_GTT_WC;
7503       bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
7504    }
7505 
7506    if (!bits) {
7507       ignore_flags |= RADEON_FLAG_NO_CPU_ACCESS;
7508       bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
7509    }
7510 
7511    return bits;
7512 }
7513 VkResult
radv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)7514 radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType,
7515                               int fd, VkMemoryFdPropertiesKHR *pMemoryFdProperties)
7516 {
7517    RADV_FROM_HANDLE(radv_device, device, _device);
7518 
7519    switch (handleType) {
7520    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
7521       enum radeon_bo_domain domains;
7522       enum radeon_bo_flag flags;
7523       if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
7524          return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7525 
7526       pMemoryFdProperties->memoryTypeBits =
7527          radv_compute_valid_memory_types(device->physical_device, domains, flags);
7528       return VK_SUCCESS;
7529    }
7530    default:
7531       /* The valid usage section for this function says:
7532        *
7533        *    "handleType must not be one of the handle types defined as
7534        *    opaque."
7535        *
7536        * So opaque handle types fall into the default "unsupported" case.
7537        */
7538       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7539    }
7540 }
7541 
7542 static VkResult
radv_import_opaque_fd(struct radv_device * device,int fd,uint32_t * syncobj)7543 radv_import_opaque_fd(struct radv_device *device, int fd, uint32_t *syncobj)
7544 {
7545    uint32_t syncobj_handle = 0;
7546    int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
7547    if (ret != 0)
7548       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7549 
7550    if (*syncobj)
7551       device->ws->destroy_syncobj(device->ws, *syncobj);
7552 
7553    *syncobj = syncobj_handle;
7554    close(fd);
7555 
7556    return VK_SUCCESS;
7557 }
7558 
7559 static VkResult
radv_import_sync_fd(struct radv_device * device,int fd,uint32_t * syncobj)7560 radv_import_sync_fd(struct radv_device *device, int fd, uint32_t *syncobj)
7561 {
7562    /* If we create a syncobj we do it locally so that if we have an error, we don't
7563     * leave a syncobj in an undetermined state in the fence. */
7564    uint32_t syncobj_handle = *syncobj;
7565    if (!syncobj_handle) {
7566       bool create_signaled = fd == -1 ? true : false;
7567 
7568       int ret = device->ws->create_syncobj(device->ws, create_signaled, &syncobj_handle);
7569       if (ret) {
7570          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
7571       }
7572    } else {
7573       if (fd == -1)
7574          device->ws->signal_syncobj(device->ws, syncobj_handle, 0);
7575    }
7576 
7577    if (fd != -1) {
7578       int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
7579       if (ret)
7580          return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7581       close(fd);
7582    }
7583 
7584    *syncobj = syncobj_handle;
7585 
7586    return VK_SUCCESS;
7587 }
7588 
7589 VkResult
radv_ImportSemaphoreFdKHR(VkDevice _device,const VkImportSemaphoreFdInfoKHR * pImportSemaphoreFdInfo)7590 radv_ImportSemaphoreFdKHR(VkDevice _device,
7591                           const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
7592 {
7593    RADV_FROM_HANDLE(radv_device, device, _device);
7594    RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
7595    VkResult result;
7596    struct radv_semaphore_part *dst = NULL;
7597    bool timeline = sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
7598 
7599    if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
7600       assert(!timeline);
7601       dst = &sem->temporary;
7602    } else {
7603       dst = &sem->permanent;
7604    }
7605 
7606    uint32_t syncobj =
7607       (dst->kind == RADV_SEMAPHORE_SYNCOBJ || dst->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
7608          ? dst->syncobj
7609          : 0;
7610 
7611    switch (pImportSemaphoreFdInfo->handleType) {
7612    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
7613       result = radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
7614       break;
7615    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
7616       assert(!timeline);
7617       result = radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
7618       break;
7619    default:
7620       unreachable("Unhandled semaphore handle type");
7621    }
7622 
7623    if (result == VK_SUCCESS) {
7624       dst->syncobj = syncobj;
7625       dst->kind = RADV_SEMAPHORE_SYNCOBJ;
7626       if (timeline) {
7627          dst->kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
7628          dst->timeline_syncobj.max_point = 0;
7629       }
7630    }
7631 
7632    return result;
7633 }
7634 
7635 VkResult
radv_GetSemaphoreFdKHR(VkDevice _device,const VkSemaphoreGetFdInfoKHR * pGetFdInfo,int * pFd)7636 radv_GetSemaphoreFdKHR(VkDevice _device, const VkSemaphoreGetFdInfoKHR *pGetFdInfo, int *pFd)
7637 {
7638    RADV_FROM_HANDLE(radv_device, device, _device);
7639    RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
7640    int ret;
7641    uint32_t syncobj_handle;
7642 
7643    if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
7644       assert(sem->temporary.kind == RADV_SEMAPHORE_SYNCOBJ ||
7645              sem->temporary.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
7646       syncobj_handle = sem->temporary.syncobj;
7647    } else {
7648       assert(sem->permanent.kind == RADV_SEMAPHORE_SYNCOBJ ||
7649              sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
7650       syncobj_handle = sem->permanent.syncobj;
7651    }
7652 
7653    switch (pGetFdInfo->handleType) {
7654    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
7655       ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
7656       if (ret)
7657          return vk_error(device, VK_ERROR_TOO_MANY_OBJECTS);
7658       break;
7659    case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
7660       ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
7661       if (ret)
7662          return vk_error(device, VK_ERROR_TOO_MANY_OBJECTS);
7663 
7664       if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
7665          radv_destroy_semaphore_part(device, &sem->temporary);
7666       } else {
7667          device->ws->reset_syncobj(device->ws, syncobj_handle);
7668       }
7669       break;
7670    default:
7671       unreachable("Unhandled semaphore handle type");
7672    }
7673 
7674    return VK_SUCCESS;
7675 }
7676 
7677 void
radv_GetPhysicalDeviceExternalSemaphoreProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalSemaphoreInfo * pExternalSemaphoreInfo,VkExternalSemaphoreProperties * pExternalSemaphoreProperties)7678 radv_GetPhysicalDeviceExternalSemaphoreProperties(
7679    VkPhysicalDevice physicalDevice,
7680    const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
7681    VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
7682 {
7683    RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
7684    VkSemaphoreTypeKHR type = radv_get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
7685 
7686    if (type == VK_SEMAPHORE_TYPE_TIMELINE && pdevice->rad_info.has_timeline_syncobj &&
7687        pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
7688       pExternalSemaphoreProperties->exportFromImportedHandleTypes =
7689          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7690       pExternalSemaphoreProperties->compatibleHandleTypes =
7691          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7692       pExternalSemaphoreProperties->externalSemaphoreFeatures =
7693          VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7694          VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7695    } else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
7696       pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
7697       pExternalSemaphoreProperties->compatibleHandleTypes = 0;
7698       pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
7699    } else if (pExternalSemaphoreInfo->handleType ==
7700                  VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
7701               pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
7702       pExternalSemaphoreProperties->exportFromImportedHandleTypes =
7703          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
7704          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
7705       pExternalSemaphoreProperties->compatibleHandleTypes =
7706          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
7707          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
7708       pExternalSemaphoreProperties->externalSemaphoreFeatures =
7709          VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7710          VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7711    } else if (pExternalSemaphoreInfo->handleType ==
7712               VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
7713       pExternalSemaphoreProperties->exportFromImportedHandleTypes =
7714          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7715       pExternalSemaphoreProperties->compatibleHandleTypes =
7716          VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7717       pExternalSemaphoreProperties->externalSemaphoreFeatures =
7718          VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7719          VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7720    } else {
7721       pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
7722       pExternalSemaphoreProperties->compatibleHandleTypes = 0;
7723       pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
7724    }
7725 }
7726 
7727 VkResult
radv_ImportFenceFdKHR(VkDevice _device,const VkImportFenceFdInfoKHR * pImportFenceFdInfo)7728 radv_ImportFenceFdKHR(VkDevice _device, const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
7729 {
7730    RADV_FROM_HANDLE(radv_device, device, _device);
7731    RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
7732    struct radv_fence_part *dst = NULL;
7733    VkResult result;
7734 
7735    if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
7736       dst = &fence->temporary;
7737    } else {
7738       dst = &fence->permanent;
7739    }
7740 
7741    uint32_t syncobj = dst->kind == RADV_FENCE_SYNCOBJ ? dst->syncobj : 0;
7742 
7743    switch (pImportFenceFdInfo->handleType) {
7744    case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
7745       result = radv_import_opaque_fd(device, pImportFenceFdInfo->fd, &syncobj);
7746       break;
7747    case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
7748       result = radv_import_sync_fd(device, pImportFenceFdInfo->fd, &syncobj);
7749       break;
7750    default:
7751       unreachable("Unhandled fence handle type");
7752    }
7753 
7754    if (result == VK_SUCCESS) {
7755       dst->syncobj = syncobj;
7756       dst->kind = RADV_FENCE_SYNCOBJ;
7757    }
7758 
7759    return result;
7760 }
7761 
7762 VkResult
radv_GetFenceFdKHR(VkDevice _device,const VkFenceGetFdInfoKHR * pGetFdInfo,int * pFd)7763 radv_GetFenceFdKHR(VkDevice _device, const VkFenceGetFdInfoKHR *pGetFdInfo, int *pFd)
7764 {
7765    RADV_FROM_HANDLE(radv_device, device, _device);
7766    RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
7767    int ret;
7768 
7769    struct radv_fence_part *part =
7770       fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
7771 
7772    switch (pGetFdInfo->handleType) {
7773    case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
7774       ret = device->ws->export_syncobj(device->ws, part->syncobj, pFd);
7775       if (ret)
7776          return vk_error(device, VK_ERROR_TOO_MANY_OBJECTS);
7777       break;
7778    case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
7779       ret = device->ws->export_syncobj_to_sync_file(device->ws, part->syncobj, pFd);
7780       if (ret)
7781          return vk_error(device, VK_ERROR_TOO_MANY_OBJECTS);
7782 
7783       if (part == &fence->temporary) {
7784          radv_destroy_fence_part(device, part);
7785       } else {
7786          device->ws->reset_syncobj(device->ws, part->syncobj);
7787       }
7788       break;
7789    default:
7790       unreachable("Unhandled fence handle type");
7791    }
7792 
7793    return VK_SUCCESS;
7794 }
7795 
7796 void
radv_GetPhysicalDeviceExternalFenceProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalFenceInfo * pExternalFenceInfo,VkExternalFenceProperties * pExternalFenceProperties)7797 radv_GetPhysicalDeviceExternalFenceProperties(
7798    VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
7799    VkExternalFenceProperties *pExternalFenceProperties)
7800 {
7801    if (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT ||
7802        pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT) {
7803       pExternalFenceProperties->exportFromImportedHandleTypes =
7804          VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
7805       pExternalFenceProperties->compatibleHandleTypes =
7806          VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
7807       pExternalFenceProperties->externalFenceFeatures =
7808          VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7809    } else {
7810       pExternalFenceProperties->exportFromImportedHandleTypes = 0;
7811       pExternalFenceProperties->compatibleHandleTypes = 0;
7812       pExternalFenceProperties->externalFenceFeatures = 0;
7813    }
7814 }
7815 
7816 void
radv_GetDeviceGroupPeerMemoryFeatures(VkDevice device,uint32_t heapIndex,uint32_t localDeviceIndex,uint32_t remoteDeviceIndex,VkPeerMemoryFeatureFlags * pPeerMemoryFeatures)7817 radv_GetDeviceGroupPeerMemoryFeatures(VkDevice device, uint32_t heapIndex,
7818                                       uint32_t localDeviceIndex, uint32_t remoteDeviceIndex,
7819                                       VkPeerMemoryFeatureFlags *pPeerMemoryFeatures)
7820 {
7821    assert(localDeviceIndex == remoteDeviceIndex);
7822 
7823    *pPeerMemoryFeatures =
7824       VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
7825       VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
7826 }
7827 
7828 static const VkTimeDomainEXT radv_time_domains[] = {
7829    VK_TIME_DOMAIN_DEVICE_EXT,
7830    VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
7831 #ifdef CLOCK_MONOTONIC_RAW
7832    VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
7833 #endif
7834 };
7835 
7836 VkResult
radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,uint32_t * pTimeDomainCount,VkTimeDomainEXT * pTimeDomains)7837 radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,
7838                                                   uint32_t *pTimeDomainCount,
7839                                                   VkTimeDomainEXT *pTimeDomains)
7840 {
7841    int d;
7842    VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount);
7843 
7844    for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
7845       vk_outarray_append_typed(VkTimeDomainEXT, &out, i)
7846       {
7847          *i = radv_time_domains[d];
7848       }
7849    }
7850 
7851    return vk_outarray_status(&out);
7852 }
7853 
7854 #ifndef _WIN32
7855 static uint64_t
radv_clock_gettime(clockid_t clock_id)7856 radv_clock_gettime(clockid_t clock_id)
7857 {
7858    struct timespec current;
7859    int ret;
7860 
7861    ret = clock_gettime(clock_id, &current);
7862 #ifdef CLOCK_MONOTONIC_RAW
7863    if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
7864       ret = clock_gettime(CLOCK_MONOTONIC, &current);
7865 #endif
7866    if (ret < 0)
7867       return 0;
7868 
7869    return (uint64_t)current.tv_sec * 1000000000ULL + current.tv_nsec;
7870 }
7871 
7872 VkResult
radv_GetCalibratedTimestampsEXT(VkDevice _device,uint32_t timestampCount,const VkCalibratedTimestampInfoEXT * pTimestampInfos,uint64_t * pTimestamps,uint64_t * pMaxDeviation)7873 radv_GetCalibratedTimestampsEXT(VkDevice _device, uint32_t timestampCount,
7874                                 const VkCalibratedTimestampInfoEXT *pTimestampInfos,
7875                                 uint64_t *pTimestamps, uint64_t *pMaxDeviation)
7876 {
7877    RADV_FROM_HANDLE(radv_device, device, _device);
7878    uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
7879    int d;
7880    uint64_t begin, end;
7881    uint64_t max_clock_period = 0;
7882 
7883 #ifdef CLOCK_MONOTONIC_RAW
7884    begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
7885 #else
7886    begin = radv_clock_gettime(CLOCK_MONOTONIC);
7887 #endif
7888 
7889    for (d = 0; d < timestampCount; d++) {
7890       switch (pTimestampInfos[d].timeDomain) {
7891       case VK_TIME_DOMAIN_DEVICE_EXT:
7892          pTimestamps[d] = device->ws->query_value(device->ws, RADEON_TIMESTAMP);
7893          uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
7894          max_clock_period = MAX2(max_clock_period, device_period);
7895          break;
7896       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
7897          pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
7898          max_clock_period = MAX2(max_clock_period, 1);
7899          break;
7900 
7901 #ifdef CLOCK_MONOTONIC_RAW
7902       case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
7903          pTimestamps[d] = begin;
7904          break;
7905 #endif
7906       default:
7907          pTimestamps[d] = 0;
7908          break;
7909       }
7910    }
7911 
7912 #ifdef CLOCK_MONOTONIC_RAW
7913    end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
7914 #else
7915    end = radv_clock_gettime(CLOCK_MONOTONIC);
7916 #endif
7917 
7918    /*
7919     * The maximum deviation is the sum of the interval over which we
7920     * perform the sampling and the maximum period of any sampled
7921     * clock. That's because the maximum skew between any two sampled
7922     * clock edges is when the sampled clock with the largest period is
7923     * sampled at the end of that period but right at the beginning of the
7924     * sampling interval and some other clock is sampled right at the
7925     * begining of its sampling period and right at the end of the
7926     * sampling interval. Let's assume the GPU has the longest clock
7927     * period and that the application is sampling GPU and monotonic:
7928     *
7929     *                               s                 e
7930     *			 w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
7931     *	Raw              -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
7932     *
7933     *                               g
7934     *		  0         1         2         3
7935     *	GPU       -----_____-----_____-----_____-----_____
7936     *
7937     *                                                m
7938     *					    x y z 0 1 2 3 4 5 6 7 8 9 a b c
7939     *	Monotonic                           -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
7940     *
7941     *	Interval                     <----------------->
7942     *	Deviation           <-------------------------->
7943     *
7944     *		s  = read(raw)       2
7945     *		g  = read(GPU)       1
7946     *		m  = read(monotonic) 2
7947     *		e  = read(raw)       b
7948     *
7949     * We round the sample interval up by one tick to cover sampling error
7950     * in the interval clock
7951     */
7952 
7953    uint64_t sample_interval = end - begin + 1;
7954 
7955    *pMaxDeviation = sample_interval + max_clock_period;
7956 
7957    return VK_SUCCESS;
7958 }
7959 #endif
7960 
7961 void
radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)7962 radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,
7963                                                VkSampleCountFlagBits samples,
7964                                                VkMultisamplePropertiesEXT *pMultisampleProperties)
7965 {
7966    VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT |
7967                                              VK_SAMPLE_COUNT_8_BIT;
7968 
7969    if (samples & supported_samples) {
7970       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
7971    } else {
7972       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
7973    }
7974 }
7975 
7976 VkResult
radv_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)7977 radv_GetPhysicalDeviceFragmentShadingRatesKHR(
7978    VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount,
7979    VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
7980 {
7981    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates,
7982                           pFragmentShadingRateCount);
7983 
7984 #define append_rate(w, h, s)                                                                       \
7985    {                                                                                               \
7986       VkPhysicalDeviceFragmentShadingRateKHR rate = {                                              \
7987          .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR,          \
7988          .sampleCounts = s,                                                                        \
7989          .fragmentSize = {.width = w, .height = h},                                                \
7990       };                                                                                           \
7991       vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate;         \
7992    }
7993 
7994    for (uint32_t x = 2; x >= 1; x--) {
7995       for (uint32_t y = 2; y >= 1; y--) {
7996          VkSampleCountFlagBits samples;
7997 
7998          if (x == 1 && y == 1) {
7999             samples = ~0;
8000          } else {
8001             samples = VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT |
8002                       VK_SAMPLE_COUNT_4_BIT | VK_SAMPLE_COUNT_8_BIT;
8003          }
8004 
8005          append_rate(x, y, samples);
8006       }
8007    }
8008 #undef append_rate
8009 
8010    return vk_outarray_status(&out);
8011 }
8012