1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 * SPDX-License-Identifier: MIT
5 *
6 * based in part on anv driver which is:
7 * Copyright © 2015 Intel Corporation
8 */
9
10 #include "tu_device.h"
11
12 #include <fcntl.h>
13 #include <poll.h>
14 #include <sys/sysinfo.h>
15
16 #include "git_sha1.h"
17 #include "util/debug.h"
18 #include "util/disk_cache.h"
19 #include "util/driconf.h"
20 #include "util/os_misc.h"
21 #include "vk_sampler.h"
22 #include "vk_util.h"
23
24 /* for fd_get_driver/device_uuid() */
25 #include "freedreno/common/freedreno_uuid.h"
26
27 #include "tu_clear_blit.h"
28 #include "tu_cmd_buffer.h"
29 #include "tu_cs.h"
30 #include "tu_descriptor_set.h"
31 #include "tu_dynamic_rendering.h"
32 #include "tu_image.h"
33 #include "tu_pass.h"
34 #include "tu_query.h"
35 #include "tu_tracepoints.h"
36 #include "tu_wsi.h"
37
38 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
39 defined(VK_USE_PLATFORM_XCB_KHR) || \
40 defined(VK_USE_PLATFORM_XLIB_KHR) || \
41 defined(VK_USE_PLATFORM_DISPLAY_KHR)
42 #define TU_HAS_SURFACE 1
43 #else
44 #define TU_HAS_SURFACE 0
45 #endif
46
47
48 static int
tu_device_get_cache_uuid(struct tu_physical_device * device,void * uuid)49 tu_device_get_cache_uuid(struct tu_physical_device *device, void *uuid)
50 {
51 struct mesa_sha1 ctx;
52 unsigned char sha1[20];
53 /* Note: IR3_SHADER_DEBUG also affects compilation, but it's not
54 * initialized until after compiler creation so we have to add it to the
55 * shader hash instead, since the compiler is only created with the logical
56 * device.
57 */
58 uint64_t driver_flags = device->instance->debug_flags & TU_DEBUG_NOMULTIPOS;
59 uint16_t family = fd_dev_gpu_id(&device->dev_id);
60
61 memset(uuid, 0, VK_UUID_SIZE);
62 _mesa_sha1_init(&ctx);
63
64 if (!disk_cache_get_function_identifier(tu_device_get_cache_uuid, &ctx))
65 return -1;
66
67 _mesa_sha1_update(&ctx, &family, sizeof(family));
68 _mesa_sha1_update(&ctx, &driver_flags, sizeof(driver_flags));
69 _mesa_sha1_final(&ctx, sha1);
70
71 memcpy(uuid, sha1, VK_UUID_SIZE);
72 return 0;
73 }
74
75 #define TU_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
76
77 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceVersion(uint32_t * pApiVersion)78 tu_EnumerateInstanceVersion(uint32_t *pApiVersion)
79 {
80 *pApiVersion = TU_API_VERSION;
81 return VK_SUCCESS;
82 }
83
84 static const struct vk_instance_extension_table tu_instance_extensions_supported = {
85 .KHR_device_group_creation = true,
86 .KHR_external_fence_capabilities = true,
87 .KHR_external_memory_capabilities = true,
88 .KHR_external_semaphore_capabilities = true,
89 .KHR_get_physical_device_properties2 = true,
90 .KHR_surface = TU_HAS_SURFACE,
91 .KHR_get_surface_capabilities2 = TU_HAS_SURFACE,
92 .EXT_debug_report = true,
93 .EXT_debug_utils = true,
94 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
95 .KHR_wayland_surface = true,
96 #endif
97 #ifdef VK_USE_PLATFORM_XCB_KHR
98 .KHR_xcb_surface = true,
99 #endif
100 #ifdef VK_USE_PLATFORM_XLIB_KHR
101 .KHR_xlib_surface = true,
102 #endif
103 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
104 .EXT_acquire_xlib_display = true,
105 #endif
106 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
107 .KHR_display = true,
108 .KHR_get_display_properties2 = true,
109 .EXT_direct_mode_display = true,
110 .EXT_display_surface_counter = true,
111 .EXT_acquire_drm_display = true,
112 #endif
113 };
114
115 static void
get_device_extensions(const struct tu_physical_device * device,struct vk_device_extension_table * ext)116 get_device_extensions(const struct tu_physical_device *device,
117 struct vk_device_extension_table *ext)
118 {
119 *ext = (struct vk_device_extension_table) {
120 .KHR_16bit_storage = device->info->a6xx.storage_16bit,
121 .KHR_bind_memory2 = true,
122 .KHR_copy_commands2 = true,
123 .KHR_create_renderpass2 = true,
124 .KHR_dedicated_allocation = true,
125 .KHR_depth_stencil_resolve = true,
126 .KHR_descriptor_update_template = true,
127 .KHR_device_group = true,
128 .KHR_draw_indirect_count = true,
129 .KHR_external_fence = true,
130 .KHR_external_fence_fd = true,
131 .KHR_external_memory = true,
132 .KHR_external_memory_fd = true,
133 .KHR_external_semaphore = true,
134 .KHR_external_semaphore_fd = true,
135 .KHR_format_feature_flags2 = true,
136 .KHR_get_memory_requirements2 = true,
137 .KHR_imageless_framebuffer = true,
138 .KHR_incremental_present = TU_HAS_SURFACE,
139 .KHR_image_format_list = true,
140 .KHR_maintenance1 = true,
141 .KHR_maintenance2 = true,
142 .KHR_maintenance3 = true,
143 .KHR_maintenance4 = true,
144 .KHR_multiview = true,
145 .KHR_performance_query = device->instance->debug_flags & TU_DEBUG_PERFC,
146 .KHR_pipeline_executable_properties = true,
147 .KHR_push_descriptor = true,
148 .KHR_relaxed_block_layout = true,
149 .KHR_sampler_mirror_clamp_to_edge = true,
150 .KHR_sampler_ycbcr_conversion = true,
151 .KHR_shader_draw_parameters = true,
152 .KHR_shader_float_controls = true,
153 .KHR_shader_float16_int8 = true,
154 .KHR_shader_subgroup_extended_types = true,
155 .KHR_shader_terminate_invocation = true,
156 .KHR_spirv_1_4 = true,
157 .KHR_storage_buffer_storage_class = true,
158 .KHR_swapchain = TU_HAS_SURFACE,
159 .KHR_swapchain_mutable_format = TU_HAS_SURFACE,
160 .KHR_uniform_buffer_standard_layout = true,
161 .KHR_variable_pointers = true,
162 .KHR_vulkan_memory_model = true,
163 .KHR_driver_properties = true,
164 .KHR_separate_depth_stencil_layouts = true,
165 .KHR_buffer_device_address = true,
166 .KHR_shader_integer_dot_product = true,
167 .KHR_zero_initialize_workgroup_memory = true,
168 .KHR_shader_non_semantic_info = true,
169 .KHR_synchronization2 = true,
170 .KHR_dynamic_rendering = true,
171 #ifndef TU_USE_KGSL
172 .KHR_timeline_semaphore = true,
173 #endif
174 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
175 .EXT_display_control = true,
176 #endif
177 .EXT_external_memory_dma_buf = true,
178 .EXT_image_drm_format_modifier = true,
179 .EXT_sample_locations = device->info->a6xx.has_sample_locations,
180 .EXT_sampler_filter_minmax = true,
181 .EXT_transform_feedback = true,
182 .EXT_4444_formats = true,
183 .EXT_border_color_swizzle = true,
184 .EXT_conditional_rendering = true,
185 .EXT_custom_border_color = true,
186 .EXT_depth_clip_control = true,
187 .EXT_depth_clip_enable = true,
188 .EXT_descriptor_indexing = true,
189 .EXT_extended_dynamic_state = true,
190 .EXT_extended_dynamic_state2 = true,
191 .EXT_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
192 .EXT_host_query_reset = true,
193 .EXT_index_type_uint8 = true,
194 .EXT_memory_budget = true,
195 .EXT_primitive_topology_list_restart = true,
196 .EXT_private_data = true,
197 .EXT_queue_family_foreign = true,
198 .EXT_robustness2 = true,
199 .EXT_scalar_block_layout = true,
200 .EXT_separate_stencil_usage = true,
201 .EXT_shader_demote_to_helper_invocation = true,
202 .EXT_shader_stencil_export = true,
203 .EXT_shader_viewport_index_layer = true,
204 .EXT_shader_module_identifier = true,
205 .EXT_texel_buffer_alignment = true,
206 .EXT_vertex_attribute_divisor = true,
207 .EXT_provoking_vertex = true,
208 .EXT_line_rasterization = true,
209 .EXT_subgroup_size_control = true,
210 .EXT_image_robustness = true,
211 .EXT_primitives_generated_query = true,
212 .EXT_image_view_min_lod = true,
213 .EXT_pipeline_creation_feedback = true,
214 .EXT_pipeline_creation_cache_control = true,
215 #ifndef TU_USE_KGSL
216 .EXT_physical_device_drm = true,
217 #endif
218 /* For Graphics Flight Recorder (GFR) */
219 .AMD_buffer_marker = true,
220 .ARM_rasterization_order_attachment_access = true,
221 #ifdef ANDROID
222 .ANDROID_native_buffer = true,
223 #endif
224 .IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
225 .VALVE_mutable_descriptor_type = true,
226 .EXT_image_2d_view_of_3d = true,
227 .EXT_color_write_enable = true,
228 .EXT_load_store_op_none = true,
229 };
230 }
231
232 static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
233 &tu_shaders_ops,
234 NULL,
235 };
236
237 VkResult
tu_physical_device_init(struct tu_physical_device * device,struct tu_instance * instance)238 tu_physical_device_init(struct tu_physical_device *device,
239 struct tu_instance *instance)
240 {
241 VkResult result = VK_SUCCESS;
242
243 const char *fd_name = fd_dev_name(&device->dev_id);
244 if (strncmp(fd_name, "FD", 2) == 0) {
245 device->name = vk_asprintf(&instance->vk.alloc,
246 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE,
247 "Turnip Adreno (TM) %s", &fd_name[2]);
248 } else {
249 device->name = vk_strdup(&instance->vk.alloc, fd_name,
250 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
251
252 }
253 if (!device->name) {
254 return vk_startup_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
255 "device name alloc fail");
256 }
257
258 const struct fd_dev_info *info = fd_dev_info(&device->dev_id);
259 if (!info) {
260 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
261 "device %s is unsupported", device->name);
262 goto fail_free_name;
263 }
264 switch (fd_dev_gen(&device->dev_id)) {
265 case 6:
266 device->info = info;
267 device->ccu_offset_bypass = device->info->num_ccu * A6XX_CCU_DEPTH_SIZE;
268 device->ccu_offset_gmem = (device->gmem_size -
269 device->info->num_ccu * A6XX_CCU_GMEM_COLOR_SIZE);
270 break;
271 default:
272 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
273 "device %s is unsupported", device->name);
274 goto fail_free_name;
275 }
276 if (tu_device_get_cache_uuid(device, device->cache_uuid)) {
277 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
278 "cannot generate UUID");
279 goto fail_free_name;
280 }
281
282 fd_get_driver_uuid(device->driver_uuid);
283 fd_get_device_uuid(device->device_uuid, &device->dev_id);
284
285 struct vk_device_extension_table supported_extensions;
286 get_device_extensions(device, &supported_extensions);
287
288 struct vk_physical_device_dispatch_table dispatch_table;
289 vk_physical_device_dispatch_table_from_entrypoints(
290 &dispatch_table, &tu_physical_device_entrypoints, true);
291 vk_physical_device_dispatch_table_from_entrypoints(
292 &dispatch_table, &wsi_physical_device_entrypoints, false);
293
294 result = vk_physical_device_init(&device->vk, &instance->vk,
295 &supported_extensions,
296 &dispatch_table);
297 if (result != VK_SUCCESS)
298 goto fail_free_name;
299
300 device->vk.supported_sync_types = device->sync_types;
301
302 #if TU_HAS_SURFACE
303 result = tu_wsi_init(device);
304 if (result != VK_SUCCESS) {
305 vk_startup_errorf(instance, result, "WSI init failure");
306 vk_physical_device_finish(&device->vk);
307 goto fail_free_name;
308 }
309 #endif
310
311 /* The gpu id is already embedded in the uuid so we just pass "tu"
312 * when creating the cache.
313 */
314 char buf[VK_UUID_SIZE * 2 + 1];
315 disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
316 device->vk.disk_cache = disk_cache_create(device->name, buf, 0);
317
318 device->vk.pipeline_cache_import_ops = cache_import_ops;
319
320 return VK_SUCCESS;
321
322 fail_free_name:
323 vk_free(&instance->vk.alloc, (void *)device->name);
324 return result;
325 }
326
327 static void
tu_physical_device_finish(struct tu_physical_device * device)328 tu_physical_device_finish(struct tu_physical_device *device)
329 {
330 #if TU_HAS_SURFACE
331 tu_wsi_finish(device);
332 #endif
333
334 close(device->local_fd);
335 if (device->master_fd != -1)
336 close(device->master_fd);
337
338 vk_free(&device->instance->vk.alloc, (void *)device->name);
339
340 vk_physical_device_finish(&device->vk);
341 }
342
343 static const struct debug_control tu_debug_options[] = {
344 { "startup", TU_DEBUG_STARTUP },
345 { "nir", TU_DEBUG_NIR },
346 { "nobin", TU_DEBUG_NOBIN },
347 { "sysmem", TU_DEBUG_SYSMEM },
348 { "gmem", TU_DEBUG_GMEM },
349 { "forcebin", TU_DEBUG_FORCEBIN },
350 { "layout", TU_DEBUG_LAYOUT },
351 { "noubwc", TU_DEBUG_NOUBWC },
352 { "nomultipos", TU_DEBUG_NOMULTIPOS },
353 { "nolrz", TU_DEBUG_NOLRZ },
354 { "nolrzfc", TU_DEBUG_NOLRZFC },
355 { "perf", TU_DEBUG_PERF },
356 { "perfc", TU_DEBUG_PERFC },
357 { "flushall", TU_DEBUG_FLUSHALL },
358 { "syncdraw", TU_DEBUG_SYNCDRAW },
359 { "dontcare_as_load", TU_DEBUG_DONT_CARE_AS_LOAD },
360 { "rast_order", TU_DEBUG_RAST_ORDER },
361 { "unaligned_store", TU_DEBUG_UNALIGNED_STORE },
362 { "log_skip_gmem_ops", TU_DEBUG_LOG_SKIP_GMEM_OPS },
363 { "dynamic", TU_DEBUG_DYNAMIC },
364 { NULL, 0 }
365 };
366
367 const char *
tu_get_debug_option_name(int id)368 tu_get_debug_option_name(int id)
369 {
370 assert(id < ARRAY_SIZE(tu_debug_options) - 1);
371 return tu_debug_options[id].string;
372 }
373
374 static const driOptionDescription tu_dri_options[] = {
375 DRI_CONF_SECTION_PERFORMANCE
376 DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
377 DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
378 DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
379 DRI_CONF_VK_XWAYLAND_WAIT_READY(true)
380 DRI_CONF_SECTION_END
381
382 DRI_CONF_SECTION_DEBUG
383 DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
384 DRI_CONF_VK_DONT_CARE_AS_LOAD(false)
385 DRI_CONF_SECTION_END
386 };
387
388 static void
tu_init_dri_options(struct tu_instance * instance)389 tu_init_dri_options(struct tu_instance *instance)
390 {
391 driParseOptionInfo(&instance->available_dri_options, tu_dri_options,
392 ARRAY_SIZE(tu_dri_options));
393 driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "turnip", NULL, NULL,
394 instance->vk.app_info.app_name, instance->vk.app_info.app_version,
395 instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
396
397 if (driQueryOptionb(&instance->dri_options, "vk_dont_care_as_load"))
398 instance->debug_flags |= TU_DEBUG_DONT_CARE_AS_LOAD;
399 }
400
401 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)402 tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
403 const VkAllocationCallbacks *pAllocator,
404 VkInstance *pInstance)
405 {
406 struct tu_instance *instance;
407 VkResult result;
408
409 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
410
411 if (pAllocator == NULL)
412 pAllocator = vk_default_allocator();
413
414 instance = vk_zalloc(pAllocator, sizeof(*instance), 8,
415 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
416
417 if (!instance)
418 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
419
420 struct vk_instance_dispatch_table dispatch_table;
421 vk_instance_dispatch_table_from_entrypoints(
422 &dispatch_table, &tu_instance_entrypoints, true);
423 vk_instance_dispatch_table_from_entrypoints(
424 &dispatch_table, &wsi_instance_entrypoints, false);
425
426 result = vk_instance_init(&instance->vk,
427 &tu_instance_extensions_supported,
428 &dispatch_table,
429 pCreateInfo, pAllocator);
430 if (result != VK_SUCCESS) {
431 vk_free(pAllocator, instance);
432 return vk_error(NULL, result);
433 }
434
435 instance->physical_device_count = -1;
436
437 instance->debug_flags =
438 parse_debug_string(os_get_option("TU_DEBUG"), tu_debug_options);
439
440 #ifdef DEBUG
441 /* Enable startup debugging by default on debug drivers. You almost always
442 * want to see your startup failures in that case, and it's hard to set
443 * this env var on android.
444 */
445 instance->debug_flags |= TU_DEBUG_STARTUP;
446 #endif
447
448 if (instance->debug_flags & TU_DEBUG_STARTUP)
449 mesa_logi("Created an instance");
450
451 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
452
453 tu_init_dri_options(instance);
454
455 *pInstance = tu_instance_to_handle(instance);
456
457 #ifdef HAVE_PERFETTO
458 tu_perfetto_init();
459 #endif
460
461 return VK_SUCCESS;
462 }
463
464 VKAPI_ATTR void VKAPI_CALL
tu_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)465 tu_DestroyInstance(VkInstance _instance,
466 const VkAllocationCallbacks *pAllocator)
467 {
468 TU_FROM_HANDLE(tu_instance, instance, _instance);
469
470 if (!instance)
471 return;
472
473 for (int i = 0; i < instance->physical_device_count; ++i) {
474 tu_physical_device_finish(instance->physical_devices + i);
475 }
476
477 VG(VALGRIND_DESTROY_MEMPOOL(instance));
478
479 driDestroyOptionCache(&instance->dri_options);
480 driDestroyOptionInfo(&instance->available_dri_options);
481
482 vk_instance_finish(&instance->vk);
483 vk_free(&instance->vk.alloc, instance);
484 }
485
486 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumeratePhysicalDevices(VkInstance _instance,uint32_t * pPhysicalDeviceCount,VkPhysicalDevice * pPhysicalDevices)487 tu_EnumeratePhysicalDevices(VkInstance _instance,
488 uint32_t *pPhysicalDeviceCount,
489 VkPhysicalDevice *pPhysicalDevices)
490 {
491 TU_FROM_HANDLE(tu_instance, instance, _instance);
492 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out,
493 pPhysicalDevices, pPhysicalDeviceCount);
494
495 VkResult result;
496
497 if (instance->physical_device_count < 0) {
498 result = tu_enumerate_devices(instance);
499 if (result != VK_SUCCESS && result != VK_ERROR_INCOMPATIBLE_DRIVER)
500 return result;
501 }
502
503 for (uint32_t i = 0; i < instance->physical_device_count; ++i) {
504 vk_outarray_append_typed(VkPhysicalDevice, &out, p)
505 {
506 *p = tu_physical_device_to_handle(instance->physical_devices + i);
507 }
508 }
509
510 return vk_outarray_status(&out);
511 }
512
513 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumeratePhysicalDeviceGroups(VkInstance _instance,uint32_t * pPhysicalDeviceGroupCount,VkPhysicalDeviceGroupProperties * pPhysicalDeviceGroupProperties)514 tu_EnumeratePhysicalDeviceGroups(
515 VkInstance _instance,
516 uint32_t *pPhysicalDeviceGroupCount,
517 VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties)
518 {
519 TU_FROM_HANDLE(tu_instance, instance, _instance);
520 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out,
521 pPhysicalDeviceGroupProperties,
522 pPhysicalDeviceGroupCount);
523 VkResult result;
524
525 if (instance->physical_device_count < 0) {
526 result = tu_enumerate_devices(instance);
527 if (result != VK_SUCCESS && result != VK_ERROR_INCOMPATIBLE_DRIVER)
528 return result;
529 }
530
531 for (uint32_t i = 0; i < instance->physical_device_count; ++i) {
532 vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p)
533 {
534 p->physicalDeviceCount = 1;
535 p->physicalDevices[0] =
536 tu_physical_device_to_handle(instance->physical_devices + i);
537 p->subsetAllocation = false;
538 }
539 }
540
541 return vk_outarray_status(&out);
542 }
543
544 static void
tu_get_physical_device_features_1_1(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan11Features * features)545 tu_get_physical_device_features_1_1(struct tu_physical_device *pdevice,
546 VkPhysicalDeviceVulkan11Features *features)
547 {
548 features->storageBuffer16BitAccess = pdevice->info->a6xx.storage_16bit;
549 features->uniformAndStorageBuffer16BitAccess = false;
550 features->storagePushConstant16 = false;
551 features->storageInputOutput16 = false;
552 features->multiview = true;
553 features->multiviewGeometryShader = false;
554 features->multiviewTessellationShader = false;
555 features->variablePointersStorageBuffer = true;
556 features->variablePointers = true;
557 features->protectedMemory = false;
558 features->samplerYcbcrConversion = true;
559 features->shaderDrawParameters = true;
560 }
561
562 static void
tu_get_physical_device_features_1_2(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan12Features * features)563 tu_get_physical_device_features_1_2(struct tu_physical_device *pdevice,
564 VkPhysicalDeviceVulkan12Features *features)
565 {
566 features->samplerMirrorClampToEdge = true;
567 features->drawIndirectCount = true;
568 features->storageBuffer8BitAccess = false;
569 features->uniformAndStorageBuffer8BitAccess = false;
570 features->storagePushConstant8 = false;
571 features->shaderBufferInt64Atomics = false;
572 features->shaderSharedInt64Atomics = false;
573 features->shaderFloat16 = true;
574 features->shaderInt8 = false;
575
576 features->descriptorIndexing = true;
577 features->shaderInputAttachmentArrayDynamicIndexing = false;
578 features->shaderUniformTexelBufferArrayDynamicIndexing = true;
579 features->shaderStorageTexelBufferArrayDynamicIndexing = true;
580 features->shaderUniformBufferArrayNonUniformIndexing = true;
581 features->shaderSampledImageArrayNonUniformIndexing = true;
582 features->shaderStorageBufferArrayNonUniformIndexing = true;
583 features->shaderStorageImageArrayNonUniformIndexing = true;
584 features->shaderInputAttachmentArrayNonUniformIndexing = false;
585 features->shaderUniformTexelBufferArrayNonUniformIndexing = true;
586 features->shaderStorageTexelBufferArrayNonUniformIndexing = true;
587 features->descriptorBindingUniformBufferUpdateAfterBind = true;
588 features->descriptorBindingSampledImageUpdateAfterBind = true;
589 features->descriptorBindingStorageImageUpdateAfterBind = true;
590 features->descriptorBindingStorageBufferUpdateAfterBind = true;
591 features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
592 features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
593 features->descriptorBindingUpdateUnusedWhilePending = true;
594 features->descriptorBindingPartiallyBound = true;
595 features->descriptorBindingVariableDescriptorCount = true;
596 features->runtimeDescriptorArray = true;
597
598 features->samplerFilterMinmax = true;
599 features->scalarBlockLayout = true;
600 features->imagelessFramebuffer = true;
601 features->uniformBufferStandardLayout = true;
602 features->shaderSubgroupExtendedTypes = true;
603 features->separateDepthStencilLayouts = true;
604 features->hostQueryReset = true;
605 features->timelineSemaphore = true;
606 features->bufferDeviceAddress = true;
607 features->bufferDeviceAddressCaptureReplay = false;
608 features->bufferDeviceAddressMultiDevice = false;
609 features->vulkanMemoryModel = true;
610 features->vulkanMemoryModelDeviceScope = true;
611 features->vulkanMemoryModelAvailabilityVisibilityChains = true;
612 features->shaderOutputViewportIndex = true;
613 features->shaderOutputLayer = true;
614 features->subgroupBroadcastDynamicId = true;
615 }
616
617 static void
tu_get_physical_device_features_1_3(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan13Features * features)618 tu_get_physical_device_features_1_3(struct tu_physical_device *pdevice,
619 VkPhysicalDeviceVulkan13Features *features)
620 {
621 features->robustImageAccess = true;
622 features->inlineUniformBlock = false;
623 features->descriptorBindingInlineUniformBlockUpdateAfterBind = false;
624 features->pipelineCreationCacheControl = true;
625 features->privateData = true;
626 features->shaderDemoteToHelperInvocation = true;
627 features->shaderTerminateInvocation = true;
628 features->subgroupSizeControl = true;
629 features->computeFullSubgroups = true;
630 features->synchronization2 = true;
631 features->textureCompressionASTC_HDR = false;
632 features->shaderZeroInitializeWorkgroupMemory = true;
633 features->dynamicRendering = true;
634 features->shaderIntegerDotProduct = true;
635 features->maintenance4 = true;
636 }
637
638 void
tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures2 * pFeatures)639 tu_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
640 VkPhysicalDeviceFeatures2 *pFeatures)
641 {
642 TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
643
644 pFeatures->features = (VkPhysicalDeviceFeatures) {
645 .robustBufferAccess = true,
646 .fullDrawIndexUint32 = true,
647 .imageCubeArray = true,
648 .independentBlend = true,
649 .geometryShader = true,
650 .tessellationShader = true,
651 .sampleRateShading = true,
652 .dualSrcBlend = true,
653 .logicOp = true,
654 .multiDrawIndirect = true,
655 .drawIndirectFirstInstance = true,
656 .depthClamp = true,
657 .depthBiasClamp = true,
658 .fillModeNonSolid = true,
659 .depthBounds = true,
660 .wideLines = false,
661 .largePoints = true,
662 .alphaToOne = true,
663 .multiViewport = true,
664 .samplerAnisotropy = true,
665 .textureCompressionETC2 = true,
666 .textureCompressionASTC_LDR = true,
667 .textureCompressionBC = true,
668 .occlusionQueryPrecise = true,
669 .pipelineStatisticsQuery = true,
670 .vertexPipelineStoresAndAtomics = true,
671 .fragmentStoresAndAtomics = true,
672 .shaderTessellationAndGeometryPointSize = true,
673 .shaderImageGatherExtended = true,
674 .shaderStorageImageExtendedFormats = true,
675 .shaderStorageImageMultisample = false,
676 .shaderUniformBufferArrayDynamicIndexing = true,
677 .shaderSampledImageArrayDynamicIndexing = true,
678 .shaderStorageBufferArrayDynamicIndexing = true,
679 .shaderStorageImageArrayDynamicIndexing = true,
680 .shaderStorageImageReadWithoutFormat = true,
681 .shaderStorageImageWriteWithoutFormat = true,
682 .shaderClipDistance = true,
683 .shaderCullDistance = true,
684 .shaderFloat64 = false,
685 .shaderInt64 = false,
686 .shaderInt16 = true,
687 .sparseBinding = false,
688 .variableMultisampleRate = true,
689 .inheritedQueries = true,
690 };
691
692 VkPhysicalDeviceVulkan11Features core_1_1 = {
693 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
694 };
695 tu_get_physical_device_features_1_1(pdevice, &core_1_1);
696
697 VkPhysicalDeviceVulkan12Features core_1_2 = {
698 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
699 };
700 tu_get_physical_device_features_1_2(pdevice, &core_1_2);
701
702 VkPhysicalDeviceVulkan13Features core_1_3 = {
703 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_FEATURES,
704 };
705 tu_get_physical_device_features_1_3(pdevice, &core_1_3);
706
707 vk_foreach_struct(ext, pFeatures->pNext)
708 {
709 if (vk_get_physical_device_core_1_1_feature_ext(ext, &core_1_1))
710 continue;
711 if (vk_get_physical_device_core_1_2_feature_ext(ext, &core_1_2))
712 continue;
713 if (vk_get_physical_device_core_1_3_feature_ext(ext, &core_1_3))
714 continue;
715
716 switch (ext->sType) {
717 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
718 VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
719 (VkPhysicalDeviceConditionalRenderingFeaturesEXT *) ext;
720 features->conditionalRendering = true;
721 features->inheritedConditionalRendering = true;
722 break;
723 }
724 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
725 VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
726 (VkPhysicalDeviceTransformFeedbackFeaturesEXT *) ext;
727 features->transformFeedback = true;
728 features->geometryStreams = true;
729 break;
730 }
731 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
732 VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
733 (VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
734 features->indexTypeUint8 = true;
735 break;
736 }
737 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
738 VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
739 (VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
740 features->vertexAttributeInstanceRateDivisor = true;
741 features->vertexAttributeInstanceRateZeroDivisor = true;
742 break;
743 }
744 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
745 VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
746 (VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
747 features->depthClipEnable = true;
748 break;
749 }
750 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
751 VkPhysicalDevice4444FormatsFeaturesEXT *features = (void *)ext;
752 features->formatA4R4G4B4 = true;
753 features->formatA4B4G4R4 = true;
754 break;
755 }
756 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BORDER_COLOR_SWIZZLE_FEATURES_EXT: {
757 VkPhysicalDeviceBorderColorSwizzleFeaturesEXT *features = (void *)ext;
758 features->borderColorSwizzle = true;
759 features->borderColorSwizzleFromImage = true;
760 break;
761 }
762 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
763 VkPhysicalDeviceCustomBorderColorFeaturesEXT *features = (void *) ext;
764 features->customBorderColors = true;
765 features->customBorderColorWithoutFormat = true;
766 break;
767 }
768 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
769 VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features = (void *)ext;
770 features->extendedDynamicState = true;
771 break;
772 }
773 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: {
774 VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features =
775 (VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext;
776 features->extendedDynamicState2 = true;
777 features->extendedDynamicState2LogicOp = true;
778 features->extendedDynamicState2PatchControlPoints = false;
779 break;
780 }
781 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
782 VkPhysicalDevicePerformanceQueryFeaturesKHR *feature =
783 (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext;
784 feature->performanceCounterQueryPools = true;
785 feature->performanceCounterMultipleQueryPools = false;
786 break;
787 }
788 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
789 VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
790 (VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
791 features->pipelineExecutableInfo = true;
792 break;
793 }
794 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES: {
795 VkPhysicalDeviceShaderFloat16Int8Features *features =
796 (VkPhysicalDeviceShaderFloat16Int8Features *) ext;
797 features->shaderFloat16 = true;
798 features->shaderInt8 = false;
799 break;
800 }
801 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: {
802 VkPhysicalDeviceScalarBlockLayoutFeatures *features = (void *)ext;
803 features->scalarBlockLayout = true;
804 break;
805 }
806 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
807 VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
808 features->robustBufferAccess2 = true;
809 features->robustImageAccess2 = true;
810 features->nullDescriptor = true;
811 break;
812 }
813 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
814 VkPhysicalDeviceTimelineSemaphoreFeatures *features =
815 (VkPhysicalDeviceTimelineSemaphoreFeatures *) ext;
816 features->timelineSemaphore = true;
817 break;
818 }
819 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
820 VkPhysicalDeviceProvokingVertexFeaturesEXT *features =
821 (VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext;
822 features->provokingVertexLast = true;
823 features->transformFeedbackPreservesProvokingVertex = true;
824 break;
825 }
826 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
827 VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
828 (VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
829 features->mutableDescriptorType = true;
830 break;
831 }
832 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
833 VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
834 (VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
835 features->rectangularLines = true;
836 features->bresenhamLines = true;
837 features->smoothLines = false;
838 features->stippledRectangularLines = false;
839 features->stippledBresenhamLines = false;
840 features->stippledSmoothLines = false;
841 break;
842 }
843 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVE_TOPOLOGY_LIST_RESTART_FEATURES_EXT: {
844 VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *features =
845 (VkPhysicalDevicePrimitiveTopologyListRestartFeaturesEXT *)ext;
846 features->primitiveTopologyListRestart = true;
847 features->primitiveTopologyPatchListRestart = false;
848 break;
849 }
850 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RASTERIZATION_ORDER_ATTACHMENT_ACCESS_FEATURES_ARM: {
851 VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesARM *features =
852 (VkPhysicalDeviceRasterizationOrderAttachmentAccessFeaturesARM *)ext;
853 features->rasterizationOrderColorAttachmentAccess = true;
854 features->rasterizationOrderDepthAttachmentAccess = true;
855 features->rasterizationOrderStencilAttachmentAccess = true;
856 break;
857 }
858 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_CONTROL_FEATURES_EXT: {
859 VkPhysicalDeviceDepthClipControlFeaturesEXT *features =
860 (VkPhysicalDeviceDepthClipControlFeaturesEXT *)ext;
861 features->depthClipControl = true;
862 break;
863 }
864 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
865 VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
866 (VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
867 features->texelBufferAlignment = true;
868 break;
869 }
870 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIMITIVES_GENERATED_QUERY_FEATURES_EXT: {
871 VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *features =
872 (VkPhysicalDevicePrimitivesGeneratedQueryFeaturesEXT *)ext;
873 features->primitivesGeneratedQuery = true;
874 features->primitivesGeneratedQueryWithRasterizerDiscard = false;
875 features->primitivesGeneratedQueryWithNonZeroStreams = false;
876 break;
877 }
878 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_VIEW_MIN_LOD_FEATURES_EXT: {
879 VkPhysicalDeviceImageViewMinLodFeaturesEXT *features =
880 (VkPhysicalDeviceImageViewMinLodFeaturesEXT *)ext;
881 features->minLod = true;
882 break;
883 }
884 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_2D_VIEW_OF_3D_FEATURES_EXT: {
885 VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *features =
886 (VkPhysicalDeviceImage2DViewOf3DFeaturesEXT *)ext;
887 features->image2DViewOf3D = true;
888 features->sampler2DViewOf3D = true;
889 break;
890 }
891 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: {
892 VkPhysicalDeviceColorWriteEnableFeaturesEXT *features =
893 (VkPhysicalDeviceColorWriteEnableFeaturesEXT *)ext;
894 features->colorWriteEnable = true;
895 break;
896 }
897 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_FEATURES_EXT: {
898 VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *features =
899 (VkPhysicalDeviceShaderModuleIdentifierFeaturesEXT *)ext;
900 features->shaderModuleIdentifier = true;
901 break;
902 }
903
904 default:
905 break;
906 }
907 }
908 }
909
910
911 static void
tu_get_physical_device_properties_1_1(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan11Properties * p)912 tu_get_physical_device_properties_1_1(struct tu_physical_device *pdevice,
913 VkPhysicalDeviceVulkan11Properties *p)
914 {
915 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
916
917 memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
918 memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
919 memset(p->deviceLUID, 0, VK_LUID_SIZE);
920 p->deviceNodeMask = 0;
921 p->deviceLUIDValid = false;
922
923 p->subgroupSize = 128;
924 p->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
925 p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
926 VK_SUBGROUP_FEATURE_VOTE_BIT |
927 VK_SUBGROUP_FEATURE_BALLOT_BIT |
928 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
929 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
930 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT;
931 if (pdevice->info->a6xx.has_getfiberid) {
932 p->subgroupSupportedStages |= VK_SHADER_STAGE_ALL_GRAPHICS;
933 p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_QUAD_BIT;
934 }
935
936 p->subgroupQuadOperationsInAllStages = false;
937
938 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
939 p->maxMultiviewViewCount = MAX_VIEWS;
940 p->maxMultiviewInstanceIndex = INT_MAX;
941 p->protectedNoFault = false;
942 /* Make sure everything is addressable by a signed 32-bit int, and
943 * our largest descriptors are 96 bytes.
944 */
945 p->maxPerSetDescriptors = (1ull << 31) / 96;
946 /* Our buffer size fields allow only this much */
947 p->maxMemoryAllocationSize = 0xFFFFFFFFull;
948
949 }
950
951
952 /* I have no idea what the maximum size is, but the hardware supports very
953 * large numbers of descriptors (at least 2^16). This limit is based on
954 * CP_LOAD_STATE6, which has a 28-bit field for the DWORD offset, so that
955 * we don't have to think about what to do if that overflows, but really
956 * nothing is likely to get close to this.
957 */
958 static const size_t max_descriptor_set_size = (1 << 28) / A6XX_TEX_CONST_DWORDS;
959 static const VkSampleCountFlags sample_counts =
960 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
961
962 static void
tu_get_physical_device_properties_1_2(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan12Properties * p)963 tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice,
964 VkPhysicalDeviceVulkan12Properties *p)
965 {
966 assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
967
968 p->driverID = VK_DRIVER_ID_MESA_TURNIP;
969 memset(p->driverName, 0, sizeof(p->driverName));
970 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
971 "turnip Mesa driver");
972 memset(p->driverInfo, 0, sizeof(p->driverInfo));
973 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
974 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
975 p->conformanceVersion = (VkConformanceVersion) {
976 .major = 1,
977 .minor = 2,
978 .subminor = 7,
979 .patch = 1,
980 };
981
982 p->denormBehaviorIndependence =
983 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
984 p->roundingModeIndependence =
985 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
986
987 p->shaderDenormFlushToZeroFloat16 = true;
988 p->shaderDenormPreserveFloat16 = false;
989 p->shaderRoundingModeRTEFloat16 = true;
990 p->shaderRoundingModeRTZFloat16 = false;
991 p->shaderSignedZeroInfNanPreserveFloat16 = true;
992
993 p->shaderDenormFlushToZeroFloat32 = true;
994 p->shaderDenormPreserveFloat32 = false;
995 p->shaderRoundingModeRTEFloat32 = true;
996 p->shaderRoundingModeRTZFloat32 = false;
997 p->shaderSignedZeroInfNanPreserveFloat32 = true;
998
999 p->shaderDenormFlushToZeroFloat64 = false;
1000 p->shaderDenormPreserveFloat64 = false;
1001 p->shaderRoundingModeRTEFloat64 = false;
1002 p->shaderRoundingModeRTZFloat64 = false;
1003 p->shaderSignedZeroInfNanPreserveFloat64 = false;
1004
1005 p->shaderUniformBufferArrayNonUniformIndexingNative = true;
1006 p->shaderSampledImageArrayNonUniformIndexingNative = true;
1007 p->shaderStorageBufferArrayNonUniformIndexingNative = true;
1008 p->shaderStorageImageArrayNonUniformIndexingNative = true;
1009 p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1010 p->robustBufferAccessUpdateAfterBind = false;
1011 p->quadDivergentImplicitLod = false;
1012
1013 p->maxUpdateAfterBindDescriptorsInAllPools = max_descriptor_set_size;
1014 p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1015 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1016 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1017 p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1018 p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1019 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_RTS;
1020 p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1021 p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1022 p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1023 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1024 p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1025 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1026 p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1027 p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1028 p->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_RTS;
1029
1030 p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
1031 p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
1032 p->independentResolveNone = false;
1033 p->independentResolve = false;
1034
1035 p->filterMinmaxSingleComponentFormats = true;
1036 p->filterMinmaxImageComponentMapping = true;
1037
1038 p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1039
1040 p->framebufferIntegerColorSampleCounts = sample_counts;
1041 }
1042
1043 static void
tu_get_physical_device_properties_1_3(struct tu_physical_device * pdevice,VkPhysicalDeviceVulkan13Properties * p)1044 tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice,
1045 VkPhysicalDeviceVulkan13Properties *p)
1046 {
1047 /* TODO move threadsize_base and max_waves to fd_dev_info and use them here */
1048 p->minSubgroupSize = 64; /* threadsize_base */
1049 p->maxSubgroupSize = 128; /* threadsize_base * 2 */
1050 p->maxComputeWorkgroupSubgroups = 16; /* max_waves */
1051 p->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL;
1052
1053 /* VK_EXT_inline_uniform_block is not implemented */
1054 p->maxInlineUniformBlockSize = 0;
1055 p->maxPerStageDescriptorInlineUniformBlocks = 0;
1056 p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = 0;
1057 p->maxDescriptorSetInlineUniformBlocks = 0;
1058 p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = 0;
1059 p->maxInlineUniformTotalSize = 0;
1060
1061 p->integerDotProduct8BitUnsignedAccelerated = false;
1062 p->integerDotProduct8BitSignedAccelerated = false;
1063 p->integerDotProduct8BitMixedSignednessAccelerated = false;
1064 p->integerDotProduct4x8BitPackedUnsignedAccelerated =
1065 pdevice->info->a6xx.has_dp2acc;
1066 /* TODO: we should be able to emulate 4x8BitPackedSigned fast enough */
1067 p->integerDotProduct4x8BitPackedSignedAccelerated = false;
1068 p->integerDotProduct4x8BitPackedMixedSignednessAccelerated =
1069 pdevice->info->a6xx.has_dp2acc;
1070 p->integerDotProduct16BitUnsignedAccelerated = false;
1071 p->integerDotProduct16BitSignedAccelerated = false;
1072 p->integerDotProduct16BitMixedSignednessAccelerated = false;
1073 p->integerDotProduct32BitUnsignedAccelerated = false;
1074 p->integerDotProduct32BitSignedAccelerated = false;
1075 p->integerDotProduct32BitMixedSignednessAccelerated = false;
1076 p->integerDotProduct64BitUnsignedAccelerated = false;
1077 p->integerDotProduct64BitSignedAccelerated = false;
1078 p->integerDotProduct64BitMixedSignednessAccelerated = false;
1079 p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
1080 p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
1081 p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
1082 p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated =
1083 pdevice->info->a6xx.has_dp2acc;
1084 /* TODO: we should be able to emulate Saturating4x8BitPackedSigned fast enough */
1085 p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
1086 p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated =
1087 pdevice->info->a6xx.has_dp2acc;
1088 p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
1089 p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
1090 p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
1091 p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
1092 p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
1093 p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
1094 p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
1095 p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
1096 p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
1097
1098 p->storageTexelBufferOffsetAlignmentBytes = 64;
1099 p->storageTexelBufferOffsetSingleTexelAlignment = false;
1100 p->uniformTexelBufferOffsetAlignmentBytes = 64;
1101 p->uniformTexelBufferOffsetSingleTexelAlignment = false;
1102
1103 /* The address space is 4GB for current kernels, so there's no point
1104 * allowing a larger buffer. Our buffer sizes are 64-bit though, so
1105 * GetBufferDeviceRequirements won't fall over if someone actually creates
1106 * a 4GB buffer.
1107 */
1108 p->maxBufferSize = 1ull << 32;
1109 }
1110
1111 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties2 * pProperties)1112 tu_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
1113 VkPhysicalDeviceProperties2 *pProperties)
1114 {
1115 TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
1116
1117 VkPhysicalDeviceLimits limits = {
1118 .maxImageDimension1D = (1 << 14),
1119 .maxImageDimension2D = (1 << 14),
1120 .maxImageDimension3D = (1 << 11),
1121 .maxImageDimensionCube = (1 << 14),
1122 .maxImageArrayLayers = (1 << 11),
1123 .maxTexelBufferElements = 128 * 1024 * 1024,
1124 .maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE,
1125 .maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE,
1126 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1127 .maxMemoryAllocationCount = UINT32_MAX,
1128 .maxSamplerAllocationCount = 64 * 1024,
1129 .bufferImageGranularity = 64, /* A cache line */
1130 .sparseAddressSpaceSize = 0,
1131 .maxBoundDescriptorSets = MAX_SETS,
1132 .maxPerStageDescriptorSamplers = max_descriptor_set_size,
1133 .maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1134 .maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1135 .maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1136 .maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1137 .maxPerStageDescriptorInputAttachments = MAX_RTS,
1138 .maxPerStageResources = max_descriptor_set_size,
1139 .maxDescriptorSetSamplers = max_descriptor_set_size,
1140 .maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1141 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1142 .maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1143 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1144 .maxDescriptorSetSampledImages = max_descriptor_set_size,
1145 .maxDescriptorSetStorageImages = max_descriptor_set_size,
1146 .maxDescriptorSetInputAttachments = MAX_RTS,
1147 .maxVertexInputAttributes = 32,
1148 .maxVertexInputBindings = 32,
1149 .maxVertexInputAttributeOffset = 4095,
1150 .maxVertexInputBindingStride = 2048,
1151 .maxVertexOutputComponents = 128,
1152 .maxTessellationGenerationLevel = 64,
1153 .maxTessellationPatchSize = 32,
1154 .maxTessellationControlPerVertexInputComponents = 128,
1155 .maxTessellationControlPerVertexOutputComponents = 128,
1156 .maxTessellationControlPerPatchOutputComponents = 120,
1157 .maxTessellationControlTotalOutputComponents = 4096,
1158 .maxTessellationEvaluationInputComponents = 128,
1159 .maxTessellationEvaluationOutputComponents = 128,
1160 .maxGeometryShaderInvocations = 32,
1161 .maxGeometryInputComponents = 64,
1162 .maxGeometryOutputComponents = 128,
1163 .maxGeometryOutputVertices = 256,
1164 .maxGeometryTotalOutputComponents = 1024,
1165 .maxFragmentInputComponents = 124,
1166 .maxFragmentOutputAttachments = 8,
1167 .maxFragmentDualSrcAttachments = 1,
1168 .maxFragmentCombinedOutputResources = MAX_RTS + max_descriptor_set_size * 2,
1169 .maxComputeSharedMemorySize = 32768,
1170 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
1171 .maxComputeWorkGroupInvocations = 2048,
1172 .maxComputeWorkGroupSize = { 1024, 1024, 1024 },
1173 .subPixelPrecisionBits = 8,
1174 .subTexelPrecisionBits = 8,
1175 .mipmapPrecisionBits = 8,
1176 .maxDrawIndexedIndexValue = UINT32_MAX,
1177 .maxDrawIndirectCount = UINT32_MAX,
1178 .maxSamplerLodBias = 4095.0 / 256.0, /* [-16, 15.99609375] */
1179 .maxSamplerAnisotropy = 16,
1180 .maxViewports = MAX_VIEWPORTS,
1181 .maxViewportDimensions = { MAX_VIEWPORT_SIZE, MAX_VIEWPORT_SIZE },
1182 .viewportBoundsRange = { INT16_MIN, INT16_MAX },
1183 .viewportSubPixelBits = 8,
1184 .minMemoryMapAlignment = 4096, /* A page */
1185 .minTexelBufferOffsetAlignment = 64,
1186 .minUniformBufferOffsetAlignment = 64,
1187 .minStorageBufferOffsetAlignment = 64,
1188 .minTexelOffset = -16,
1189 .maxTexelOffset = 15,
1190 .minTexelGatherOffset = -32,
1191 .maxTexelGatherOffset = 31,
1192 .minInterpolationOffset = -0.5,
1193 .maxInterpolationOffset = 0.4375,
1194 .subPixelInterpolationOffsetBits = 4,
1195 .maxFramebufferWidth = (1 << 14),
1196 .maxFramebufferHeight = (1 << 14),
1197 .maxFramebufferLayers = (1 << 10),
1198 .framebufferColorSampleCounts = sample_counts,
1199 .framebufferDepthSampleCounts = sample_counts,
1200 .framebufferStencilSampleCounts = sample_counts,
1201 .framebufferNoAttachmentsSampleCounts = sample_counts,
1202 .maxColorAttachments = MAX_RTS,
1203 .sampledImageColorSampleCounts = sample_counts,
1204 .sampledImageIntegerSampleCounts = sample_counts,
1205 .sampledImageDepthSampleCounts = sample_counts,
1206 .sampledImageStencilSampleCounts = sample_counts,
1207 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1208 .maxSampleMaskWords = 1,
1209 .timestampComputeAndGraphics = true,
1210 .timestampPeriod = 1000000000.0 / 19200000.0, /* CP_ALWAYS_ON_COUNTER is fixed 19.2MHz */
1211 .maxClipDistances = 8,
1212 .maxCullDistances = 8,
1213 .maxCombinedClipAndCullDistances = 8,
1214 .discreteQueuePriorities = 2,
1215 .pointSizeRange = { 1, 4092 },
1216 .lineWidthRange = { 1.0, 1.0 },
1217 .pointSizeGranularity = 0.0625,
1218 .lineWidthGranularity = 0.0,
1219 .strictLines = true,
1220 .standardSampleLocations = true,
1221 .optimalBufferCopyOffsetAlignment = 128,
1222 .optimalBufferCopyRowPitchAlignment = 128,
1223 .nonCoherentAtomSize = 64,
1224 };
1225
1226 pProperties->properties = (VkPhysicalDeviceProperties) {
1227 .apiVersion = TU_API_VERSION,
1228 .driverVersion = vk_get_driver_version(),
1229 .vendorID = 0x5143,
1230 .deviceID = pdevice->dev_id.chip_id,
1231 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1232 .limits = limits,
1233 .sparseProperties = { 0 },
1234 };
1235
1236 strcpy(pProperties->properties.deviceName, pdevice->name);
1237 memcpy(pProperties->properties.pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1238
1239 VkPhysicalDeviceVulkan11Properties core_1_1 = {
1240 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
1241 };
1242 tu_get_physical_device_properties_1_1(pdevice, &core_1_1);
1243
1244 VkPhysicalDeviceVulkan12Properties core_1_2 = {
1245 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
1246 };
1247 tu_get_physical_device_properties_1_2(pdevice, &core_1_2);
1248
1249 VkPhysicalDeviceVulkan13Properties core_1_3 = {
1250 .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_3_PROPERTIES,
1251 };
1252 tu_get_physical_device_properties_1_3(pdevice, &core_1_3);
1253
1254 vk_foreach_struct(ext, pProperties->pNext)
1255 {
1256 if (vk_get_physical_device_core_1_1_property_ext(ext, &core_1_1))
1257 continue;
1258 if (vk_get_physical_device_core_1_2_property_ext(ext, &core_1_2))
1259 continue;
1260 if (vk_get_physical_device_core_1_3_property_ext(ext, &core_1_3))
1261 continue;
1262
1263 switch (ext->sType) {
1264 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
1265 VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
1266 (VkPhysicalDevicePushDescriptorPropertiesKHR *) ext;
1267 properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1268 break;
1269 }
1270 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
1271 VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
1272 (VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
1273
1274 properties->maxTransformFeedbackStreams = IR3_MAX_SO_STREAMS;
1275 properties->maxTransformFeedbackBuffers = IR3_MAX_SO_BUFFERS;
1276 properties->maxTransformFeedbackBufferSize = UINT32_MAX;
1277 properties->maxTransformFeedbackStreamDataSize = 512;
1278 properties->maxTransformFeedbackBufferDataSize = 512;
1279 properties->maxTransformFeedbackBufferDataStride = 512;
1280 properties->transformFeedbackQueries = true;
1281 properties->transformFeedbackStreamsLinesTriangles = true;
1282 properties->transformFeedbackRasterizationStreamSelect = true;
1283 properties->transformFeedbackDraw = true;
1284 break;
1285 }
1286 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
1287 VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
1288 (VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
1289 properties->sampleLocationSampleCounts = 0;
1290 if (pdevice->vk.supported_extensions.EXT_sample_locations) {
1291 properties->sampleLocationSampleCounts =
1292 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
1293 }
1294 properties->maxSampleLocationGridSize = (VkExtent2D) { 1 , 1 };
1295 properties->sampleLocationCoordinateRange[0] = 0.0f;
1296 properties->sampleLocationCoordinateRange[1] = 0.9375f;
1297 properties->sampleLocationSubPixelBits = 4;
1298 properties->variableSampleLocations = true;
1299 break;
1300 }
1301 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
1302 VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *props =
1303 (VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
1304 props->maxVertexAttribDivisor = UINT32_MAX;
1305 break;
1306 }
1307 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
1308 VkPhysicalDeviceCustomBorderColorPropertiesEXT *props = (void *)ext;
1309 props->maxCustomBorderColorSamplers = TU_BORDER_COLOR_COUNT;
1310 break;
1311 }
1312 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_PROPERTIES_KHR: {
1313 VkPhysicalDevicePerformanceQueryPropertiesKHR *properties =
1314 (VkPhysicalDevicePerformanceQueryPropertiesKHR *)ext;
1315 properties->allowCommandBufferQueryCopies = false;
1316 break;
1317 }
1318 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
1319 VkPhysicalDeviceRobustness2PropertiesEXT *props = (void *)ext;
1320 /* see write_buffer_descriptor() */
1321 props->robustStorageBufferAccessSizeAlignment = 4;
1322 /* see write_ubo_descriptor() */
1323 props->robustUniformBufferAccessSizeAlignment = 16;
1324 break;
1325 }
1326
1327 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
1328 VkPhysicalDeviceProvokingVertexPropertiesEXT *properties =
1329 (VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
1330 properties->provokingVertexModePerPipeline = true;
1331 properties->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1332 break;
1333 }
1334 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
1335 VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
1336 (VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
1337 props->lineSubPixelPrecisionBits = 8;
1338 break;
1339 }
1340 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
1341 VkPhysicalDeviceDrmPropertiesEXT *props =
1342 (VkPhysicalDeviceDrmPropertiesEXT *)ext;
1343 props->hasPrimary = pdevice->has_master;
1344 props->primaryMajor = pdevice->master_major;
1345 props->primaryMinor = pdevice->master_minor;
1346
1347 props->hasRender = pdevice->has_local;
1348 props->renderMajor = pdevice->local_major;
1349 props->renderMinor = pdevice->local_minor;
1350 break;
1351 }
1352 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_MODULE_IDENTIFIER_PROPERTIES_EXT: {
1353 VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *props =
1354 (VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT *)ext;
1355 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1356 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1357 memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1358 vk_shaderModuleIdentifierAlgorithmUUID,
1359 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1360 break;
1361 }
1362 default:
1363 break;
1364 }
1365 }
1366 }
1367
1368 static const VkQueueFamilyProperties tu_queue_family_properties = {
1369 .queueFlags =
1370 VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
1371 .queueCount = 1,
1372 .timestampValidBits = 48,
1373 .minImageTransferGranularity = { 1, 1, 1 },
1374 };
1375
1376 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1377 tu_GetPhysicalDeviceQueueFamilyProperties2(
1378 VkPhysicalDevice physicalDevice,
1379 uint32_t *pQueueFamilyPropertyCount,
1380 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1381 {
1382 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
1383 pQueueFamilyProperties, pQueueFamilyPropertyCount);
1384
1385 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
1386 {
1387 p->queueFamilyProperties = tu_queue_family_properties;
1388 }
1389 }
1390
1391 uint64_t
tu_get_system_heap_size()1392 tu_get_system_heap_size()
1393 {
1394 struct sysinfo info;
1395 sysinfo(&info);
1396
1397 uint64_t total_ram = (uint64_t) info.totalram * (uint64_t) info.mem_unit;
1398
1399 /* We don't want to burn too much ram with the GPU. If the user has 4GiB
1400 * or less, we use at most half. If they have more than 4GiB, we use 3/4.
1401 */
1402 uint64_t available_ram;
1403 if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
1404 available_ram = total_ram / 2;
1405 else
1406 available_ram = total_ram * 3 / 4;
1407
1408 return available_ram;
1409 }
1410
1411 static VkDeviceSize
tu_get_budget_memory(struct tu_physical_device * physical_device)1412 tu_get_budget_memory(struct tu_physical_device *physical_device)
1413 {
1414 uint64_t heap_size = physical_device->heap.size;
1415 uint64_t heap_used = physical_device->heap.used;
1416 uint64_t sys_available;
1417 ASSERTED bool has_available_memory =
1418 os_get_available_system_memory(&sys_available);
1419 assert(has_available_memory);
1420
1421 /*
1422 * Let's not incite the app to starve the system: report at most 90% of
1423 * available system memory.
1424 */
1425 uint64_t heap_available = sys_available * 9 / 10;
1426 return MIN2(heap_size, heap_used + heap_available);
1427 }
1428
1429 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,VkPhysicalDeviceMemoryProperties2 * props2)1430 tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,
1431 VkPhysicalDeviceMemoryProperties2 *props2)
1432 {
1433 TU_FROM_HANDLE(tu_physical_device, physical_device, pdev);
1434
1435 VkPhysicalDeviceMemoryProperties *props = &props2->memoryProperties;
1436 props->memoryHeapCount = 1;
1437 props->memoryHeaps[0].size = physical_device->heap.size;
1438 props->memoryHeaps[0].flags = physical_device->heap.flags;
1439
1440 props->memoryTypeCount = 1;
1441 props->memoryTypes[0].propertyFlags =
1442 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1443 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1444 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
1445 props->memoryTypes[0].heapIndex = 0;
1446
1447 vk_foreach_struct(ext, props2->pNext)
1448 {
1449 switch (ext->sType) {
1450 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1451 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget_props =
1452 (VkPhysicalDeviceMemoryBudgetPropertiesEXT *) ext;
1453 memory_budget_props->heapUsage[0] = physical_device->heap.used;
1454 memory_budget_props->heapBudget[0] = tu_get_budget_memory(physical_device);
1455
1456 /* The heapBudget and heapUsage values must be zero for array elements
1457 * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
1458 */
1459 for (unsigned i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1460 memory_budget_props->heapBudget[i] = 0u;
1461 memory_budget_props->heapUsage[i] = 0u;
1462 }
1463 break;
1464 }
1465 default:
1466 break;
1467 }
1468 }
1469 }
1470
1471 static VkResult
tu_queue_init(struct tu_device * device,struct tu_queue * queue,int idx,const VkDeviceQueueCreateInfo * create_info)1472 tu_queue_init(struct tu_device *device,
1473 struct tu_queue *queue,
1474 int idx,
1475 const VkDeviceQueueCreateInfo *create_info)
1476 {
1477
1478 /* Match the default priority of fd_context_init. We ignore
1479 * pQueuePriorities because the spec says
1480 *
1481 * An implementation may allow a higher-priority queue to starve a
1482 * lower-priority queue on the same VkDevice until the higher-priority
1483 * queue has no further commands to execute. The relationship of queue
1484 * priorities must not cause queues on one VkDevice to starve queues on
1485 * another VkDevice.
1486 *
1487 * We cannot let one VkDevice starve another.
1488 */
1489 const int priority = 1;
1490
1491 VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);
1492 if (result != VK_SUCCESS)
1493 return result;
1494
1495 queue->device = device;
1496 #ifndef TU_USE_KGSL
1497 queue->vk.driver_submit = tu_queue_submit;
1498 #endif
1499
1500 int ret = tu_drm_submitqueue_new(device, priority, &queue->msm_queue_id);
1501 if (ret)
1502 return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
1503 "submitqueue create failed");
1504
1505 queue->fence = -1;
1506
1507 return VK_SUCCESS;
1508 }
1509
1510 static void
tu_queue_finish(struct tu_queue * queue)1511 tu_queue_finish(struct tu_queue *queue)
1512 {
1513 vk_queue_finish(&queue->vk);
1514 if (queue->fence >= 0)
1515 close(queue->fence);
1516 tu_drm_submitqueue_close(queue->device, queue->msm_queue_id);
1517 }
1518
1519 uint64_t
tu_device_ticks_to_ns(struct tu_device * dev,uint64_t ts)1520 tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts)
1521 {
1522 /* This is based on the 19.2MHz always-on rbbm timer.
1523 *
1524 * TODO we should probably query this value from kernel..
1525 */
1526 return ts * (1000000000 / 19200000);
1527 }
1528
1529 static void*
tu_trace_create_ts_buffer(struct u_trace_context * utctx,uint32_t size)1530 tu_trace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size)
1531 {
1532 struct tu_device *device =
1533 container_of(utctx, struct tu_device, trace_context);
1534
1535 struct tu_bo *bo;
1536 tu_bo_init_new(device, &bo, size, false);
1537
1538 return bo;
1539 }
1540
1541 static void
tu_trace_destroy_ts_buffer(struct u_trace_context * utctx,void * timestamps)1542 tu_trace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
1543 {
1544 struct tu_device *device =
1545 container_of(utctx, struct tu_device, trace_context);
1546 struct tu_bo *bo = timestamps;
1547
1548 tu_bo_finish(device, bo);
1549 }
1550
1551 static void
tu_trace_record_ts(struct u_trace * ut,void * cs,void * timestamps,unsigned idx,bool end_of_pipe)1552 tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
1553 unsigned idx, bool end_of_pipe)
1554 {
1555 struct tu_bo *bo = timestamps;
1556 struct tu_cs *ts_cs = cs;
1557
1558 unsigned ts_offset = idx * sizeof(uint64_t);
1559 tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE, 4);
1560 tu_cs_emit(ts_cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) | CP_EVENT_WRITE_0_TIMESTAMP);
1561 tu_cs_emit_qw(ts_cs, bo->iova + ts_offset);
1562 tu_cs_emit(ts_cs, 0x00000000);
1563 }
1564
1565 static uint64_t
tu_trace_read_ts(struct u_trace_context * utctx,void * timestamps,unsigned idx,void * flush_data)1566 tu_trace_read_ts(struct u_trace_context *utctx,
1567 void *timestamps, unsigned idx, void *flush_data)
1568 {
1569 struct tu_device *device =
1570 container_of(utctx, struct tu_device, trace_context);
1571 struct tu_bo *bo = timestamps;
1572 struct tu_u_trace_submission_data *submission_data = flush_data;
1573
1574 /* Only need to stall on results for the first entry: */
1575 if (idx == 0) {
1576 tu_device_wait_u_trace(device, submission_data->syncobj);
1577 }
1578
1579 if (tu_bo_map(device, bo) != VK_SUCCESS) {
1580 return U_TRACE_NO_TIMESTAMP;
1581 }
1582
1583 uint64_t *ts = bo->map;
1584
1585 /* Don't translate the no-timestamp marker: */
1586 if (ts[idx] == U_TRACE_NO_TIMESTAMP)
1587 return U_TRACE_NO_TIMESTAMP;
1588
1589 return tu_device_ticks_to_ns(device, ts[idx]);
1590 }
1591
1592 static void
tu_trace_delete_flush_data(struct u_trace_context * utctx,void * flush_data)1593 tu_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
1594 {
1595 struct tu_device *device =
1596 container_of(utctx, struct tu_device, trace_context);
1597 struct tu_u_trace_submission_data *submission_data = flush_data;
1598
1599 tu_u_trace_submission_data_finish(device, submission_data);
1600 }
1601
1602 void
tu_copy_timestamp_buffer(struct u_trace_context * utctx,void * cmdstream,void * ts_from,uint32_t from_offset,void * ts_to,uint32_t to_offset,uint32_t count)1603 tu_copy_timestamp_buffer(struct u_trace_context *utctx, void *cmdstream,
1604 void *ts_from, uint32_t from_offset,
1605 void *ts_to, uint32_t to_offset,
1606 uint32_t count)
1607 {
1608 struct tu_cs *cs = cmdstream;
1609 struct tu_bo *bo_from = ts_from;
1610 struct tu_bo *bo_to = ts_to;
1611
1612 tu_cs_emit_pkt7(cs, CP_MEMCPY, 5);
1613 tu_cs_emit(cs, count * sizeof(uint64_t) / sizeof(uint32_t));
1614 tu_cs_emit_qw(cs, bo_from->iova + from_offset * sizeof(uint64_t));
1615 tu_cs_emit_qw(cs, bo_to->iova + to_offset * sizeof(uint64_t));
1616 }
1617
1618 /* Special helpers instead of u_trace_begin_iterator()/u_trace_end_iterator()
1619 * that ignore tracepoints at the beginning/end that are part of a
1620 * suspend/resume chain.
1621 */
1622 static struct u_trace_iterator
tu_cmd_begin_iterator(struct tu_cmd_buffer * cmdbuf)1623 tu_cmd_begin_iterator(struct tu_cmd_buffer *cmdbuf)
1624 {
1625 switch (cmdbuf->state.suspend_resume) {
1626 case SR_IN_PRE_CHAIN:
1627 return cmdbuf->trace_renderpass_end;
1628 case SR_AFTER_PRE_CHAIN:
1629 case SR_IN_CHAIN_AFTER_PRE_CHAIN:
1630 return cmdbuf->pre_chain.trace_renderpass_end;
1631 default:
1632 return u_trace_begin_iterator(&cmdbuf->trace);
1633 }
1634 }
1635
1636 static struct u_trace_iterator
tu_cmd_end_iterator(struct tu_cmd_buffer * cmdbuf)1637 tu_cmd_end_iterator(struct tu_cmd_buffer *cmdbuf)
1638 {
1639 switch (cmdbuf->state.suspend_resume) {
1640 case SR_IN_PRE_CHAIN:
1641 return cmdbuf->trace_renderpass_end;
1642 case SR_IN_CHAIN:
1643 case SR_IN_CHAIN_AFTER_PRE_CHAIN:
1644 return cmdbuf->trace_renderpass_start;
1645 default:
1646 return u_trace_end_iterator(&cmdbuf->trace);
1647 }
1648 }
1649 VkResult
tu_create_copy_timestamp_cs(struct tu_cmd_buffer * cmdbuf,struct tu_cs ** cs,struct u_trace ** trace_copy)1650 tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
1651 struct u_trace **trace_copy)
1652 {
1653 *cs = vk_zalloc(&cmdbuf->device->vk.alloc, sizeof(struct tu_cs), 8,
1654 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1655
1656 if (*cs == NULL) {
1657 return VK_ERROR_OUT_OF_HOST_MEMORY;
1658 }
1659
1660 tu_cs_init(*cs, cmdbuf->device, TU_CS_MODE_GROW,
1661 list_length(&cmdbuf->trace.trace_chunks) * 6 + 3);
1662
1663 tu_cs_begin(*cs);
1664
1665 tu_cs_emit_wfi(*cs);
1666 tu_cs_emit_pkt7(*cs, CP_WAIT_FOR_ME, 0);
1667
1668 *trace_copy = vk_zalloc(&cmdbuf->device->vk.alloc, sizeof(struct u_trace), 8,
1669 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1670
1671 if (*trace_copy == NULL) {
1672 return VK_ERROR_OUT_OF_HOST_MEMORY;
1673 }
1674
1675 u_trace_init(*trace_copy, cmdbuf->trace.utctx);
1676 u_trace_clone_append(tu_cmd_begin_iterator(cmdbuf),
1677 tu_cmd_end_iterator(cmdbuf),
1678 *trace_copy, *cs,
1679 tu_copy_timestamp_buffer);
1680
1681 tu_cs_emit_wfi(*cs);
1682
1683 tu_cs_end(*cs);
1684
1685 return VK_SUCCESS;
1686 }
1687
1688 VkResult
tu_u_trace_submission_data_create(struct tu_device * device,struct tu_cmd_buffer ** cmd_buffers,uint32_t cmd_buffer_count,struct tu_u_trace_submission_data ** submission_data)1689 tu_u_trace_submission_data_create(
1690 struct tu_device *device,
1691 struct tu_cmd_buffer **cmd_buffers,
1692 uint32_t cmd_buffer_count,
1693 struct tu_u_trace_submission_data **submission_data)
1694 {
1695 *submission_data =
1696 vk_zalloc(&device->vk.alloc,
1697 sizeof(struct tu_u_trace_submission_data), 8,
1698 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1699
1700 if (!(*submission_data)) {
1701 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1702 }
1703
1704 struct tu_u_trace_submission_data *data = *submission_data;
1705
1706 data->cmd_trace_data =
1707 vk_zalloc(&device->vk.alloc,
1708 cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8,
1709 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1710
1711 if (!data->cmd_trace_data) {
1712 goto fail;
1713 }
1714
1715 data->cmd_buffer_count = cmd_buffer_count;
1716 data->last_buffer_with_tracepoints = -1;
1717
1718 for (uint32_t i = 0; i < cmd_buffer_count; ++i) {
1719 struct tu_cmd_buffer *cmdbuf = cmd_buffers[i];
1720
1721 if (!u_trace_has_points(&cmdbuf->trace))
1722 continue;
1723
1724 data->last_buffer_with_tracepoints = i;
1725
1726 if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
1727 /* A single command buffer could be submitted several times, but we
1728 * already baked timestamp iova addresses and trace points are
1729 * single-use. Therefor we have to copy trace points and create
1730 * a new timestamp buffer on every submit of reusable command buffer.
1731 */
1732 if (tu_create_copy_timestamp_cs(cmdbuf,
1733 &data->cmd_trace_data[i].timestamp_copy_cs,
1734 &data->cmd_trace_data[i].trace) != VK_SUCCESS) {
1735 goto fail;
1736 }
1737
1738 assert(data->cmd_trace_data[i].timestamp_copy_cs->entry_count == 1);
1739 } else {
1740 data->cmd_trace_data[i].trace = &cmdbuf->trace;
1741 }
1742 }
1743
1744 assert(data->last_buffer_with_tracepoints != -1);
1745
1746 return VK_SUCCESS;
1747
1748 fail:
1749 tu_u_trace_submission_data_finish(device, data);
1750 *submission_data = NULL;
1751
1752 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1753 }
1754
1755 void
tu_u_trace_submission_data_finish(struct tu_device * device,struct tu_u_trace_submission_data * submission_data)1756 tu_u_trace_submission_data_finish(
1757 struct tu_device *device,
1758 struct tu_u_trace_submission_data *submission_data)
1759 {
1760 for (uint32_t i = 0; i < submission_data->cmd_buffer_count; ++i) {
1761 /* Only if we had to create a copy of trace we should free it */
1762 struct tu_u_trace_cmd_data *cmd_data = &submission_data->cmd_trace_data[i];
1763 if (cmd_data->timestamp_copy_cs) {
1764 tu_cs_finish(cmd_data->timestamp_copy_cs);
1765 vk_free(&device->vk.alloc, cmd_data->timestamp_copy_cs);
1766
1767 u_trace_fini(cmd_data->trace);
1768 vk_free(&device->vk.alloc, cmd_data->trace);
1769 }
1770 }
1771
1772 vk_free(&device->vk.alloc, submission_data->cmd_trace_data);
1773 vk_free(&device->vk.alloc, submission_data->syncobj);
1774 vk_free(&device->vk.alloc, submission_data);
1775 }
1776
1777 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)1778 tu_CreateDevice(VkPhysicalDevice physicalDevice,
1779 const VkDeviceCreateInfo *pCreateInfo,
1780 const VkAllocationCallbacks *pAllocator,
1781 VkDevice *pDevice)
1782 {
1783 TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice);
1784 VkResult result;
1785 struct tu_device *device;
1786 bool custom_border_colors = false;
1787 bool perf_query_pools = false;
1788 bool robust_buffer_access2 = false;
1789 bool border_color_without_format = false;
1790
1791 vk_foreach_struct_const(ext, pCreateInfo->pNext) {
1792 switch (ext->sType) {
1793 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1794 const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features = (const void *)ext;
1795 custom_border_colors = border_color_features->customBorderColors;
1796 border_color_without_format =
1797 border_color_features->customBorderColorWithoutFormat;
1798 break;
1799 }
1800 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
1801 const VkPhysicalDevicePerformanceQueryFeaturesKHR *feature =
1802 (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext;
1803 perf_query_pools = feature->performanceCounterQueryPools;
1804 break;
1805 }
1806 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1807 VkPhysicalDeviceRobustness2FeaturesEXT *features = (void *)ext;
1808 robust_buffer_access2 = features->robustBufferAccess2;
1809 break;
1810 }
1811 default:
1812 break;
1813 }
1814 }
1815
1816 device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator,
1817 sizeof(*device), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1818 if (!device)
1819 return vk_startup_errorf(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
1820
1821 struct vk_device_dispatch_table dispatch_table;
1822 vk_device_dispatch_table_from_entrypoints(
1823 &dispatch_table, &tu_device_entrypoints, true);
1824 vk_device_dispatch_table_from_entrypoints(
1825 &dispatch_table, &wsi_device_entrypoints, false);
1826
1827 result = vk_device_init(&device->vk, &physical_device->vk,
1828 &dispatch_table, pCreateInfo, pAllocator);
1829 if (result != VK_SUCCESS) {
1830 vk_free(&device->vk.alloc, device);
1831 return vk_startup_errorf(physical_device->instance, result,
1832 "vk_device_init failed");
1833 }
1834
1835 device->instance = physical_device->instance;
1836 device->physical_device = physical_device;
1837 device->fd = physical_device->local_fd;
1838 device->vk.check_status = tu_device_check_status;
1839
1840 mtx_init(&device->bo_mutex, mtx_plain);
1841 mtx_init(&device->pipeline_mutex, mtx_plain);
1842 mtx_init(&device->autotune_mutex, mtx_plain);
1843 u_rwlock_init(&device->dma_bo_lock);
1844 pthread_mutex_init(&device->submit_mutex, NULL);
1845
1846 #ifndef TU_USE_KGSL
1847 vk_device_set_drm_fd(&device->vk, device->fd);
1848 #endif
1849
1850 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1851 const VkDeviceQueueCreateInfo *queue_create =
1852 &pCreateInfo->pQueueCreateInfos[i];
1853 uint32_t qfi = queue_create->queueFamilyIndex;
1854 device->queues[qfi] = vk_alloc(
1855 &device->vk.alloc, queue_create->queueCount * sizeof(struct tu_queue),
1856 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1857 if (!device->queues[qfi]) {
1858 result = vk_startup_errorf(physical_device->instance,
1859 VK_ERROR_OUT_OF_HOST_MEMORY,
1860 "OOM");
1861 goto fail_queues;
1862 }
1863
1864 memset(device->queues[qfi], 0,
1865 queue_create->queueCount * sizeof(struct tu_queue));
1866
1867 device->queue_count[qfi] = queue_create->queueCount;
1868
1869 for (unsigned q = 0; q < queue_create->queueCount; q++) {
1870 result = tu_queue_init(device, &device->queues[qfi][q], q,
1871 queue_create);
1872 if (result != VK_SUCCESS)
1873 goto fail_queues;
1874 }
1875 }
1876
1877 device->compiler =
1878 ir3_compiler_create(NULL, &physical_device->dev_id,
1879 &(struct ir3_compiler_options) {
1880 .robust_buffer_access2 = robust_buffer_access2,
1881 .push_ubo_with_preamble = true,
1882 .disable_cache = true,
1883 });
1884 if (!device->compiler) {
1885 result = vk_startup_errorf(physical_device->instance,
1886 VK_ERROR_INITIALIZATION_FAILED,
1887 "failed to initialize ir3 compiler");
1888 goto fail_queues;
1889 }
1890
1891 /* Initialize sparse array for refcounting imported BOs */
1892 util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
1893
1894 /* initial sizes, these will increase if there is overflow */
1895 device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD;
1896 device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD;
1897
1898 uint32_t global_size = sizeof(struct tu6_global);
1899 if (custom_border_colors)
1900 global_size += TU_BORDER_COLOR_COUNT * sizeof(struct bcolor_entry);
1901
1902 tu_bo_suballocator_init(&device->pipeline_suballoc, device,
1903 128 * 1024, TU_BO_ALLOC_GPU_READ_ONLY | TU_BO_ALLOC_ALLOW_DUMP);
1904 tu_bo_suballocator_init(&device->autotune_suballoc, device,
1905 128 * 1024, 0);
1906
1907 result = tu_bo_init_new(device, &device->global_bo, global_size,
1908 TU_BO_ALLOC_ALLOW_DUMP);
1909 if (result != VK_SUCCESS) {
1910 vk_startup_errorf(device->instance, result, "BO init");
1911 goto fail_global_bo;
1912 }
1913
1914 result = tu_bo_map(device, device->global_bo);
1915 if (result != VK_SUCCESS) {
1916 vk_startup_errorf(device->instance, result, "BO map");
1917 goto fail_global_bo_map;
1918 }
1919
1920 struct tu6_global *global = device->global_bo->map;
1921 tu_init_clear_blit_shaders(device);
1922 global->predicate = 0;
1923 global->vtx_stats_query_not_running = 1;
1924 global->dbg_one = (uint32_t)-1;
1925 global->dbg_gmem_total_loads = 0;
1926 global->dbg_gmem_taken_loads = 0;
1927 global->dbg_gmem_total_stores = 0;
1928 global->dbg_gmem_taken_stores = 0;
1929 for (int i = 0; i < TU_BORDER_COLOR_BUILTIN; i++) {
1930 VkClearColorValue border_color = vk_border_color_value(i);
1931 tu6_pack_border_color(&global->bcolor_builtin[i], &border_color,
1932 vk_border_color_is_int(i));
1933 }
1934
1935 /* initialize to ones so ffs can be used to find unused slots */
1936 BITSET_ONES(device->custom_border_color);
1937
1938 result = tu_init_dynamic_rendering(device);
1939 if (result != VK_SUCCESS) {
1940 vk_startup_errorf(device->instance, result, "dynamic rendering");
1941 goto fail_dynamic_rendering;
1942 }
1943
1944 struct vk_pipeline_cache_create_info pcc_info = { };
1945 device->mem_cache = vk_pipeline_cache_create(&device->vk, &pcc_info,
1946 false);
1947 if (!device->mem_cache) {
1948 result = VK_ERROR_OUT_OF_HOST_MEMORY;
1949 vk_startup_errorf(device->instance, result, "create pipeline cache failed");
1950 goto fail_pipeline_cache;
1951 }
1952
1953 if (perf_query_pools) {
1954 /* Prepare command streams setting pass index to the PERF_CNTRS_REG
1955 * from 0 to 31. One of these will be picked up at cmd submit time
1956 * when the perf query is executed.
1957 */
1958 struct tu_cs *cs;
1959
1960 if (!(device->perfcntrs_pass_cs = calloc(1, sizeof(struct tu_cs)))) {
1961 result = vk_startup_errorf(device->instance,
1962 VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
1963 goto fail_perfcntrs_pass_alloc;
1964 }
1965
1966 device->perfcntrs_pass_cs_entries = calloc(32, sizeof(struct tu_cs_entry));
1967 if (!device->perfcntrs_pass_cs_entries) {
1968 result = vk_startup_errorf(device->instance,
1969 VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
1970 goto fail_perfcntrs_pass_entries_alloc;
1971 }
1972
1973 cs = device->perfcntrs_pass_cs;
1974 tu_cs_init(cs, device, TU_CS_MODE_SUB_STREAM, 96);
1975
1976 for (unsigned i = 0; i < 32; i++) {
1977 struct tu_cs sub_cs;
1978
1979 result = tu_cs_begin_sub_stream(cs, 3, &sub_cs);
1980 if (result != VK_SUCCESS) {
1981 vk_startup_errorf(device->instance, result,
1982 "failed to allocate commands streams");
1983 goto fail_prepare_perfcntrs_pass_cs;
1984 }
1985
1986 tu_cs_emit_regs(&sub_cs, A6XX_CP_SCRATCH_REG(PERF_CNTRS_REG, 1 << i));
1987 tu_cs_emit_pkt7(&sub_cs, CP_WAIT_FOR_ME, 0);
1988
1989 device->perfcntrs_pass_cs_entries[i] = tu_cs_end_sub_stream(cs, &sub_cs);
1990 }
1991 }
1992
1993 /* Initialize a condition variable for timeline semaphore */
1994 pthread_condattr_t condattr;
1995 if (pthread_condattr_init(&condattr) != 0) {
1996 result = vk_startup_errorf(physical_device->instance,
1997 VK_ERROR_INITIALIZATION_FAILED,
1998 "pthread condattr init");
1999 goto fail_timeline_cond;
2000 }
2001 if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
2002 pthread_condattr_destroy(&condattr);
2003 result = vk_startup_errorf(physical_device->instance,
2004 VK_ERROR_INITIALIZATION_FAILED,
2005 "pthread condattr clock setup");
2006 goto fail_timeline_cond;
2007 }
2008 if (pthread_cond_init(&device->timeline_cond, &condattr) != 0) {
2009 pthread_condattr_destroy(&condattr);
2010 result = vk_startup_errorf(physical_device->instance,
2011 VK_ERROR_INITIALIZATION_FAILED,
2012 "pthread cond init");
2013 goto fail_timeline_cond;
2014 }
2015 pthread_condattr_destroy(&condattr);
2016
2017 result = tu_autotune_init(&device->autotune, device);
2018 if (result != VK_SUCCESS) {
2019 goto fail_timeline_cond;
2020 }
2021
2022 for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
2023 mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain);
2024
2025 mtx_init(&device->mutex, mtx_plain);
2026
2027 device->use_z24uint_s8uint =
2028 physical_device->info->a6xx.has_z24uint_s8uint &&
2029 !border_color_without_format;
2030
2031 tu_gpu_tracepoint_config_variable();
2032
2033 device->submit_count = 0;
2034 u_trace_context_init(&device->trace_context, device,
2035 tu_trace_create_ts_buffer,
2036 tu_trace_destroy_ts_buffer,
2037 tu_trace_record_ts,
2038 tu_trace_read_ts,
2039 tu_trace_delete_flush_data);
2040
2041 tu_breadcrumbs_init(device);
2042
2043 *pDevice = tu_device_to_handle(device);
2044 return VK_SUCCESS;
2045
2046 fail_timeline_cond:
2047 fail_prepare_perfcntrs_pass_cs:
2048 free(device->perfcntrs_pass_cs_entries);
2049 tu_cs_finish(device->perfcntrs_pass_cs);
2050 fail_perfcntrs_pass_entries_alloc:
2051 free(device->perfcntrs_pass_cs);
2052 fail_perfcntrs_pass_alloc:
2053 vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2054 fail_pipeline_cache:
2055 tu_destroy_dynamic_rendering(device);
2056 fail_dynamic_rendering:
2057 tu_destroy_clear_blit_shaders(device);
2058 fail_global_bo_map:
2059 tu_bo_finish(device, device->global_bo);
2060 vk_free(&device->vk.alloc, device->bo_list);
2061 fail_global_bo:
2062 ir3_compiler_destroy(device->compiler);
2063 util_sparse_array_finish(&device->bo_map);
2064
2065 fail_queues:
2066 for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2067 for (unsigned q = 0; q < device->queue_count[i]; q++)
2068 tu_queue_finish(&device->queues[i][q]);
2069 if (device->queue_count[i])
2070 vk_free(&device->vk.alloc, device->queues[i]);
2071 }
2072
2073 u_rwlock_destroy(&device->dma_bo_lock);
2074 vk_device_finish(&device->vk);
2075 vk_free(&device->vk.alloc, device);
2076 return result;
2077 }
2078
2079 VKAPI_ATTR void VKAPI_CALL
tu_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2080 tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
2081 {
2082 TU_FROM_HANDLE(tu_device, device, _device);
2083
2084 if (!device)
2085 return;
2086
2087 tu_breadcrumbs_finish(device);
2088
2089 u_trace_context_fini(&device->trace_context);
2090
2091 for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2092 for (unsigned q = 0; q < device->queue_count[i]; q++)
2093 tu_queue_finish(&device->queues[i][q]);
2094 if (device->queue_count[i])
2095 vk_free(&device->vk.alloc, device->queues[i]);
2096 }
2097
2098 for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
2099 if (device->scratch_bos[i].initialized)
2100 tu_bo_finish(device, device->scratch_bos[i].bo);
2101 }
2102
2103 tu_destroy_clear_blit_shaders(device);
2104
2105 tu_destroy_dynamic_rendering(device);
2106
2107 ir3_compiler_destroy(device->compiler);
2108
2109 vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2110
2111 if (device->perfcntrs_pass_cs) {
2112 free(device->perfcntrs_pass_cs_entries);
2113 tu_cs_finish(device->perfcntrs_pass_cs);
2114 free(device->perfcntrs_pass_cs);
2115 }
2116
2117 tu_autotune_fini(&device->autotune, device);
2118
2119 tu_bo_suballocator_finish(&device->pipeline_suballoc);
2120 tu_bo_suballocator_finish(&device->autotune_suballoc);
2121
2122 util_sparse_array_finish(&device->bo_map);
2123 u_rwlock_destroy(&device->dma_bo_lock);
2124
2125 pthread_cond_destroy(&device->timeline_cond);
2126 vk_free(&device->vk.alloc, device->bo_list);
2127 vk_device_finish(&device->vk);
2128 vk_free(&device->vk.alloc, device);
2129 }
2130
2131 VkResult
tu_get_scratch_bo(struct tu_device * dev,uint64_t size,struct tu_bo ** bo)2132 tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
2133 {
2134 unsigned size_log2 = MAX2(util_logbase2_ceil64(size), MIN_SCRATCH_BO_SIZE_LOG2);
2135 unsigned index = size_log2 - MIN_SCRATCH_BO_SIZE_LOG2;
2136 assert(index < ARRAY_SIZE(dev->scratch_bos));
2137
2138 for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
2139 if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
2140 /* Fast path: just return the already-allocated BO. */
2141 *bo = dev->scratch_bos[i].bo;
2142 return VK_SUCCESS;
2143 }
2144 }
2145
2146 /* Slow path: actually allocate the BO. We take a lock because the process
2147 * of allocating it is slow, and we don't want to block the CPU while it
2148 * finishes.
2149 */
2150 mtx_lock(&dev->scratch_bos[index].construct_mtx);
2151
2152 /* Another thread may have allocated it already while we were waiting on
2153 * the lock. We need to check this in order to avoid double-allocating.
2154 */
2155 if (dev->scratch_bos[index].initialized) {
2156 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2157 *bo = dev->scratch_bos[index].bo;
2158 return VK_SUCCESS;
2159 }
2160
2161 unsigned bo_size = 1ull << size_log2;
2162 VkResult result = tu_bo_init_new(dev, &dev->scratch_bos[index].bo, bo_size,
2163 TU_BO_ALLOC_NO_FLAGS);
2164 if (result != VK_SUCCESS) {
2165 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2166 return result;
2167 }
2168
2169 p_atomic_set(&dev->scratch_bos[index].initialized, true);
2170
2171 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2172
2173 *bo = dev->scratch_bos[index].bo;
2174 return VK_SUCCESS;
2175 }
2176
2177 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2178 tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2179 VkLayerProperties *pProperties)
2180 {
2181 *pPropertyCount = 0;
2182 return VK_SUCCESS;
2183 }
2184
2185 /* Only used for kgsl since drm started using common implementation */
2186 #ifdef TU_USE_KGSL
2187 VKAPI_ATTR VkResult VKAPI_CALL
tu_QueueWaitIdle(VkQueue _queue)2188 tu_QueueWaitIdle(VkQueue _queue)
2189 {
2190 TU_FROM_HANDLE(tu_queue, queue, _queue);
2191
2192 if (vk_device_is_lost(&queue->device->vk))
2193 return VK_ERROR_DEVICE_LOST;
2194
2195 if (queue->fence < 0)
2196 return VK_SUCCESS;
2197
2198 struct pollfd fds = { .fd = queue->fence, .events = POLLIN };
2199 int ret;
2200 do {
2201 ret = poll(&fds, 1, -1);
2202 } while (ret == -1 && (errno == EINTR || errno == EAGAIN));
2203
2204 /* TODO: otherwise set device lost ? */
2205 assert(ret == 1 && !(fds.revents & (POLLERR | POLLNVAL)));
2206
2207 close(queue->fence);
2208 queue->fence = -1;
2209 return VK_SUCCESS;
2210 }
2211 #endif
2212
2213 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)2214 tu_EnumerateInstanceExtensionProperties(const char *pLayerName,
2215 uint32_t *pPropertyCount,
2216 VkExtensionProperties *pProperties)
2217 {
2218 if (pLayerName)
2219 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2220
2221 return vk_enumerate_instance_extension_properties(
2222 &tu_instance_extensions_supported, pPropertyCount, pProperties);
2223 }
2224
2225 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
tu_GetInstanceProcAddr(VkInstance _instance,const char * pName)2226 tu_GetInstanceProcAddr(VkInstance _instance, const char *pName)
2227 {
2228 TU_FROM_HANDLE(tu_instance, instance, _instance);
2229 return vk_instance_get_proc_addr(&instance->vk,
2230 &tu_instance_entrypoints,
2231 pName);
2232 }
2233
2234 /* The loader wants us to expose a second GetInstanceProcAddr function
2235 * to work around certain LD_PRELOAD issues seen in apps.
2236 */
2237 PUBLIC
2238 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
2239 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName);
2240
2241 PUBLIC
2242 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2243 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
2244 {
2245 return tu_GetInstanceProcAddr(instance, pName);
2246 }
2247
2248 /* With version 4+ of the loader interface the ICD should expose
2249 * vk_icdGetPhysicalDeviceProcAddr()
2250 */
2251 PUBLIC
2252 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
2253 vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,
2254 const char* pName);
2255
2256 PFN_vkVoidFunction
vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,const char * pName)2257 vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,
2258 const char* pName)
2259 {
2260 TU_FROM_HANDLE(tu_instance, instance, _instance);
2261
2262 return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
2263 }
2264
2265 VKAPI_ATTR VkResult VKAPI_CALL
tu_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2266 tu_AllocateMemory(VkDevice _device,
2267 const VkMemoryAllocateInfo *pAllocateInfo,
2268 const VkAllocationCallbacks *pAllocator,
2269 VkDeviceMemory *pMem)
2270 {
2271 TU_FROM_HANDLE(tu_device, device, _device);
2272 struct tu_device_memory *mem;
2273 VkResult result;
2274
2275 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2276
2277 if (pAllocateInfo->allocationSize == 0) {
2278 /* Apparently, this is allowed */
2279 *pMem = VK_NULL_HANDLE;
2280 return VK_SUCCESS;
2281 }
2282
2283 struct tu_memory_heap *mem_heap = &device->physical_device->heap;
2284 uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
2285 if (mem_heap_used > mem_heap->size)
2286 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2287
2288 mem = vk_object_alloc(&device->vk, pAllocator, sizeof(*mem),
2289 VK_OBJECT_TYPE_DEVICE_MEMORY);
2290 if (mem == NULL)
2291 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2292
2293 const VkImportMemoryFdInfoKHR *fd_info =
2294 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2295 if (fd_info && !fd_info->handleType)
2296 fd_info = NULL;
2297
2298 if (fd_info) {
2299 assert(fd_info->handleType ==
2300 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2301 fd_info->handleType ==
2302 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2303
2304 /*
2305 * TODO Importing the same fd twice gives us the same handle without
2306 * reference counting. We need to maintain a per-instance handle-to-bo
2307 * table and add reference count to tu_bo.
2308 */
2309 result = tu_bo_init_dmabuf(device, &mem->bo,
2310 pAllocateInfo->allocationSize, fd_info->fd);
2311 if (result == VK_SUCCESS) {
2312 /* take ownership and close the fd */
2313 close(fd_info->fd);
2314 }
2315 } else {
2316 result =
2317 tu_bo_init_new(device, &mem->bo, pAllocateInfo->allocationSize,
2318 TU_BO_ALLOC_NO_FLAGS);
2319 }
2320
2321
2322 if (result == VK_SUCCESS) {
2323 mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
2324 if (mem_heap_used > mem_heap->size) {
2325 p_atomic_add(&mem_heap->used, -mem->bo->size);
2326 tu_bo_finish(device, mem->bo);
2327 result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
2328 "Out of heap memory");
2329 }
2330 }
2331
2332 if (result != VK_SUCCESS) {
2333 vk_object_free(&device->vk, pAllocator, mem);
2334 return result;
2335 }
2336
2337 /* Track in the device whether our BO list contains any implicit-sync BOs, so
2338 * we can suppress implicit sync on non-WSI usage.
2339 */
2340 const struct wsi_memory_allocate_info *wsi_info =
2341 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2342 if (wsi_info && wsi_info->implicit_sync) {
2343 mtx_lock(&device->bo_mutex);
2344 if (!mem->bo->implicit_sync) {
2345 mem->bo->implicit_sync = true;
2346 device->implicit_sync_bo_count++;
2347 }
2348 mtx_unlock(&device->bo_mutex);
2349 }
2350
2351 *pMem = tu_device_memory_to_handle(mem);
2352
2353 return VK_SUCCESS;
2354 }
2355
2356 VKAPI_ATTR void VKAPI_CALL
tu_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)2357 tu_FreeMemory(VkDevice _device,
2358 VkDeviceMemory _mem,
2359 const VkAllocationCallbacks *pAllocator)
2360 {
2361 TU_FROM_HANDLE(tu_device, device, _device);
2362 TU_FROM_HANDLE(tu_device_memory, mem, _mem);
2363
2364 if (mem == NULL)
2365 return;
2366
2367 p_atomic_add(&device->physical_device->heap.used, -mem->bo->size);
2368 tu_bo_finish(device, mem->bo);
2369 vk_object_free(&device->vk, pAllocator, mem);
2370 }
2371
2372 VKAPI_ATTR VkResult VKAPI_CALL
tu_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)2373 tu_MapMemory(VkDevice _device,
2374 VkDeviceMemory _memory,
2375 VkDeviceSize offset,
2376 VkDeviceSize size,
2377 VkMemoryMapFlags flags,
2378 void **ppData)
2379 {
2380 TU_FROM_HANDLE(tu_device, device, _device);
2381 TU_FROM_HANDLE(tu_device_memory, mem, _memory);
2382 VkResult result;
2383
2384 if (mem == NULL) {
2385 *ppData = NULL;
2386 return VK_SUCCESS;
2387 }
2388
2389 if (!mem->bo->map) {
2390 result = tu_bo_map(device, mem->bo);
2391 if (result != VK_SUCCESS)
2392 return result;
2393 }
2394
2395 *ppData = mem->bo->map + offset;
2396 return VK_SUCCESS;
2397 }
2398
2399 VKAPI_ATTR void VKAPI_CALL
tu_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)2400 tu_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
2401 {
2402 /* TODO: unmap here instead of waiting for FreeMemory */
2403 }
2404
2405 VKAPI_ATTR VkResult VKAPI_CALL
tu_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2406 tu_FlushMappedMemoryRanges(VkDevice _device,
2407 uint32_t memoryRangeCount,
2408 const VkMappedMemoryRange *pMemoryRanges)
2409 {
2410 return VK_SUCCESS;
2411 }
2412
2413 VKAPI_ATTR VkResult VKAPI_CALL
tu_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2414 tu_InvalidateMappedMemoryRanges(VkDevice _device,
2415 uint32_t memoryRangeCount,
2416 const VkMappedMemoryRange *pMemoryRanges)
2417 {
2418 return VK_SUCCESS;
2419 }
2420
2421 static void
tu_get_buffer_memory_requirements(uint64_t size,VkMemoryRequirements2 * pMemoryRequirements)2422 tu_get_buffer_memory_requirements(uint64_t size,
2423 VkMemoryRequirements2 *pMemoryRequirements)
2424 {
2425 pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
2426 .memoryTypeBits = 1,
2427 .alignment = 64,
2428 .size = MAX2(align64(size, 64), size),
2429 };
2430
2431 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2432 switch (ext->sType) {
2433 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
2434 VkMemoryDedicatedRequirements *req =
2435 (VkMemoryDedicatedRequirements *) ext;
2436 req->requiresDedicatedAllocation = false;
2437 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2438 break;
2439 }
2440 default:
2441 break;
2442 }
2443 }
2444 }
2445
2446 VKAPI_ATTR void VKAPI_CALL
tu_GetBufferMemoryRequirements2(VkDevice device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2447 tu_GetBufferMemoryRequirements2(
2448 VkDevice device,
2449 const VkBufferMemoryRequirementsInfo2 *pInfo,
2450 VkMemoryRequirements2 *pMemoryRequirements)
2451 {
2452 TU_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer);
2453
2454 tu_get_buffer_memory_requirements(buffer->size, pMemoryRequirements);
2455 }
2456
2457 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceBufferMemoryRequirements(VkDevice device,const VkDeviceBufferMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2458 tu_GetDeviceBufferMemoryRequirements(
2459 VkDevice device,
2460 const VkDeviceBufferMemoryRequirements *pInfo,
2461 VkMemoryRequirements2 *pMemoryRequirements)
2462 {
2463 tu_get_buffer_memory_requirements(pInfo->pCreateInfo->size, pMemoryRequirements);
2464 }
2465
2466 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)2467 tu_GetDeviceMemoryCommitment(VkDevice device,
2468 VkDeviceMemory memory,
2469 VkDeviceSize *pCommittedMemoryInBytes)
2470 {
2471 *pCommittedMemoryInBytes = 0;
2472 }
2473
2474 VKAPI_ATTR VkResult VKAPI_CALL
tu_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)2475 tu_BindBufferMemory2(VkDevice device,
2476 uint32_t bindInfoCount,
2477 const VkBindBufferMemoryInfo *pBindInfos)
2478 {
2479 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2480 TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
2481 TU_FROM_HANDLE(tu_buffer, buffer, pBindInfos[i].buffer);
2482
2483 if (mem) {
2484 buffer->bo = mem->bo;
2485 buffer->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
2486 } else {
2487 buffer->bo = NULL;
2488 }
2489 }
2490 return VK_SUCCESS;
2491 }
2492
2493 VKAPI_ATTR VkResult VKAPI_CALL
tu_BindImageMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos)2494 tu_BindImageMemory2(VkDevice device,
2495 uint32_t bindInfoCount,
2496 const VkBindImageMemoryInfo *pBindInfos)
2497 {
2498 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2499 TU_FROM_HANDLE(tu_image, image, pBindInfos[i].image);
2500 TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
2501
2502 if (mem) {
2503 image->bo = mem->bo;
2504 image->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
2505 } else {
2506 image->bo = NULL;
2507 image->iova = 0;
2508 }
2509 }
2510
2511 return VK_SUCCESS;
2512 }
2513
2514 VKAPI_ATTR VkResult VKAPI_CALL
tu_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence _fence)2515 tu_QueueBindSparse(VkQueue _queue,
2516 uint32_t bindInfoCount,
2517 const VkBindSparseInfo *pBindInfo,
2518 VkFence _fence)
2519 {
2520 return VK_SUCCESS;
2521 }
2522
2523 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)2524 tu_CreateEvent(VkDevice _device,
2525 const VkEventCreateInfo *pCreateInfo,
2526 const VkAllocationCallbacks *pAllocator,
2527 VkEvent *pEvent)
2528 {
2529 TU_FROM_HANDLE(tu_device, device, _device);
2530
2531 struct tu_event *event =
2532 vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
2533 VK_OBJECT_TYPE_EVENT);
2534 if (!event)
2535 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2536
2537 VkResult result = tu_bo_init_new(device, &event->bo, 0x1000,
2538 TU_BO_ALLOC_NO_FLAGS);
2539 if (result != VK_SUCCESS)
2540 goto fail_alloc;
2541
2542 result = tu_bo_map(device, event->bo);
2543 if (result != VK_SUCCESS)
2544 goto fail_map;
2545
2546 *pEvent = tu_event_to_handle(event);
2547
2548 return VK_SUCCESS;
2549
2550 fail_map:
2551 tu_bo_finish(device, event->bo);
2552 fail_alloc:
2553 vk_object_free(&device->vk, pAllocator, event);
2554 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2555 }
2556
2557 VKAPI_ATTR void VKAPI_CALL
tu_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)2558 tu_DestroyEvent(VkDevice _device,
2559 VkEvent _event,
2560 const VkAllocationCallbacks *pAllocator)
2561 {
2562 TU_FROM_HANDLE(tu_device, device, _device);
2563 TU_FROM_HANDLE(tu_event, event, _event);
2564
2565 if (!event)
2566 return;
2567
2568 tu_bo_finish(device, event->bo);
2569 vk_object_free(&device->vk, pAllocator, event);
2570 }
2571
2572 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetEventStatus(VkDevice _device,VkEvent _event)2573 tu_GetEventStatus(VkDevice _device, VkEvent _event)
2574 {
2575 TU_FROM_HANDLE(tu_event, event, _event);
2576
2577 if (*(uint64_t*) event->bo->map == 1)
2578 return VK_EVENT_SET;
2579 return VK_EVENT_RESET;
2580 }
2581
2582 VKAPI_ATTR VkResult VKAPI_CALL
tu_SetEvent(VkDevice _device,VkEvent _event)2583 tu_SetEvent(VkDevice _device, VkEvent _event)
2584 {
2585 TU_FROM_HANDLE(tu_event, event, _event);
2586 *(uint64_t*) event->bo->map = 1;
2587
2588 return VK_SUCCESS;
2589 }
2590
2591 VKAPI_ATTR VkResult VKAPI_CALL
tu_ResetEvent(VkDevice _device,VkEvent _event)2592 tu_ResetEvent(VkDevice _device, VkEvent _event)
2593 {
2594 TU_FROM_HANDLE(tu_event, event, _event);
2595 *(uint64_t*) event->bo->map = 0;
2596
2597 return VK_SUCCESS;
2598 }
2599
2600 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)2601 tu_CreateBuffer(VkDevice _device,
2602 const VkBufferCreateInfo *pCreateInfo,
2603 const VkAllocationCallbacks *pAllocator,
2604 VkBuffer *pBuffer)
2605 {
2606 TU_FROM_HANDLE(tu_device, device, _device);
2607 struct tu_buffer *buffer;
2608
2609 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2610
2611 buffer = vk_object_alloc(&device->vk, pAllocator, sizeof(*buffer),
2612 VK_OBJECT_TYPE_BUFFER);
2613 if (buffer == NULL)
2614 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2615
2616 buffer->size = pCreateInfo->size;
2617 buffer->usage = pCreateInfo->usage;
2618 buffer->flags = pCreateInfo->flags;
2619
2620 *pBuffer = tu_buffer_to_handle(buffer);
2621
2622 return VK_SUCCESS;
2623 }
2624
2625 VKAPI_ATTR void VKAPI_CALL
tu_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)2626 tu_DestroyBuffer(VkDevice _device,
2627 VkBuffer _buffer,
2628 const VkAllocationCallbacks *pAllocator)
2629 {
2630 TU_FROM_HANDLE(tu_device, device, _device);
2631 TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
2632
2633 if (!buffer)
2634 return;
2635
2636 vk_object_free(&device->vk, pAllocator, buffer);
2637 }
2638
2639 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)2640 tu_CreateFramebuffer(VkDevice _device,
2641 const VkFramebufferCreateInfo *pCreateInfo,
2642 const VkAllocationCallbacks *pAllocator,
2643 VkFramebuffer *pFramebuffer)
2644 {
2645 TU_FROM_HANDLE(tu_device, device, _device);
2646
2647 if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC))
2648 return vk_common_CreateFramebuffer(_device, pCreateInfo, pAllocator,
2649 pFramebuffer);
2650
2651 TU_FROM_HANDLE(tu_render_pass, pass, pCreateInfo->renderPass);
2652 struct tu_framebuffer *framebuffer;
2653
2654 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2655
2656 bool imageless = pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT;
2657
2658 size_t size = sizeof(*framebuffer);
2659 if (!imageless)
2660 size += sizeof(struct tu_attachment_info) * pCreateInfo->attachmentCount;
2661 framebuffer = vk_object_alloc(&device->vk, pAllocator, size,
2662 VK_OBJECT_TYPE_FRAMEBUFFER);
2663 if (framebuffer == NULL)
2664 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2665
2666 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2667 framebuffer->width = pCreateInfo->width;
2668 framebuffer->height = pCreateInfo->height;
2669 framebuffer->layers = pCreateInfo->layers;
2670
2671 if (!imageless) {
2672 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
2673 VkImageView _iview = pCreateInfo->pAttachments[i];
2674 struct tu_image_view *iview = tu_image_view_from_handle(_iview);
2675 framebuffer->attachments[i].attachment = iview;
2676 }
2677 }
2678
2679 tu_framebuffer_tiling_config(framebuffer, device, pass);
2680
2681 *pFramebuffer = tu_framebuffer_to_handle(framebuffer);
2682 return VK_SUCCESS;
2683 }
2684
2685 void
tu_setup_dynamic_framebuffer(struct tu_cmd_buffer * cmd_buffer,const VkRenderingInfo * pRenderingInfo)2686 tu_setup_dynamic_framebuffer(struct tu_cmd_buffer *cmd_buffer,
2687 const VkRenderingInfo *pRenderingInfo)
2688 {
2689 struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
2690 struct tu_framebuffer *framebuffer = &cmd_buffer->dynamic_framebuffer;
2691
2692 framebuffer->attachment_count = pass->attachment_count;
2693 framebuffer->width = pRenderingInfo->renderArea.offset.x +
2694 pRenderingInfo->renderArea.extent.width;
2695 framebuffer->height = pRenderingInfo->renderArea.offset.y +
2696 pRenderingInfo->renderArea.extent.height;
2697 framebuffer->layers = pRenderingInfo->layerCount;
2698
2699 tu_framebuffer_tiling_config(framebuffer, cmd_buffer->device, pass);
2700 }
2701
2702 VKAPI_ATTR void VKAPI_CALL
tu_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)2703 tu_DestroyFramebuffer(VkDevice _device,
2704 VkFramebuffer _fb,
2705 const VkAllocationCallbacks *pAllocator)
2706 {
2707 TU_FROM_HANDLE(tu_device, device, _device);
2708
2709 if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC)) {
2710 vk_common_DestroyFramebuffer(_device, _fb, pAllocator);
2711 return;
2712 }
2713
2714 TU_FROM_HANDLE(tu_framebuffer, fb, _fb);
2715
2716 if (!fb)
2717 return;
2718
2719 vk_object_free(&device->vk, pAllocator, fb);
2720 }
2721
2722 static void
tu_init_sampler(struct tu_device * device,struct tu_sampler * sampler,const VkSamplerCreateInfo * pCreateInfo)2723 tu_init_sampler(struct tu_device *device,
2724 struct tu_sampler *sampler,
2725 const VkSamplerCreateInfo *pCreateInfo)
2726 {
2727 const struct VkSamplerReductionModeCreateInfo *reduction =
2728 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO);
2729 const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
2730 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
2731 const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
2732 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
2733 /* for non-custom border colors, the VK enum is translated directly to an offset in
2734 * the border color buffer. custom border colors are located immediately after the
2735 * builtin colors, and thus an offset of TU_BORDER_COLOR_BUILTIN is added.
2736 */
2737 uint32_t border_color = (unsigned) pCreateInfo->borderColor;
2738 if (pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT ||
2739 pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
2740 mtx_lock(&device->mutex);
2741 border_color = BITSET_FFS(device->custom_border_color) - 1;
2742 assert(border_color < TU_BORDER_COLOR_COUNT);
2743 BITSET_CLEAR(device->custom_border_color, border_color);
2744 mtx_unlock(&device->mutex);
2745
2746 VkClearColorValue color = custom_border_color->customBorderColor;
2747 if (custom_border_color->format == VK_FORMAT_D24_UNORM_S8_UINT &&
2748 pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT &&
2749 device->use_z24uint_s8uint) {
2750 /* When sampling stencil using the special Z24UINT_S8UINT format, the
2751 * border color is in the second component. Note: if
2752 * customBorderColorWithoutFormat is enabled, we may miss doing this
2753 * here if the format isn't specified, which is why we don't use that
2754 * format.
2755 */
2756 color.uint32[1] = color.uint32[0];
2757 }
2758
2759 tu6_pack_border_color(device->global_bo->map + gb_offset(bcolor[border_color]),
2760 &color,
2761 pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT);
2762 border_color += TU_BORDER_COLOR_BUILTIN;
2763 }
2764
2765 unsigned aniso = pCreateInfo->anisotropyEnable ?
2766 util_last_bit(MIN2((uint32_t)pCreateInfo->maxAnisotropy >> 1, 8)) : 0;
2767 bool miplinear = (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR);
2768 float min_lod = CLAMP(pCreateInfo->minLod, 0.0f, 4095.0f / 256.0f);
2769 float max_lod = CLAMP(pCreateInfo->maxLod, 0.0f, 4095.0f / 256.0f);
2770
2771 sampler->descriptor[0] =
2772 COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
2773 A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(pCreateInfo->magFilter, aniso)) |
2774 A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(pCreateInfo->minFilter, aniso)) |
2775 A6XX_TEX_SAMP_0_ANISO(aniso) |
2776 A6XX_TEX_SAMP_0_WRAP_S(tu6_tex_wrap(pCreateInfo->addressModeU)) |
2777 A6XX_TEX_SAMP_0_WRAP_T(tu6_tex_wrap(pCreateInfo->addressModeV)) |
2778 A6XX_TEX_SAMP_0_WRAP_R(tu6_tex_wrap(pCreateInfo->addressModeW)) |
2779 A6XX_TEX_SAMP_0_LOD_BIAS(pCreateInfo->mipLodBias);
2780 sampler->descriptor[1] =
2781 /* COND(!cso->seamless_cube_map, A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) | */
2782 COND(pCreateInfo->unnormalizedCoordinates, A6XX_TEX_SAMP_1_UNNORM_COORDS) |
2783 A6XX_TEX_SAMP_1_MIN_LOD(min_lod) |
2784 A6XX_TEX_SAMP_1_MAX_LOD(max_lod) |
2785 COND(pCreateInfo->compareEnable,
2786 A6XX_TEX_SAMP_1_COMPARE_FUNC(tu6_compare_func(pCreateInfo->compareOp)));
2787 sampler->descriptor[2] = A6XX_TEX_SAMP_2_BCOLOR(border_color);
2788 sampler->descriptor[3] = 0;
2789
2790 if (reduction) {
2791 sampler->descriptor[2] |= A6XX_TEX_SAMP_2_REDUCTION_MODE(
2792 tu6_reduction_mode(reduction->reductionMode));
2793 }
2794
2795 sampler->ycbcr_sampler = ycbcr_conversion ?
2796 tu_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL;
2797
2798 if (sampler->ycbcr_sampler &&
2799 sampler->ycbcr_sampler->chroma_filter == VK_FILTER_LINEAR) {
2800 sampler->descriptor[2] |= A6XX_TEX_SAMP_2_CHROMA_LINEAR;
2801 }
2802
2803 /* TODO:
2804 * A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR disables mipmapping, but vk has no NONE mipfilter?
2805 */
2806 }
2807
2808 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)2809 tu_CreateSampler(VkDevice _device,
2810 const VkSamplerCreateInfo *pCreateInfo,
2811 const VkAllocationCallbacks *pAllocator,
2812 VkSampler *pSampler)
2813 {
2814 TU_FROM_HANDLE(tu_device, device, _device);
2815 struct tu_sampler *sampler;
2816
2817 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2818
2819 sampler = vk_object_alloc(&device->vk, pAllocator, sizeof(*sampler),
2820 VK_OBJECT_TYPE_SAMPLER);
2821 if (!sampler)
2822 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2823
2824 tu_init_sampler(device, sampler, pCreateInfo);
2825 *pSampler = tu_sampler_to_handle(sampler);
2826
2827 return VK_SUCCESS;
2828 }
2829
2830 VKAPI_ATTR void VKAPI_CALL
tu_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)2831 tu_DestroySampler(VkDevice _device,
2832 VkSampler _sampler,
2833 const VkAllocationCallbacks *pAllocator)
2834 {
2835 TU_FROM_HANDLE(tu_device, device, _device);
2836 TU_FROM_HANDLE(tu_sampler, sampler, _sampler);
2837 uint32_t border_color;
2838
2839 if (!sampler)
2840 return;
2841
2842 border_color = (sampler->descriptor[2] & A6XX_TEX_SAMP_2_BCOLOR__MASK) >> A6XX_TEX_SAMP_2_BCOLOR__SHIFT;
2843 if (border_color >= TU_BORDER_COLOR_BUILTIN) {
2844 border_color -= TU_BORDER_COLOR_BUILTIN;
2845 /* if the sampler had a custom border color, free it. TODO: no lock */
2846 mtx_lock(&device->mutex);
2847 assert(!BITSET_TEST(device->custom_border_color, border_color));
2848 BITSET_SET(device->custom_border_color, border_color);
2849 mtx_unlock(&device->mutex);
2850 }
2851
2852 vk_object_free(&device->vk, pAllocator, sampler);
2853 }
2854
2855 /* vk_icd.h does not declare this function, so we declare it here to
2856 * suppress Wmissing-prototypes.
2857 */
2858 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
2859 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion);
2860
2861 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t * pSupportedVersion)2862 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2863 {
2864 /* For the full details on loader interface versioning, see
2865 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2866 * What follows is a condensed summary, to help you navigate the large and
2867 * confusing official doc.
2868 *
2869 * - Loader interface v0 is incompatible with later versions. We don't
2870 * support it.
2871 *
2872 * - In loader interface v1:
2873 * - The first ICD entrypoint called by the loader is
2874 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2875 * entrypoint.
2876 * - The ICD must statically expose no other Vulkan symbol unless it
2877 * is linked with -Bsymbolic.
2878 * - Each dispatchable Vulkan handle created by the ICD must be
2879 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2880 * ICD must initialize VK_LOADER_DATA.loadMagic to
2881 * ICD_LOADER_MAGIC.
2882 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2883 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2884 * such loader-managed surfaces.
2885 *
2886 * - Loader interface v2 differs from v1 in:
2887 * - The first ICD entrypoint called by the loader is
2888 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2889 * statically expose this entrypoint.
2890 *
2891 * - Loader interface v3 differs from v2 in:
2892 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2893 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2894 * because the loader no longer does so.
2895 *
2896 * - Loader interface v4 differs from v3 in:
2897 * - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
2898 *
2899 * - Loader interface v5 differs from v4 in:
2900 * - The ICD must support Vulkan API version 1.1 and must not return
2901 * VK_ERROR_INCOMPATIBLE_DRIVER from vkCreateInstance() unless a
2902 * Vulkan Loader with interface v4 or smaller is being used and the
2903 * application provides an API version that is greater than 1.0.
2904 */
2905 *pSupportedVersion = MIN2(*pSupportedVersion, 5u);
2906 return VK_SUCCESS;
2907 }
2908
2909 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)2910 tu_GetMemoryFdKHR(VkDevice _device,
2911 const VkMemoryGetFdInfoKHR *pGetFdInfo,
2912 int *pFd)
2913 {
2914 TU_FROM_HANDLE(tu_device, device, _device);
2915 TU_FROM_HANDLE(tu_device_memory, memory, pGetFdInfo->memory);
2916
2917 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
2918
2919 /* At the moment, we support only the below handle types. */
2920 assert(pGetFdInfo->handleType ==
2921 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2922 pGetFdInfo->handleType ==
2923 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2924
2925 int prime_fd = tu_bo_export_dmabuf(device, memory->bo);
2926 if (prime_fd < 0)
2927 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2928
2929 *pFd = prime_fd;
2930 return VK_SUCCESS;
2931 }
2932
2933 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)2934 tu_GetMemoryFdPropertiesKHR(VkDevice _device,
2935 VkExternalMemoryHandleTypeFlagBits handleType,
2936 int fd,
2937 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
2938 {
2939 assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2940 pMemoryFdProperties->memoryTypeBits = 1;
2941 return VK_SUCCESS;
2942 }
2943
2944 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceExternalFenceProperties(VkPhysicalDevice physicalDevice,const VkPhysicalDeviceExternalFenceInfo * pExternalFenceInfo,VkExternalFenceProperties * pExternalFenceProperties)2945 tu_GetPhysicalDeviceExternalFenceProperties(
2946 VkPhysicalDevice physicalDevice,
2947 const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
2948 VkExternalFenceProperties *pExternalFenceProperties)
2949 {
2950 pExternalFenceProperties->exportFromImportedHandleTypes = 0;
2951 pExternalFenceProperties->compatibleHandleTypes = 0;
2952 pExternalFenceProperties->externalFenceFeatures = 0;
2953 }
2954
2955 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceGroupPeerMemoryFeatures(VkDevice device,uint32_t heapIndex,uint32_t localDeviceIndex,uint32_t remoteDeviceIndex,VkPeerMemoryFeatureFlags * pPeerMemoryFeatures)2956 tu_GetDeviceGroupPeerMemoryFeatures(
2957 VkDevice device,
2958 uint32_t heapIndex,
2959 uint32_t localDeviceIndex,
2960 uint32_t remoteDeviceIndex,
2961 VkPeerMemoryFeatureFlags *pPeerMemoryFeatures)
2962 {
2963 assert(localDeviceIndex == remoteDeviceIndex);
2964
2965 *pPeerMemoryFeatures = VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT |
2966 VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
2967 VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT |
2968 VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
2969 }
2970
2971 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)2972 tu_GetPhysicalDeviceMultisamplePropertiesEXT(
2973 VkPhysicalDevice physicalDevice,
2974 VkSampleCountFlagBits samples,
2975 VkMultisamplePropertiesEXT* pMultisampleProperties)
2976 {
2977 TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
2978
2979 if (samples <= VK_SAMPLE_COUNT_4_BIT && pdevice->vk.supported_extensions.EXT_sample_locations)
2980 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 1, 1 };
2981 else
2982 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
2983 }
2984
2985 VkDeviceAddress
tu_GetBufferDeviceAddress(VkDevice _device,const VkBufferDeviceAddressInfo * pInfo)2986 tu_GetBufferDeviceAddress(VkDevice _device,
2987 const VkBufferDeviceAddressInfo* pInfo)
2988 {
2989 TU_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer);
2990
2991 return buffer->iova;
2992 }
2993
tu_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)2994 uint64_t tu_GetBufferOpaqueCaptureAddress(
2995 VkDevice device,
2996 const VkBufferDeviceAddressInfo* pInfo)
2997 {
2998 tu_stub();
2999 return 0;
3000 }
3001
tu_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3002 uint64_t tu_GetDeviceMemoryOpaqueCaptureAddress(
3003 VkDevice device,
3004 const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
3005 {
3006 tu_stub();
3007 return 0;
3008 }
3009