1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 * SPDX-License-Identifier: MIT
5 *
6 * based in part on anv driver which is:
7 * Copyright © 2015 Intel Corporation
8 */
9
10 #include "tu_device.h"
11
12 #include "drm-uapi/drm_fourcc.h"
13 #include "fdl/freedreno_layout.h"
14 #include <fcntl.h>
15 #include <poll.h>
16
17 #include "git_sha1.h"
18 #include "util/u_debug.h"
19 #include "util/disk_cache.h"
20 #include "util/hex.h"
21 #include "util/driconf.h"
22 #include "util/os_misc.h"
23 #include "util/u_process.h"
24 #include "vk_android.h"
25 #include "vk_shader_module.h"
26 #include "vk_sampler.h"
27 #include "vk_util.h"
28
29 /* for fd_get_driver/device_uuid() */
30 #include "freedreno/common/freedreno_uuid.h"
31 #include "freedreno/common/freedreno_stompable_regs.h"
32
33 #include "tu_acceleration_structure.h"
34 #include "tu_clear_blit.h"
35 #include "tu_cmd_buffer.h"
36 #include "tu_cs.h"
37 #include "tu_descriptor_set.h"
38 #include "tu_dynamic_rendering.h"
39 #include "tu_image.h"
40 #include "tu_pass.h"
41 #include "tu_queue.h"
42 #include "tu_query_pool.h"
43 #include "tu_rmv.h"
44 #include "tu_tracepoints.h"
45 #include "tu_wsi.h"
46
47 #if DETECT_OS_ANDROID
48 #include "util/u_gralloc/u_gralloc.h"
49 #include <vndk/hardware_buffer.h>
50 #endif
51
52 uint64_t os_page_size = 4096;
53
54 static int
tu_device_get_cache_uuid(struct tu_physical_device * device,void * uuid)55 tu_device_get_cache_uuid(struct tu_physical_device *device, void *uuid)
56 {
57 struct mesa_sha1 ctx;
58 unsigned char sha1[20];
59 /* Note: IR3_SHADER_DEBUG also affects compilation, but it's not
60 * initialized until after compiler creation so we have to add it to the
61 * shader hash instead, since the compiler is only created with the logical
62 * device.
63 */
64 uint64_t driver_flags = TU_DEBUG(NOMULTIPOS);
65 uint16_t family = fd_dev_gpu_id(&device->dev_id);
66
67 memset(uuid, 0, VK_UUID_SIZE);
68 _mesa_sha1_init(&ctx);
69
70 if (!disk_cache_get_function_identifier((void *)tu_device_get_cache_uuid, &ctx))
71 return -1;
72
73 _mesa_sha1_update(&ctx, &family, sizeof(family));
74 _mesa_sha1_update(&ctx, &driver_flags, sizeof(driver_flags));
75 _mesa_sha1_final(&ctx, sha1);
76
77 memcpy(uuid, sha1, VK_UUID_SIZE);
78 return 0;
79 }
80
81 #define TU_API_VERSION VK_MAKE_VERSION(1, 4, VK_HEADER_VERSION)
82
83 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceVersion(uint32_t * pApiVersion)84 tu_EnumerateInstanceVersion(uint32_t *pApiVersion)
85 {
86 *pApiVersion = TU_API_VERSION;
87 return VK_SUCCESS;
88 }
89
90 static const struct vk_instance_extension_table tu_instance_extensions_supported = { .table = {
91 .KHR_device_group_creation = true,
92 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
93 .KHR_display = true,
94 #endif
95 .KHR_external_fence_capabilities = true,
96 .KHR_external_memory_capabilities = true,
97 .KHR_external_semaphore_capabilities = true,
98 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
99 .KHR_get_display_properties2 = true,
100 #endif
101 .KHR_get_physical_device_properties2 = true,
102 #ifdef TU_USE_WSI_PLATFORM
103 .KHR_get_surface_capabilities2 = true,
104 .KHR_surface = true,
105 .KHR_surface_protected_capabilities = true,
106 #endif
107 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
108 .KHR_wayland_surface = true,
109 #endif
110 #ifdef VK_USE_PLATFORM_XCB_KHR
111 .KHR_xcb_surface = true,
112 #endif
113 #ifdef VK_USE_PLATFORM_XLIB_KHR
114 .KHR_xlib_surface = true,
115 #endif
116 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
117 .EXT_acquire_drm_display = true,
118 #endif
119 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
120 .EXT_acquire_xlib_display = true,
121 #endif
122 .EXT_debug_report = true,
123 .EXT_debug_utils = true,
124 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
125 .EXT_direct_mode_display = true,
126 .EXT_display_surface_counter = true,
127 #endif
128 #ifndef VK_USE_PLATFORM_WIN32_KHR
129 .EXT_headless_surface = true,
130 #endif
131 #ifdef TU_USE_WSI_PLATFORM
132 .EXT_surface_maintenance1 = true,
133 .EXT_swapchain_colorspace = true,
134 #endif
135 } };
136
137 static bool
is_kgsl(struct tu_instance * instance)138 is_kgsl(struct tu_instance *instance)
139 {
140 return strcmp(instance->knl->name, "kgsl") == 0;
141 }
142
143 static void
get_device_extensions(const struct tu_physical_device * device,struct vk_device_extension_table * ext)144 get_device_extensions(const struct tu_physical_device *device,
145 struct vk_device_extension_table *ext)
146 {
147 /* device->has_raytracing contains the value of the SW fuse. If the
148 * device doesn't have a fuse (i.e. a740), we have to ignore it because
149 * kgsl returns false. If it does have a fuse, enable raytracing if the
150 * fuse is set and we have ray_intersection.
151 */
152 bool has_raytracing =
153 device->info->a7xx.has_ray_intersection &&
154 (!device->info->a7xx.has_sw_fuse || device->has_raytracing);
155
156 *ext = (struct vk_device_extension_table) { .table = {
157 .KHR_8bit_storage = device->info->a7xx.storage_8bit,
158 .KHR_16bit_storage = device->info->a6xx.storage_16bit,
159 .KHR_acceleration_structure = has_raytracing,
160 .KHR_bind_memory2 = true,
161 .KHR_buffer_device_address = true,
162 .KHR_calibrated_timestamps = device->info->a7xx.has_persistent_counter,
163 .KHR_compute_shader_derivatives = device->info->chip >= 7,
164 .KHR_copy_commands2 = true,
165 .KHR_create_renderpass2 = true,
166 .KHR_dedicated_allocation = true,
167 .KHR_deferred_host_operations = true,
168 .KHR_depth_stencil_resolve = true,
169 .KHR_descriptor_update_template = true,
170 .KHR_device_group = true,
171 .KHR_draw_indirect_count = true,
172 .KHR_driver_properties = true,
173 .KHR_dynamic_rendering = true,
174 .KHR_dynamic_rendering_local_read = true,
175 .KHR_external_fence = true,
176 .KHR_external_fence_fd = true,
177 .KHR_external_memory = true,
178 .KHR_external_memory_fd = true,
179 .KHR_external_semaphore = true,
180 .KHR_external_semaphore_fd = true,
181 .KHR_format_feature_flags2 = true,
182 .KHR_fragment_shading_rate = device->info->a6xx.has_attachment_shading_rate,
183 .KHR_get_memory_requirements2 = true,
184 .KHR_global_priority = true,
185 .KHR_image_format_list = true,
186 .KHR_imageless_framebuffer = true,
187 #ifdef TU_USE_WSI_PLATFORM
188 .KHR_incremental_present = true,
189 #endif
190 .KHR_index_type_uint8 = true,
191 .KHR_line_rasterization = true,
192 .KHR_load_store_op_none = true,
193 .KHR_maintenance1 = true,
194 .KHR_maintenance2 = true,
195 .KHR_maintenance3 = true,
196 .KHR_maintenance4 = true,
197 .KHR_maintenance5 = true,
198 .KHR_maintenance6 = true,
199 .KHR_map_memory2 = true,
200 .KHR_multiview = TU_DEBUG(NOCONFORM) ? true : device->info->a6xx.has_hw_multiview,
201 .KHR_performance_query = TU_DEBUG(PERFC),
202 .KHR_pipeline_executable_properties = true,
203 .KHR_pipeline_library = true,
204 #ifdef TU_USE_WSI_PLATFORM
205 /* Hide these behind dri configs for now since we cannot implement it reliably on
206 * all surfaces yet. There is no surface capability query for present wait/id,
207 * but the feature is useful enough to hide behind an opt-in mechanism for now.
208 * If the instance only enables surface extensions that unconditionally support present wait,
209 * we can also expose the extension that way. */
210 .KHR_present_id = (driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
211 wsi_common_vk_instance_supports_present_wait(&device->instance->vk)),
212 .KHR_present_wait = (driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
213 wsi_common_vk_instance_supports_present_wait(&device->instance->vk)),
214 #endif
215 .KHR_push_descriptor = true,
216 .KHR_ray_query = has_raytracing,
217 .KHR_ray_tracing_maintenance1 = has_raytracing,
218 .KHR_relaxed_block_layout = true,
219 .KHR_sampler_mirror_clamp_to_edge = true,
220 .KHR_sampler_ycbcr_conversion = true,
221 .KHR_separate_depth_stencil_layouts = true,
222 .KHR_shader_atomic_int64 = device->info->a7xx.has_64b_ssbo_atomics,
223 .KHR_shader_draw_parameters = true,
224 .KHR_shader_expect_assume = true,
225 .KHR_shader_float16_int8 = true,
226 .KHR_shader_float_controls = true,
227 .KHR_shader_float_controls2 = true,
228 .KHR_shader_integer_dot_product = true,
229 .KHR_shader_non_semantic_info = true,
230 .KHR_shader_relaxed_extended_instruction = true,
231 .KHR_shader_subgroup_extended_types = true,
232 .KHR_shader_subgroup_rotate = true,
233 .KHR_shader_subgroup_uniform_control_flow = true,
234 .KHR_shader_terminate_invocation = true,
235 .KHR_spirv_1_4 = true,
236 .KHR_storage_buffer_storage_class = true,
237 #ifdef TU_USE_WSI_PLATFORM
238 .KHR_swapchain = true,
239 .KHR_swapchain_mutable_format = true,
240 #endif
241 .KHR_synchronization2 = true,
242 .KHR_timeline_semaphore = true,
243 .KHR_uniform_buffer_standard_layout = true,
244 .KHR_variable_pointers = true,
245 .KHR_vertex_attribute_divisor = true,
246 .KHR_vulkan_memory_model = true,
247 .KHR_workgroup_memory_explicit_layout = true,
248 .KHR_zero_initialize_workgroup_memory = true,
249
250 .EXT_4444_formats = true,
251 .EXT_attachment_feedback_loop_dynamic_state = true,
252 .EXT_attachment_feedback_loop_layout = true,
253 .EXT_border_color_swizzle = true,
254 .EXT_calibrated_timestamps = device->info->a7xx.has_persistent_counter,
255 .EXT_color_write_enable = true,
256 .EXT_conditional_rendering = true,
257 .EXT_conservative_rasterization = device->info->chip >= 7,
258 .EXT_custom_border_color = true,
259 .EXT_depth_clamp_zero_one = true,
260 .EXT_depth_clip_control = true,
261 .EXT_depth_clip_enable = true,
262 .EXT_descriptor_buffer = true,
263 .EXT_descriptor_indexing = true,
264 .EXT_device_address_binding_report = true,
265 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
266 .EXT_display_control = true,
267 #endif
268 .EXT_extended_dynamic_state = true,
269 .EXT_extended_dynamic_state2 = true,
270 .EXT_extended_dynamic_state3 = true,
271 .EXT_external_memory_dma_buf = true,
272 .EXT_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
273 .EXT_fragment_density_map = true,
274 .EXT_global_priority = true,
275 .EXT_global_priority_query = true,
276 .EXT_graphics_pipeline_library = true,
277 .EXT_host_image_copy = true,
278 .EXT_host_query_reset = true,
279 .EXT_image_2d_view_of_3d = true,
280 .EXT_image_drm_format_modifier = true,
281 .EXT_image_robustness = true,
282 .EXT_image_view_min_lod = true,
283 .EXT_index_type_uint8 = true,
284 .EXT_inline_uniform_block = true,
285 .EXT_legacy_dithering = true,
286 .EXT_legacy_vertex_attributes = true,
287 .EXT_line_rasterization = true,
288 .EXT_load_store_op_none = true,
289 .EXT_map_memory_placed = true,
290 .EXT_memory_budget = true,
291 .EXT_multi_draw = true,
292 .EXT_mutable_descriptor_type = true,
293 .EXT_nested_command_buffer = true,
294 .EXT_non_seamless_cube_map = true,
295 .EXT_physical_device_drm = !is_kgsl(device->instance),
296 .EXT_pipeline_creation_cache_control = true,
297 .EXT_pipeline_creation_feedback = true,
298 .EXT_post_depth_coverage = true,
299 .EXT_primitive_topology_list_restart = true,
300 .EXT_primitives_generated_query = true,
301 .EXT_private_data = true,
302 .EXT_provoking_vertex = true,
303 .EXT_queue_family_foreign = true,
304 .EXT_rasterization_order_attachment_access = true,
305 .EXT_robustness2 = true,
306 .EXT_sample_locations = device->info->a6xx.has_sample_locations,
307 .EXT_sampler_filter_minmax = device->info->a6xx.has_sampler_minmax,
308 .EXT_scalar_block_layout = true,
309 .EXT_separate_stencil_usage = true,
310 .EXT_shader_demote_to_helper_invocation = true,
311 .EXT_shader_module_identifier = true,
312 .EXT_shader_replicated_composites = true,
313 .EXT_shader_stencil_export = true,
314 .EXT_shader_viewport_index_layer = TU_DEBUG(NOCONFORM) ? true : device->info->a6xx.has_hw_multiview,
315 .EXT_subgroup_size_control = true,
316 #ifdef TU_USE_WSI_PLATFORM
317 .EXT_swapchain_maintenance1 = true,
318 #endif
319 .EXT_texel_buffer_alignment = true,
320 .EXT_tooling_info = true,
321 .EXT_transform_feedback = true,
322 .EXT_vertex_attribute_divisor = true,
323 .EXT_vertex_input_dynamic_state = true,
324
325 /* For Graphics Flight Recorder (GFR) */
326 .AMD_buffer_marker = true,
327 .ARM_rasterization_order_attachment_access = true,
328 .GOOGLE_decorate_string = true,
329 .GOOGLE_hlsl_functionality1 = true,
330 .GOOGLE_user_type = true,
331 .IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
332 .NV_compute_shader_derivatives = device->info->chip >= 7,
333 .VALVE_mutable_descriptor_type = true,
334 } };
335
336 #if DETECT_OS_ANDROID
337 if (vk_android_get_ugralloc() != NULL) {
338 ext->ANDROID_external_memory_android_hardware_buffer = true,
339 ext->ANDROID_native_buffer = true;
340 }
341 #endif
342 }
343
344 static void
tu_get_features(struct tu_physical_device * pdevice,struct vk_features * features)345 tu_get_features(struct tu_physical_device *pdevice,
346 struct vk_features *features)
347 {
348 *features = (struct vk_features) { false };
349
350 /* Vulkan 1.0 */
351 features->robustBufferAccess = true;
352 features->fullDrawIndexUint32 = true;
353 features->imageCubeArray = true;
354 features->independentBlend = true;
355 features->geometryShader = true;
356 features->tessellationShader = true;
357 features->sampleRateShading = true;
358 features->dualSrcBlend = true;
359 features->logicOp = true;
360 features->multiDrawIndirect = true;
361 features->drawIndirectFirstInstance = true;
362 features->depthClamp = true;
363 features->depthBiasClamp = true;
364 features->fillModeNonSolid = true;
365 features->depthBounds = true;
366 features->wideLines = pdevice->info->a6xx.line_width_max > 1.0;
367 features->largePoints = true;
368 features->alphaToOne = true;
369 features->multiViewport = true;
370 features->samplerAnisotropy = true;
371 features->textureCompressionETC2 = true;
372 features->textureCompressionASTC_LDR = true;
373 features->textureCompressionBC = true;
374 features->occlusionQueryPrecise = true;
375 features->pipelineStatisticsQuery = true;
376 features->vertexPipelineStoresAndAtomics = true;
377 features->fragmentStoresAndAtomics = true;
378 features->shaderTessellationAndGeometryPointSize = true;
379 features->shaderImageGatherExtended = true;
380 features->shaderStorageImageExtendedFormats = true;
381 features->shaderStorageImageMultisample = false;
382 features->shaderStorageImageReadWithoutFormat = true;
383 features->shaderStorageImageWriteWithoutFormat = true;
384 features->shaderUniformBufferArrayDynamicIndexing = true;
385 features->shaderSampledImageArrayDynamicIndexing = true;
386 features->shaderStorageBufferArrayDynamicIndexing = true;
387 features->shaderStorageImageArrayDynamicIndexing = true;
388 features->shaderClipDistance = true;
389 features->shaderCullDistance = true;
390 features->shaderFloat64 = false;
391 features->shaderInt64 = true;
392 features->shaderInt16 = true;
393 features->sparseBinding = false;
394 features->variableMultisampleRate = true;
395 features->inheritedQueries = true;
396
397 /* Vulkan 1.1 */
398 features->storageBuffer16BitAccess = pdevice->info->a6xx.storage_16bit;
399 features->uniformAndStorageBuffer16BitAccess = false;
400 features->storagePushConstant16 = false;
401 features->storageInputOutput16 = false;
402 features->multiview = true;
403 features->multiviewGeometryShader = false;
404 features->multiviewTessellationShader = false;
405 features->variablePointersStorageBuffer = true;
406 features->variablePointers = true;
407 features->protectedMemory = false;
408 features->samplerYcbcrConversion = true;
409 features->shaderDrawParameters = true;
410
411 /* Vulkan 1.2 */
412 features->samplerMirrorClampToEdge = true;
413 features->drawIndirectCount = true;
414 features->storageBuffer8BitAccess = pdevice->info->a7xx.storage_8bit;
415 features->uniformAndStorageBuffer8BitAccess = false;
416 features->storagePushConstant8 = false;
417 features->shaderBufferInt64Atomics =
418 pdevice->info->a7xx.has_64b_ssbo_atomics;
419 features->shaderSharedInt64Atomics = false;
420 features->shaderFloat16 = true;
421 features->shaderInt8 = true;
422
423 features->descriptorIndexing = true;
424 features->shaderInputAttachmentArrayDynamicIndexing = false;
425 features->shaderUniformTexelBufferArrayDynamicIndexing = true;
426 features->shaderStorageTexelBufferArrayDynamicIndexing = true;
427 features->shaderUniformBufferArrayNonUniformIndexing = true;
428 features->shaderSampledImageArrayNonUniformIndexing = true;
429 features->shaderStorageBufferArrayNonUniformIndexing = true;
430 features->shaderStorageImageArrayNonUniformIndexing = true;
431 features->shaderInputAttachmentArrayNonUniformIndexing = false;
432 features->shaderUniformTexelBufferArrayNonUniformIndexing = true;
433 features->shaderStorageTexelBufferArrayNonUniformIndexing = true;
434 features->descriptorBindingUniformBufferUpdateAfterBind = true;
435 features->descriptorBindingSampledImageUpdateAfterBind = true;
436 features->descriptorBindingStorageImageUpdateAfterBind = true;
437 features->descriptorBindingStorageBufferUpdateAfterBind = true;
438 features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
439 features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
440 features->descriptorBindingUpdateUnusedWhilePending = true;
441 features->descriptorBindingPartiallyBound = true;
442 features->descriptorBindingVariableDescriptorCount = true;
443 features->runtimeDescriptorArray = true;
444
445 features->samplerFilterMinmax =
446 pdevice->info->a6xx.has_sampler_minmax;
447 features->scalarBlockLayout = true;
448 features->imagelessFramebuffer = true;
449 features->uniformBufferStandardLayout = true;
450 features->shaderSubgroupExtendedTypes = true;
451 features->separateDepthStencilLayouts = true;
452 features->hostQueryReset = true;
453 features->timelineSemaphore = true;
454 features->bufferDeviceAddress = true;
455 features->bufferDeviceAddressCaptureReplay = pdevice->has_set_iova;
456 features->bufferDeviceAddressMultiDevice = false;
457 features->vulkanMemoryModel = true;
458 features->vulkanMemoryModelDeviceScope = true;
459 features->vulkanMemoryModelAvailabilityVisibilityChains = true;
460 features->shaderOutputViewportIndex = true;
461 features->shaderOutputLayer = true;
462 features->subgroupBroadcastDynamicId = true;
463
464 /* Vulkan 1.3 */
465 features->robustImageAccess = true;
466 features->inlineUniformBlock = true;
467 features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
468 features->pipelineCreationCacheControl = true;
469 features->privateData = true;
470 features->shaderDemoteToHelperInvocation = true;
471 features->shaderTerminateInvocation = true;
472 features->subgroupSizeControl = true;
473 features->computeFullSubgroups = true;
474 features->synchronization2 = true;
475 features->textureCompressionASTC_HDR = false;
476 features->shaderZeroInitializeWorkgroupMemory = true;
477 features->dynamicRendering = true;
478 features->shaderIntegerDotProduct = true;
479 features->maintenance4 = true;
480
481 /* Vulkan 1.4 */
482 features->pushDescriptor = true;
483
484 /* VK_KHR_acceleration_structure */
485 features->accelerationStructure = true;
486 features->accelerationStructureCaptureReplay = pdevice->has_set_iova;
487 features->descriptorBindingAccelerationStructureUpdateAfterBind = true;
488
489 /* VK_KHR_compute_shader_derivatives */
490 features->computeDerivativeGroupQuads = pdevice->info->chip >= 7;
491 features->computeDerivativeGroupLinear = pdevice->info->chip >= 7;
492
493 /* VK_KHR_dynamic_rendering_local_read */
494 features->dynamicRenderingLocalRead = true;
495
496 /* VK_KHR_fragment_shading_rate */
497 features->pipelineFragmentShadingRate = pdevice->info->a6xx.has_attachment_shading_rate;
498 features->primitiveFragmentShadingRate = pdevice->info->a7xx.has_primitive_shading_rate;
499 features->attachmentFragmentShadingRate = pdevice->info->a6xx.has_attachment_shading_rate;
500
501 /* VK_KHR_index_type_uint8 */
502 features->indexTypeUint8 = true;
503
504 /* VK_KHR_line_rasterization */
505 features->rectangularLines = true;
506 features->bresenhamLines = true;
507 features->smoothLines = false;
508 features->stippledRectangularLines = false;
509 features->stippledBresenhamLines = false;
510 features->stippledSmoothLines = false;
511
512 /* VK_KHR_maintenance5 */
513 features->maintenance5 = true;
514
515 /* VK_KHR_maintenance6 */
516 features->maintenance6 = true;
517
518 /* VK_KHR_performance_query */
519 features->performanceCounterQueryPools = true;
520 features->performanceCounterMultipleQueryPools = false;
521
522 /* VK_KHR_pipeline_executable_properties */
523 features->pipelineExecutableInfo = true;
524
525 /* VK_KHR_present_id */
526 features->presentId = pdevice->vk.supported_extensions.KHR_present_id;
527
528 /* VK_KHR_present_wait */
529 features->presentWait = pdevice->vk.supported_extensions.KHR_present_wait;
530
531 /* VK_KHR_shader_expect_assume */
532 features->shaderExpectAssume = true;
533
534 /* VK_KHR_shader_float_controls2 */
535 features->shaderFloatControls2 = true;
536
537 /* VK_KHR_shader_subgroup_uniform_control_flow */
538 features->shaderSubgroupUniformControlFlow = true;
539
540 /* VK_KHR_vertex_attribute_divisor */
541 features->vertexAttributeInstanceRateDivisor = true;
542 features->vertexAttributeInstanceRateZeroDivisor = true;
543
544 /* VK_KHR_workgroup_memory_explicit_layout */
545 features->workgroupMemoryExplicitLayout = true;
546 features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
547 features->workgroupMemoryExplicitLayout8BitAccess = true;
548 features->workgroupMemoryExplicitLayout16BitAccess = true;
549
550 /* VK_EXT_4444_formats */
551 features->formatA4R4G4B4 = true;
552 features->formatA4B4G4R4 = true;
553
554 /* VK_EXT_attachment_feedback_loop_dynamic_state */
555 features->attachmentFeedbackLoopDynamicState = true;
556
557 /* VK_EXT_attachment_feedback_loop_layout */
558 features->attachmentFeedbackLoopLayout = true;
559
560 /* VK_EXT_border_color_swizzle */
561 features->borderColorSwizzle = true;
562 features->borderColorSwizzleFromImage = true;
563
564 /* VK_EXT_color_write_enable */
565 features->colorWriteEnable = true;
566
567 /* VK_EXT_conditional_rendering */
568 features->conditionalRendering = true;
569 features->inheritedConditionalRendering = true;
570
571 /* VK_EXT_custom_border_color */
572 features->customBorderColors = true;
573 features->customBorderColorWithoutFormat = true;
574
575 /* VK_EXT_depth_clamp_zero_one */
576 features->depthClampZeroOne = true;
577
578 /* VK_EXT_depth_clip_control */
579 features->depthClipControl = true;
580
581 /* VK_EXT_depth_clip_enable */
582 features->depthClipEnable = true;
583
584 /* VK_EXT_descriptor_buffer */
585 features->descriptorBuffer = true;
586 features->descriptorBufferCaptureReplay = pdevice->has_set_iova;
587 features->descriptorBufferImageLayoutIgnored = true;
588 features->descriptorBufferPushDescriptors = true;
589
590 /* VK_EXT_device_address_binding_report */
591 features->reportAddressBinding = true;
592
593 /* VK_EXT_extended_dynamic_state */
594 features->extendedDynamicState = true;
595
596 /* VK_EXT_extended_dynamic_state2 */
597 features->extendedDynamicState2 = true;
598 features->extendedDynamicState2LogicOp = true;
599 features->extendedDynamicState2PatchControlPoints = true;
600
601 /* VK_EXT_extended_dynamic_state3 */
602 features->extendedDynamicState3PolygonMode = true;
603 features->extendedDynamicState3TessellationDomainOrigin = true;
604 features->extendedDynamicState3DepthClampEnable = true;
605 features->extendedDynamicState3DepthClipEnable = true;
606 features->extendedDynamicState3LogicOpEnable = true;
607 features->extendedDynamicState3SampleMask = true;
608 features->extendedDynamicState3RasterizationSamples = true;
609 features->extendedDynamicState3AlphaToCoverageEnable = true;
610 features->extendedDynamicState3AlphaToOneEnable = true;
611 features->extendedDynamicState3DepthClipNegativeOneToOne = true;
612 features->extendedDynamicState3RasterizationStream = true;
613 features->extendedDynamicState3ConservativeRasterizationMode =
614 pdevice->vk.supported_extensions.EXT_conservative_rasterization;
615 features->extendedDynamicState3ExtraPrimitiveOverestimationSize =
616 pdevice->vk.supported_extensions.EXT_conservative_rasterization;
617 features->extendedDynamicState3LineRasterizationMode = true;
618 features->extendedDynamicState3LineStippleEnable = false;
619 features->extendedDynamicState3ProvokingVertexMode = true;
620 features->extendedDynamicState3SampleLocationsEnable =
621 pdevice->info->a6xx.has_sample_locations;
622 features->extendedDynamicState3ColorBlendEnable = true;
623 features->extendedDynamicState3ColorBlendEquation = true;
624 features->extendedDynamicState3ColorWriteMask = true;
625 features->extendedDynamicState3ViewportWScalingEnable = false;
626 features->extendedDynamicState3ViewportSwizzle = false;
627 features->extendedDynamicState3ShadingRateImageEnable = false;
628 features->extendedDynamicState3CoverageToColorEnable = false;
629 features->extendedDynamicState3CoverageToColorLocation = false;
630 features->extendedDynamicState3CoverageModulationMode = false;
631 features->extendedDynamicState3CoverageModulationTableEnable = false;
632 features->extendedDynamicState3CoverageModulationTable = false;
633 features->extendedDynamicState3CoverageReductionMode = false;
634 features->extendedDynamicState3RepresentativeFragmentTestEnable = false;
635 features->extendedDynamicState3ColorBlendAdvanced = false;
636
637 /* VK_EXT_fragment_density_map */
638 features->fragmentDensityMap = true;
639 features->fragmentDensityMapDynamic = false;
640 features->fragmentDensityMapNonSubsampledImages = true;
641
642 /* VK_EXT_global_priority_query */
643 features->globalPriorityQuery = true;
644
645 /* VK_EXT_graphics_pipeline_library */
646 features->graphicsPipelineLibrary = true;
647
648 /* VK_EXT_host_image_copy */
649 features->hostImageCopy = true;
650
651 /* VK_EXT_image_2d_view_of_3d */
652 features->image2DViewOf3D = true;
653 features->sampler2DViewOf3D = true;
654
655 /* VK_EXT_image_view_min_lod */
656 features->minLod = true;
657
658 /* VK_EXT_legacy_vertex_attributes */
659 features->legacyVertexAttributes = true;
660
661 /* VK_EXT_legacy_dithering */
662 features->legacyDithering = true;
663
664 /* VK_EXT_map_memory_placed */
665 features->memoryMapPlaced = true;
666 features->memoryMapRangePlaced = false;
667 features->memoryUnmapReserve = true;
668
669 /* VK_EXT_multi_draw */
670 features->multiDraw = true;
671
672 /* VK_EXT_mutable_descriptor_type */
673 features->mutableDescriptorType = true;
674
675 /* VK_EXT_nested_command_buffer */
676 features->nestedCommandBuffer = true;
677 features->nestedCommandBufferRendering = true;
678 features->nestedCommandBufferSimultaneousUse = true;
679
680 /* VK_EXT_non_seamless_cube_map */
681 features->nonSeamlessCubeMap = true;
682
683 /* VK_EXT_pipeline_robustness */
684 features->pipelineRobustness = true;
685
686 /* VK_EXT_primitive_topology_list_restart */
687 features->primitiveTopologyListRestart = true;
688 features->primitiveTopologyPatchListRestart = false;
689
690 /* VK_EXT_primitives_generated_query */
691 features->primitivesGeneratedQuery = true;
692 features->primitivesGeneratedQueryWithRasterizerDiscard = false;
693 features->primitivesGeneratedQueryWithNonZeroStreams = false;
694
695 /* VK_EXT_provoking_vertex */
696 features->provokingVertexLast = true;
697
698 /* VK_EXT_rasterization_order_attachment_access */
699 features->rasterizationOrderColorAttachmentAccess = true;
700 features->rasterizationOrderDepthAttachmentAccess = true;
701 features->rasterizationOrderStencilAttachmentAccess = true;
702
703 /* VK_KHR_ray_query */
704 features->rayQuery = true;
705
706 /* VK_KHR_ray_tracing_maintenance1 */
707 features->rayTracingMaintenance1 = true;
708
709 /* VK_EXT_robustness2 */
710 features->robustBufferAccess2 = true;
711 features->robustImageAccess2 = true;
712 features->nullDescriptor = true;
713
714 /* VK_EXT_shader_module_identifier */
715 features->shaderModuleIdentifier = true;
716
717 /* VK_EXT_shader_replicated_composites */
718 features->shaderReplicatedComposites = true;
719
720 #ifdef TU_USE_WSI_PLATFORM
721 /* VK_EXT_swapchain_maintenance1 */
722 features->swapchainMaintenance1 = true;
723 #endif
724
725 /* VK_EXT_texel_buffer_alignment */
726 features->texelBufferAlignment = true;
727
728 /* VK_EXT_transform_feedback */
729 features->transformFeedback = true;
730 features->geometryStreams = true;
731
732 /* VK_EXT_vertex_input_dynamic_state */
733 features->vertexInputDynamicState = true;
734
735 /* VK_KHR_shader_relaxed_extended_instruction */
736 features->shaderRelaxedExtendedInstruction = true;
737
738 /* VK_KHR_subgroup_rotate */
739 features->shaderSubgroupRotate = true;
740 features->shaderSubgroupRotateClustered = true;
741 }
742
743 static void
tu_get_physical_device_properties_1_1(struct tu_physical_device * pdevice,struct vk_properties * p)744 tu_get_physical_device_properties_1_1(struct tu_physical_device *pdevice,
745 struct vk_properties *p)
746 {
747 memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
748 memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
749 memset(p->deviceLUID, 0, VK_LUID_SIZE);
750 p->deviceNodeMask = 0;
751 p->deviceLUIDValid = false;
752
753 p->subgroupSize = pdevice->info->a6xx.supports_double_threadsize ?
754 pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
755 p->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
756 p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
757 VK_SUBGROUP_FEATURE_VOTE_BIT |
758 VK_SUBGROUP_FEATURE_BALLOT_BIT |
759 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
760 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
761 VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
762 VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR |
763 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
764 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT;
765 if (pdevice->info->a6xx.has_getfiberid) {
766 p->subgroupSupportedStages |= VK_SHADER_STAGE_ALL_GRAPHICS;
767 p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_QUAD_BIT;
768 }
769
770 p->subgroupQuadOperationsInAllStages = false;
771
772 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
773 p->maxMultiviewViewCount =
774 (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ? MAX_VIEWPORTS : 1;
775 p->maxMultiviewInstanceIndex = INT_MAX;
776 p->protectedNoFault = false;
777 /* Our largest descriptors are 2 texture descriptors, or a texture and
778 * sampler descriptor.
779 */
780 p->maxPerSetDescriptors = MAX_SET_SIZE / (2 * A6XX_TEX_CONST_DWORDS * 4);
781 /* Our buffer size fields allow only this much */
782 p->maxMemoryAllocationSize = 0xFFFFFFFFull;
783
784 }
785
786
787 static const size_t max_descriptor_set_size = MAX_SET_SIZE / (4 * A6XX_TEX_CONST_DWORDS);
788 static const VkSampleCountFlags sample_counts =
789 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
790
791 static void
tu_get_physical_device_properties_1_2(struct tu_physical_device * pdevice,struct vk_properties * p)792 tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice,
793 struct vk_properties *p)
794 {
795 p->driverID = VK_DRIVER_ID_MESA_TURNIP;
796 memset(p->driverName, 0, sizeof(p->driverName));
797 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
798 "turnip Mesa driver");
799 memset(p->driverInfo, 0, sizeof(p->driverInfo));
800 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
801 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
802 if (pdevice->info->chip >= 7) {
803 p->conformanceVersion = (VkConformanceVersion) {
804 .major = 1,
805 .minor = 4,
806 .subminor = 0,
807 .patch = 0,
808 };
809 } else {
810 p->conformanceVersion = (VkConformanceVersion) {
811 .major = 1,
812 .minor = 2,
813 .subminor = 7,
814 .patch = 1,
815 };
816 }
817
818 p->denormBehaviorIndependence =
819 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
820 p->roundingModeIndependence =
821 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
822
823 p->shaderDenormFlushToZeroFloat16 = true;
824 p->shaderDenormPreserveFloat16 = false;
825 p->shaderRoundingModeRTEFloat16 = true;
826 p->shaderRoundingModeRTZFloat16 = false;
827 p->shaderSignedZeroInfNanPreserveFloat16 = true;
828
829 p->shaderDenormFlushToZeroFloat32 = true;
830 p->shaderDenormPreserveFloat32 = false;
831 p->shaderRoundingModeRTEFloat32 = true;
832 p->shaderRoundingModeRTZFloat32 = false;
833 p->shaderSignedZeroInfNanPreserveFloat32 = true;
834
835 p->shaderDenormFlushToZeroFloat64 = false;
836 p->shaderDenormPreserveFloat64 = false;
837 p->shaderRoundingModeRTEFloat64 = false;
838 p->shaderRoundingModeRTZFloat64 = false;
839 p->shaderSignedZeroInfNanPreserveFloat64 = false;
840
841 p->shaderUniformBufferArrayNonUniformIndexingNative = true;
842 p->shaderSampledImageArrayNonUniformIndexingNative = true;
843 p->shaderStorageBufferArrayNonUniformIndexingNative = true;
844 p->shaderStorageImageArrayNonUniformIndexingNative = true;
845 p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
846 p->robustBufferAccessUpdateAfterBind = false;
847 p->quadDivergentImplicitLod = false;
848
849 p->maxUpdateAfterBindDescriptorsInAllPools = max_descriptor_set_size;
850 p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
851 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
852 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
853 p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
854 p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
855 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_RTS;
856 p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
857 p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
858 p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
859 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
860 p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
861 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
862 p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
863 p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
864 p->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_RTS;
865
866 p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
867 p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
868 p->independentResolveNone = false;
869 p->independentResolve = false;
870
871 p->filterMinmaxSingleComponentFormats = true;
872 p->filterMinmaxImageComponentMapping = true;
873
874 p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
875
876 p->framebufferIntegerColorSampleCounts = sample_counts;
877 }
878
879 static void
tu_get_physical_device_properties_1_3(struct tu_physical_device * pdevice,struct vk_properties * p)880 tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice,
881 struct vk_properties *p)
882 {
883 p->minSubgroupSize = pdevice->info->threadsize_base;
884 p->maxSubgroupSize = pdevice->info->a6xx.supports_double_threadsize ?
885 pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
886 p->maxComputeWorkgroupSubgroups = pdevice->info->max_waves;
887 p->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL;
888
889 p->maxInlineUniformBlockSize = MAX_INLINE_UBO_RANGE;
890 p->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UBOS;
891 p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UBOS;
892 p->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UBOS;
893 p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UBOS;
894 p->maxInlineUniformTotalSize = MAX_INLINE_UBOS * MAX_INLINE_UBO_RANGE;
895
896 p->integerDotProduct8BitUnsignedAccelerated = false;
897 p->integerDotProduct8BitSignedAccelerated = false;
898 p->integerDotProduct8BitMixedSignednessAccelerated = false;
899 p->integerDotProduct4x8BitPackedUnsignedAccelerated =
900 pdevice->info->a6xx.has_dp2acc;
901 /* TODO: we should be able to emulate 4x8BitPackedSigned fast enough */
902 p->integerDotProduct4x8BitPackedSignedAccelerated = false;
903 p->integerDotProduct4x8BitPackedMixedSignednessAccelerated =
904 pdevice->info->a6xx.has_dp2acc;
905 p->integerDotProduct16BitUnsignedAccelerated = false;
906 p->integerDotProduct16BitSignedAccelerated = false;
907 p->integerDotProduct16BitMixedSignednessAccelerated = false;
908 p->integerDotProduct32BitUnsignedAccelerated = false;
909 p->integerDotProduct32BitSignedAccelerated = false;
910 p->integerDotProduct32BitMixedSignednessAccelerated = false;
911 p->integerDotProduct64BitUnsignedAccelerated = false;
912 p->integerDotProduct64BitSignedAccelerated = false;
913 p->integerDotProduct64BitMixedSignednessAccelerated = false;
914 p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
915 p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
916 p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
917 p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated =
918 pdevice->info->a6xx.has_dp2acc;
919 /* TODO: we should be able to emulate Saturating4x8BitPackedSigned fast enough */
920 p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
921 p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated =
922 pdevice->info->a6xx.has_dp2acc;
923 p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
924 p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
925 p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
926 p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
927 p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
928 p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
929 p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
930 p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
931 p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
932
933 p->storageTexelBufferOffsetAlignmentBytes = 64;
934 p->storageTexelBufferOffsetSingleTexelAlignment = true;
935 p->uniformTexelBufferOffsetAlignmentBytes = 64;
936 p->uniformTexelBufferOffsetSingleTexelAlignment = true;
937
938 /* The address space is 4GB for current kernels, so there's no point
939 * allowing a larger buffer. Our buffer sizes are 64-bit though, so
940 * GetBufferDeviceRequirements won't fall over if someone actually creates
941 * a 4GB buffer.
942 */
943 p->maxBufferSize = 1ull << 32;
944 }
945
946 /* CP_ALWAYS_ON_COUNTER is fixed 19.2 MHz */
947 #define ALWAYS_ON_FREQUENCY 19200000
948
949 static void
tu_get_properties(struct tu_physical_device * pdevice,struct vk_properties * props)950 tu_get_properties(struct tu_physical_device *pdevice,
951 struct vk_properties *props)
952 {
953 /* Limits */
954 props->maxImageDimension1D = (1 << 14);
955 props->maxImageDimension2D = (1 << 14);
956 props->maxImageDimension3D = (1 << 11);
957 props->maxImageDimensionCube = (1 << 14);
958 props->maxImageArrayLayers = (1 << 11);
959 props->maxTexelBufferElements = MAX_TEXEL_ELEMENTS;
960 props->maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE;
961 props->maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE;
962 props->maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE;
963 props->maxMemoryAllocationCount = UINT32_MAX;
964 props->maxSamplerAllocationCount = 64 * 1024;
965 props->bufferImageGranularity = 64; /* A cache line */
966 props->sparseAddressSpaceSize = 0;
967 props->maxBoundDescriptorSets = pdevice->usable_sets;
968 props->maxPerStageDescriptorSamplers = max_descriptor_set_size;
969 props->maxPerStageDescriptorUniformBuffers = max_descriptor_set_size;
970 props->maxPerStageDescriptorStorageBuffers = max_descriptor_set_size;
971 props->maxPerStageDescriptorSampledImages = max_descriptor_set_size;
972 props->maxPerStageDescriptorStorageImages = max_descriptor_set_size;
973 props->maxPerStageDescriptorInputAttachments = MAX_RTS;
974 props->maxPerStageResources = max_descriptor_set_size;
975 props->maxDescriptorSetSamplers = max_descriptor_set_size;
976 props->maxDescriptorSetUniformBuffers = max_descriptor_set_size;
977 props->maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
978 props->maxDescriptorSetStorageBuffers = max_descriptor_set_size;
979 props->maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
980 props->maxDescriptorSetSampledImages = max_descriptor_set_size;
981 props->maxDescriptorSetStorageImages = max_descriptor_set_size;
982 props->maxDescriptorSetInputAttachments = MAX_RTS;
983 props->maxVertexInputAttributes = pdevice->info->a6xx.vs_max_inputs_count;
984 props->maxVertexInputBindings = pdevice->info->a6xx.vs_max_inputs_count;
985 props->maxVertexInputAttributeOffset = 4095;
986 props->maxVertexInputBindingStride = 2048;
987 props->maxVertexOutputComponents = 128;
988 props->maxTessellationGenerationLevel = 64;
989 props->maxTessellationPatchSize = 32;
990 props->maxTessellationControlPerVertexInputComponents = 128;
991 props->maxTessellationControlPerVertexOutputComponents = 128;
992 props->maxTessellationControlPerPatchOutputComponents = 120;
993 props->maxTessellationControlTotalOutputComponents = 4096;
994 props->maxTessellationEvaluationInputComponents = 128;
995 props->maxTessellationEvaluationOutputComponents = 128;
996 props->maxGeometryShaderInvocations = 32;
997 props->maxGeometryInputComponents = 64;
998 props->maxGeometryOutputComponents = 128;
999 props->maxGeometryOutputVertices = 256;
1000 props->maxGeometryTotalOutputComponents = 1024;
1001 props->maxFragmentInputComponents = 124;
1002 props->maxFragmentOutputAttachments = 8;
1003 props->maxFragmentDualSrcAttachments = 1;
1004 props->maxFragmentCombinedOutputResources = MAX_RTS + max_descriptor_set_size * 2;
1005 props->maxComputeSharedMemorySize = pdevice->info->cs_shared_mem_size;
1006 props->maxComputeWorkGroupCount[0] =
1007 props->maxComputeWorkGroupCount[1] =
1008 props->maxComputeWorkGroupCount[2] = 65535;
1009 props->maxComputeWorkGroupInvocations = pdevice->info->a6xx.supports_double_threadsize ?
1010 pdevice->info->threadsize_base * 2 * pdevice->info->max_waves :
1011 pdevice->info->threadsize_base * pdevice->info->max_waves;
1012 props->maxComputeWorkGroupSize[0] =
1013 props->maxComputeWorkGroupSize[1] =
1014 props->maxComputeWorkGroupSize[2] = 1024;
1015 props->subPixelPrecisionBits = 8;
1016 props->subTexelPrecisionBits = 8;
1017 props->mipmapPrecisionBits = 8;
1018 props->maxDrawIndexedIndexValue = UINT32_MAX;
1019 props->maxDrawIndirectCount = UINT32_MAX;
1020 props->maxSamplerLodBias = 4095.0 / 256.0; /* [-16, 15.99609375] */
1021 props->maxSamplerAnisotropy = 16;
1022 props->maxViewports =
1023 (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ? MAX_VIEWPORTS : 1;
1024 props->maxViewportDimensions[0] =
1025 props->maxViewportDimensions[1] = MAX_VIEWPORT_SIZE;
1026 props->viewportBoundsRange[0] = INT16_MIN;
1027 props->viewportBoundsRange[1] = INT16_MAX;
1028 props->viewportSubPixelBits = 8;
1029 props->minMemoryMapAlignment = 4096; /* A page */
1030 props->minTexelBufferOffsetAlignment = 64;
1031 props->minUniformBufferOffsetAlignment = 64;
1032 props->minStorageBufferOffsetAlignment = 4;
1033 props->minTexelOffset = -16;
1034 props->maxTexelOffset = 15;
1035 props->minTexelGatherOffset = -32;
1036 props->maxTexelGatherOffset = 31;
1037 props->minInterpolationOffset = -0.5;
1038 props->maxInterpolationOffset = 0.4375;
1039 props->subPixelInterpolationOffsetBits = 4;
1040 props->maxFramebufferWidth = (1 << 14);
1041 props->maxFramebufferHeight = (1 << 14);
1042 props->maxFramebufferLayers = (1 << 10);
1043 props->framebufferColorSampleCounts = sample_counts;
1044 props->framebufferDepthSampleCounts = sample_counts;
1045 props->framebufferStencilSampleCounts = sample_counts;
1046 props->framebufferNoAttachmentsSampleCounts = sample_counts;
1047 props->maxColorAttachments = MAX_RTS;
1048 props->sampledImageColorSampleCounts = sample_counts;
1049 props->sampledImageIntegerSampleCounts = sample_counts;
1050 props->sampledImageDepthSampleCounts = sample_counts;
1051 props->sampledImageStencilSampleCounts = sample_counts;
1052 props->storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT;
1053 props->maxSampleMaskWords = 1;
1054 props->timestampComputeAndGraphics = true;
1055 props->timestampPeriod = 1000000000.0 / (float) ALWAYS_ON_FREQUENCY;
1056 props->maxClipDistances = 8;
1057 props->maxCullDistances = 8;
1058 props->maxCombinedClipAndCullDistances = 8;
1059 props->discreteQueuePriorities = 2;
1060 props->pointSizeRange[0] = 1;
1061 props->pointSizeRange[1] = 4092;
1062 props->lineWidthRange[0] = pdevice->info->a6xx.line_width_min;
1063 props->lineWidthRange[1] = pdevice->info->a6xx.line_width_max;
1064 props->pointSizeGranularity = 0.0625;
1065 props->lineWidthGranularity =
1066 pdevice->info->a6xx.line_width_max == 1.0 ? 0.0 : 0.5;
1067 props->strictLines = true;
1068 props->standardSampleLocations = true;
1069 props->optimalBufferCopyOffsetAlignment = 128;
1070 props->optimalBufferCopyRowPitchAlignment = 128;
1071 props->nonCoherentAtomSize = 64;
1072
1073 props->apiVersion =
1074 (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ?
1075 ((pdevice->info->chip >= 7) ? TU_API_VERSION :
1076 VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION))
1077 : VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION);
1078 props->driverVersion = vk_get_driver_version();
1079 props->vendorID = 0x5143;
1080 props->deviceID = pdevice->dev_id.chip_id;
1081 props->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
1082
1083 /* Vulkan 1.4 */
1084 props->dynamicRenderingLocalReadDepthStencilAttachments = true;
1085 props->dynamicRenderingLocalReadMultisampledAttachments = true;
1086
1087 /* sparse properties */
1088 props->sparseResidencyStandard2DBlockShape = { 0 };
1089 props->sparseResidencyStandard2DMultisampleBlockShape = { 0 };
1090 props->sparseResidencyStandard3DBlockShape = { 0 };
1091 props->sparseResidencyAlignedMipSize = { 0 };
1092 props->sparseResidencyNonResidentStrict = { 0 };
1093
1094 strcpy(props->deviceName, pdevice->name);
1095 memcpy(props->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1096
1097 tu_get_physical_device_properties_1_1(pdevice, props);
1098 tu_get_physical_device_properties_1_2(pdevice, props);
1099 tu_get_physical_device_properties_1_3(pdevice, props);
1100
1101 /* VK_KHR_compute_shader_derivatives */
1102 props->meshAndTaskShaderDerivatives = false;
1103
1104 /* VK_KHR_fragment_shading_rate */
1105 if (pdevice->info->a6xx.has_attachment_shading_rate) {
1106 props->minFragmentShadingRateAttachmentTexelSize = {8, 8};
1107 props->maxFragmentShadingRateAttachmentTexelSize = {8, 8};
1108 } else {
1109 props->minFragmentShadingRateAttachmentTexelSize = {0, 0};
1110 props->maxFragmentShadingRateAttachmentTexelSize = {0, 0};
1111 }
1112 props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
1113 props->primitiveFragmentShadingRateWithMultipleViewports =
1114 pdevice->info->a7xx.has_primitive_shading_rate;
1115 /* A7XX TODO: dEQP-VK.fragment_shading_rate.*.srlayered.* are failing
1116 * for some reason.
1117 */
1118 props->layeredShadingRateAttachments = false;
1119 props->fragmentShadingRateNonTrivialCombinerOps = true;
1120 props->maxFragmentSize = {4, 4};
1121 props->maxFragmentSizeAspectRatio = 4;
1122 props->maxFragmentShadingRateCoverageSamples = 16;
1123 props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_4_BIT;
1124 props->fragmentShadingRateWithShaderDepthStencilWrites = true;
1125 props->fragmentShadingRateWithSampleMask = true;
1126 /* Has wrong gl_SampleMaskIn[0] values with VK_EXT_post_depth_coverage used. */
1127 props->fragmentShadingRateWithShaderSampleMask = false;
1128 props->fragmentShadingRateWithConservativeRasterization = true;
1129 props->fragmentShadingRateWithFragmentShaderInterlock = false;
1130 props->fragmentShadingRateWithCustomSampleLocations = true;
1131 props->fragmentShadingRateStrictMultiplyCombiner = true;
1132
1133 /* VK_KHR_push_descriptor */
1134 props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1135
1136 /* VK_EXT_transform_feedback */
1137 props->maxTransformFeedbackStreams = IR3_MAX_SO_STREAMS;
1138 props->maxTransformFeedbackBuffers = IR3_MAX_SO_BUFFERS;
1139 props->maxTransformFeedbackBufferSize = UINT32_MAX;
1140 props->maxTransformFeedbackStreamDataSize = 512;
1141 props->maxTransformFeedbackBufferDataSize = 512;
1142 props->maxTransformFeedbackBufferDataStride = 512;
1143 props->transformFeedbackQueries = true;
1144 props->transformFeedbackStreamsLinesTriangles = true;
1145 props->transformFeedbackRasterizationStreamSelect = true;
1146 props->transformFeedbackDraw = true;
1147
1148 /* VK_EXT_sample_locations */
1149 props->sampleLocationSampleCounts =
1150 pdevice->vk.supported_extensions.EXT_sample_locations ? sample_counts : 0;
1151 props->maxSampleLocationGridSize = (VkExtent2D) { 1 , 1 };
1152 props->sampleLocationCoordinateRange[0] = SAMPLE_LOCATION_MIN;
1153 props->sampleLocationCoordinateRange[1] = SAMPLE_LOCATION_MAX;
1154 props->sampleLocationSubPixelBits = 4;
1155 props->variableSampleLocations = true;
1156
1157 /* VK_KHR_vertex_attribute_divisor */
1158 props->maxVertexAttribDivisor = UINT32_MAX;
1159 props->supportsNonZeroFirstInstance = true;
1160
1161 /* VK_EXT_custom_border_color */
1162 props->maxCustomBorderColorSamplers = TU_BORDER_COLOR_COUNT;
1163
1164 /* VK_KHR_performance_query */
1165 props->allowCommandBufferQueryCopies = false;
1166
1167 /* VK_EXT_robustness2 */
1168 /* see write_buffer_descriptor() */
1169 props->robustStorageBufferAccessSizeAlignment = 4;
1170 /* see write_ubo_descriptor() */
1171 props->robustUniformBufferAccessSizeAlignment = 16;
1172
1173 /* VK_EXT_pipeline_robustness */
1174 props->defaultRobustnessStorageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1175 props->defaultRobustnessUniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1176 props->defaultRobustnessVertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT;
1177 props->defaultRobustnessImages = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT;
1178
1179 /* VK_EXT_provoking_vertex */
1180 props->provokingVertexModePerPipeline = true;
1181 props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1182
1183 /* VK_KHR_line_rasterization */
1184 props->lineSubPixelPrecisionBits = 8;
1185
1186 /* VK_EXT_physical_device_drm */
1187 props->drmHasPrimary = pdevice->has_master;
1188 props->drmPrimaryMajor = pdevice->master_major;
1189 props->drmPrimaryMinor = pdevice->master_minor;
1190
1191 props->drmHasRender = pdevice->has_local;
1192 props->drmRenderMajor = pdevice->local_major;
1193 props->drmRenderMinor = pdevice->local_minor;
1194
1195 /* VK_EXT_shader_module_identifier */
1196 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1197 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1198 memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1199 vk_shaderModuleIdentifierAlgorithmUUID,
1200 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1201
1202 /* VK_EXT_map_memory_placed */
1203 os_get_page_size(&os_page_size);
1204 props->minPlacedMemoryMapAlignment = os_page_size;
1205
1206 /* VK_EXT_multi_draw */
1207 props->maxMultiDrawCount = 2048;
1208
1209 /* VK_EXT_nested_command_buffer */
1210 props->maxCommandBufferNestingLevel = UINT32_MAX;
1211
1212 /* VK_EXT_graphics_pipeline_library */
1213 props->graphicsPipelineLibraryFastLinking = true;
1214 props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1215
1216 /* VK_EXT_extended_dynamic_state3 */
1217 props->dynamicPrimitiveTopologyUnrestricted = true;
1218
1219 /* VK_EXT_descriptor_buffer */
1220 props->combinedImageSamplerDescriptorSingleArray = true;
1221 props->bufferlessPushDescriptors = true;
1222 props->allowSamplerImageViewPostSubmitCreation = true;
1223 props->descriptorBufferOffsetAlignment = A6XX_TEX_CONST_DWORDS * 4;
1224 props->maxDescriptorBufferBindings = pdevice->usable_sets;
1225 props->maxResourceDescriptorBufferBindings = pdevice->usable_sets;
1226 props->maxSamplerDescriptorBufferBindings = pdevice->usable_sets;
1227 props->maxEmbeddedImmutableSamplerBindings = pdevice->usable_sets;
1228 props->maxEmbeddedImmutableSamplers = max_descriptor_set_size;
1229 props->bufferCaptureReplayDescriptorDataSize = 0;
1230 props->imageCaptureReplayDescriptorDataSize = 0;
1231 props->imageViewCaptureReplayDescriptorDataSize = 0;
1232 props->samplerCaptureReplayDescriptorDataSize = 0;
1233 props->accelerationStructureCaptureReplayDescriptorDataSize = 0;
1234 /* Note: these sizes must match descriptor_size() */
1235 props->samplerDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1236 props->combinedImageSamplerDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
1237 props->sampledImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1238 props->storageImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1239 props->uniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1240 props->robustUniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1241 props->storageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1242 props->robustStorageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1243 props->uniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1244 props->robustUniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1245 props->storageBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4 * (1 +
1246 COND(pdevice->info->a6xx.storage_16bit && !pdevice->info->a6xx.has_isam_v, 1) +
1247 COND(pdevice->info->a7xx.storage_8bit, 1));
1248 props->robustStorageBufferDescriptorSize =
1249 props->storageBufferDescriptorSize;
1250 props->accelerationStructureDescriptorSize = 4 * A6XX_TEX_CONST_DWORDS;
1251 props->inputAttachmentDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1252 props->maxSamplerDescriptorBufferRange = ~0ull;
1253 props->maxResourceDescriptorBufferRange = ~0ull;
1254 props->samplerDescriptorBufferAddressSpaceSize = ~0ull;
1255 props->resourceDescriptorBufferAddressSpaceSize = ~0ull;
1256 props->descriptorBufferAddressSpaceSize = ~0ull;
1257 props->combinedImageSamplerDensityMapDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
1258
1259 /* VK_EXT_legacy_vertex_attributes */
1260 props->nativeUnalignedPerformance = true;
1261
1262 /* VK_EXT_fragment_density_map*/
1263 props->minFragmentDensityTexelSize = (VkExtent2D) { MIN_FDM_TEXEL_SIZE, MIN_FDM_TEXEL_SIZE };
1264 props->maxFragmentDensityTexelSize = (VkExtent2D) { MAX_FDM_TEXEL_SIZE, MAX_FDM_TEXEL_SIZE };
1265 props->fragmentDensityInvocations = false;
1266
1267 /* VK_KHR_maintenance5 */
1268 props->earlyFragmentMultisampleCoverageAfterSampleCounting = true;
1269 props->earlyFragmentSampleMaskTestBeforeSampleCounting = true;
1270 props->depthStencilSwizzleOneSupport = true;
1271 props->polygonModePointSize = true;
1272 props->nonStrictWideLinesUseParallelogram = false;
1273 props->nonStrictSinglePixelWideLinesUseParallelogram = false;
1274
1275 /* VK_KHR_maintenance6 */
1276 props->blockTexelViewCompatibleMultipleLayers = true;
1277 props->maxCombinedImageSamplerDescriptorCount = 1;
1278 props->fragmentShadingRateClampCombinerInputs = true;
1279
1280 /* VK_EXT_host_image_copy */
1281
1282 /* We don't use the layouts ATM so just report all layouts from
1283 * extensions that we support as compatible.
1284 */
1285 static const VkImageLayout supported_layouts[] = {
1286 VK_IMAGE_LAYOUT_GENERAL, /* required by spec */
1287 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1288 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1289 VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
1290 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
1291 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1292 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1293 VK_IMAGE_LAYOUT_PREINITIALIZED,
1294 VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
1295 VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
1296 VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
1297 VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
1298 VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
1299 VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
1300 VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
1301 VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
1302 VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT,
1303 VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
1304 };
1305
1306 props->pCopySrcLayouts = (VkImageLayout *)supported_layouts;
1307 props->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
1308 props->pCopyDstLayouts = (VkImageLayout *)supported_layouts;
1309 props->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
1310
1311 /* We're a UMR so we can always map every kind of memory */
1312 props->identicalMemoryTypeRequirements = true;
1313
1314 {
1315 struct mesa_sha1 sha1_ctx;
1316 uint8_t sha1[20];
1317
1318 _mesa_sha1_init(&sha1_ctx);
1319
1320 /* Make sure we don't match with other vendors */
1321 const char *driver = "turnip-v1";
1322 _mesa_sha1_update(&sha1_ctx, driver, strlen(driver));
1323
1324 /* Hash in UBWC configuration */
1325 _mesa_sha1_update(&sha1_ctx, &pdevice->ubwc_config.highest_bank_bit,
1326 sizeof(pdevice->ubwc_config.highest_bank_bit));
1327 _mesa_sha1_update(&sha1_ctx, &pdevice->ubwc_config.bank_swizzle_levels,
1328 sizeof(pdevice->ubwc_config.bank_swizzle_levels));
1329 _mesa_sha1_update(&sha1_ctx, &pdevice->ubwc_config.macrotile_mode,
1330 sizeof(pdevice->ubwc_config.macrotile_mode));
1331
1332 _mesa_sha1_final(&sha1_ctx, sha1);
1333
1334 memcpy(props->optimalTilingLayoutUUID, sha1, VK_UUID_SIZE);
1335 }
1336
1337 /* VK_KHR_acceleration_structure */
1338 props->maxGeometryCount = (1 << 24) - 1;
1339 props->maxInstanceCount = (1 << 24) - 1;
1340 props->maxPrimitiveCount = (1 << 29) - 1;
1341 props->maxPerStageDescriptorAccelerationStructures = max_descriptor_set_size;
1342 props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = max_descriptor_set_size;
1343 props->maxDescriptorSetAccelerationStructures = max_descriptor_set_size;
1344 props->maxDescriptorSetUpdateAfterBindAccelerationStructures = max_descriptor_set_size;
1345 props->minAccelerationStructureScratchOffsetAlignment = 128;
1346
1347 /* VK_EXT_conservative_rasterization */
1348 props->primitiveOverestimationSize = 0.5 + 1 / 256.;
1349 props->maxExtraPrimitiveOverestimationSize = 0.5;
1350 props->extraPrimitiveOverestimationSizeGranularity = 0.5;
1351 props->primitiveUnderestimation = false;
1352 props->conservativePointAndLineRasterization = false;
1353 props->degenerateTrianglesRasterized = true;
1354 props->degenerateLinesRasterized = false;
1355 props->fullyCoveredFragmentShaderInputVariable = false;
1356 props->conservativeRasterizationPostDepthCoverage = false;
1357 }
1358
1359 static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
1360 &tu_shader_ops,
1361 &tu_nir_shaders_ops,
1362 NULL,
1363 };
1364
1365 VkResult
tu_physical_device_init(struct tu_physical_device * device,struct tu_instance * instance)1366 tu_physical_device_init(struct tu_physical_device *device,
1367 struct tu_instance *instance)
1368 {
1369 VkResult result = VK_SUCCESS;
1370
1371 const char *fd_name = fd_dev_name(&device->dev_id);
1372 if (!fd_name) {
1373 return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1374 "device (chip_id = %" PRIX64
1375 ", gpu_id = %u) is unsupported",
1376 device->dev_id.chip_id, device->dev_id.gpu_id);
1377 }
1378
1379 const struct fd_dev_info info = fd_dev_info(&device->dev_id);
1380 assert(info.chip);
1381
1382 /* Print a suffix if raytracing is disabled by the SW fuse, in an attempt
1383 * to avoid confusion when apps don't work.
1384 */
1385 bool raytracing_disabled = info.a7xx.has_sw_fuse &&
1386 !device->has_raytracing;
1387 const char *rt_suffix = raytracing_disabled ? " (raytracing disabled)" : "";
1388
1389 if (strncmp(fd_name, "FD", 2) == 0) {
1390 device->name = vk_asprintf(&instance->vk.alloc,
1391 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE,
1392 "Turnip Adreno (TM) %s%s", &fd_name[2],
1393 rt_suffix);
1394 } else {
1395 device->name = vk_asprintf(&instance->vk.alloc,
1396 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE,
1397 "%s%s", fd_name, rt_suffix);
1398
1399 }
1400 if (!device->name) {
1401 return vk_startup_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
1402 "device name alloc fail");
1403 }
1404
1405 switch (fd_dev_gen(&device->dev_id)) {
1406 case 6:
1407 case 7: {
1408 device->dev_info = info;
1409 device->info = &device->dev_info;
1410 uint32_t depth_cache_size =
1411 device->info->num_ccu * device->info->a6xx.sysmem_per_ccu_depth_cache_size;
1412 uint32_t color_cache_size =
1413 (device->info->num_ccu *
1414 device->info->a6xx.sysmem_per_ccu_color_cache_size);
1415 uint32_t color_cache_size_gmem =
1416 color_cache_size /
1417 (1 << device->info->a6xx.gmem_ccu_color_cache_fraction);
1418
1419 device->ccu_depth_offset_bypass = 0;
1420 device->ccu_offset_bypass =
1421 device->ccu_depth_offset_bypass + depth_cache_size;
1422
1423 if (device->info->a7xx.has_gmem_vpc_attr_buf) {
1424 device->vpc_attr_buf_size_bypass =
1425 device->info->a7xx.sysmem_vpc_attr_buf_size;
1426 device->vpc_attr_buf_offset_bypass =
1427 device->ccu_offset_bypass + color_cache_size;
1428
1429 device->vpc_attr_buf_size_gmem =
1430 device->info->a7xx.gmem_vpc_attr_buf_size;
1431 device->vpc_attr_buf_offset_gmem =
1432 device->gmem_size -
1433 (device->vpc_attr_buf_size_gmem * device->info->num_ccu);
1434
1435 device->ccu_offset_gmem =
1436 device->vpc_attr_buf_offset_gmem - color_cache_size_gmem;
1437
1438 device->usable_gmem_size_gmem = device->vpc_attr_buf_offset_gmem;
1439 } else {
1440 device->ccu_offset_gmem = device->gmem_size - color_cache_size_gmem;
1441 device->usable_gmem_size_gmem = device->gmem_size;
1442 }
1443
1444 if (instance->reserve_descriptor_set) {
1445 device->usable_sets = device->reserved_set_idx = device->info->a6xx.max_sets - 1;
1446 } else {
1447 device->usable_sets = device->info->a6xx.max_sets;
1448 device->reserved_set_idx = -1;
1449 }
1450 break;
1451 }
1452 default:
1453 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1454 "device %s is unsupported", device->name);
1455 goto fail_free_name;
1456 }
1457 if (tu_device_get_cache_uuid(device, device->cache_uuid)) {
1458 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1459 "cannot generate UUID");
1460 goto fail_free_name;
1461 }
1462
1463 device->level1_dcache_size = tu_get_l1_dcache_size();
1464 device->has_cached_non_coherent_memory =
1465 device->level1_dcache_size > 0 && !DETECT_ARCH_ARM;
1466
1467 device->memory.type_count = 1;
1468 device->memory.types[0] =
1469 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1470 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1471 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
1472
1473 if (device->has_cached_coherent_memory) {
1474 device->memory.types[device->memory.type_count] =
1475 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1476 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1477 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1478 VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1479 device->memory.type_count++;
1480 }
1481
1482 if (device->has_cached_non_coherent_memory) {
1483 device->memory.types[device->memory.type_count] =
1484 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1485 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1486 VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1487 device->memory.type_count++;
1488 }
1489
1490 /* Provide fallback UBWC config values if the kernel doesn't support
1491 * providing them. This should match what the kernel programs.
1492 */
1493 if (!device->ubwc_config.highest_bank_bit) {
1494 device->ubwc_config.highest_bank_bit = info.highest_bank_bit;
1495 }
1496 if (device->ubwc_config.bank_swizzle_levels == ~0) {
1497 device->ubwc_config.bank_swizzle_levels = info.ubwc_swizzle;
1498 }
1499 if (device->ubwc_config.macrotile_mode == FDL_MACROTILE_INVALID) {
1500 device->ubwc_config.macrotile_mode =
1501 (enum fdl_macrotile_mode) info.macrotile_mode;
1502 }
1503
1504 fd_get_driver_uuid(device->driver_uuid);
1505 fd_get_device_uuid(device->device_uuid, &device->dev_id);
1506
1507 struct vk_physical_device_dispatch_table dispatch_table;
1508 vk_physical_device_dispatch_table_from_entrypoints(
1509 &dispatch_table, &tu_physical_device_entrypoints, true);
1510 vk_physical_device_dispatch_table_from_entrypoints(
1511 &dispatch_table, &wsi_physical_device_entrypoints, false);
1512
1513 result = vk_physical_device_init(&device->vk, &instance->vk,
1514 NULL, NULL, NULL, /* We set up extensions later */
1515 &dispatch_table);
1516 if (result != VK_SUCCESS)
1517 goto fail_free_name;
1518
1519 get_device_extensions(device, &device->vk.supported_extensions);
1520 tu_get_features(device, &device->vk.supported_features);
1521 tu_get_properties(device, &device->vk.properties);
1522
1523 device->vk.supported_sync_types = device->sync_types;
1524
1525 #ifdef TU_USE_WSI_PLATFORM
1526 result = tu_wsi_init(device);
1527 if (result != VK_SUCCESS) {
1528 vk_startup_errorf(instance, result, "WSI init failure");
1529 vk_physical_device_finish(&device->vk);
1530 goto fail_free_name;
1531 }
1532 #endif
1533
1534 /* The gpu id is already embedded in the uuid so we just pass "tu"
1535 * when creating the cache.
1536 */
1537 char buf[VK_UUID_SIZE * 2 + 1];
1538 mesa_bytes_to_hex(buf, device->cache_uuid, VK_UUID_SIZE);
1539 device->vk.disk_cache = disk_cache_create(device->name, buf, 0);
1540
1541 device->vk.pipeline_cache_import_ops = cache_import_ops;
1542
1543 return VK_SUCCESS;
1544
1545 fail_free_name:
1546 vk_free(&instance->vk.alloc, (void *)device->name);
1547 return result;
1548 }
1549
1550 static void
tu_physical_device_finish(struct tu_physical_device * device)1551 tu_physical_device_finish(struct tu_physical_device *device)
1552 {
1553 #ifdef TU_USE_WSI_PLATFORM
1554 tu_wsi_finish(device);
1555 #endif
1556
1557 close(device->local_fd);
1558 if (device->master_fd != -1)
1559 close(device->master_fd);
1560
1561 if (device->kgsl_dma_fd != -1)
1562 close(device->kgsl_dma_fd);
1563
1564 disk_cache_destroy(device->vk.disk_cache);
1565 vk_free(&device->instance->vk.alloc, (void *)device->name);
1566
1567 vk_physical_device_finish(&device->vk);
1568 }
1569
1570 static void
tu_destroy_physical_device(struct vk_physical_device * device)1571 tu_destroy_physical_device(struct vk_physical_device *device)
1572 {
1573 tu_physical_device_finish((struct tu_physical_device *) device);
1574 vk_free(&device->instance->alloc, device);
1575 }
1576
1577 static const driOptionDescription tu_dri_options[] = {
1578 DRI_CONF_SECTION_PERFORMANCE
1579 DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
1580 DRI_CONF_VK_KHR_PRESENT_WAIT(false)
1581 DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
1582 DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
1583 DRI_CONF_VK_XWAYLAND_WAIT_READY(false)
1584 DRI_CONF_SECTION_END
1585
1586 DRI_CONF_SECTION_DEBUG
1587 DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
1588 DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
1589 DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
1590 DRI_CONF_VK_DONT_CARE_AS_LOAD(false)
1591 DRI_CONF_SECTION_END
1592
1593 DRI_CONF_SECTION_MISCELLANEOUS
1594 DRI_CONF_DISABLE_CONSERVATIVE_LRZ(false)
1595 DRI_CONF_TU_DONT_RESERVE_DESCRIPTOR_SET(false)
1596 DRI_CONF_TU_ALLOW_OOB_INDIRECT_UBO_LOADS(false)
1597 DRI_CONF_TU_DISABLE_D24S8_BORDER_COLOR_WORKAROUND(false)
1598 DRI_CONF_SECTION_END
1599 };
1600
1601 static void
tu_init_dri_options(struct tu_instance * instance)1602 tu_init_dri_options(struct tu_instance *instance)
1603 {
1604 driParseOptionInfo(&instance->available_dri_options, tu_dri_options,
1605 ARRAY_SIZE(tu_dri_options));
1606 driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "turnip", NULL, NULL,
1607 instance->vk.app_info.app_name, instance->vk.app_info.app_version,
1608 instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
1609
1610 instance->dont_care_as_load =
1611 driQueryOptionb(&instance->dri_options, "vk_dont_care_as_load");
1612 instance->conservative_lrz =
1613 !driQueryOptionb(&instance->dri_options, "disable_conservative_lrz");
1614 instance->reserve_descriptor_set =
1615 !driQueryOptionb(&instance->dri_options, "tu_dont_reserve_descriptor_set");
1616 instance->allow_oob_indirect_ubo_loads =
1617 driQueryOptionb(&instance->dri_options, "tu_allow_oob_indirect_ubo_loads");
1618 instance->disable_d24s8_border_color_workaround =
1619 driQueryOptionb(&instance->dri_options, "tu_disable_d24s8_border_color_workaround");
1620 }
1621
1622 static uint32_t instance_count = 0;
1623
1624 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1625 tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
1626 const VkAllocationCallbacks *pAllocator,
1627 VkInstance *pInstance)
1628 {
1629 struct tu_instance *instance;
1630 VkResult result;
1631
1632 tu_env_init();
1633
1634 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1635
1636 if (pAllocator == NULL)
1637 pAllocator = vk_default_allocator();
1638
1639 instance = (struct tu_instance *) vk_zalloc(
1640 pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1641
1642 if (!instance)
1643 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1644
1645 struct vk_instance_dispatch_table dispatch_table;
1646 vk_instance_dispatch_table_from_entrypoints(
1647 &dispatch_table, &tu_instance_entrypoints, true);
1648 vk_instance_dispatch_table_from_entrypoints(
1649 &dispatch_table, &wsi_instance_entrypoints, false);
1650
1651 result = vk_instance_init(&instance->vk,
1652 &tu_instance_extensions_supported,
1653 &dispatch_table,
1654 pCreateInfo, pAllocator);
1655 if (result != VK_SUCCESS) {
1656 vk_free(pAllocator, instance);
1657 return vk_error(NULL, result);
1658 }
1659
1660 instance->vk.physical_devices.try_create_for_drm =
1661 tu_physical_device_try_create;
1662 instance->vk.physical_devices.enumerate = tu_enumerate_devices;
1663 instance->vk.physical_devices.destroy = tu_destroy_physical_device;
1664
1665 instance->instance_idx = p_atomic_fetch_add(&instance_count, 1);
1666 if (TU_DEBUG(STARTUP))
1667 mesa_logi("Created an instance");
1668
1669 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1670
1671 tu_init_dri_options(instance);
1672
1673 *pInstance = tu_instance_to_handle(instance);
1674
1675 #ifdef HAVE_PERFETTO
1676 tu_perfetto_init();
1677 #endif
1678
1679 util_gpuvis_init();
1680
1681 return VK_SUCCESS;
1682 }
1683
1684 VKAPI_ATTR void VKAPI_CALL
tu_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)1685 tu_DestroyInstance(VkInstance _instance,
1686 const VkAllocationCallbacks *pAllocator)
1687 {
1688 VK_FROM_HANDLE(tu_instance, instance, _instance);
1689
1690 if (!instance)
1691 return;
1692
1693 VG(VALGRIND_DESTROY_MEMPOOL(instance));
1694
1695 driDestroyOptionCache(&instance->dri_options);
1696 driDestroyOptionInfo(&instance->available_dri_options);
1697
1698 vk_instance_finish(&instance->vk);
1699 vk_free(&instance->vk.alloc, instance);
1700 }
1701
1702 static const VkQueueFamilyProperties tu_queue_family_properties = {
1703 .queueFlags =
1704 VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
1705 .queueCount = 1,
1706 .timestampValidBits = 48,
1707 .minImageTransferGranularity = { 1, 1, 1 },
1708 };
1709
1710 void
tu_physical_device_get_global_priority_properties(const struct tu_physical_device * pdevice,VkQueueFamilyGlobalPriorityPropertiesKHR * props)1711 tu_physical_device_get_global_priority_properties(const struct tu_physical_device *pdevice,
1712 VkQueueFamilyGlobalPriorityPropertiesKHR *props)
1713 {
1714 props->priorityCount = MIN2(pdevice->submitqueue_priority_count, 3);
1715 switch (props->priorityCount) {
1716 case 1:
1717 props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1718 break;
1719 case 2:
1720 props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1721 props->priorities[1] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
1722 break;
1723 case 3:
1724 props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR;
1725 props->priorities[1] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1726 props->priorities[2] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
1727 break;
1728 default:
1729 unreachable("unexpected priority count");
1730 break;
1731 }
1732 }
1733
1734 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1735 tu_GetPhysicalDeviceQueueFamilyProperties2(
1736 VkPhysicalDevice physicalDevice,
1737 uint32_t *pQueueFamilyPropertyCount,
1738 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1739 {
1740 VK_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
1741
1742 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
1743 pQueueFamilyProperties, pQueueFamilyPropertyCount);
1744
1745 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
1746 {
1747 p->queueFamilyProperties = tu_queue_family_properties;
1748
1749 vk_foreach_struct(ext, p->pNext) {
1750 switch (ext->sType) {
1751 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
1752 VkQueueFamilyGlobalPriorityPropertiesKHR *props =
1753 (VkQueueFamilyGlobalPriorityPropertiesKHR *) ext;
1754 tu_physical_device_get_global_priority_properties(pdevice, props);
1755 break;
1756 }
1757 default:
1758 break;
1759 }
1760 }
1761 }
1762 }
1763
1764 uint64_t
tu_get_system_heap_size(struct tu_physical_device * physical_device)1765 tu_get_system_heap_size(struct tu_physical_device *physical_device)
1766 {
1767 uint64_t total_ram = 0;
1768 ASSERTED bool has_physical_memory =
1769 os_get_total_physical_memory(&total_ram);
1770 assert(has_physical_memory);
1771
1772 /* We don't want to burn too much ram with the GPU. If the user has 4GiB
1773 * or less, we use at most half. If they have more than 4GiB, we use 3/4.
1774 */
1775 uint64_t available_ram;
1776 if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
1777 available_ram = total_ram / 2;
1778 else
1779 available_ram = total_ram * 3 / 4;
1780
1781 if (physical_device->va_size)
1782 available_ram = MIN2(available_ram, physical_device->va_size);
1783
1784 return available_ram;
1785 }
1786
1787 static VkDeviceSize
tu_get_budget_memory(struct tu_physical_device * physical_device)1788 tu_get_budget_memory(struct tu_physical_device *physical_device)
1789 {
1790 uint64_t heap_size = physical_device->heap.size;
1791 uint64_t heap_used = physical_device->heap.used;
1792 uint64_t sys_available;
1793 ASSERTED bool has_available_memory =
1794 os_get_available_system_memory(&sys_available);
1795 assert(has_available_memory);
1796
1797 if (physical_device->va_size)
1798 sys_available = MIN2(sys_available, physical_device->va_size);
1799
1800 /*
1801 * Let's not incite the app to starve the system: report at most 90% of
1802 * available system memory.
1803 */
1804 uint64_t heap_available = sys_available * 9 / 10;
1805 return MIN2(heap_size, heap_used + heap_available);
1806 }
1807
1808 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,VkPhysicalDeviceMemoryProperties2 * props2)1809 tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,
1810 VkPhysicalDeviceMemoryProperties2 *props2)
1811 {
1812 VK_FROM_HANDLE(tu_physical_device, physical_device, pdev);
1813
1814 VkPhysicalDeviceMemoryProperties *props = &props2->memoryProperties;
1815 props->memoryHeapCount = 1;
1816 props->memoryHeaps[0].size = physical_device->heap.size;
1817 props->memoryHeaps[0].flags = physical_device->heap.flags;
1818
1819 props->memoryTypeCount = physical_device->memory.type_count;
1820 for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
1821 props->memoryTypes[i] = (VkMemoryType) {
1822 .propertyFlags = physical_device->memory.types[i],
1823 .heapIndex = 0,
1824 };
1825 }
1826
1827 vk_foreach_struct(ext, props2->pNext)
1828 {
1829 switch (ext->sType) {
1830 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1831 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget_props =
1832 (VkPhysicalDeviceMemoryBudgetPropertiesEXT *) ext;
1833 memory_budget_props->heapUsage[0] = physical_device->heap.used;
1834 memory_budget_props->heapBudget[0] = tu_get_budget_memory(physical_device);
1835
1836 /* The heapBudget and heapUsage values must be zero for array elements
1837 * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
1838 */
1839 for (unsigned i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1840 memory_budget_props->heapBudget[i] = 0u;
1841 memory_budget_props->heapUsage[i] = 0u;
1842 }
1843 break;
1844 }
1845 default:
1846 break;
1847 }
1848 }
1849 }
1850
1851 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)1852 tu_GetPhysicalDeviceFragmentShadingRatesKHR(
1853 VkPhysicalDevice physicalDevice,
1854 uint32_t *pFragmentShadingRateCount,
1855 VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
1856 {
1857 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
1858 pFragmentShadingRates, pFragmentShadingRateCount);
1859
1860 #define append_rate(w, h, s) \
1861 { \
1862 VkPhysicalDeviceFragmentShadingRateKHR rate = { \
1863 .sType = \
1864 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, \
1865 .sampleCounts = s, \
1866 .fragmentSize = { .width = w, .height = h }, \
1867 }; \
1868 vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, \
1869 r) *r = rate; \
1870 }
1871
1872 append_rate(4, 4, VK_SAMPLE_COUNT_1_BIT);
1873 append_rate(4, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT);
1874 append_rate(2, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
1875 append_rate(2, 1, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
1876 append_rate(1, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
1877 append_rate(1, 1, ~0);
1878
1879 #undef append_rate
1880
1881 return vk_outarray_status(&out);
1882 }
1883
1884 uint64_t
tu_device_ticks_to_ns(struct tu_device * dev,uint64_t ts)1885 tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts)
1886 {
1887 /* This is based on the 19.2MHz always-on rbbm timer.
1888 *
1889 * TODO we should probably query this value from kernel..
1890 */
1891 return ts * (1000000000 / 19200000);
1892 }
1893
1894 struct u_trace_context *
tu_device_get_u_trace(struct tu_device * device)1895 tu_device_get_u_trace(struct tu_device *device)
1896 {
1897 return &device->trace_context;
1898 }
1899
1900 static void*
tu_trace_create_buffer(struct u_trace_context * utctx,uint64_t size_B)1901 tu_trace_create_buffer(struct u_trace_context *utctx, uint64_t size_B)
1902 {
1903 struct tu_device *device =
1904 container_of(utctx, struct tu_device, trace_context);
1905
1906 struct tu_bo *bo;
1907 tu_bo_init_new(device, NULL, &bo, size_B, TU_BO_ALLOC_INTERNAL_RESOURCE, "trace");
1908 tu_bo_map(device, bo, NULL);
1909
1910 return bo;
1911 }
1912
1913 static void
tu_trace_destroy_buffer(struct u_trace_context * utctx,void * timestamps)1914 tu_trace_destroy_buffer(struct u_trace_context *utctx, void *timestamps)
1915 {
1916 struct tu_device *device =
1917 container_of(utctx, struct tu_device, trace_context);
1918 struct tu_bo *bo = (struct tu_bo *) timestamps;
1919
1920 tu_bo_finish(device, bo);
1921 }
1922
1923 template <chip CHIP>
1924 static void
tu_trace_record_ts(struct u_trace * ut,void * cs,void * timestamps,uint64_t offset_B,uint32_t)1925 tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
1926 uint64_t offset_B, uint32_t)
1927 {
1928 struct tu_bo *bo = (struct tu_bo *) timestamps;
1929 struct tu_cs *ts_cs = (struct tu_cs *) cs;
1930
1931 if (CHIP == A6XX) {
1932 tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE, 4);
1933 tu_cs_emit(ts_cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
1934 CP_EVENT_WRITE_0_TIMESTAMP);
1935 tu_cs_emit_qw(ts_cs, bo->iova + offset_B);
1936 tu_cs_emit(ts_cs, 0x00000000);
1937 } else {
1938 tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE7, 3);
1939 tu_cs_emit(ts_cs, CP_EVENT_WRITE7_0(.event = RB_DONE_TS,
1940 .write_src = EV_WRITE_ALWAYSON,
1941 .write_dst = EV_DST_RAM,
1942 .write_enabled = true)
1943 .value);
1944 tu_cs_emit_qw(ts_cs, bo->iova + offset_B);
1945 }
1946 }
1947
1948 static uint64_t
tu_trace_read_ts(struct u_trace_context * utctx,void * timestamps,uint64_t offset_B,void * flush_data)1949 tu_trace_read_ts(struct u_trace_context *utctx,
1950 void *timestamps, uint64_t offset_B, void *flush_data)
1951 {
1952 struct tu_device *device =
1953 container_of(utctx, struct tu_device, trace_context);
1954 struct tu_bo *bo = (struct tu_bo *) timestamps;
1955 struct tu_u_trace_submission_data *submission_data =
1956 (struct tu_u_trace_submission_data *) flush_data;
1957
1958 /* Only need to stall on results for the first entry: */
1959 if (offset_B == 0) {
1960 tu_queue_wait_fence(submission_data->queue, submission_data->fence,
1961 1000000000);
1962 }
1963
1964 if (tu_bo_map(device, bo, NULL) != VK_SUCCESS) {
1965 return U_TRACE_NO_TIMESTAMP;
1966 }
1967
1968 uint64_t *ts = (uint64_t *) ((char *)bo->map + offset_B);
1969
1970 /* Don't translate the no-timestamp marker: */
1971 if (*ts == U_TRACE_NO_TIMESTAMP)
1972 return U_TRACE_NO_TIMESTAMP;
1973
1974 return tu_device_ticks_to_ns(device, *ts);
1975 }
1976
1977 static void
tu_trace_delete_flush_data(struct u_trace_context * utctx,void * flush_data)1978 tu_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
1979 {
1980 struct tu_device *device =
1981 container_of(utctx, struct tu_device, trace_context);
1982 struct tu_u_trace_submission_data *submission_data =
1983 (struct tu_u_trace_submission_data *) flush_data;
1984
1985 tu_u_trace_submission_data_finish(device, submission_data);
1986 }
1987
1988 void
tu_copy_buffer(struct u_trace_context * utctx,void * cmdstream,void * ts_from,uint64_t from_offset_B,void * ts_to,uint64_t to_offset_B,uint64_t size_B)1989 tu_copy_buffer(struct u_trace_context *utctx, void *cmdstream,
1990 void *ts_from, uint64_t from_offset_B,
1991 void *ts_to, uint64_t to_offset_B,
1992 uint64_t size_B)
1993 {
1994 struct tu_cs *cs = (struct tu_cs *) cmdstream;
1995 struct tu_bo *bo_from = (struct tu_bo *) ts_from;
1996 struct tu_bo *bo_to = (struct tu_bo *) ts_to;
1997
1998 tu_cs_emit_pkt7(cs, CP_MEMCPY, 5);
1999 tu_cs_emit(cs, size_B / sizeof(uint32_t));
2000 tu_cs_emit_qw(cs, bo_from->iova + from_offset_B);
2001 tu_cs_emit_qw(cs, bo_to->iova + to_offset_B);
2002 }
2003
2004 static void
tu_trace_capture_data(struct u_trace * ut,void * cs,void * dst_buffer,uint64_t dst_offset_B,void * src_buffer,uint64_t src_offset_B,uint32_t size_B)2005 tu_trace_capture_data(struct u_trace *ut,
2006 void *cs,
2007 void *dst_buffer,
2008 uint64_t dst_offset_B,
2009 void *src_buffer,
2010 uint64_t src_offset_B,
2011 uint32_t size_B)
2012 {
2013 if (src_buffer)
2014 tu_copy_buffer(ut->utctx, cs, src_buffer, src_offset_B, dst_buffer,
2015 dst_offset_B, size_B);
2016 }
2017
2018 static const void *
tu_trace_get_data(struct u_trace_context * utctx,void * buffer,uint64_t offset_B,uint32_t size_B)2019 tu_trace_get_data(struct u_trace_context *utctx,
2020 void *buffer,
2021 uint64_t offset_B,
2022 uint32_t size_B)
2023 {
2024 struct tu_bo *bo = (struct tu_bo *) buffer;
2025 return (char *) bo->map + offset_B;
2026 }
2027
2028 /* Special helpers instead of u_trace_begin_iterator()/u_trace_end_iterator()
2029 * that ignore tracepoints at the beginning/end that are part of a
2030 * suspend/resume chain.
2031 */
2032 static struct u_trace_iterator
tu_cmd_begin_iterator(struct tu_cmd_buffer * cmdbuf)2033 tu_cmd_begin_iterator(struct tu_cmd_buffer *cmdbuf)
2034 {
2035 switch (cmdbuf->state.suspend_resume) {
2036 case SR_IN_PRE_CHAIN:
2037 return cmdbuf->trace_renderpass_end;
2038 case SR_AFTER_PRE_CHAIN:
2039 case SR_IN_CHAIN_AFTER_PRE_CHAIN:
2040 return cmdbuf->pre_chain.trace_renderpass_end;
2041 default:
2042 return u_trace_begin_iterator(&cmdbuf->trace);
2043 }
2044 }
2045
2046 static struct u_trace_iterator
tu_cmd_end_iterator(struct tu_cmd_buffer * cmdbuf)2047 tu_cmd_end_iterator(struct tu_cmd_buffer *cmdbuf)
2048 {
2049 switch (cmdbuf->state.suspend_resume) {
2050 case SR_IN_PRE_CHAIN:
2051 return cmdbuf->trace_renderpass_end;
2052 case SR_IN_CHAIN:
2053 case SR_IN_CHAIN_AFTER_PRE_CHAIN:
2054 return cmdbuf->trace_renderpass_start;
2055 default:
2056 return u_trace_end_iterator(&cmdbuf->trace);
2057 }
2058 }
2059 VkResult
tu_create_copy_timestamp_cs(struct tu_cmd_buffer * cmdbuf,struct tu_cs ** cs,struct u_trace ** trace_copy)2060 tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
2061 struct u_trace **trace_copy)
2062 {
2063 *cs = (struct tu_cs *) vk_zalloc(&cmdbuf->device->vk.alloc,
2064 sizeof(struct tu_cs), 8,
2065 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2066
2067 if (*cs == NULL) {
2068 return VK_ERROR_OUT_OF_HOST_MEMORY;
2069 }
2070
2071 tu_cs_init(*cs, cmdbuf->device, TU_CS_MODE_GROW,
2072 list_length(&cmdbuf->trace.trace_chunks) * 6 * 2 + 3, "trace copy timestamp cs");
2073
2074 tu_cs_begin(*cs);
2075
2076 tu_cs_emit_wfi(*cs);
2077 tu_cs_emit_pkt7(*cs, CP_WAIT_FOR_ME, 0);
2078
2079 *trace_copy = (struct u_trace *) vk_zalloc(
2080 &cmdbuf->device->vk.alloc, sizeof(struct u_trace), 8,
2081 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2082
2083 if (*trace_copy == NULL) {
2084 return VK_ERROR_OUT_OF_HOST_MEMORY;
2085 }
2086
2087 u_trace_init(*trace_copy, cmdbuf->trace.utctx);
2088 u_trace_clone_append(tu_cmd_begin_iterator(cmdbuf),
2089 tu_cmd_end_iterator(cmdbuf),
2090 *trace_copy, *cs,
2091 tu_copy_buffer);
2092
2093 tu_cs_emit_wfi(*cs);
2094
2095 tu_cs_end(*cs);
2096
2097 return VK_SUCCESS;
2098 }
2099
2100 VkResult
tu_u_trace_submission_data_create(struct tu_device * device,struct tu_cmd_buffer ** cmd_buffers,uint32_t cmd_buffer_count,struct tu_u_trace_submission_data ** submission_data)2101 tu_u_trace_submission_data_create(
2102 struct tu_device *device,
2103 struct tu_cmd_buffer **cmd_buffers,
2104 uint32_t cmd_buffer_count,
2105 struct tu_u_trace_submission_data **submission_data)
2106 {
2107 *submission_data = (struct tu_u_trace_submission_data *)
2108 vk_zalloc(&device->vk.alloc,
2109 sizeof(struct tu_u_trace_submission_data), 8,
2110 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2111
2112 if (!(*submission_data)) {
2113 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2114 }
2115
2116 struct tu_u_trace_submission_data *data = *submission_data;
2117
2118 data->cmd_trace_data = (struct tu_u_trace_cmd_data *) vk_zalloc(
2119 &device->vk.alloc,
2120 cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8,
2121 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2122
2123 if (!data->cmd_trace_data) {
2124 goto fail;
2125 }
2126
2127 data->cmd_buffer_count = cmd_buffer_count;
2128 data->last_buffer_with_tracepoints = -1;
2129
2130 for (uint32_t i = 0; i < cmd_buffer_count; ++i) {
2131 struct tu_cmd_buffer *cmdbuf = cmd_buffers[i];
2132
2133 if (!u_trace_has_points(&cmdbuf->trace))
2134 continue;
2135
2136 data->last_buffer_with_tracepoints = i;
2137
2138 if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
2139 /* A single command buffer could be submitted several times, but we
2140 * already baked timestamp iova addresses and trace points are
2141 * single-use. Therefor we have to copy trace points and create
2142 * a new timestamp buffer on every submit of reusable command buffer.
2143 */
2144 if (tu_create_copy_timestamp_cs(cmdbuf,
2145 &data->cmd_trace_data[i].timestamp_copy_cs,
2146 &data->cmd_trace_data[i].trace) != VK_SUCCESS) {
2147 goto fail;
2148 }
2149
2150 assert(data->cmd_trace_data[i].timestamp_copy_cs->entry_count == 1);
2151 } else {
2152 data->cmd_trace_data[i].trace = &cmdbuf->trace;
2153 }
2154 }
2155
2156 assert(data->last_buffer_with_tracepoints != -1);
2157
2158 return VK_SUCCESS;
2159
2160 fail:
2161 tu_u_trace_submission_data_finish(device, data);
2162 *submission_data = NULL;
2163
2164 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2165 }
2166
2167 void
tu_u_trace_submission_data_finish(struct tu_device * device,struct tu_u_trace_submission_data * submission_data)2168 tu_u_trace_submission_data_finish(
2169 struct tu_device *device,
2170 struct tu_u_trace_submission_data *submission_data)
2171 {
2172 for (uint32_t i = 0; i < submission_data->cmd_buffer_count; ++i) {
2173 /* Only if we had to create a copy of trace we should free it */
2174 struct tu_u_trace_cmd_data *cmd_data = &submission_data->cmd_trace_data[i];
2175 if (cmd_data->timestamp_copy_cs) {
2176 tu_cs_finish(cmd_data->timestamp_copy_cs);
2177 vk_free(&device->vk.alloc, cmd_data->timestamp_copy_cs);
2178
2179 u_trace_fini(cmd_data->trace);
2180 vk_free(&device->vk.alloc, cmd_data->trace);
2181 }
2182 }
2183
2184 if (submission_data->kgsl_timestamp_bo.bo) {
2185 mtx_lock(&device->kgsl_profiling_mutex);
2186 tu_suballoc_bo_free(&device->kgsl_profiling_suballoc,
2187 &submission_data->kgsl_timestamp_bo);
2188 mtx_unlock(&device->kgsl_profiling_mutex);
2189 }
2190
2191 vk_free(&device->vk.alloc, submission_data->cmd_trace_data);
2192 vk_free(&device->vk.alloc, submission_data);
2193 }
2194
2195 enum tu_reg_stomper_flags
2196 {
2197 TU_DEBUG_REG_STOMP_INVERSE = 1 << 0,
2198 TU_DEBUG_REG_STOMP_CMDBUF = 1 << 1,
2199 TU_DEBUG_REG_STOMP_RENDERPASS = 1 << 2,
2200 };
2201
2202 /* See freedreno.rst for usage tips */
2203 static const struct debug_named_value tu_reg_stomper_options[] = {
2204 { "inverse", TU_DEBUG_REG_STOMP_INVERSE,
2205 "By default the range specifies the regs to stomp, with 'inverse' it "
2206 "specifies the regs NOT to stomp" },
2207 { "cmdbuf", TU_DEBUG_REG_STOMP_CMDBUF,
2208 "Stomp regs at the start of a cmdbuf" },
2209 { "renderpass", TU_DEBUG_REG_STOMP_RENDERPASS,
2210 "Stomp regs before a renderpass" },
2211 { NULL, 0 }
2212 };
2213
2214 template <chip CHIP>
2215 static inline void
tu_cs_dbg_stomp_regs(struct tu_cs * cs,bool is_rp_blit,uint32_t first_reg,uint32_t last_reg,bool inverse)2216 tu_cs_dbg_stomp_regs(struct tu_cs *cs,
2217 bool is_rp_blit,
2218 uint32_t first_reg,
2219 uint32_t last_reg,
2220 bool inverse)
2221 {
2222 const uint16_t *regs = NULL;
2223 size_t count = 0;
2224
2225 if (is_rp_blit) {
2226 regs = &RP_BLIT_REGS<CHIP>[0];
2227 count = ARRAY_SIZE(RP_BLIT_REGS<CHIP>);
2228 } else {
2229 regs = &CMD_REGS<CHIP>[0];
2230 count = ARRAY_SIZE(CMD_REGS<CHIP>);
2231 }
2232
2233 for (size_t i = 0; i < count; i++) {
2234 if (inverse) {
2235 if (regs[i] >= first_reg && regs[i] <= last_reg)
2236 continue;
2237 } else {
2238 if (regs[i] < first_reg || regs[i] > last_reg)
2239 continue;
2240 }
2241
2242 if (fd_reg_stomp_allowed(CHIP, regs[i]))
2243 tu_cs_emit_write_reg(cs, regs[i], 0xffffffff);
2244 }
2245 }
2246
2247 static void
tu_init_dbg_reg_stomper(struct tu_device * device)2248 tu_init_dbg_reg_stomper(struct tu_device *device)
2249 {
2250 const char *stale_reg_range_str =
2251 os_get_option("TU_DEBUG_STALE_REGS_RANGE");
2252 if (!stale_reg_range_str)
2253 return;
2254
2255 uint32_t first_reg, last_reg;
2256
2257 if (sscanf(stale_reg_range_str, "%x,%x", &first_reg, &last_reg) != 2) {
2258 mesa_loge("Incorrect TU_DEBUG_STALE_REGS_RANGE");
2259 return;
2260 }
2261
2262 uint64_t debug_flags = debug_get_flags_option("TU_DEBUG_STALE_REGS_FLAGS",
2263 tu_reg_stomper_options,
2264 TU_DEBUG_REG_STOMP_CMDBUF);
2265
2266 bool inverse = debug_flags & TU_DEBUG_REG_STOMP_INVERSE;
2267
2268 if (debug_flags & TU_DEBUG_REG_STOMP_CMDBUF) {
2269 struct tu_cs *cmdbuf_cs =
2270 (struct tu_cs *) calloc(1, sizeof(struct tu_cs));
2271 tu_cs_init(cmdbuf_cs, device, TU_CS_MODE_GROW, 4096,
2272 "cmdbuf reg stomp cs");
2273 tu_cs_begin(cmdbuf_cs);
2274
2275 TU_CALLX(device, tu_cs_dbg_stomp_regs)(cmdbuf_cs, false, first_reg, last_reg, inverse);
2276 tu_cs_end(cmdbuf_cs);
2277 device->dbg_cmdbuf_stomp_cs = cmdbuf_cs;
2278 }
2279
2280 if (debug_flags & TU_DEBUG_REG_STOMP_RENDERPASS) {
2281 struct tu_cs *rp_cs = (struct tu_cs *) calloc(1, sizeof(struct tu_cs));
2282 tu_cs_init(rp_cs, device, TU_CS_MODE_GROW, 4096, "rp reg stomp cs");
2283 tu_cs_begin(rp_cs);
2284
2285 TU_CALLX(device, tu_cs_dbg_stomp_regs)(rp_cs, true, first_reg, last_reg, inverse);
2286 tu_cs_end(rp_cs);
2287
2288 device->dbg_renderpass_stomp_cs = rp_cs;
2289 }
2290 }
2291
2292 /* It is unknown what this workaround is for and what it fixes. */
2293 static VkResult
tu_init_cmdbuf_start_a725_quirk(struct tu_device * device)2294 tu_init_cmdbuf_start_a725_quirk(struct tu_device *device)
2295 {
2296 struct tu_cs shader_cs;
2297 tu_cs_begin_sub_stream(&device->sub_cs, 10, &shader_cs);
2298
2299 uint32_t raw_shader[] = {
2300 0x00040000, 0x40600000, // mul.f hr0.x, hr0.x, hr1.x
2301 0x00050001, 0x40600001, // mul.f hr0.y, hr0.y, hr1.y
2302 0x00060002, 0x40600002, // mul.f hr0.z, hr0.z, hr1.z
2303 0x00070003, 0x40600003, // mul.f hr0.w, hr0.w, hr1.w
2304 0x00000000, 0x03000000, // end
2305 };
2306
2307 tu_cs_emit_array(&shader_cs, raw_shader, ARRAY_SIZE(raw_shader));
2308 struct tu_cs_entry shader_entry = tu_cs_end_sub_stream(&device->sub_cs, &shader_cs);
2309 uint64_t shader_iova = shader_entry.bo->iova + shader_entry.offset;
2310
2311 struct tu_cs sub_cs;
2312 tu_cs_begin_sub_stream(&device->sub_cs, 47, &sub_cs);
2313
2314 tu_cs_emit_regs(&sub_cs, HLSQ_INVALIDATE_CMD(A7XX,
2315 .vs_state = true, .hs_state = true, .ds_state = true,
2316 .gs_state = true, .fs_state = true, .gfx_ibo = true,
2317 .cs_bindless = 0xff, .gfx_bindless = 0xff));
2318 tu_cs_emit_regs(&sub_cs, HLSQ_CS_CNTL(A7XX,
2319 .constlen = 4,
2320 .enabled = true));
2321 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CONFIG(.enabled = true));
2322 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CTRL_REG0(
2323 .threadmode = MULTI,
2324 .threadsize = THREAD128,
2325 .mergedregs = true));
2326 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_UNKNOWN_A9B1(.shared_size = 1));
2327 tu_cs_emit_regs(&sub_cs, HLSQ_CS_KERNEL_GROUP_X(A7XX, 1),
2328 HLSQ_CS_KERNEL_GROUP_Y(A7XX, 1),
2329 HLSQ_CS_KERNEL_GROUP_Z(A7XX, 1));
2330 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_INSTRLEN(.sp_cs_instrlen = 1));
2331 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_TEX_COUNT(0));
2332 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_IBO_COUNT(0));
2333 tu_cs_emit_regs(&sub_cs, HLSQ_CS_CNTL_1(A7XX,
2334 .linearlocalidregid = regid(63, 0),
2335 .threadsize = THREAD128,
2336 .workgrouprastorderzfirsten = true,
2337 .wgtilewidth = 4,
2338 .wgtileheight = 17));
2339 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CNTL_0(
2340 .wgidconstid = regid(51, 3),
2341 .wgsizeconstid = regid(48, 0),
2342 .wgoffsetconstid = regid(63, 0),
2343 .localidregid = regid(63, 0)));
2344 tu_cs_emit_regs(&sub_cs, SP_CS_CNTL_1(A7XX,
2345 .linearlocalidregid = regid(63, 0),
2346 .threadsize = THREAD128,
2347 .workitemrastorder = WORKITEMRASTORDER_TILED));
2348 tu_cs_emit_regs(&sub_cs, A7XX_SP_CS_UNKNOWN_A9BE(0));
2349
2350 tu_cs_emit_regs(&sub_cs,
2351 HLSQ_CS_NDRANGE_0(A7XX, .kerneldim = 3,
2352 .localsizex = 255,
2353 .localsizey = 1,
2354 .localsizez = 1),
2355 HLSQ_CS_NDRANGE_1(A7XX, .globalsize_x = 3072),
2356 HLSQ_CS_NDRANGE_2(A7XX, .globaloff_x = 0),
2357 HLSQ_CS_NDRANGE_3(A7XX, .globalsize_y = 1),
2358 HLSQ_CS_NDRANGE_4(A7XX, .globaloff_y = 0),
2359 HLSQ_CS_NDRANGE_5(A7XX, .globalsize_z = 1),
2360 HLSQ_CS_NDRANGE_6(A7XX, .globaloff_z = 0));
2361 tu_cs_emit_regs(&sub_cs, A7XX_HLSQ_CS_LAST_LOCAL_SIZE(
2362 .localsizex = 255,
2363 .localsizey = 0,
2364 .localsizez = 0));
2365 tu_cs_emit_pkt4(&sub_cs, REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET, 3);
2366 tu_cs_emit(&sub_cs, 0);
2367 tu_cs_emit_qw(&sub_cs, shader_iova);
2368
2369 tu_cs_emit_pkt7(&sub_cs, CP_EXEC_CS, 4);
2370 tu_cs_emit(&sub_cs, 0x00000000);
2371 tu_cs_emit(&sub_cs, CP_EXEC_CS_1_NGROUPS_X(12));
2372 tu_cs_emit(&sub_cs, CP_EXEC_CS_2_NGROUPS_Y(1));
2373 tu_cs_emit(&sub_cs, CP_EXEC_CS_3_NGROUPS_Z(1));
2374
2375 device->cmdbuf_start_a725_quirk_entry =
2376 tu_cs_end_sub_stream(&device->sub_cs, &sub_cs);
2377
2378 return VK_SUCCESS;
2379 }
2380
2381 static VkResult
tu_device_get_timestamp(struct vk_device * vk_device,uint64_t * timestamp)2382 tu_device_get_timestamp(struct vk_device *vk_device, uint64_t *timestamp)
2383 {
2384 struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
2385 const int ret = tu_device_get_gpu_timestamp(dev, timestamp);
2386 return ret == 0 ? VK_SUCCESS : VK_ERROR_UNKNOWN;
2387 }
2388
2389 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2390 tu_CreateDevice(VkPhysicalDevice physicalDevice,
2391 const VkDeviceCreateInfo *pCreateInfo,
2392 const VkAllocationCallbacks *pAllocator,
2393 VkDevice *pDevice)
2394 {
2395 VK_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice);
2396 VkResult result;
2397 struct tu_device *device;
2398 bool border_color_without_format = false;
2399
2400 vk_foreach_struct_const (ext, pCreateInfo->pNext) {
2401 switch (ext->sType) {
2402 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT:
2403 border_color_without_format =
2404 ((const VkPhysicalDeviceCustomBorderColorFeaturesEXT *) ext)
2405 ->customBorderColorWithoutFormat;
2406 break;
2407 default:
2408 break;
2409 }
2410 }
2411
2412 device = (struct tu_device *) vk_zalloc2(
2413 &physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,
2414 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2415 if (!device)
2416 return vk_startup_errorf(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2417
2418 struct vk_device_dispatch_table dispatch_table;
2419 bool override_initial_entrypoints = true;
2420
2421 if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV) {
2422 vk_device_dispatch_table_from_entrypoints(
2423 &dispatch_table, &tu_rmv_device_entrypoints, true);
2424 override_initial_entrypoints = false;
2425 }
2426
2427 vk_device_dispatch_table_from_entrypoints(
2428 &dispatch_table, &tu_device_entrypoints, override_initial_entrypoints);
2429
2430 switch (fd_dev_gen(&physical_device->dev_id)) {
2431 case 6:
2432 vk_device_dispatch_table_from_entrypoints(
2433 &dispatch_table, &tu_device_entrypoints_a6xx, false);
2434 break;
2435 case 7:
2436 vk_device_dispatch_table_from_entrypoints(
2437 &dispatch_table, &tu_device_entrypoints_a7xx, false);
2438 }
2439
2440 vk_device_dispatch_table_from_entrypoints(
2441 &dispatch_table, &wsi_device_entrypoints, false);
2442
2443 const struct vk_device_entrypoint_table *knl_device_entrypoints =
2444 physical_device->instance->knl->device_entrypoints;
2445 if (knl_device_entrypoints) {
2446 vk_device_dispatch_table_from_entrypoints(
2447 &dispatch_table, knl_device_entrypoints, false);
2448 }
2449
2450 result = vk_device_init(&device->vk, &physical_device->vk,
2451 &dispatch_table, pCreateInfo, pAllocator);
2452 if (result != VK_SUCCESS) {
2453 vk_free(&device->vk.alloc, device);
2454 return vk_startup_errorf(physical_device->instance, result,
2455 "vk_device_init failed");
2456 }
2457
2458 device->instance = physical_device->instance;
2459 device->physical_device = physical_device;
2460 device->device_idx = device->physical_device->device_count++;
2461
2462 result = tu_drm_device_init(device);
2463 if (result != VK_SUCCESS) {
2464 vk_free(&device->vk.alloc, device);
2465 return result;
2466 }
2467
2468 device->vk.command_buffer_ops = &tu_cmd_buffer_ops;
2469 device->vk.as_build_ops = &tu_as_build_ops;
2470 device->vk.check_status = tu_device_check_status;
2471 device->vk.get_timestamp = tu_device_get_timestamp;
2472
2473 mtx_init(&device->bo_mutex, mtx_plain);
2474 mtx_init(&device->pipeline_mutex, mtx_plain);
2475 mtx_init(&device->autotune_mutex, mtx_plain);
2476 mtx_init(&device->kgsl_profiling_mutex, mtx_plain);
2477 u_rwlock_init(&device->dma_bo_lock);
2478 pthread_mutex_init(&device->submit_mutex, NULL);
2479
2480 if (physical_device->has_set_iova) {
2481 mtx_init(&device->vma_mutex, mtx_plain);
2482 util_vma_heap_init(&device->vma, physical_device->va_start,
2483 ROUND_DOWN_TO(physical_device->va_size, os_page_size));
2484 }
2485
2486 if (TU_DEBUG(BOS))
2487 device->bo_sizes = _mesa_hash_table_create(NULL, _mesa_hash_string, _mesa_key_string_equal);
2488
2489 if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV)
2490 tu_memory_trace_init(device);
2491
2492 /* kgsl is not a drm device: */
2493 if (!is_kgsl(physical_device->instance))
2494 vk_device_set_drm_fd(&device->vk, device->fd);
2495
2496 struct tu6_global *global = NULL;
2497 uint32_t global_size = sizeof(struct tu6_global);
2498 struct vk_pipeline_cache_create_info pcc_info = { };
2499
2500 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2501 const VkDeviceQueueCreateInfo *queue_create =
2502 &pCreateInfo->pQueueCreateInfos[i];
2503 uint32_t qfi = queue_create->queueFamilyIndex;
2504 device->queues[qfi] = (struct tu_queue *) vk_alloc(
2505 &device->vk.alloc,
2506 queue_create->queueCount * sizeof(struct tu_queue), 8,
2507 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2508 if (!device->queues[qfi]) {
2509 result = vk_startup_errorf(physical_device->instance,
2510 VK_ERROR_OUT_OF_HOST_MEMORY,
2511 "OOM");
2512 goto fail_queues;
2513 }
2514
2515 memset(device->queues[qfi], 0,
2516 queue_create->queueCount * sizeof(struct tu_queue));
2517
2518 device->queue_count[qfi] = queue_create->queueCount;
2519
2520 for (unsigned q = 0; q < queue_create->queueCount; q++) {
2521 result = tu_queue_init(device, &device->queues[qfi][q], q, queue_create);
2522 if (result != VK_SUCCESS) {
2523 device->queue_count[qfi] = q;
2524 goto fail_queues;
2525 }
2526 }
2527 }
2528
2529 result = vk_meta_device_init(&device->vk, &device->meta);
2530 if (result != VK_SUCCESS)
2531 goto fail_queues;
2532
2533 util_sparse_array_init(&device->accel_struct_ranges, sizeof(VkDeviceSize), 256);
2534
2535 mtx_init(&device->radix_sort_mutex, mtx_plain);
2536
2537 {
2538 struct ir3_compiler_options ir3_options = {
2539 .push_ubo_with_preamble = true,
2540 .disable_cache = true,
2541 .bindless_fb_read_descriptor = -1,
2542 .bindless_fb_read_slot = -1,
2543 .storage_16bit = physical_device->info->a6xx.storage_16bit,
2544 .storage_8bit = physical_device->info->a7xx.storage_8bit,
2545 .shared_push_consts = !TU_DEBUG(PUSH_CONSTS_PER_STAGE),
2546 };
2547 device->compiler = ir3_compiler_create(
2548 NULL, &physical_device->dev_id, physical_device->info, &ir3_options);
2549 }
2550 if (!device->compiler) {
2551 result = vk_startup_errorf(physical_device->instance,
2552 VK_ERROR_INITIALIZATION_FAILED,
2553 "failed to initialize ir3 compiler");
2554 goto fail_compiler;
2555 }
2556
2557 /* Initialize sparse array for refcounting imported BOs */
2558 util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
2559
2560 if (physical_device->has_set_iova) {
2561 STATIC_ASSERT(TU_MAX_QUEUE_FAMILIES == 1);
2562 if (!u_vector_init(&device->zombie_vmas, 64,
2563 sizeof(struct tu_zombie_vma))) {
2564 result = vk_startup_errorf(physical_device->instance,
2565 VK_ERROR_INITIALIZATION_FAILED,
2566 "zombie_vmas create failed");
2567 goto fail_free_zombie_vma;
2568 }
2569 }
2570
2571 /* initial sizes, these will increase if there is overflow */
2572 device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD;
2573 device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD;
2574
2575 if (device->vk.enabled_features.customBorderColors)
2576 global_size += TU_BORDER_COLOR_COUNT * sizeof(struct bcolor_entry);
2577
2578 tu_bo_suballocator_init(
2579 &device->pipeline_suballoc, device, 128 * 1024,
2580 (enum tu_bo_alloc_flags) (TU_BO_ALLOC_GPU_READ_ONLY |
2581 TU_BO_ALLOC_ALLOW_DUMP |
2582 TU_BO_ALLOC_INTERNAL_RESOURCE),
2583 "pipeline_suballoc");
2584 tu_bo_suballocator_init(&device->autotune_suballoc, device,
2585 128 * 1024, TU_BO_ALLOC_INTERNAL_RESOURCE,
2586 "autotune_suballoc");
2587 if (is_kgsl(physical_device->instance)) {
2588 tu_bo_suballocator_init(&device->kgsl_profiling_suballoc, device,
2589 128 * 1024, TU_BO_ALLOC_INTERNAL_RESOURCE,
2590 "kgsl_profiling_suballoc");
2591 }
2592
2593 result = tu_bo_init_new(
2594 device, NULL, &device->global_bo, global_size,
2595 (enum tu_bo_alloc_flags) (TU_BO_ALLOC_ALLOW_DUMP |
2596 TU_BO_ALLOC_INTERNAL_RESOURCE),
2597 "global");
2598 if (result != VK_SUCCESS) {
2599 vk_startup_errorf(device->instance, result, "BO init");
2600 goto fail_global_bo;
2601 }
2602
2603 result = tu_bo_map(device, device->global_bo, NULL);
2604 if (result != VK_SUCCESS) {
2605 vk_startup_errorf(device->instance, result, "BO map");
2606 goto fail_global_bo_map;
2607 }
2608
2609 global = (struct tu6_global *)device->global_bo->map;
2610 device->global_bo_map = global;
2611 tu_init_clear_blit_shaders(device);
2612
2613 if (device->vk.enabled_features.accelerationStructure &&
2614 device->vk.enabled_features.nullDescriptor) {
2615 result = tu_init_null_accel_struct(device);
2616 if (result != VK_SUCCESS) {
2617 vk_startup_errorf(device->instance, result, "null acceleration struct");
2618 goto fail_null_accel_struct;
2619 }
2620 }
2621
2622 result = tu_init_empty_shaders(device);
2623 if (result != VK_SUCCESS) {
2624 vk_startup_errorf(device->instance, result, "empty shaders");
2625 goto fail_empty_shaders;
2626 }
2627
2628 global->predicate = 0;
2629 global->vtx_stats_query_not_running = 1;
2630 global->dbg_one = (uint32_t)-1;
2631 global->dbg_gmem_total_loads = 0;
2632 global->dbg_gmem_taken_loads = 0;
2633 global->dbg_gmem_total_stores = 0;
2634 global->dbg_gmem_taken_stores = 0;
2635 for (int i = 0; i < TU_BORDER_COLOR_BUILTIN; i++) {
2636 VkClearColorValue border_color = vk_border_color_value((VkBorderColor) i);
2637 tu6_pack_border_color(&global->bcolor_builtin[i], &border_color,
2638 vk_border_color_is_int((VkBorderColor) i));
2639 }
2640
2641 /* initialize to ones so ffs can be used to find unused slots */
2642 BITSET_ONES(device->custom_border_color);
2643
2644 result = tu_init_dynamic_rendering(device);
2645 if (result != VK_SUCCESS) {
2646 vk_startup_errorf(device->instance, result, "dynamic rendering");
2647 goto fail_dynamic_rendering;
2648 }
2649
2650 device->mem_cache = vk_pipeline_cache_create(&device->vk, &pcc_info,
2651 NULL);
2652 if (!device->mem_cache) {
2653 result = VK_ERROR_OUT_OF_HOST_MEMORY;
2654 vk_startup_errorf(device->instance, result, "create pipeline cache failed");
2655 goto fail_pipeline_cache;
2656 }
2657
2658 tu_cs_init(&device->sub_cs, device, TU_CS_MODE_SUB_STREAM, 1024, "device sub cs");
2659
2660 if (device->vk.enabled_features.performanceCounterQueryPools) {
2661 /* Prepare command streams setting pass index to the PERF_CNTRS_REG
2662 * from 0 to 31. One of these will be picked up at cmd submit time
2663 * when the perf query is executed.
2664 */
2665
2666 device->perfcntrs_pass_cs_entries =
2667 (struct tu_cs_entry *) calloc(32, sizeof(struct tu_cs_entry));
2668 if (!device->perfcntrs_pass_cs_entries) {
2669 result = vk_startup_errorf(device->instance,
2670 VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2671 goto fail_perfcntrs_pass_entries_alloc;
2672 }
2673
2674 for (unsigned i = 0; i < 32; i++) {
2675 struct tu_cs sub_cs;
2676
2677 result = tu_cs_begin_sub_stream(&device->sub_cs, 3, &sub_cs);
2678 if (result != VK_SUCCESS) {
2679 vk_startup_errorf(device->instance, result,
2680 "failed to allocate commands streams");
2681 goto fail_prepare_perfcntrs_pass_cs;
2682 }
2683
2684 tu_cs_emit_regs(&sub_cs, A6XX_CP_SCRATCH_REG(PERF_CNTRS_REG, 1 << i));
2685 tu_cs_emit_pkt7(&sub_cs, CP_WAIT_FOR_ME, 0);
2686
2687 device->perfcntrs_pass_cs_entries[i] =
2688 tu_cs_end_sub_stream(&device->sub_cs, &sub_cs);
2689 }
2690 }
2691
2692 result = tu_init_bin_preamble(device);
2693 if (result != VK_SUCCESS)
2694 goto fail_bin_preamble;
2695
2696 if (physical_device->info->a7xx.cmdbuf_start_a725_quirk) {
2697 result = tu_init_cmdbuf_start_a725_quirk(device);
2698 if (result != VK_SUCCESS)
2699 goto fail_a725_workaround;
2700 }
2701
2702 tu_init_dbg_reg_stomper(device);
2703
2704 /* Initialize a condition variable for timeline semaphore */
2705 pthread_condattr_t condattr;
2706 if (pthread_condattr_init(&condattr) != 0) {
2707 result = vk_startup_errorf(physical_device->instance,
2708 VK_ERROR_INITIALIZATION_FAILED,
2709 "pthread condattr init");
2710 goto fail_timeline_cond;
2711 }
2712 if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
2713 pthread_condattr_destroy(&condattr);
2714 result = vk_startup_errorf(physical_device->instance,
2715 VK_ERROR_INITIALIZATION_FAILED,
2716 "pthread condattr clock setup");
2717 goto fail_timeline_cond;
2718 }
2719 if (pthread_cond_init(&device->timeline_cond, &condattr) != 0) {
2720 pthread_condattr_destroy(&condattr);
2721 result = vk_startup_errorf(physical_device->instance,
2722 VK_ERROR_INITIALIZATION_FAILED,
2723 "pthread cond init");
2724 goto fail_timeline_cond;
2725 }
2726 pthread_condattr_destroy(&condattr);
2727
2728 result = tu_autotune_init(&device->autotune, device);
2729 if (result != VK_SUCCESS) {
2730 goto fail_timeline_cond;
2731 }
2732
2733 for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
2734 mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain);
2735
2736 mtx_init(&device->fiber_pvtmem_bo.mtx, mtx_plain);
2737 mtx_init(&device->wave_pvtmem_bo.mtx, mtx_plain);
2738
2739 mtx_init(&device->mutex, mtx_plain);
2740
2741 device->use_z24uint_s8uint =
2742 physical_device->info->a6xx.has_z24uint_s8uint &&
2743 (!border_color_without_format ||
2744 physical_device->instance->disable_d24s8_border_color_workaround);
2745 device->use_lrz = !TU_DEBUG_ENV(NOLRZ);
2746
2747 tu_gpu_tracepoint_config_variable();
2748
2749 device->submit_count = 0;
2750 u_trace_context_init(&device->trace_context, device,
2751 sizeof(uint64_t),
2752 12,
2753 tu_trace_create_buffer,
2754 tu_trace_destroy_buffer,
2755 TU_CALLX(device, tu_trace_record_ts),
2756 tu_trace_read_ts,
2757 tu_trace_capture_data,
2758 tu_trace_get_data,
2759 tu_trace_delete_flush_data);
2760
2761 tu_breadcrumbs_init(device);
2762
2763 if (FD_RD_DUMP(ENABLE)) {
2764 struct vk_app_info *app_info = &device->instance->vk.app_info;
2765 const char *app_name_str = app_info->app_name ?
2766 app_info->app_name : util_get_process_name();
2767 const char *engine_name_str = app_info->engine_name ?
2768 app_info->engine_name : "unknown-engine";
2769
2770 char app_name[64];
2771 snprintf(app_name, sizeof(app_name), "%s", app_name_str);
2772
2773 char engine_name[32];
2774 snprintf(engine_name, sizeof(engine_name), "%s", engine_name_str);
2775
2776 char output_name[128];
2777 snprintf(output_name, sizeof(output_name), "tu_%s.%s_instance%u_device%u",
2778 app_name, engine_name, device->instance->instance_idx,
2779 device->device_idx);
2780
2781 fd_rd_output_init(&device->rd_output, output_name);
2782 }
2783
2784 device->vk.cmd_dispatch_unaligned = tu_dispatch_unaligned;
2785 device->vk.write_buffer_cp = tu_write_buffer_cp;
2786 device->vk.flush_buffer_write_cp = tu_flush_buffer_write_cp;
2787 device->vk.cmd_fill_buffer_addr = tu_cmd_fill_buffer_addr;
2788
2789 *pDevice = tu_device_to_handle(device);
2790 return VK_SUCCESS;
2791
2792 fail_timeline_cond:
2793 fail_a725_workaround:
2794 fail_bin_preamble:
2795 fail_prepare_perfcntrs_pass_cs:
2796 free(device->perfcntrs_pass_cs_entries);
2797 fail_perfcntrs_pass_entries_alloc:
2798 tu_cs_finish(&device->sub_cs);
2799 vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2800 fail_pipeline_cache:
2801 tu_destroy_dynamic_rendering(device);
2802 fail_dynamic_rendering:
2803 tu_destroy_empty_shaders(device);
2804 fail_empty_shaders:
2805 if (device->null_accel_struct_bo)
2806 tu_bo_finish(device, device->null_accel_struct_bo);
2807 fail_null_accel_struct:
2808 tu_destroy_clear_blit_shaders(device);
2809 fail_global_bo_map:
2810 TU_RMV(resource_destroy, device, device->global_bo);
2811 tu_bo_finish(device, device->global_bo);
2812 vk_free(&device->vk.alloc, device->submit_bo_list);
2813 util_dynarray_fini(&device->dump_bo_list);
2814 fail_global_bo:
2815 if (physical_device->has_set_iova)
2816 util_vma_heap_finish(&device->vma);
2817 fail_free_zombie_vma:
2818 util_sparse_array_finish(&device->bo_map);
2819 u_vector_finish(&device->zombie_vmas);
2820 ir3_compiler_destroy(device->compiler);
2821 fail_compiler:
2822 util_sparse_array_finish(&device->accel_struct_ranges);
2823 vk_meta_device_finish(&device->vk, &device->meta);
2824 fail_queues:
2825 for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2826 for (unsigned q = 0; q < device->queue_count[i]; q++)
2827 tu_queue_finish(&device->queues[i][q]);
2828 if (device->queues[i])
2829 vk_free(&device->vk.alloc, device->queues[i]);
2830 }
2831
2832 u_rwlock_destroy(&device->dma_bo_lock);
2833 tu_drm_device_finish(device);
2834 vk_device_finish(&device->vk);
2835 vk_free(&device->vk.alloc, device);
2836 return result;
2837 }
2838
2839 VKAPI_ATTR void VKAPI_CALL
tu_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2840 tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
2841 {
2842 VK_FROM_HANDLE(tu_device, device, _device);
2843
2844 if (!device)
2845 return;
2846
2847 tu_memory_trace_finish(device);
2848
2849 if (FD_RD_DUMP(ENABLE))
2850 fd_rd_output_fini(&device->rd_output);
2851
2852 tu_breadcrumbs_finish(device);
2853
2854 u_trace_context_fini(&device->trace_context);
2855
2856 for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
2857 if (device->scratch_bos[i].initialized)
2858 tu_bo_finish(device, device->scratch_bos[i].bo);
2859 }
2860
2861 if (device->fiber_pvtmem_bo.bo)
2862 tu_bo_finish(device, device->fiber_pvtmem_bo.bo);
2863
2864 if (device->wave_pvtmem_bo.bo)
2865 tu_bo_finish(device, device->wave_pvtmem_bo.bo);
2866
2867 tu_destroy_clear_blit_shaders(device);
2868
2869 tu_destroy_empty_shaders(device);
2870
2871 tu_destroy_dynamic_rendering(device);
2872
2873 vk_meta_device_finish(&device->vk, &device->meta);
2874
2875 util_sparse_array_finish(&device->accel_struct_ranges);
2876
2877 ir3_compiler_destroy(device->compiler);
2878
2879 vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2880
2881 tu_cs_finish(&device->sub_cs);
2882
2883 if (device->perfcntrs_pass_cs_entries) {
2884 free(device->perfcntrs_pass_cs_entries);
2885 }
2886
2887 if (device->dbg_cmdbuf_stomp_cs) {
2888 tu_cs_finish(device->dbg_cmdbuf_stomp_cs);
2889 free(device->dbg_cmdbuf_stomp_cs);
2890 }
2891
2892 if (device->dbg_renderpass_stomp_cs) {
2893 tu_cs_finish(device->dbg_renderpass_stomp_cs);
2894 free(device->dbg_renderpass_stomp_cs);
2895 }
2896
2897 tu_autotune_fini(&device->autotune, device);
2898
2899 tu_bo_suballocator_finish(&device->pipeline_suballoc);
2900 tu_bo_suballocator_finish(&device->autotune_suballoc);
2901 tu_bo_suballocator_finish(&device->kgsl_profiling_suballoc);
2902
2903 tu_bo_finish(device, device->global_bo);
2904
2905 if (device->null_accel_struct_bo)
2906 tu_bo_finish(device, device->null_accel_struct_bo);
2907
2908 for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2909 for (unsigned q = 0; q < device->queue_count[i]; q++)
2910 tu_queue_finish(&device->queues[i][q]);
2911 if (device->queue_count[i])
2912 vk_free(&device->vk.alloc, device->queues[i]);
2913 }
2914
2915 tu_drm_device_finish(device);
2916
2917 if (device->physical_device->has_set_iova)
2918 util_vma_heap_finish(&device->vma);
2919
2920 util_sparse_array_finish(&device->bo_map);
2921 u_rwlock_destroy(&device->dma_bo_lock);
2922
2923 u_vector_finish(&device->zombie_vmas);
2924
2925 pthread_cond_destroy(&device->timeline_cond);
2926 _mesa_hash_table_destroy(device->bo_sizes, NULL);
2927 vk_free(&device->vk.alloc, device->submit_bo_list);
2928 util_dynarray_fini(&device->dump_bo_list);
2929 vk_device_finish(&device->vk);
2930 vk_free(&device->vk.alloc, device);
2931 }
2932
2933 VkResult
tu_get_scratch_bo(struct tu_device * dev,uint64_t size,struct tu_bo ** bo)2934 tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
2935 {
2936 unsigned size_log2 = MAX2(util_logbase2_ceil64(size), MIN_SCRATCH_BO_SIZE_LOG2);
2937 unsigned index = size_log2 - MIN_SCRATCH_BO_SIZE_LOG2;
2938 assert(index < ARRAY_SIZE(dev->scratch_bos));
2939
2940 for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
2941 if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
2942 /* Fast path: just return the already-allocated BO. */
2943 *bo = dev->scratch_bos[i].bo;
2944 return VK_SUCCESS;
2945 }
2946 }
2947
2948 /* Slow path: actually allocate the BO. We take a lock because the process
2949 * of allocating it is slow, and we don't want to block the CPU while it
2950 * finishes.
2951 */
2952 mtx_lock(&dev->scratch_bos[index].construct_mtx);
2953
2954 /* Another thread may have allocated it already while we were waiting on
2955 * the lock. We need to check this in order to avoid double-allocating.
2956 */
2957 if (dev->scratch_bos[index].initialized) {
2958 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2959 *bo = dev->scratch_bos[index].bo;
2960 return VK_SUCCESS;
2961 }
2962
2963 unsigned bo_size = 1ull << size_log2;
2964 VkResult result = tu_bo_init_new(dev, NULL, &dev->scratch_bos[index].bo, bo_size,
2965 TU_BO_ALLOC_INTERNAL_RESOURCE, "scratch");
2966 if (result != VK_SUCCESS) {
2967 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2968 return result;
2969 }
2970
2971 p_atomic_set(&dev->scratch_bos[index].initialized, true);
2972
2973 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2974
2975 *bo = dev->scratch_bos[index].bo;
2976 return VK_SUCCESS;
2977 }
2978
2979 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2980 tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2981 VkLayerProperties *pProperties)
2982 {
2983 *pPropertyCount = 0;
2984 return VK_SUCCESS;
2985 }
2986
2987 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)2988 tu_EnumerateInstanceExtensionProperties(const char *pLayerName,
2989 uint32_t *pPropertyCount,
2990 VkExtensionProperties *pProperties)
2991 {
2992 if (pLayerName)
2993 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2994
2995 return vk_enumerate_instance_extension_properties(
2996 &tu_instance_extensions_supported, pPropertyCount, pProperties);
2997 }
2998
2999 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
tu_GetInstanceProcAddr(VkInstance _instance,const char * pName)3000 tu_GetInstanceProcAddr(VkInstance _instance, const char *pName)
3001 {
3002 VK_FROM_HANDLE(tu_instance, instance, _instance);
3003 return vk_instance_get_proc_addr(instance != NULL ? &instance->vk : NULL,
3004 &tu_instance_entrypoints,
3005 pName);
3006 }
3007
3008 /* The loader wants us to expose a second GetInstanceProcAddr function
3009 * to work around certain LD_PRELOAD issues seen in apps.
3010 */
3011 PUBLIC
3012 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)3013 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
3014 {
3015 return tu_GetInstanceProcAddr(instance, pName);
3016 }
3017
3018 VKAPI_ATTR VkResult VKAPI_CALL
tu_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)3019 tu_AllocateMemory(VkDevice _device,
3020 const VkMemoryAllocateInfo *pAllocateInfo,
3021 const VkAllocationCallbacks *pAllocator,
3022 VkDeviceMemory *pMem)
3023 {
3024 VK_FROM_HANDLE(tu_device, device, _device);
3025 struct tu_device_memory *mem;
3026 VkResult result;
3027
3028 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
3029
3030 struct tu_memory_heap *mem_heap = &device->physical_device->heap;
3031 uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
3032 if (mem_heap_used > mem_heap->size)
3033 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3034
3035 mem = (struct tu_device_memory *) vk_device_memory_create(
3036 &device->vk, pAllocateInfo, pAllocator, sizeof(*mem));
3037 if (mem == NULL)
3038 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3039
3040 if (pAllocateInfo->allocationSize == 0 && !mem->vk.ahardware_buffer) {
3041 vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
3042 /* Apparently, this is allowed */
3043 *pMem = VK_NULL_HANDLE;
3044 return VK_SUCCESS;
3045 }
3046
3047 const VkImportMemoryFdInfoKHR *fd_info =
3048 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
3049
3050 if (fd_info && fd_info->handleType) {
3051 assert(fd_info->handleType ==
3052 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3053 fd_info->handleType ==
3054 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3055
3056 /*
3057 * TODO Importing the same fd twice gives us the same handle without
3058 * reference counting. We need to maintain a per-instance handle-to-bo
3059 * table and add reference count to tu_bo.
3060 */
3061 result = tu_bo_init_dmabuf(device, &mem->bo,
3062 pAllocateInfo->allocationSize, fd_info->fd);
3063 if (result == VK_SUCCESS) {
3064 /* take ownership and close the fd */
3065 close(fd_info->fd);
3066 }
3067 } else if (mem->vk.ahardware_buffer) {
3068 #if DETECT_OS_ANDROID
3069 const native_handle_t *handle = AHardwareBuffer_getNativeHandle(mem->vk.ahardware_buffer);
3070 assert(handle->numFds > 0);
3071 size_t size = lseek(handle->data[0], 0, SEEK_END);
3072 result = tu_bo_init_dmabuf(device, &mem->bo, size, handle->data[0]);
3073 #else
3074 result = VK_ERROR_FEATURE_NOT_PRESENT;
3075 #endif
3076 } else {
3077 uint64_t client_address = 0;
3078 BITMASK_ENUM(tu_bo_alloc_flags) alloc_flags = TU_BO_ALLOC_NO_FLAGS;
3079
3080 const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info =
3081 vk_find_struct_const(pAllocateInfo->pNext,
3082 MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO);
3083 if (replay_info && replay_info->opaqueCaptureAddress) {
3084 client_address = replay_info->opaqueCaptureAddress;
3085 alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
3086 }
3087
3088 const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const(
3089 pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO);
3090 if (flags_info &&
3091 (flags_info->flags &
3092 VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)) {
3093 alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
3094 }
3095
3096 const VkExportMemoryAllocateInfo *export_info =
3097 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
3098 if (export_info && (export_info->handleTypes &
3099 (VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
3100 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT)))
3101 alloc_flags |= TU_BO_ALLOC_SHAREABLE;
3102
3103
3104 char name[64] = "vkAllocateMemory()";
3105 if (device->bo_sizes)
3106 snprintf(name, ARRAY_SIZE(name), "vkAllocateMemory(%ldkb)",
3107 (long)DIV_ROUND_UP(pAllocateInfo->allocationSize, 1024));
3108 VkMemoryPropertyFlags mem_property =
3109 device->physical_device->memory.types[pAllocateInfo->memoryTypeIndex];
3110 result = tu_bo_init_new_explicit_iova(
3111 device, &mem->vk.base, &mem->bo, pAllocateInfo->allocationSize,
3112 client_address, mem_property, alloc_flags, name);
3113 }
3114
3115 if (result == VK_SUCCESS) {
3116 mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
3117 if (mem_heap_used > mem_heap->size) {
3118 p_atomic_add(&mem_heap->used, -mem->bo->size);
3119 tu_bo_finish(device, mem->bo);
3120 result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
3121 "Out of heap memory");
3122 }
3123 }
3124
3125 if (result != VK_SUCCESS) {
3126 vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
3127 return result;
3128 }
3129
3130 /* Track in the device whether our BO list contains any implicit-sync BOs, so
3131 * we can suppress implicit sync on non-WSI usage.
3132 */
3133 const struct wsi_memory_allocate_info *wsi_info =
3134 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
3135 if (wsi_info && wsi_info->implicit_sync) {
3136 mtx_lock(&device->bo_mutex);
3137 if (!mem->bo->implicit_sync) {
3138 mem->bo->implicit_sync = true;
3139 device->implicit_sync_bo_count++;
3140 }
3141 mtx_unlock(&device->bo_mutex);
3142 }
3143
3144 const VkMemoryDedicatedAllocateInfo *dedicate_info =
3145 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
3146 if (dedicate_info) {
3147 mem->image = tu_image_from_handle(dedicate_info->image);
3148 } else {
3149 mem->image = NULL;
3150 }
3151
3152 TU_RMV(heap_create, device, pAllocateInfo, mem);
3153
3154 *pMem = tu_device_memory_to_handle(mem);
3155
3156 return VK_SUCCESS;
3157 }
3158
3159 VKAPI_ATTR void VKAPI_CALL
tu_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)3160 tu_FreeMemory(VkDevice _device,
3161 VkDeviceMemory _mem,
3162 const VkAllocationCallbacks *pAllocator)
3163 {
3164 VK_FROM_HANDLE(tu_device, device, _device);
3165 VK_FROM_HANDLE(tu_device_memory, mem, _mem);
3166
3167 if (mem == NULL)
3168 return;
3169
3170 TU_RMV(resource_destroy, device, mem);
3171
3172 p_atomic_add(&device->physical_device->heap.used, -mem->bo->size);
3173 tu_bo_finish(device, mem->bo);
3174 vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
3175 }
3176
3177 VKAPI_ATTR VkResult VKAPI_CALL
tu_MapMemory2KHR(VkDevice _device,const VkMemoryMapInfoKHR * pMemoryMapInfo,void ** ppData)3178 tu_MapMemory2KHR(VkDevice _device, const VkMemoryMapInfoKHR *pMemoryMapInfo, void **ppData)
3179 {
3180 VK_FROM_HANDLE(tu_device, device, _device);
3181 VK_FROM_HANDLE(tu_device_memory, mem, pMemoryMapInfo->memory);
3182 VkResult result;
3183
3184 if (mem == NULL) {
3185 *ppData = NULL;
3186 return VK_SUCCESS;
3187 }
3188
3189 void *placed_addr = NULL;
3190 if (pMemoryMapInfo->flags & VK_MEMORY_MAP_PLACED_BIT_EXT) {
3191 const VkMemoryMapPlacedInfoEXT *placed_info =
3192 vk_find_struct_const(pMemoryMapInfo->pNext, MEMORY_MAP_PLACED_INFO_EXT);
3193 assert(placed_info != NULL);
3194 placed_addr = placed_info->pPlacedAddress;
3195 }
3196
3197 result = tu_bo_map(device, mem->bo, placed_addr);
3198 if (result != VK_SUCCESS)
3199 return result;
3200
3201 *ppData = (char *) mem->bo->map + pMemoryMapInfo->offset;
3202 return VK_SUCCESS;
3203 }
3204
3205 VKAPI_ATTR VkResult VKAPI_CALL
tu_UnmapMemory2KHR(VkDevice _device,const VkMemoryUnmapInfoKHR * pMemoryUnmapInfo)3206 tu_UnmapMemory2KHR(VkDevice _device, const VkMemoryUnmapInfoKHR *pMemoryUnmapInfo)
3207 {
3208 VK_FROM_HANDLE(tu_device, device, _device);
3209 VK_FROM_HANDLE(tu_device_memory, mem, pMemoryUnmapInfo->memory);
3210
3211 if (mem == NULL)
3212 return VK_SUCCESS;
3213
3214 return tu_bo_unmap(device, mem->bo, pMemoryUnmapInfo->flags & VK_MEMORY_UNMAP_RESERVE_BIT_EXT);
3215 }
3216 static VkResult
sync_cache(VkDevice _device,enum tu_mem_sync_op op,uint32_t count,const VkMappedMemoryRange * ranges)3217 sync_cache(VkDevice _device,
3218 enum tu_mem_sync_op op,
3219 uint32_t count,
3220 const VkMappedMemoryRange *ranges)
3221 {
3222 VK_FROM_HANDLE(tu_device, device, _device);
3223
3224 if (!device->physical_device->has_cached_non_coherent_memory) {
3225 tu_finishme(
3226 "data cache clean and invalidation are unsupported on this arch!");
3227 return VK_SUCCESS;
3228 }
3229
3230 for (uint32_t i = 0; i < count; i++) {
3231 VK_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory);
3232 tu_bo_sync_cache(device, mem->bo, ranges[i].offset, ranges[i].size, op);
3233 }
3234
3235 return VK_SUCCESS;
3236 }
3237
3238 VkResult
tu_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3239 tu_FlushMappedMemoryRanges(VkDevice _device,
3240 uint32_t memoryRangeCount,
3241 const VkMappedMemoryRange *pMemoryRanges)
3242 {
3243 return sync_cache(_device, TU_MEM_SYNC_CACHE_TO_GPU, memoryRangeCount,
3244 pMemoryRanges);
3245 }
3246
3247 VkResult
tu_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3248 tu_InvalidateMappedMemoryRanges(VkDevice _device,
3249 uint32_t memoryRangeCount,
3250 const VkMappedMemoryRange *pMemoryRanges)
3251 {
3252 return sync_cache(_device, TU_MEM_SYNC_CACHE_FROM_GPU, memoryRangeCount,
3253 pMemoryRanges);
3254 }
3255
3256 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)3257 tu_GetDeviceMemoryCommitment(VkDevice device,
3258 VkDeviceMemory memory,
3259 VkDeviceSize *pCommittedMemoryInBytes)
3260 {
3261 *pCommittedMemoryInBytes = 0;
3262 }
3263
3264 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)3265 tu_CreateFramebuffer(VkDevice _device,
3266 const VkFramebufferCreateInfo *pCreateInfo,
3267 const VkAllocationCallbacks *pAllocator,
3268 VkFramebuffer *pFramebuffer)
3269 {
3270 VK_FROM_HANDLE(tu_device, device, _device);
3271
3272 if (TU_DEBUG(DYNAMIC))
3273 return vk_common_CreateFramebuffer(_device, pCreateInfo, pAllocator,
3274 pFramebuffer);
3275
3276 VK_FROM_HANDLE(tu_render_pass, pass, pCreateInfo->renderPass);
3277 struct tu_framebuffer *framebuffer;
3278
3279 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3280
3281 bool imageless = pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT;
3282
3283 size_t size = sizeof(*framebuffer);
3284 if (!imageless)
3285 size += sizeof(struct tu_attachment_info) * pCreateInfo->attachmentCount;
3286 framebuffer = (struct tu_framebuffer *) vk_object_alloc(
3287 &device->vk, pAllocator, size, VK_OBJECT_TYPE_FRAMEBUFFER);
3288 if (framebuffer == NULL)
3289 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3290
3291 framebuffer->attachment_count = pCreateInfo->attachmentCount;
3292 framebuffer->width = pCreateInfo->width;
3293 framebuffer->height = pCreateInfo->height;
3294 framebuffer->layers = pCreateInfo->layers;
3295
3296 if (!imageless) {
3297 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3298 VkImageView _iview = pCreateInfo->pAttachments[i];
3299 struct tu_image_view *iview = tu_image_view_from_handle(_iview);
3300 framebuffer->attachments[i].attachment = iview;
3301 }
3302 }
3303
3304 tu_framebuffer_tiling_config(framebuffer, device, pass);
3305
3306 *pFramebuffer = tu_framebuffer_to_handle(framebuffer);
3307 return VK_SUCCESS;
3308 }
3309
3310 void
tu_setup_dynamic_framebuffer(struct tu_cmd_buffer * cmd_buffer,const VkRenderingInfo * pRenderingInfo)3311 tu_setup_dynamic_framebuffer(struct tu_cmd_buffer *cmd_buffer,
3312 const VkRenderingInfo *pRenderingInfo)
3313 {
3314 struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
3315 struct tu_framebuffer *framebuffer = &cmd_buffer->dynamic_framebuffer;
3316
3317 framebuffer->attachment_count = pass->attachment_count;
3318 framebuffer->width = pRenderingInfo->renderArea.offset.x +
3319 pRenderingInfo->renderArea.extent.width;
3320 framebuffer->height = pRenderingInfo->renderArea.offset.y +
3321 pRenderingInfo->renderArea.extent.height;
3322 framebuffer->layers = pRenderingInfo->layerCount;
3323
3324 tu_framebuffer_tiling_config(framebuffer, cmd_buffer->device, pass);
3325 }
3326
3327 VKAPI_ATTR void VKAPI_CALL
tu_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)3328 tu_DestroyFramebuffer(VkDevice _device,
3329 VkFramebuffer _fb,
3330 const VkAllocationCallbacks *pAllocator)
3331 {
3332 VK_FROM_HANDLE(tu_device, device, _device);
3333
3334 if (TU_DEBUG(DYNAMIC)) {
3335 vk_common_DestroyFramebuffer(_device, _fb, pAllocator);
3336 return;
3337 }
3338
3339 VK_FROM_HANDLE(tu_framebuffer, fb, _fb);
3340
3341 if (!fb)
3342 return;
3343
3344 vk_object_free(&device->vk, pAllocator, fb);
3345 }
3346
3347 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)3348 tu_GetMemoryFdKHR(VkDevice _device,
3349 const VkMemoryGetFdInfoKHR *pGetFdInfo,
3350 int *pFd)
3351 {
3352 VK_FROM_HANDLE(tu_device, device, _device);
3353 VK_FROM_HANDLE(tu_device_memory, memory, pGetFdInfo->memory);
3354
3355 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3356
3357 /* At the moment, we support only the below handle types. */
3358 assert(pGetFdInfo->handleType ==
3359 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3360 pGetFdInfo->handleType ==
3361 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3362
3363 int prime_fd = tu_bo_export_dmabuf(device, memory->bo);
3364 if (prime_fd < 0)
3365 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3366
3367 *pFd = prime_fd;
3368
3369 if (memory->image) {
3370 struct fdl_layout *l = &memory->image->layout[0];
3371 uint64_t modifier;
3372 if (l->ubwc) {
3373 modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
3374 } else if (l->tile_mode == 2) {
3375 modifier = DRM_FORMAT_MOD_QCOM_TILED2;
3376 } else if (l->tile_mode == 3) {
3377 modifier = DRM_FORMAT_MOD_QCOM_TILED3;
3378 } else {
3379 assert(!l->tile_mode);
3380 modifier = DRM_FORMAT_MOD_LINEAR;
3381 }
3382 struct fdl_metadata metadata = {
3383 .modifier = modifier,
3384 };
3385 tu_bo_set_metadata(device, memory->bo, &metadata, sizeof(metadata));
3386 }
3387
3388 return VK_SUCCESS;
3389 }
3390
3391 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)3392 tu_GetMemoryFdPropertiesKHR(VkDevice _device,
3393 VkExternalMemoryHandleTypeFlagBits handleType,
3394 int fd,
3395 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
3396 {
3397 VK_FROM_HANDLE(tu_device, device, _device);
3398 assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3399 pMemoryFdProperties->memoryTypeBits =
3400 (1 << device->physical_device->memory.type_count) - 1;
3401 return VK_SUCCESS;
3402 }
3403
3404 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)3405 tu_GetPhysicalDeviceMultisamplePropertiesEXT(
3406 VkPhysicalDevice physicalDevice,
3407 VkSampleCountFlagBits samples,
3408 VkMultisamplePropertiesEXT* pMultisampleProperties)
3409 {
3410 VK_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
3411
3412 if (samples <= VK_SAMPLE_COUNT_4_BIT && pdevice->vk.supported_extensions.EXT_sample_locations)
3413 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 1, 1 };
3414 else
3415 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
3416 }
3417
tu_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3418 uint64_t tu_GetDeviceMemoryOpaqueCaptureAddress(
3419 VkDevice device,
3420 const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
3421 {
3422 VK_FROM_HANDLE(tu_device_memory, mem, pInfo->memory);
3423 return mem->bo->iova;
3424 }
3425
3426 struct tu_debug_bos_entry {
3427 uint32_t count;
3428 uint64_t size;
3429 const char *name;
3430 };
3431
3432 const char *
tu_debug_bos_add(struct tu_device * dev,uint64_t size,const char * name)3433 tu_debug_bos_add(struct tu_device *dev, uint64_t size, const char *name)
3434 {
3435 assert(name);
3436
3437 if (likely(!dev->bo_sizes))
3438 return NULL;
3439
3440 mtx_lock(&dev->bo_mutex);
3441 struct hash_entry *entry = _mesa_hash_table_search(dev->bo_sizes, name);
3442 struct tu_debug_bos_entry *debug_bos;
3443
3444 if (!entry) {
3445 debug_bos = (struct tu_debug_bos_entry *) calloc(
3446 1, sizeof(struct tu_debug_bos_entry));
3447 debug_bos->name = strdup(name);
3448 _mesa_hash_table_insert(dev->bo_sizes, debug_bos->name, debug_bos);
3449 } else {
3450 debug_bos = (struct tu_debug_bos_entry *) entry->data;
3451 }
3452
3453 debug_bos->count++;
3454 debug_bos->size += align(size, 4096);
3455 mtx_unlock(&dev->bo_mutex);
3456
3457 return debug_bos->name;
3458 }
3459
3460 void
tu_debug_bos_del(struct tu_device * dev,struct tu_bo * bo)3461 tu_debug_bos_del(struct tu_device *dev, struct tu_bo *bo)
3462 {
3463 if (likely(!dev->bo_sizes) || !bo->name)
3464 return;
3465
3466 mtx_lock(&dev->bo_mutex);
3467 struct hash_entry *entry =
3468 _mesa_hash_table_search(dev->bo_sizes, bo->name);
3469 /* If we're finishing the BO, it should have been added already */
3470 assert(entry);
3471
3472 struct tu_debug_bos_entry *debug_bos =
3473 (struct tu_debug_bos_entry *) entry->data;
3474 debug_bos->count--;
3475 debug_bos->size -= align(bo->size, 4096);
3476 if (!debug_bos->count) {
3477 _mesa_hash_table_remove(dev->bo_sizes, entry);
3478 free((void *) debug_bos->name);
3479 free(debug_bos);
3480 }
3481 mtx_unlock(&dev->bo_mutex);
3482 }
3483
debug_bos_count_compare(const void * in_a,const void * in_b)3484 static int debug_bos_count_compare(const void *in_a, const void *in_b)
3485 {
3486 struct tu_debug_bos_entry *a = *(struct tu_debug_bos_entry **)in_a;
3487 struct tu_debug_bos_entry *b = *(struct tu_debug_bos_entry **)in_b;
3488 return a->count - b->count;
3489 }
3490
3491 void
tu_debug_bos_print_stats(struct tu_device * dev)3492 tu_debug_bos_print_stats(struct tu_device *dev)
3493 {
3494 if (likely(!dev->bo_sizes))
3495 return;
3496
3497 mtx_lock(&dev->bo_mutex);
3498
3499 /* Put the HT's sizes data in an array so we can sort by number of allocations. */
3500 struct util_dynarray dyn;
3501 util_dynarray_init(&dyn, NULL);
3502
3503 uint32_t size = 0;
3504 uint32_t count = 0;
3505 hash_table_foreach(dev->bo_sizes, entry)
3506 {
3507 struct tu_debug_bos_entry *debug_bos =
3508 (struct tu_debug_bos_entry *) entry->data;
3509 util_dynarray_append(&dyn, struct tu_debug_bos_entry *, debug_bos);
3510 size += debug_bos->size / 1024;
3511 count += debug_bos->count;
3512 }
3513
3514 qsort(dyn.data,
3515 util_dynarray_num_elements(&dyn, struct tu_debug_bos_entry *),
3516 sizeof(struct tu_debug_bos_entryos_entry *), debug_bos_count_compare);
3517
3518 util_dynarray_foreach(&dyn, struct tu_debug_bos_entry *, entryp)
3519 {
3520 struct tu_debug_bos_entry *debug_bos = *entryp;
3521 mesa_logi("%30s: %4d bos, %lld kb\n", debug_bos->name, debug_bos->count,
3522 (long long) (debug_bos->size / 1024));
3523 }
3524
3525 mesa_logi("submitted %d bos (%d MB)\n", count, DIV_ROUND_UP(size, 1024));
3526
3527 util_dynarray_fini(&dyn);
3528
3529 mtx_unlock(&dev->bo_mutex);
3530 }
3531
3532 void
tu_dump_bo_init(struct tu_device * dev,struct tu_bo * bo)3533 tu_dump_bo_init(struct tu_device *dev, struct tu_bo *bo)
3534 {
3535 bo->dump_bo_list_idx = ~0;
3536
3537 if (!FD_RD_DUMP(ENABLE))
3538 return;
3539
3540 mtx_lock(&dev->bo_mutex);
3541 uint32_t idx =
3542 util_dynarray_num_elements(&dev->dump_bo_list, struct tu_bo *);
3543 bo->dump_bo_list_idx = idx;
3544 util_dynarray_append(&dev->dump_bo_list, struct tu_bo *, bo);
3545 mtx_unlock(&dev->bo_mutex);
3546 }
3547
3548 void
tu_dump_bo_del(struct tu_device * dev,struct tu_bo * bo)3549 tu_dump_bo_del(struct tu_device *dev, struct tu_bo *bo)
3550 {
3551 if (bo->dump_bo_list_idx != ~0) {
3552 mtx_lock(&dev->bo_mutex);
3553 struct tu_bo *exchanging_bo =
3554 util_dynarray_pop(&dev->dump_bo_list, struct tu_bo *);
3555 *util_dynarray_element(&dev->dump_bo_list, struct tu_bo *,
3556 bo->dump_bo_list_idx) = exchanging_bo;
3557 exchanging_bo->dump_bo_list_idx = bo->dump_bo_list_idx;
3558 mtx_unlock(&dev->bo_mutex);
3559 }
3560 }
3561
3562 void
tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)3563 tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,
3564 const VkDebugUtilsLabelEXT *pLabelInfo)
3565 {
3566 VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
3567
3568 vk_common_CmdBeginDebugUtilsLabelEXT(_commandBuffer, pLabelInfo);
3569
3570 /* Note that the spec says:
3571 *
3572 * "An application may open a debug label region in one command buffer and
3573 * close it in another, or otherwise split debug label regions across
3574 * multiple command buffers or multiple queue submissions. When viewed
3575 * from the linear series of submissions to a single queue, the calls to
3576 * vkCmdBeginDebugUtilsLabelEXT and vkCmdEndDebugUtilsLabelEXT must be
3577 * matched and balanced."
3578 *
3579 * But if you're beginning labeling during a renderpass and ending outside
3580 * it, or vice versa, these trace ranges in perfetto will be unbalanced. I
3581 * expect that u_trace and perfetto will do something like take just one of
3582 * the begins/ends, or drop the event entirely, but not crash. Similarly,
3583 * I think we'll have problems if the tracepoints are split across cmd
3584 * buffers. Still, getting the simple case of cmd buffer annotation into
3585 * perfetto should prove useful.
3586 */
3587 const char *label = pLabelInfo->pLabelName;
3588 if (cmd_buffer->state.pass) {
3589 trace_start_cmd_buffer_annotation_rp(
3590 &cmd_buffer->trace, &cmd_buffer->draw_cs, strlen(label), label);
3591 } else {
3592 trace_start_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs,
3593 strlen(label), label);
3594 }
3595 }
3596
3597 void
tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)3598 tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)
3599 {
3600 VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
3601
3602 if (cmd_buffer->vk.labels.size > 0) {
3603 if (cmd_buffer->state.pass) {
3604 trace_end_cmd_buffer_annotation_rp(&cmd_buffer->trace,
3605 &cmd_buffer->draw_cs);
3606 } else {
3607 trace_end_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs);
3608 }
3609 }
3610
3611 vk_common_CmdEndDebugUtilsLabelEXT(_commandBuffer);
3612 }
3613