1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 * SPDX-License-Identifier: MIT
5 *
6 * based in part on anv driver which is:
7 * Copyright © 2015 Intel Corporation
8 */
9
10 #include "tu_device.h"
11
12 #include "drm-uapi/drm_fourcc.h"
13 #include "fdl/freedreno_layout.h"
14 #include <fcntl.h>
15 #include <poll.h>
16
17 #include "git_sha1.h"
18 #include "util/u_debug.h"
19 #include "util/disk_cache.h"
20 #include "util/hex.h"
21 #include "util/driconf.h"
22 #include "util/os_misc.h"
23 #include "util/u_process.h"
24 #include "vk_android.h"
25 #include "vk_shader_module.h"
26 #include "vk_sampler.h"
27 #include "vk_util.h"
28
29 /* for fd_get_driver/device_uuid() */
30 #include "freedreno/common/freedreno_uuid.h"
31 #include "freedreno/common/freedreno_stompable_regs.h"
32
33 #include "tu_clear_blit.h"
34 #include "tu_cmd_buffer.h"
35 #include "tu_cs.h"
36 #include "tu_descriptor_set.h"
37 #include "tu_dynamic_rendering.h"
38 #include "tu_image.h"
39 #include "tu_pass.h"
40 #include "tu_queue.h"
41 #include "tu_query_pool.h"
42 #include "tu_rmv.h"
43 #include "tu_tracepoints.h"
44 #include "tu_wsi.h"
45
46 #if DETECT_OS_ANDROID
47 #include "util/u_gralloc/u_gralloc.h"
48 #include <vndk/hardware_buffer.h>
49 #endif
50
51 uint64_t os_page_size = 4096;
52
53 static int
tu_device_get_cache_uuid(struct tu_physical_device * device,void * uuid)54 tu_device_get_cache_uuid(struct tu_physical_device *device, void *uuid)
55 {
56 struct mesa_sha1 ctx;
57 unsigned char sha1[20];
58 /* Note: IR3_SHADER_DEBUG also affects compilation, but it's not
59 * initialized until after compiler creation so we have to add it to the
60 * shader hash instead, since the compiler is only created with the logical
61 * device.
62 */
63 uint64_t driver_flags = tu_env.debug & TU_DEBUG_NOMULTIPOS;
64 uint16_t family = fd_dev_gpu_id(&device->dev_id);
65
66 memset(uuid, 0, VK_UUID_SIZE);
67 _mesa_sha1_init(&ctx);
68
69 if (!disk_cache_get_function_identifier((void *)tu_device_get_cache_uuid, &ctx))
70 return -1;
71
72 _mesa_sha1_update(&ctx, &family, sizeof(family));
73 _mesa_sha1_update(&ctx, &driver_flags, sizeof(driver_flags));
74 _mesa_sha1_final(&ctx, sha1);
75
76 memcpy(uuid, sha1, VK_UUID_SIZE);
77 return 0;
78 }
79
80 #define TU_API_VERSION VK_MAKE_VERSION(1, 4, VK_HEADER_VERSION)
81
82 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceVersion(uint32_t * pApiVersion)83 tu_EnumerateInstanceVersion(uint32_t *pApiVersion)
84 {
85 *pApiVersion = TU_API_VERSION;
86 return VK_SUCCESS;
87 }
88
89 static const struct vk_instance_extension_table tu_instance_extensions_supported = { .table = {
90 .KHR_device_group_creation = true,
91 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
92 .KHR_display = true,
93 #endif
94 .KHR_external_fence_capabilities = true,
95 .KHR_external_memory_capabilities = true,
96 .KHR_external_semaphore_capabilities = true,
97 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
98 .KHR_get_display_properties2 = true,
99 #endif
100 .KHR_get_physical_device_properties2 = true,
101 #ifdef TU_USE_WSI_PLATFORM
102 .KHR_get_surface_capabilities2 = true,
103 .KHR_surface = true,
104 .KHR_surface_protected_capabilities = true,
105 #endif
106 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
107 .KHR_wayland_surface = true,
108 #endif
109 #ifdef VK_USE_PLATFORM_XCB_KHR
110 .KHR_xcb_surface = true,
111 #endif
112 #ifdef VK_USE_PLATFORM_XLIB_KHR
113 .KHR_xlib_surface = true,
114 #endif
115 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
116 .EXT_acquire_drm_display = true,
117 #endif
118 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
119 .EXT_acquire_xlib_display = true,
120 #endif
121 .EXT_debug_report = true,
122 .EXT_debug_utils = true,
123 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
124 .EXT_direct_mode_display = true,
125 .EXT_display_surface_counter = true,
126 #endif
127 #ifndef VK_USE_PLATFORM_WIN32_KHR
128 .EXT_headless_surface = true,
129 #endif
130 #ifdef TU_USE_WSI_PLATFORM
131 .EXT_surface_maintenance1 = true,
132 .EXT_swapchain_colorspace = true,
133 #endif
134 } };
135
136 static bool
is_kgsl(struct tu_instance * instance)137 is_kgsl(struct tu_instance *instance)
138 {
139 return strcmp(instance->knl->name, "kgsl") == 0;
140 }
141
142 static void
get_device_extensions(const struct tu_physical_device * device,struct vk_device_extension_table * ext)143 get_device_extensions(const struct tu_physical_device *device,
144 struct vk_device_extension_table *ext)
145 {
146 *ext = (struct vk_device_extension_table) { .table = {
147 .KHR_8bit_storage = device->info->a7xx.storage_8bit,
148 .KHR_16bit_storage = device->info->a6xx.storage_16bit,
149 .KHR_bind_memory2 = true,
150 .KHR_buffer_device_address = true,
151 .KHR_calibrated_timestamps = device->info->a7xx.has_persistent_counter,
152 .KHR_compute_shader_derivatives = device->info->chip >= 7,
153 .KHR_copy_commands2 = true,
154 .KHR_create_renderpass2 = true,
155 .KHR_dedicated_allocation = true,
156 .KHR_depth_stencil_resolve = true,
157 .KHR_descriptor_update_template = true,
158 .KHR_device_group = true,
159 .KHR_draw_indirect_count = true,
160 .KHR_driver_properties = true,
161 .KHR_dynamic_rendering = true,
162 .KHR_dynamic_rendering_local_read = true,
163 .KHR_external_fence = true,
164 .KHR_external_fence_fd = true,
165 .KHR_external_memory = true,
166 .KHR_external_memory_fd = true,
167 .KHR_external_semaphore = true,
168 .KHR_external_semaphore_fd = true,
169 .KHR_format_feature_flags2 = true,
170 .KHR_fragment_shading_rate = device->info->a6xx.has_attachment_shading_rate,
171 .KHR_get_memory_requirements2 = true,
172 .KHR_global_priority = true,
173 .KHR_image_format_list = true,
174 .KHR_imageless_framebuffer = true,
175 #ifdef TU_USE_WSI_PLATFORM
176 .KHR_incremental_present = true,
177 #endif
178 .KHR_index_type_uint8 = true,
179 .KHR_line_rasterization = true,
180 .KHR_load_store_op_none = true,
181 .KHR_maintenance1 = true,
182 .KHR_maintenance2 = true,
183 .KHR_maintenance3 = true,
184 .KHR_maintenance4 = true,
185 .KHR_maintenance5 = true,
186 .KHR_maintenance6 = true,
187 .KHR_map_memory2 = true,
188 .KHR_multiview = TU_DEBUG(NOCONFORM) ? true : device->info->a6xx.has_hw_multiview,
189 .KHR_performance_query = TU_DEBUG(PERFC),
190 .KHR_pipeline_executable_properties = true,
191 .KHR_pipeline_library = true,
192 #ifdef TU_USE_WSI_PLATFORM
193 /* Hide these behind dri configs for now since we cannot implement it reliably on
194 * all surfaces yet. There is no surface capability query for present wait/id,
195 * but the feature is useful enough to hide behind an opt-in mechanism for now.
196 * If the instance only enables surface extensions that unconditionally support present wait,
197 * we can also expose the extension that way. */
198 .KHR_present_id = (driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
199 wsi_common_vk_instance_supports_present_wait(&device->instance->vk)),
200 .KHR_present_wait = (driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
201 wsi_common_vk_instance_supports_present_wait(&device->instance->vk)),
202 #endif
203 .KHR_push_descriptor = true,
204 .KHR_relaxed_block_layout = true,
205 .KHR_sampler_mirror_clamp_to_edge = true,
206 .KHR_sampler_ycbcr_conversion = true,
207 .KHR_separate_depth_stencil_layouts = true,
208 .KHR_shader_atomic_int64 = device->info->a7xx.has_64b_ssbo_atomics,
209 .KHR_shader_draw_parameters = true,
210 .KHR_shader_expect_assume = true,
211 .KHR_shader_float16_int8 = true,
212 .KHR_shader_float_controls = true,
213 .KHR_shader_float_controls2 = true,
214 .KHR_shader_integer_dot_product = true,
215 .KHR_shader_non_semantic_info = true,
216 .KHR_shader_relaxed_extended_instruction = true,
217 .KHR_shader_subgroup_extended_types = true,
218 .KHR_shader_subgroup_rotate = true,
219 .KHR_shader_subgroup_uniform_control_flow = true,
220 .KHR_shader_terminate_invocation = true,
221 .KHR_spirv_1_4 = true,
222 .KHR_storage_buffer_storage_class = true,
223 #ifdef TU_USE_WSI_PLATFORM
224 .KHR_swapchain = true,
225 .KHR_swapchain_mutable_format = true,
226 #endif
227 .KHR_synchronization2 = true,
228 .KHR_timeline_semaphore = true,
229 .KHR_uniform_buffer_standard_layout = true,
230 .KHR_variable_pointers = true,
231 .KHR_vertex_attribute_divisor = true,
232 .KHR_vulkan_memory_model = true,
233 .KHR_workgroup_memory_explicit_layout = true,
234 .KHR_zero_initialize_workgroup_memory = true,
235
236 .EXT_4444_formats = true,
237 .EXT_attachment_feedback_loop_dynamic_state = true,
238 .EXT_attachment_feedback_loop_layout = true,
239 .EXT_border_color_swizzle = true,
240 .EXT_calibrated_timestamps = device->info->a7xx.has_persistent_counter,
241 .EXT_color_write_enable = true,
242 .EXT_conditional_rendering = true,
243 .EXT_custom_border_color = true,
244 .EXT_depth_clamp_zero_one = true,
245 .EXT_depth_clip_control = true,
246 .EXT_depth_clip_enable = true,
247 .EXT_descriptor_buffer = true,
248 .EXT_descriptor_indexing = true,
249 .EXT_device_address_binding_report = true,
250 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
251 .EXT_display_control = true,
252 #endif
253 .EXT_extended_dynamic_state = true,
254 .EXT_extended_dynamic_state2 = true,
255 .EXT_extended_dynamic_state3 = true,
256 .EXT_external_memory_dma_buf = true,
257 .EXT_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
258 .EXT_fragment_density_map = true,
259 .EXT_global_priority = true,
260 .EXT_global_priority_query = true,
261 .EXT_graphics_pipeline_library = true,
262 .EXT_host_image_copy = true,
263 .EXT_host_query_reset = true,
264 .EXT_image_2d_view_of_3d = true,
265 .EXT_image_drm_format_modifier = true,
266 .EXT_image_robustness = true,
267 .EXT_image_view_min_lod = true,
268 .EXT_index_type_uint8 = true,
269 .EXT_inline_uniform_block = true,
270 .EXT_legacy_dithering = true,
271 .EXT_legacy_vertex_attributes = true,
272 .EXT_line_rasterization = true,
273 .EXT_load_store_op_none = true,
274 .EXT_map_memory_placed = true,
275 .EXT_memory_budget = true,
276 .EXT_multi_draw = true,
277 .EXT_mutable_descriptor_type = true,
278 .EXT_nested_command_buffer = true,
279 .EXT_non_seamless_cube_map = true,
280 .EXT_physical_device_drm = !is_kgsl(device->instance),
281 .EXT_pipeline_creation_cache_control = true,
282 .EXT_pipeline_creation_feedback = true,
283 .EXT_post_depth_coverage = true,
284 .EXT_primitive_topology_list_restart = true,
285 .EXT_primitives_generated_query = true,
286 .EXT_private_data = true,
287 .EXT_provoking_vertex = true,
288 .EXT_queue_family_foreign = true,
289 .EXT_rasterization_order_attachment_access = true,
290 .EXT_robustness2 = true,
291 .EXT_sample_locations = device->info->a6xx.has_sample_locations,
292 .EXT_sampler_filter_minmax = device->info->a6xx.has_sampler_minmax,
293 .EXT_scalar_block_layout = true,
294 .EXT_separate_stencil_usage = true,
295 .EXT_shader_demote_to_helper_invocation = true,
296 .EXT_shader_module_identifier = true,
297 .EXT_shader_replicated_composites = true,
298 .EXT_shader_stencil_export = true,
299 .EXT_shader_viewport_index_layer = TU_DEBUG(NOCONFORM) ? true : device->info->a6xx.has_hw_multiview,
300 .EXT_subgroup_size_control = true,
301 #ifdef TU_USE_WSI_PLATFORM
302 .EXT_swapchain_maintenance1 = true,
303 #endif
304 .EXT_texel_buffer_alignment = true,
305 .EXT_tooling_info = true,
306 .EXT_transform_feedback = true,
307 .EXT_vertex_attribute_divisor = true,
308 .EXT_vertex_input_dynamic_state = true,
309
310 /* For Graphics Flight Recorder (GFR) */
311 .AMD_buffer_marker = true,
312 .ARM_rasterization_order_attachment_access = true,
313 .GOOGLE_decorate_string = true,
314 .GOOGLE_hlsl_functionality1 = true,
315 .GOOGLE_user_type = true,
316 .IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
317 .NV_compute_shader_derivatives = device->info->chip >= 7,
318 .VALVE_mutable_descriptor_type = true,
319 } };
320
321 #if DETECT_OS_ANDROID
322 if (vk_android_get_ugralloc() != NULL) {
323 ext->ANDROID_external_memory_android_hardware_buffer = true,
324 ext->ANDROID_native_buffer = true;
325 }
326 #endif
327 }
328
329 static void
tu_get_features(struct tu_physical_device * pdevice,struct vk_features * features)330 tu_get_features(struct tu_physical_device *pdevice,
331 struct vk_features *features)
332 {
333 *features = (struct vk_features) { false };
334
335 /* Vulkan 1.0 */
336 features->robustBufferAccess = true;
337 features->fullDrawIndexUint32 = true;
338 features->imageCubeArray = true;
339 features->independentBlend = true;
340 features->geometryShader = true;
341 features->tessellationShader = true;
342 features->sampleRateShading = true;
343 features->dualSrcBlend = true;
344 features->logicOp = true;
345 features->multiDrawIndirect = true;
346 features->drawIndirectFirstInstance = true;
347 features->depthClamp = true;
348 features->depthBiasClamp = true;
349 features->fillModeNonSolid = true;
350 features->depthBounds = true;
351 features->wideLines = pdevice->info->a6xx.line_width_max > 1.0;
352 features->largePoints = true;
353 features->alphaToOne = true;
354 features->multiViewport = true;
355 features->samplerAnisotropy = true;
356 features->textureCompressionETC2 = true;
357 features->textureCompressionASTC_LDR = true;
358 features->textureCompressionBC = true;
359 features->occlusionQueryPrecise = true;
360 features->pipelineStatisticsQuery = true;
361 features->vertexPipelineStoresAndAtomics = true;
362 features->fragmentStoresAndAtomics = true;
363 features->shaderTessellationAndGeometryPointSize = true;
364 features->shaderImageGatherExtended = true;
365 features->shaderStorageImageExtendedFormats = true;
366 features->shaderStorageImageMultisample = false;
367 features->shaderStorageImageReadWithoutFormat = true;
368 features->shaderStorageImageWriteWithoutFormat = true;
369 features->shaderUniformBufferArrayDynamicIndexing = true;
370 features->shaderSampledImageArrayDynamicIndexing = true;
371 features->shaderStorageBufferArrayDynamicIndexing = true;
372 features->shaderStorageImageArrayDynamicIndexing = true;
373 features->shaderClipDistance = true;
374 features->shaderCullDistance = true;
375 features->shaderFloat64 = false;
376 features->shaderInt64 = true;
377 features->shaderInt16 = true;
378 features->sparseBinding = false;
379 features->variableMultisampleRate = true;
380 features->inheritedQueries = true;
381
382 /* Vulkan 1.1 */
383 features->storageBuffer16BitAccess = pdevice->info->a6xx.storage_16bit;
384 features->uniformAndStorageBuffer16BitAccess = false;
385 features->storagePushConstant16 = false;
386 features->storageInputOutput16 = false;
387 features->multiview = true;
388 features->multiviewGeometryShader = false;
389 features->multiviewTessellationShader = false;
390 features->variablePointersStorageBuffer = true;
391 features->variablePointers = true;
392 features->protectedMemory = false;
393 features->samplerYcbcrConversion = true;
394 features->shaderDrawParameters = true;
395
396 /* Vulkan 1.2 */
397 features->samplerMirrorClampToEdge = true;
398 features->drawIndirectCount = true;
399 features->storageBuffer8BitAccess = pdevice->info->a7xx.storage_8bit;
400 features->uniformAndStorageBuffer8BitAccess = false;
401 features->storagePushConstant8 = false;
402 features->shaderBufferInt64Atomics =
403 pdevice->info->a7xx.has_64b_ssbo_atomics;
404 features->shaderSharedInt64Atomics = false;
405 features->shaderFloat16 = true;
406 features->shaderInt8 = true;
407
408 features->descriptorIndexing = true;
409 features->shaderInputAttachmentArrayDynamicIndexing = false;
410 features->shaderUniformTexelBufferArrayDynamicIndexing = true;
411 features->shaderStorageTexelBufferArrayDynamicIndexing = true;
412 features->shaderUniformBufferArrayNonUniformIndexing = true;
413 features->shaderSampledImageArrayNonUniformIndexing = true;
414 features->shaderStorageBufferArrayNonUniformIndexing = true;
415 features->shaderStorageImageArrayNonUniformIndexing = true;
416 features->shaderInputAttachmentArrayNonUniformIndexing = false;
417 features->shaderUniformTexelBufferArrayNonUniformIndexing = true;
418 features->shaderStorageTexelBufferArrayNonUniformIndexing = true;
419 features->descriptorBindingUniformBufferUpdateAfterBind = true;
420 features->descriptorBindingSampledImageUpdateAfterBind = true;
421 features->descriptorBindingStorageImageUpdateAfterBind = true;
422 features->descriptorBindingStorageBufferUpdateAfterBind = true;
423 features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
424 features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
425 features->descriptorBindingUpdateUnusedWhilePending = true;
426 features->descriptorBindingPartiallyBound = true;
427 features->descriptorBindingVariableDescriptorCount = true;
428 features->runtimeDescriptorArray = true;
429
430 features->samplerFilterMinmax =
431 pdevice->info->a6xx.has_sampler_minmax;
432 features->scalarBlockLayout = true;
433 features->imagelessFramebuffer = true;
434 features->uniformBufferStandardLayout = true;
435 features->shaderSubgroupExtendedTypes = true;
436 features->separateDepthStencilLayouts = true;
437 features->hostQueryReset = true;
438 features->timelineSemaphore = true;
439 features->bufferDeviceAddress = true;
440 features->bufferDeviceAddressCaptureReplay = pdevice->has_set_iova;
441 features->bufferDeviceAddressMultiDevice = false;
442 features->vulkanMemoryModel = true;
443 features->vulkanMemoryModelDeviceScope = true;
444 features->vulkanMemoryModelAvailabilityVisibilityChains = true;
445 features->shaderOutputViewportIndex = true;
446 features->shaderOutputLayer = true;
447 features->subgroupBroadcastDynamicId = true;
448
449 /* Vulkan 1.3 */
450 features->robustImageAccess = true;
451 features->inlineUniformBlock = true;
452 features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
453 features->pipelineCreationCacheControl = true;
454 features->privateData = true;
455 features->shaderDemoteToHelperInvocation = true;
456 features->shaderTerminateInvocation = true;
457 features->subgroupSizeControl = true;
458 features->computeFullSubgroups = true;
459 features->synchronization2 = true;
460 features->textureCompressionASTC_HDR = false;
461 features->shaderZeroInitializeWorkgroupMemory = true;
462 features->dynamicRendering = true;
463 features->shaderIntegerDotProduct = true;
464 features->maintenance4 = true;
465
466 /* Vulkan 1.4 */
467 features->pushDescriptor = true;
468
469 /* VK_KHR_compute_shader_derivatives */
470 features->computeDerivativeGroupQuads = pdevice->info->chip >= 7;
471 features->computeDerivativeGroupLinear = pdevice->info->chip >= 7;
472
473 /* VK_KHR_dynamic_rendering_local_read */
474 features->dynamicRenderingLocalRead = true;
475
476 /* VK_KHR_fragment_shading_rate */
477 features->pipelineFragmentShadingRate = pdevice->info->a6xx.has_attachment_shading_rate;
478 features->primitiveFragmentShadingRate = pdevice->info->a7xx.has_primitive_shading_rate;
479 features->attachmentFragmentShadingRate = pdevice->info->a6xx.has_attachment_shading_rate;
480
481 /* VK_KHR_index_type_uint8 */
482 features->indexTypeUint8 = true;
483
484 /* VK_KHR_line_rasterization */
485 features->rectangularLines = true;
486 features->bresenhamLines = true;
487 features->smoothLines = false;
488 features->stippledRectangularLines = false;
489 features->stippledBresenhamLines = false;
490 features->stippledSmoothLines = false;
491
492 /* VK_KHR_maintenance5 */
493 features->maintenance5 = true;
494
495 /* VK_KHR_maintenance6 */
496 features->maintenance6 = true;
497
498 /* VK_KHR_performance_query */
499 features->performanceCounterQueryPools = true;
500 features->performanceCounterMultipleQueryPools = false;
501
502 /* VK_KHR_pipeline_executable_properties */
503 features->pipelineExecutableInfo = true;
504
505 /* VK_KHR_present_id */
506 features->presentId = pdevice->vk.supported_extensions.KHR_present_id;
507
508 /* VK_KHR_present_wait */
509 features->presentWait = pdevice->vk.supported_extensions.KHR_present_wait;
510
511 /* VK_KHR_shader_expect_assume */
512 features->shaderExpectAssume = true;
513
514 /* VK_KHR_shader_float_controls2 */
515 features->shaderFloatControls2 = true;
516
517 /* VK_KHR_shader_subgroup_uniform_control_flow */
518 features->shaderSubgroupUniformControlFlow = true;
519
520 /* VK_KHR_vertex_attribute_divisor */
521 features->vertexAttributeInstanceRateDivisor = true;
522 features->vertexAttributeInstanceRateZeroDivisor = true;
523
524 /* VK_KHR_workgroup_memory_explicit_layout */
525 features->workgroupMemoryExplicitLayout = true;
526 features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
527 features->workgroupMemoryExplicitLayout8BitAccess = true;
528 features->workgroupMemoryExplicitLayout16BitAccess = true;
529
530 /* VK_EXT_4444_formats */
531 features->formatA4R4G4B4 = true;
532 features->formatA4B4G4R4 = true;
533
534 /* VK_EXT_attachment_feedback_loop_dynamic_state */
535 features->attachmentFeedbackLoopDynamicState = true;
536
537 /* VK_EXT_attachment_feedback_loop_layout */
538 features->attachmentFeedbackLoopLayout = true;
539
540 /* VK_EXT_border_color_swizzle */
541 features->borderColorSwizzle = true;
542 features->borderColorSwizzleFromImage = true;
543
544 /* VK_EXT_color_write_enable */
545 features->colorWriteEnable = true;
546
547 /* VK_EXT_conditional_rendering */
548 features->conditionalRendering = true;
549 features->inheritedConditionalRendering = true;
550
551 /* VK_EXT_custom_border_color */
552 features->customBorderColors = true;
553 features->customBorderColorWithoutFormat = true;
554
555 /* VK_EXT_depth_clamp_zero_one */
556 features->depthClampZeroOne = true;
557
558 /* VK_EXT_depth_clip_control */
559 features->depthClipControl = true;
560
561 /* VK_EXT_depth_clip_enable */
562 features->depthClipEnable = true;
563
564 /* VK_EXT_descriptor_buffer */
565 features->descriptorBuffer = true;
566 features->descriptorBufferCaptureReplay = pdevice->has_set_iova;
567 features->descriptorBufferImageLayoutIgnored = true;
568 features->descriptorBufferPushDescriptors = true;
569
570 /* VK_EXT_device_address_binding_report */
571 features->reportAddressBinding = true;
572
573 /* VK_EXT_extended_dynamic_state */
574 features->extendedDynamicState = true;
575
576 /* VK_EXT_extended_dynamic_state2 */
577 features->extendedDynamicState2 = true;
578 features->extendedDynamicState2LogicOp = true;
579 features->extendedDynamicState2PatchControlPoints = true;
580
581 /* VK_EXT_extended_dynamic_state3 */
582 features->extendedDynamicState3PolygonMode = true;
583 features->extendedDynamicState3TessellationDomainOrigin = true;
584 features->extendedDynamicState3DepthClampEnable = true;
585 features->extendedDynamicState3DepthClipEnable = true;
586 features->extendedDynamicState3LogicOpEnable = true;
587 features->extendedDynamicState3SampleMask = true;
588 features->extendedDynamicState3RasterizationSamples = true;
589 features->extendedDynamicState3AlphaToCoverageEnable = true;
590 features->extendedDynamicState3AlphaToOneEnable = true;
591 features->extendedDynamicState3DepthClipNegativeOneToOne = true;
592 features->extendedDynamicState3RasterizationStream = true;
593 features->extendedDynamicState3ConservativeRasterizationMode = false;
594 features->extendedDynamicState3ExtraPrimitiveOverestimationSize = false;
595 features->extendedDynamicState3LineRasterizationMode = true;
596 features->extendedDynamicState3LineStippleEnable = false;
597 features->extendedDynamicState3ProvokingVertexMode = true;
598 features->extendedDynamicState3SampleLocationsEnable =
599 pdevice->info->a6xx.has_sample_locations;
600 features->extendedDynamicState3ColorBlendEnable = true;
601 features->extendedDynamicState3ColorBlendEquation = true;
602 features->extendedDynamicState3ColorWriteMask = true;
603 features->extendedDynamicState3ViewportWScalingEnable = false;
604 features->extendedDynamicState3ViewportSwizzle = false;
605 features->extendedDynamicState3ShadingRateImageEnable = false;
606 features->extendedDynamicState3CoverageToColorEnable = false;
607 features->extendedDynamicState3CoverageToColorLocation = false;
608 features->extendedDynamicState3CoverageModulationMode = false;
609 features->extendedDynamicState3CoverageModulationTableEnable = false;
610 features->extendedDynamicState3CoverageModulationTable = false;
611 features->extendedDynamicState3CoverageReductionMode = false;
612 features->extendedDynamicState3RepresentativeFragmentTestEnable = false;
613 features->extendedDynamicState3ColorBlendAdvanced = false;
614
615 /* VK_EXT_fragment_density_map */
616 features->fragmentDensityMap = true;
617 features->fragmentDensityMapDynamic = false;
618 features->fragmentDensityMapNonSubsampledImages = true;
619
620 /* VK_EXT_global_priority_query */
621 features->globalPriorityQuery = true;
622
623 /* VK_EXT_graphics_pipeline_library */
624 features->graphicsPipelineLibrary = true;
625
626 /* VK_EXT_host_image_copy */
627 features->hostImageCopy = true;
628
629 /* VK_EXT_image_2d_view_of_3d */
630 features->image2DViewOf3D = true;
631 features->sampler2DViewOf3D = true;
632
633 /* VK_EXT_image_view_min_lod */
634 features->minLod = true;
635
636 /* VK_EXT_legacy_vertex_attributes */
637 features->legacyVertexAttributes = true;
638
639 /* VK_EXT_legacy_dithering */
640 features->legacyDithering = true;
641
642 /* VK_EXT_map_memory_placed */
643 features->memoryMapPlaced = true;
644 features->memoryMapRangePlaced = false;
645 features->memoryUnmapReserve = true;
646
647 /* VK_EXT_multi_draw */
648 features->multiDraw = true;
649
650 /* VK_EXT_mutable_descriptor_type */
651 features->mutableDescriptorType = true;
652
653 /* VK_EXT_nested_command_buffer */
654 features->nestedCommandBuffer = true;
655 features->nestedCommandBufferRendering = true;
656 features->nestedCommandBufferSimultaneousUse = true;
657
658 /* VK_EXT_non_seamless_cube_map */
659 features->nonSeamlessCubeMap = true;
660
661 /* VK_EXT_pipeline_robustness */
662 features->pipelineRobustness = true;
663
664 /* VK_EXT_primitive_topology_list_restart */
665 features->primitiveTopologyListRestart = true;
666 features->primitiveTopologyPatchListRestart = false;
667
668 /* VK_EXT_primitives_generated_query */
669 features->primitivesGeneratedQuery = true;
670 features->primitivesGeneratedQueryWithRasterizerDiscard = false;
671 features->primitivesGeneratedQueryWithNonZeroStreams = false;
672
673 /* VK_EXT_provoking_vertex */
674 features->provokingVertexLast = true;
675
676 /* VK_EXT_rasterization_order_attachment_access */
677 features->rasterizationOrderColorAttachmentAccess = true;
678 features->rasterizationOrderDepthAttachmentAccess = true;
679 features->rasterizationOrderStencilAttachmentAccess = true;
680
681 /* VK_EXT_robustness2 */
682 features->robustBufferAccess2 = true;
683 features->robustImageAccess2 = true;
684 features->nullDescriptor = true;
685
686 /* VK_EXT_shader_module_identifier */
687 features->shaderModuleIdentifier = true;
688
689 /* VK_EXT_shader_replicated_composites */
690 features->shaderReplicatedComposites = true;
691
692 #ifdef TU_USE_WSI_PLATFORM
693 /* VK_EXT_swapchain_maintenance1 */
694 features->swapchainMaintenance1 = true;
695 #endif
696
697 /* VK_EXT_texel_buffer_alignment */
698 features->texelBufferAlignment = true;
699
700 /* VK_EXT_transform_feedback */
701 features->transformFeedback = true;
702 features->geometryStreams = true;
703
704 /* VK_EXT_vertex_input_dynamic_state */
705 features->vertexInputDynamicState = true;
706
707 /* VK_KHR_shader_relaxed_extended_instruction */
708 features->shaderRelaxedExtendedInstruction = true;
709
710 /* VK_KHR_subgroup_rotate */
711 features->shaderSubgroupRotate = true;
712 features->shaderSubgroupRotateClustered = true;
713 }
714
715 static void
tu_get_physical_device_properties_1_1(struct tu_physical_device * pdevice,struct vk_properties * p)716 tu_get_physical_device_properties_1_1(struct tu_physical_device *pdevice,
717 struct vk_properties *p)
718 {
719 memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
720 memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
721 memset(p->deviceLUID, 0, VK_LUID_SIZE);
722 p->deviceNodeMask = 0;
723 p->deviceLUIDValid = false;
724
725 p->subgroupSize = pdevice->info->a6xx.supports_double_threadsize ?
726 pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
727 p->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
728 p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
729 VK_SUBGROUP_FEATURE_VOTE_BIT |
730 VK_SUBGROUP_FEATURE_BALLOT_BIT |
731 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
732 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
733 VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
734 VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR |
735 VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
736 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT;
737 if (pdevice->info->a6xx.has_getfiberid) {
738 p->subgroupSupportedStages |= VK_SHADER_STAGE_ALL_GRAPHICS;
739 p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_QUAD_BIT;
740 }
741
742 p->subgroupQuadOperationsInAllStages = false;
743
744 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
745 p->maxMultiviewViewCount =
746 (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ? MAX_VIEWPORTS : 1;
747 p->maxMultiviewInstanceIndex = INT_MAX;
748 p->protectedNoFault = false;
749 /* Our largest descriptors are 2 texture descriptors, or a texture and
750 * sampler descriptor.
751 */
752 p->maxPerSetDescriptors = MAX_SET_SIZE / (2 * A6XX_TEX_CONST_DWORDS * 4);
753 /* Our buffer size fields allow only this much */
754 p->maxMemoryAllocationSize = 0xFFFFFFFFull;
755
756 }
757
758
759 static const size_t max_descriptor_set_size = MAX_SET_SIZE / (4 * A6XX_TEX_CONST_DWORDS);
760 static const VkSampleCountFlags sample_counts =
761 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
762
763 static void
tu_get_physical_device_properties_1_2(struct tu_physical_device * pdevice,struct vk_properties * p)764 tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice,
765 struct vk_properties *p)
766 {
767 p->driverID = VK_DRIVER_ID_MESA_TURNIP;
768 memset(p->driverName, 0, sizeof(p->driverName));
769 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
770 "turnip Mesa driver");
771 memset(p->driverInfo, 0, sizeof(p->driverInfo));
772 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
773 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
774 if (pdevice->info->chip >= 7) {
775 p->conformanceVersion = (VkConformanceVersion) {
776 .major = 1,
777 .minor = 4,
778 .subminor = 0,
779 .patch = 0,
780 };
781 } else {
782 p->conformanceVersion = (VkConformanceVersion) {
783 .major = 1,
784 .minor = 2,
785 .subminor = 7,
786 .patch = 1,
787 };
788 }
789
790 p->denormBehaviorIndependence =
791 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
792 p->roundingModeIndependence =
793 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
794
795 p->shaderDenormFlushToZeroFloat16 = true;
796 p->shaderDenormPreserveFloat16 = false;
797 p->shaderRoundingModeRTEFloat16 = true;
798 p->shaderRoundingModeRTZFloat16 = false;
799 p->shaderSignedZeroInfNanPreserveFloat16 = true;
800
801 p->shaderDenormFlushToZeroFloat32 = true;
802 p->shaderDenormPreserveFloat32 = false;
803 p->shaderRoundingModeRTEFloat32 = true;
804 p->shaderRoundingModeRTZFloat32 = false;
805 p->shaderSignedZeroInfNanPreserveFloat32 = true;
806
807 p->shaderDenormFlushToZeroFloat64 = false;
808 p->shaderDenormPreserveFloat64 = false;
809 p->shaderRoundingModeRTEFloat64 = false;
810 p->shaderRoundingModeRTZFloat64 = false;
811 p->shaderSignedZeroInfNanPreserveFloat64 = false;
812
813 p->shaderUniformBufferArrayNonUniformIndexingNative = true;
814 p->shaderSampledImageArrayNonUniformIndexingNative = true;
815 p->shaderStorageBufferArrayNonUniformIndexingNative = true;
816 p->shaderStorageImageArrayNonUniformIndexingNative = true;
817 p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
818 p->robustBufferAccessUpdateAfterBind = false;
819 p->quadDivergentImplicitLod = false;
820
821 p->maxUpdateAfterBindDescriptorsInAllPools = max_descriptor_set_size;
822 p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
823 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
824 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
825 p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
826 p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
827 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_RTS;
828 p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
829 p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
830 p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
831 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
832 p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
833 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
834 p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
835 p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
836 p->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_RTS;
837
838 p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
839 p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
840 p->independentResolveNone = false;
841 p->independentResolve = false;
842
843 p->filterMinmaxSingleComponentFormats = true;
844 p->filterMinmaxImageComponentMapping = true;
845
846 p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
847
848 p->framebufferIntegerColorSampleCounts = sample_counts;
849 }
850
851 static void
tu_get_physical_device_properties_1_3(struct tu_physical_device * pdevice,struct vk_properties * p)852 tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice,
853 struct vk_properties *p)
854 {
855 p->minSubgroupSize = pdevice->info->threadsize_base;
856 p->maxSubgroupSize = pdevice->info->a6xx.supports_double_threadsize ?
857 pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
858 p->maxComputeWorkgroupSubgroups = pdevice->info->max_waves;
859 p->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL;
860
861 p->maxInlineUniformBlockSize = MAX_INLINE_UBO_RANGE;
862 p->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UBOS;
863 p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UBOS;
864 p->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UBOS;
865 p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UBOS;
866 p->maxInlineUniformTotalSize = MAX_INLINE_UBOS * MAX_INLINE_UBO_RANGE;
867
868 p->integerDotProduct8BitUnsignedAccelerated = false;
869 p->integerDotProduct8BitSignedAccelerated = false;
870 p->integerDotProduct8BitMixedSignednessAccelerated = false;
871 p->integerDotProduct4x8BitPackedUnsignedAccelerated =
872 pdevice->info->a6xx.has_dp2acc;
873 /* TODO: we should be able to emulate 4x8BitPackedSigned fast enough */
874 p->integerDotProduct4x8BitPackedSignedAccelerated = false;
875 p->integerDotProduct4x8BitPackedMixedSignednessAccelerated =
876 pdevice->info->a6xx.has_dp2acc;
877 p->integerDotProduct16BitUnsignedAccelerated = false;
878 p->integerDotProduct16BitSignedAccelerated = false;
879 p->integerDotProduct16BitMixedSignednessAccelerated = false;
880 p->integerDotProduct32BitUnsignedAccelerated = false;
881 p->integerDotProduct32BitSignedAccelerated = false;
882 p->integerDotProduct32BitMixedSignednessAccelerated = false;
883 p->integerDotProduct64BitUnsignedAccelerated = false;
884 p->integerDotProduct64BitSignedAccelerated = false;
885 p->integerDotProduct64BitMixedSignednessAccelerated = false;
886 p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
887 p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
888 p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
889 p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated =
890 pdevice->info->a6xx.has_dp2acc;
891 /* TODO: we should be able to emulate Saturating4x8BitPackedSigned fast enough */
892 p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
893 p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated =
894 pdevice->info->a6xx.has_dp2acc;
895 p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
896 p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
897 p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
898 p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
899 p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
900 p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
901 p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
902 p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
903 p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
904
905 p->storageTexelBufferOffsetAlignmentBytes = 64;
906 p->storageTexelBufferOffsetSingleTexelAlignment = true;
907 p->uniformTexelBufferOffsetAlignmentBytes = 64;
908 p->uniformTexelBufferOffsetSingleTexelAlignment = true;
909
910 /* The address space is 4GB for current kernels, so there's no point
911 * allowing a larger buffer. Our buffer sizes are 64-bit though, so
912 * GetBufferDeviceRequirements won't fall over if someone actually creates
913 * a 4GB buffer.
914 */
915 p->maxBufferSize = 1ull << 32;
916 }
917
918 /* CP_ALWAYS_ON_COUNTER is fixed 19.2 MHz */
919 #define ALWAYS_ON_FREQUENCY 19200000
920
921 static void
tu_get_properties(struct tu_physical_device * pdevice,struct vk_properties * props)922 tu_get_properties(struct tu_physical_device *pdevice,
923 struct vk_properties *props)
924 {
925 /* Limits */
926 props->maxImageDimension1D = (1 << 14);
927 props->maxImageDimension2D = (1 << 14);
928 props->maxImageDimension3D = (1 << 11);
929 props->maxImageDimensionCube = (1 << 14);
930 props->maxImageArrayLayers = (1 << 11);
931 props->maxTexelBufferElements = 128 * 1024 * 1024;
932 props->maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE;
933 props->maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE;
934 props->maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE;
935 props->maxMemoryAllocationCount = UINT32_MAX;
936 props->maxSamplerAllocationCount = 64 * 1024;
937 props->bufferImageGranularity = 64; /* A cache line */
938 props->sparseAddressSpaceSize = 0;
939 props->maxBoundDescriptorSets = pdevice->usable_sets;
940 props->maxPerStageDescriptorSamplers = max_descriptor_set_size;
941 props->maxPerStageDescriptorUniformBuffers = max_descriptor_set_size;
942 props->maxPerStageDescriptorStorageBuffers = max_descriptor_set_size;
943 props->maxPerStageDescriptorSampledImages = max_descriptor_set_size;
944 props->maxPerStageDescriptorStorageImages = max_descriptor_set_size;
945 props->maxPerStageDescriptorInputAttachments = MAX_RTS;
946 props->maxPerStageResources = max_descriptor_set_size;
947 props->maxDescriptorSetSamplers = max_descriptor_set_size;
948 props->maxDescriptorSetUniformBuffers = max_descriptor_set_size;
949 props->maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
950 props->maxDescriptorSetStorageBuffers = max_descriptor_set_size;
951 props->maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
952 props->maxDescriptorSetSampledImages = max_descriptor_set_size;
953 props->maxDescriptorSetStorageImages = max_descriptor_set_size;
954 props->maxDescriptorSetInputAttachments = MAX_RTS;
955 props->maxVertexInputAttributes = pdevice->info->a6xx.vs_max_inputs_count;
956 props->maxVertexInputBindings = pdevice->info->a6xx.vs_max_inputs_count;
957 props->maxVertexInputAttributeOffset = 4095;
958 props->maxVertexInputBindingStride = 2048;
959 props->maxVertexOutputComponents = 128;
960 props->maxTessellationGenerationLevel = 64;
961 props->maxTessellationPatchSize = 32;
962 props->maxTessellationControlPerVertexInputComponents = 128;
963 props->maxTessellationControlPerVertexOutputComponents = 128;
964 props->maxTessellationControlPerPatchOutputComponents = 120;
965 props->maxTessellationControlTotalOutputComponents = 4096;
966 props->maxTessellationEvaluationInputComponents = 128;
967 props->maxTessellationEvaluationOutputComponents = 128;
968 props->maxGeometryShaderInvocations = 32;
969 props->maxGeometryInputComponents = 64;
970 props->maxGeometryOutputComponents = 128;
971 props->maxGeometryOutputVertices = 256;
972 props->maxGeometryTotalOutputComponents = 1024;
973 props->maxFragmentInputComponents = 124;
974 props->maxFragmentOutputAttachments = 8;
975 props->maxFragmentDualSrcAttachments = 1;
976 props->maxFragmentCombinedOutputResources = MAX_RTS + max_descriptor_set_size * 2;
977 props->maxComputeSharedMemorySize = pdevice->info->cs_shared_mem_size;
978 props->maxComputeWorkGroupCount[0] =
979 props->maxComputeWorkGroupCount[1] =
980 props->maxComputeWorkGroupCount[2] = 65535;
981 props->maxComputeWorkGroupInvocations = pdevice->info->a6xx.supports_double_threadsize ?
982 pdevice->info->threadsize_base * 2 * pdevice->info->max_waves :
983 pdevice->info->threadsize_base * pdevice->info->max_waves;
984 props->maxComputeWorkGroupSize[0] =
985 props->maxComputeWorkGroupSize[1] =
986 props->maxComputeWorkGroupSize[2] = 1024;
987 props->subPixelPrecisionBits = 8;
988 props->subTexelPrecisionBits = 8;
989 props->mipmapPrecisionBits = 8;
990 props->maxDrawIndexedIndexValue = UINT32_MAX;
991 props->maxDrawIndirectCount = UINT32_MAX;
992 props->maxSamplerLodBias = 4095.0 / 256.0; /* [-16, 15.99609375] */
993 props->maxSamplerAnisotropy = 16;
994 props->maxViewports =
995 (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ? MAX_VIEWPORTS : 1;
996 props->maxViewportDimensions[0] =
997 props->maxViewportDimensions[1] = MAX_VIEWPORT_SIZE;
998 props->viewportBoundsRange[0] = INT16_MIN;
999 props->viewportBoundsRange[1] = INT16_MAX;
1000 props->viewportSubPixelBits = 8;
1001 props->minMemoryMapAlignment = 4096; /* A page */
1002 props->minTexelBufferOffsetAlignment = 64;
1003 props->minUniformBufferOffsetAlignment = 64;
1004 props->minStorageBufferOffsetAlignment = 4;
1005 props->minTexelOffset = -16;
1006 props->maxTexelOffset = 15;
1007 props->minTexelGatherOffset = -32;
1008 props->maxTexelGatherOffset = 31;
1009 props->minInterpolationOffset = -0.5;
1010 props->maxInterpolationOffset = 0.4375;
1011 props->subPixelInterpolationOffsetBits = 4;
1012 props->maxFramebufferWidth = (1 << 14);
1013 props->maxFramebufferHeight = (1 << 14);
1014 props->maxFramebufferLayers = (1 << 10);
1015 props->framebufferColorSampleCounts = sample_counts;
1016 props->framebufferDepthSampleCounts = sample_counts;
1017 props->framebufferStencilSampleCounts = sample_counts;
1018 props->framebufferNoAttachmentsSampleCounts = sample_counts;
1019 props->maxColorAttachments = MAX_RTS;
1020 props->sampledImageColorSampleCounts = sample_counts;
1021 props->sampledImageIntegerSampleCounts = sample_counts;
1022 props->sampledImageDepthSampleCounts = sample_counts;
1023 props->sampledImageStencilSampleCounts = sample_counts;
1024 props->storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT;
1025 props->maxSampleMaskWords = 1;
1026 props->timestampComputeAndGraphics = true;
1027 props->timestampPeriod = 1000000000.0 / (float) ALWAYS_ON_FREQUENCY;
1028 props->maxClipDistances = 8;
1029 props->maxCullDistances = 8;
1030 props->maxCombinedClipAndCullDistances = 8;
1031 props->discreteQueuePriorities = 2;
1032 props->pointSizeRange[0] = 1;
1033 props->pointSizeRange[1] = 4092;
1034 props->lineWidthRange[0] = pdevice->info->a6xx.line_width_min;
1035 props->lineWidthRange[1] = pdevice->info->a6xx.line_width_max;
1036 props->pointSizeGranularity = 0.0625;
1037 props->lineWidthGranularity =
1038 pdevice->info->a6xx.line_width_max == 1.0 ? 0.0 : 0.5;
1039 props->strictLines = true;
1040 props->standardSampleLocations = true;
1041 props->optimalBufferCopyOffsetAlignment = 128;
1042 props->optimalBufferCopyRowPitchAlignment = 128;
1043 props->nonCoherentAtomSize = 64;
1044
1045 props->apiVersion =
1046 (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ?
1047 ((pdevice->info->chip >= 7) ? TU_API_VERSION :
1048 VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION))
1049 : VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION);
1050 props->driverVersion = vk_get_driver_version();
1051 props->vendorID = 0x5143;
1052 props->deviceID = pdevice->dev_id.chip_id;
1053 props->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
1054
1055 /* Vulkan 1.4 */
1056 props->dynamicRenderingLocalReadDepthStencilAttachments = true;
1057 props->dynamicRenderingLocalReadMultisampledAttachments = true;
1058
1059 /* sparse properties */
1060 props->sparseResidencyStandard2DBlockShape = { 0 };
1061 props->sparseResidencyStandard2DMultisampleBlockShape = { 0 };
1062 props->sparseResidencyStandard3DBlockShape = { 0 };
1063 props->sparseResidencyAlignedMipSize = { 0 };
1064 props->sparseResidencyNonResidentStrict = { 0 };
1065
1066 strcpy(props->deviceName, pdevice->name);
1067 memcpy(props->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1068
1069 tu_get_physical_device_properties_1_1(pdevice, props);
1070 tu_get_physical_device_properties_1_2(pdevice, props);
1071 tu_get_physical_device_properties_1_3(pdevice, props);
1072
1073 /* VK_KHR_compute_shader_derivatives */
1074 props->meshAndTaskShaderDerivatives = false;
1075
1076 /* VK_KHR_fragment_shading_rate */
1077 if (pdevice->info->a6xx.has_attachment_shading_rate) {
1078 props->minFragmentShadingRateAttachmentTexelSize = {8, 8};
1079 props->maxFragmentShadingRateAttachmentTexelSize = {8, 8};
1080 } else {
1081 props->minFragmentShadingRateAttachmentTexelSize = {0, 0};
1082 props->maxFragmentShadingRateAttachmentTexelSize = {0, 0};
1083 }
1084 props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
1085 props->primitiveFragmentShadingRateWithMultipleViewports =
1086 pdevice->info->a7xx.has_primitive_shading_rate;
1087 /* A7XX TODO: dEQP-VK.fragment_shading_rate.*.srlayered.* are failing
1088 * for some reason.
1089 */
1090 props->layeredShadingRateAttachments = false;
1091 props->fragmentShadingRateNonTrivialCombinerOps = true;
1092 props->maxFragmentSize = {4, 4};
1093 props->maxFragmentSizeAspectRatio = 4;
1094 props->maxFragmentShadingRateCoverageSamples = 16;
1095 props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_4_BIT;
1096 props->fragmentShadingRateWithShaderDepthStencilWrites = true;
1097 props->fragmentShadingRateWithSampleMask = true;
1098 /* Has wrong gl_SampleMaskIn[0] values with VK_EXT_post_depth_coverage used. */
1099 props->fragmentShadingRateWithShaderSampleMask = false;
1100 props->fragmentShadingRateWithConservativeRasterization = false;
1101 props->fragmentShadingRateWithFragmentShaderInterlock = false;
1102 props->fragmentShadingRateWithCustomSampleLocations = true;
1103 props->fragmentShadingRateStrictMultiplyCombiner = true;
1104
1105 /* VK_KHR_push_descriptor */
1106 props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1107
1108 /* VK_EXT_transform_feedback */
1109 props->maxTransformFeedbackStreams = IR3_MAX_SO_STREAMS;
1110 props->maxTransformFeedbackBuffers = IR3_MAX_SO_BUFFERS;
1111 props->maxTransformFeedbackBufferSize = UINT32_MAX;
1112 props->maxTransformFeedbackStreamDataSize = 512;
1113 props->maxTransformFeedbackBufferDataSize = 512;
1114 props->maxTransformFeedbackBufferDataStride = 512;
1115 props->transformFeedbackQueries = true;
1116 props->transformFeedbackStreamsLinesTriangles = true;
1117 props->transformFeedbackRasterizationStreamSelect = true;
1118 props->transformFeedbackDraw = true;
1119
1120 /* VK_EXT_sample_locations */
1121 props->sampleLocationSampleCounts =
1122 pdevice->vk.supported_extensions.EXT_sample_locations ? sample_counts : 0;
1123 props->maxSampleLocationGridSize = (VkExtent2D) { 1 , 1 };
1124 props->sampleLocationCoordinateRange[0] = SAMPLE_LOCATION_MIN;
1125 props->sampleLocationCoordinateRange[1] = SAMPLE_LOCATION_MAX;
1126 props->sampleLocationSubPixelBits = 4;
1127 props->variableSampleLocations = true;
1128
1129 /* VK_KHR_vertex_attribute_divisor */
1130 props->maxVertexAttribDivisor = UINT32_MAX;
1131 props->supportsNonZeroFirstInstance = true;
1132
1133 /* VK_EXT_custom_border_color */
1134 props->maxCustomBorderColorSamplers = TU_BORDER_COLOR_COUNT;
1135
1136 /* VK_KHR_performance_query */
1137 props->allowCommandBufferQueryCopies = false;
1138
1139 /* VK_EXT_robustness2 */
1140 /* see write_buffer_descriptor() */
1141 props->robustStorageBufferAccessSizeAlignment = 4;
1142 /* see write_ubo_descriptor() */
1143 props->robustUniformBufferAccessSizeAlignment = 16;
1144
1145 /* VK_EXT_pipeline_robustness */
1146 props->defaultRobustnessStorageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1147 props->defaultRobustnessUniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1148 props->defaultRobustnessVertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT;
1149 props->defaultRobustnessImages = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT;
1150
1151 /* VK_EXT_provoking_vertex */
1152 props->provokingVertexModePerPipeline = true;
1153 props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1154
1155 /* VK_KHR_line_rasterization */
1156 props->lineSubPixelPrecisionBits = 8;
1157
1158 /* VK_EXT_physical_device_drm */
1159 props->drmHasPrimary = pdevice->has_master;
1160 props->drmPrimaryMajor = pdevice->master_major;
1161 props->drmPrimaryMinor = pdevice->master_minor;
1162
1163 props->drmHasRender = pdevice->has_local;
1164 props->drmRenderMajor = pdevice->local_major;
1165 props->drmRenderMinor = pdevice->local_minor;
1166
1167 /* VK_EXT_shader_module_identifier */
1168 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1169 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1170 memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1171 vk_shaderModuleIdentifierAlgorithmUUID,
1172 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1173
1174 /* VK_EXT_map_memory_placed */
1175 os_get_page_size(&os_page_size);
1176 props->minPlacedMemoryMapAlignment = os_page_size;
1177
1178 /* VK_EXT_multi_draw */
1179 props->maxMultiDrawCount = 2048;
1180
1181 /* VK_EXT_nested_command_buffer */
1182 props->maxCommandBufferNestingLevel = UINT32_MAX;
1183
1184 /* VK_EXT_graphics_pipeline_library */
1185 props->graphicsPipelineLibraryFastLinking = true;
1186 props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1187
1188 /* VK_EXT_extended_dynamic_state3 */
1189 props->dynamicPrimitiveTopologyUnrestricted = true;
1190
1191 /* VK_EXT_descriptor_buffer */
1192 props->combinedImageSamplerDescriptorSingleArray = true;
1193 props->bufferlessPushDescriptors = true;
1194 props->allowSamplerImageViewPostSubmitCreation = true;
1195 props->descriptorBufferOffsetAlignment = A6XX_TEX_CONST_DWORDS * 4;
1196 props->maxDescriptorBufferBindings = pdevice->usable_sets;
1197 props->maxResourceDescriptorBufferBindings = pdevice->usable_sets;
1198 props->maxSamplerDescriptorBufferBindings = pdevice->usable_sets;
1199 props->maxEmbeddedImmutableSamplerBindings = pdevice->usable_sets;
1200 props->maxEmbeddedImmutableSamplers = max_descriptor_set_size;
1201 props->bufferCaptureReplayDescriptorDataSize = 0;
1202 props->imageCaptureReplayDescriptorDataSize = 0;
1203 props->imageViewCaptureReplayDescriptorDataSize = 0;
1204 props->samplerCaptureReplayDescriptorDataSize = 0;
1205 props->accelerationStructureCaptureReplayDescriptorDataSize = 0;
1206 /* Note: these sizes must match descriptor_size() */
1207 props->samplerDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1208 props->combinedImageSamplerDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
1209 props->sampledImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1210 props->storageImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1211 props->uniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1212 props->robustUniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1213 props->storageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1214 props->robustStorageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1215 props->uniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1216 props->robustUniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1217 props->storageBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4 * (1 +
1218 COND(pdevice->info->a6xx.storage_16bit && !pdevice->info->a6xx.has_isam_v, 1) +
1219 COND(pdevice->info->a7xx.storage_8bit, 1));
1220 props->robustStorageBufferDescriptorSize =
1221 props->storageBufferDescriptorSize;
1222 props->inputAttachmentDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1223 props->maxSamplerDescriptorBufferRange = ~0ull;
1224 props->maxResourceDescriptorBufferRange = ~0ull;
1225 props->samplerDescriptorBufferAddressSpaceSize = ~0ull;
1226 props->resourceDescriptorBufferAddressSpaceSize = ~0ull;
1227 props->descriptorBufferAddressSpaceSize = ~0ull;
1228 props->combinedImageSamplerDensityMapDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
1229
1230 /* VK_EXT_legacy_vertex_attributes */
1231 props->nativeUnalignedPerformance = true;
1232
1233 /* VK_EXT_fragment_density_map*/
1234 props->minFragmentDensityTexelSize = (VkExtent2D) { MIN_FDM_TEXEL_SIZE, MIN_FDM_TEXEL_SIZE };
1235 props->maxFragmentDensityTexelSize = (VkExtent2D) { MAX_FDM_TEXEL_SIZE, MAX_FDM_TEXEL_SIZE };
1236 props->fragmentDensityInvocations = false;
1237
1238 /* VK_KHR_maintenance5 */
1239 props->earlyFragmentMultisampleCoverageAfterSampleCounting = true;
1240 props->earlyFragmentSampleMaskTestBeforeSampleCounting = true;
1241 props->depthStencilSwizzleOneSupport = true;
1242 props->polygonModePointSize = true;
1243 props->nonStrictWideLinesUseParallelogram = false;
1244 props->nonStrictSinglePixelWideLinesUseParallelogram = false;
1245
1246 /* VK_KHR_maintenance6 */
1247 props->blockTexelViewCompatibleMultipleLayers = true;
1248 props->maxCombinedImageSamplerDescriptorCount = 1;
1249 props->fragmentShadingRateClampCombinerInputs = true;
1250
1251 /* VK_EXT_host_image_copy */
1252
1253 /* We don't use the layouts ATM so just report all layouts from
1254 * extensions that we support as compatible.
1255 */
1256 static const VkImageLayout supported_layouts[] = {
1257 VK_IMAGE_LAYOUT_GENERAL, /* required by spec */
1258 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1259 VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1260 VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
1261 VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
1262 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1263 VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1264 VK_IMAGE_LAYOUT_PREINITIALIZED,
1265 VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
1266 VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
1267 VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
1268 VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
1269 VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
1270 VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
1271 VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
1272 VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
1273 VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT,
1274 VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
1275 };
1276
1277 props->pCopySrcLayouts = (VkImageLayout *)supported_layouts;
1278 props->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
1279 props->pCopyDstLayouts = (VkImageLayout *)supported_layouts;
1280 props->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
1281
1282 /* We're a UMR so we can always map every kind of memory */
1283 props->identicalMemoryTypeRequirements = true;
1284
1285 {
1286 struct mesa_sha1 sha1_ctx;
1287 uint8_t sha1[20];
1288
1289 _mesa_sha1_init(&sha1_ctx);
1290
1291 /* Make sure we don't match with other vendors */
1292 const char *driver = "turnip-v1";
1293 _mesa_sha1_update(&sha1_ctx, driver, strlen(driver));
1294
1295 /* Hash in UBWC configuration */
1296 _mesa_sha1_update(&sha1_ctx, &pdevice->ubwc_config.highest_bank_bit,
1297 sizeof(pdevice->ubwc_config.highest_bank_bit));
1298 _mesa_sha1_update(&sha1_ctx, &pdevice->ubwc_config.bank_swizzle_levels,
1299 sizeof(pdevice->ubwc_config.bank_swizzle_levels));
1300 _mesa_sha1_update(&sha1_ctx, &pdevice->ubwc_config.macrotile_mode,
1301 sizeof(pdevice->ubwc_config.macrotile_mode));
1302
1303 _mesa_sha1_final(&sha1_ctx, sha1);
1304
1305 memcpy(props->optimalTilingLayoutUUID, sha1, VK_UUID_SIZE);
1306 }
1307 }
1308
1309 static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
1310 &tu_shader_ops,
1311 &tu_nir_shaders_ops,
1312 NULL,
1313 };
1314
1315 VkResult
tu_physical_device_init(struct tu_physical_device * device,struct tu_instance * instance)1316 tu_physical_device_init(struct tu_physical_device *device,
1317 struct tu_instance *instance)
1318 {
1319 VkResult result = VK_SUCCESS;
1320
1321 const char *fd_name = fd_dev_name(&device->dev_id);
1322 if (!fd_name) {
1323 return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1324 "device (chip_id = %" PRIX64
1325 ", gpu_id = %u) is unsupported",
1326 device->dev_id.chip_id, device->dev_id.gpu_id);
1327 }
1328
1329 if (strncmp(fd_name, "FD", 2) == 0) {
1330 device->name = vk_asprintf(&instance->vk.alloc,
1331 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE,
1332 "Turnip Adreno (TM) %s", &fd_name[2]);
1333 } else {
1334 device->name = vk_strdup(&instance->vk.alloc, fd_name,
1335 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1336
1337 }
1338 if (!device->name) {
1339 return vk_startup_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
1340 "device name alloc fail");
1341 }
1342
1343 const struct fd_dev_info info = fd_dev_info(&device->dev_id);
1344 if (!info.chip) {
1345 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1346 "device %s is unsupported", device->name);
1347 goto fail_free_name;
1348 }
1349 switch (fd_dev_gen(&device->dev_id)) {
1350 case 6:
1351 case 7: {
1352 device->dev_info = info;
1353 device->info = &device->dev_info;
1354 uint32_t depth_cache_size =
1355 device->info->num_ccu * device->info->a6xx.sysmem_per_ccu_depth_cache_size;
1356 uint32_t color_cache_size =
1357 (device->info->num_ccu *
1358 device->info->a6xx.sysmem_per_ccu_color_cache_size);
1359 uint32_t color_cache_size_gmem =
1360 color_cache_size /
1361 (1 << device->info->a6xx.gmem_ccu_color_cache_fraction);
1362
1363 device->ccu_depth_offset_bypass = 0;
1364 device->ccu_offset_bypass =
1365 device->ccu_depth_offset_bypass + depth_cache_size;
1366
1367 if (device->info->a7xx.has_gmem_vpc_attr_buf) {
1368 device->vpc_attr_buf_size_bypass =
1369 device->info->a7xx.sysmem_vpc_attr_buf_size;
1370 device->vpc_attr_buf_offset_bypass =
1371 device->ccu_offset_bypass + color_cache_size;
1372
1373 device->vpc_attr_buf_size_gmem =
1374 device->info->a7xx.gmem_vpc_attr_buf_size;
1375 device->vpc_attr_buf_offset_gmem =
1376 device->gmem_size -
1377 (device->vpc_attr_buf_size_gmem * device->info->num_ccu);
1378
1379 device->ccu_offset_gmem =
1380 device->vpc_attr_buf_offset_gmem - color_cache_size_gmem;
1381
1382 device->usable_gmem_size_gmem = device->vpc_attr_buf_offset_gmem;
1383 } else {
1384 device->ccu_offset_gmem = device->gmem_size - color_cache_size_gmem;
1385 device->usable_gmem_size_gmem = device->gmem_size;
1386 }
1387
1388 if (instance->reserve_descriptor_set) {
1389 device->usable_sets = device->reserved_set_idx = device->info->a6xx.max_sets - 1;
1390 } else {
1391 device->usable_sets = device->info->a6xx.max_sets;
1392 device->reserved_set_idx = -1;
1393 }
1394 break;
1395 }
1396 default:
1397 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1398 "device %s is unsupported", device->name);
1399 goto fail_free_name;
1400 }
1401 if (tu_device_get_cache_uuid(device, device->cache_uuid)) {
1402 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1403 "cannot generate UUID");
1404 goto fail_free_name;
1405 }
1406
1407 device->level1_dcache_size = tu_get_l1_dcache_size();
1408 device->has_cached_non_coherent_memory =
1409 device->level1_dcache_size > 0 && !DETECT_ARCH_ARM;
1410
1411 device->memory.type_count = 1;
1412 device->memory.types[0] =
1413 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1414 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1415 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
1416
1417 if (device->has_cached_coherent_memory) {
1418 device->memory.types[device->memory.type_count] =
1419 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1420 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1421 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1422 VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1423 device->memory.type_count++;
1424 }
1425
1426 if (device->has_cached_non_coherent_memory) {
1427 device->memory.types[device->memory.type_count] =
1428 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1429 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1430 VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1431 device->memory.type_count++;
1432 }
1433
1434 /* Provide fallback UBWC config values if the kernel doesn't support
1435 * providing them. This should match what the kernel programs.
1436 */
1437 if (!device->ubwc_config.highest_bank_bit) {
1438 device->ubwc_config.highest_bank_bit = info.highest_bank_bit;
1439 }
1440 if (device->ubwc_config.bank_swizzle_levels == ~0) {
1441 device->ubwc_config.bank_swizzle_levels = info.ubwc_swizzle;
1442 }
1443 if (device->ubwc_config.macrotile_mode == FDL_MACROTILE_INVALID) {
1444 device->ubwc_config.macrotile_mode =
1445 (enum fdl_macrotile_mode) info.macrotile_mode;
1446 }
1447
1448 fd_get_driver_uuid(device->driver_uuid);
1449 fd_get_device_uuid(device->device_uuid, &device->dev_id);
1450
1451 struct vk_physical_device_dispatch_table dispatch_table;
1452 vk_physical_device_dispatch_table_from_entrypoints(
1453 &dispatch_table, &tu_physical_device_entrypoints, true);
1454 vk_physical_device_dispatch_table_from_entrypoints(
1455 &dispatch_table, &wsi_physical_device_entrypoints, false);
1456
1457 result = vk_physical_device_init(&device->vk, &instance->vk,
1458 NULL, NULL, NULL, /* We set up extensions later */
1459 &dispatch_table);
1460 if (result != VK_SUCCESS)
1461 goto fail_free_name;
1462
1463 get_device_extensions(device, &device->vk.supported_extensions);
1464 tu_get_features(device, &device->vk.supported_features);
1465 tu_get_properties(device, &device->vk.properties);
1466
1467 device->vk.supported_sync_types = device->sync_types;
1468
1469 #ifdef TU_USE_WSI_PLATFORM
1470 result = tu_wsi_init(device);
1471 if (result != VK_SUCCESS) {
1472 vk_startup_errorf(instance, result, "WSI init failure");
1473 vk_physical_device_finish(&device->vk);
1474 goto fail_free_name;
1475 }
1476 #endif
1477
1478 /* The gpu id is already embedded in the uuid so we just pass "tu"
1479 * when creating the cache.
1480 */
1481 char buf[VK_UUID_SIZE * 2 + 1];
1482 mesa_bytes_to_hex(buf, device->cache_uuid, VK_UUID_SIZE);
1483 device->vk.disk_cache = disk_cache_create(device->name, buf, 0);
1484
1485 device->vk.pipeline_cache_import_ops = cache_import_ops;
1486
1487 return VK_SUCCESS;
1488
1489 fail_free_name:
1490 vk_free(&instance->vk.alloc, (void *)device->name);
1491 return result;
1492 }
1493
1494 static void
tu_physical_device_finish(struct tu_physical_device * device)1495 tu_physical_device_finish(struct tu_physical_device *device)
1496 {
1497 #ifdef TU_USE_WSI_PLATFORM
1498 tu_wsi_finish(device);
1499 #endif
1500
1501 close(device->local_fd);
1502 if (device->master_fd != -1)
1503 close(device->master_fd);
1504
1505 if (device->kgsl_dma_fd != -1)
1506 close(device->kgsl_dma_fd);
1507
1508 disk_cache_destroy(device->vk.disk_cache);
1509 vk_free(&device->instance->vk.alloc, (void *)device->name);
1510
1511 vk_physical_device_finish(&device->vk);
1512 }
1513
1514 static void
tu_destroy_physical_device(struct vk_physical_device * device)1515 tu_destroy_physical_device(struct vk_physical_device *device)
1516 {
1517 tu_physical_device_finish((struct tu_physical_device *) device);
1518 vk_free(&device->instance->alloc, device);
1519 }
1520
1521 static const driOptionDescription tu_dri_options[] = {
1522 DRI_CONF_SECTION_PERFORMANCE
1523 DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
1524 DRI_CONF_VK_KHR_PRESENT_WAIT(false)
1525 DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
1526 DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
1527 DRI_CONF_VK_XWAYLAND_WAIT_READY(false)
1528 DRI_CONF_SECTION_END
1529
1530 DRI_CONF_SECTION_DEBUG
1531 DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
1532 DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
1533 DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
1534 DRI_CONF_VK_DONT_CARE_AS_LOAD(false)
1535 DRI_CONF_SECTION_END
1536
1537 DRI_CONF_SECTION_MISCELLANEOUS
1538 DRI_CONF_DISABLE_CONSERVATIVE_LRZ(false)
1539 DRI_CONF_TU_DONT_RESERVE_DESCRIPTOR_SET(false)
1540 DRI_CONF_TU_ALLOW_OOB_INDIRECT_UBO_LOADS(false)
1541 DRI_CONF_TU_DISABLE_D24S8_BORDER_COLOR_WORKAROUND(false)
1542 DRI_CONF_SECTION_END
1543 };
1544
1545 static void
tu_init_dri_options(struct tu_instance * instance)1546 tu_init_dri_options(struct tu_instance *instance)
1547 {
1548 driParseOptionInfo(&instance->available_dri_options, tu_dri_options,
1549 ARRAY_SIZE(tu_dri_options));
1550 driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "turnip", NULL, NULL,
1551 instance->vk.app_info.app_name, instance->vk.app_info.app_version,
1552 instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
1553
1554 instance->dont_care_as_load =
1555 driQueryOptionb(&instance->dri_options, "vk_dont_care_as_load");
1556 instance->conservative_lrz =
1557 !driQueryOptionb(&instance->dri_options, "disable_conservative_lrz");
1558 instance->reserve_descriptor_set =
1559 !driQueryOptionb(&instance->dri_options, "tu_dont_reserve_descriptor_set");
1560 instance->allow_oob_indirect_ubo_loads =
1561 driQueryOptionb(&instance->dri_options, "tu_allow_oob_indirect_ubo_loads");
1562 instance->disable_d24s8_border_color_workaround =
1563 driQueryOptionb(&instance->dri_options, "tu_disable_d24s8_border_color_workaround");
1564 }
1565
1566 static uint32_t instance_count = 0;
1567
1568 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1569 tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
1570 const VkAllocationCallbacks *pAllocator,
1571 VkInstance *pInstance)
1572 {
1573 struct tu_instance *instance;
1574 VkResult result;
1575
1576 tu_env_init();
1577
1578 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1579
1580 if (pAllocator == NULL)
1581 pAllocator = vk_default_allocator();
1582
1583 instance = (struct tu_instance *) vk_zalloc(
1584 pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1585
1586 if (!instance)
1587 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1588
1589 struct vk_instance_dispatch_table dispatch_table;
1590 vk_instance_dispatch_table_from_entrypoints(
1591 &dispatch_table, &tu_instance_entrypoints, true);
1592 vk_instance_dispatch_table_from_entrypoints(
1593 &dispatch_table, &wsi_instance_entrypoints, false);
1594
1595 result = vk_instance_init(&instance->vk,
1596 &tu_instance_extensions_supported,
1597 &dispatch_table,
1598 pCreateInfo, pAllocator);
1599 if (result != VK_SUCCESS) {
1600 vk_free(pAllocator, instance);
1601 return vk_error(NULL, result);
1602 }
1603
1604 instance->vk.physical_devices.try_create_for_drm =
1605 tu_physical_device_try_create;
1606 instance->vk.physical_devices.enumerate = tu_enumerate_devices;
1607 instance->vk.physical_devices.destroy = tu_destroy_physical_device;
1608
1609 instance->instance_idx = p_atomic_fetch_add(&instance_count, 1);
1610 if (TU_DEBUG(STARTUP))
1611 mesa_logi("Created an instance");
1612
1613 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1614
1615 tu_init_dri_options(instance);
1616
1617 *pInstance = tu_instance_to_handle(instance);
1618
1619 #ifdef HAVE_PERFETTO
1620 tu_perfetto_init();
1621 #endif
1622
1623 util_gpuvis_init();
1624
1625 return VK_SUCCESS;
1626 }
1627
1628 VKAPI_ATTR void VKAPI_CALL
tu_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)1629 tu_DestroyInstance(VkInstance _instance,
1630 const VkAllocationCallbacks *pAllocator)
1631 {
1632 VK_FROM_HANDLE(tu_instance, instance, _instance);
1633
1634 if (!instance)
1635 return;
1636
1637 VG(VALGRIND_DESTROY_MEMPOOL(instance));
1638
1639 driDestroyOptionCache(&instance->dri_options);
1640 driDestroyOptionInfo(&instance->available_dri_options);
1641
1642 vk_instance_finish(&instance->vk);
1643 vk_free(&instance->vk.alloc, instance);
1644 }
1645
1646 static const VkQueueFamilyProperties tu_queue_family_properties = {
1647 .queueFlags =
1648 VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
1649 .queueCount = 1,
1650 .timestampValidBits = 48,
1651 .minImageTransferGranularity = { 1, 1, 1 },
1652 };
1653
1654 void
tu_physical_device_get_global_priority_properties(const struct tu_physical_device * pdevice,VkQueueFamilyGlobalPriorityPropertiesKHR * props)1655 tu_physical_device_get_global_priority_properties(const struct tu_physical_device *pdevice,
1656 VkQueueFamilyGlobalPriorityPropertiesKHR *props)
1657 {
1658 props->priorityCount = MIN2(pdevice->submitqueue_priority_count, 3);
1659 switch (props->priorityCount) {
1660 case 1:
1661 props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1662 break;
1663 case 2:
1664 props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1665 props->priorities[1] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
1666 break;
1667 case 3:
1668 props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR;
1669 props->priorities[1] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1670 props->priorities[2] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
1671 break;
1672 default:
1673 unreachable("unexpected priority count");
1674 break;
1675 }
1676 }
1677
1678 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1679 tu_GetPhysicalDeviceQueueFamilyProperties2(
1680 VkPhysicalDevice physicalDevice,
1681 uint32_t *pQueueFamilyPropertyCount,
1682 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1683 {
1684 VK_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
1685
1686 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
1687 pQueueFamilyProperties, pQueueFamilyPropertyCount);
1688
1689 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
1690 {
1691 p->queueFamilyProperties = tu_queue_family_properties;
1692
1693 vk_foreach_struct(ext, p->pNext) {
1694 switch (ext->sType) {
1695 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
1696 VkQueueFamilyGlobalPriorityPropertiesKHR *props =
1697 (VkQueueFamilyGlobalPriorityPropertiesKHR *) ext;
1698 tu_physical_device_get_global_priority_properties(pdevice, props);
1699 break;
1700 }
1701 default:
1702 break;
1703 }
1704 }
1705 }
1706 }
1707
1708 uint64_t
tu_get_system_heap_size(struct tu_physical_device * physical_device)1709 tu_get_system_heap_size(struct tu_physical_device *physical_device)
1710 {
1711 uint64_t total_ram = 0;
1712 ASSERTED bool has_physical_memory =
1713 os_get_total_physical_memory(&total_ram);
1714 assert(has_physical_memory);
1715
1716 /* We don't want to burn too much ram with the GPU. If the user has 4GiB
1717 * or less, we use at most half. If they have more than 4GiB, we use 3/4.
1718 */
1719 uint64_t available_ram;
1720 if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
1721 available_ram = total_ram / 2;
1722 else
1723 available_ram = total_ram * 3 / 4;
1724
1725 if (physical_device->va_size)
1726 available_ram = MIN2(available_ram, physical_device->va_size);
1727
1728 return available_ram;
1729 }
1730
1731 static VkDeviceSize
tu_get_budget_memory(struct tu_physical_device * physical_device)1732 tu_get_budget_memory(struct tu_physical_device *physical_device)
1733 {
1734 uint64_t heap_size = physical_device->heap.size;
1735 uint64_t heap_used = physical_device->heap.used;
1736 uint64_t sys_available;
1737 ASSERTED bool has_available_memory =
1738 os_get_available_system_memory(&sys_available);
1739 assert(has_available_memory);
1740
1741 if (physical_device->va_size)
1742 sys_available = MIN2(sys_available, physical_device->va_size);
1743
1744 /*
1745 * Let's not incite the app to starve the system: report at most 90% of
1746 * available system memory.
1747 */
1748 uint64_t heap_available = sys_available * 9 / 10;
1749 return MIN2(heap_size, heap_used + heap_available);
1750 }
1751
1752 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,VkPhysicalDeviceMemoryProperties2 * props2)1753 tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,
1754 VkPhysicalDeviceMemoryProperties2 *props2)
1755 {
1756 VK_FROM_HANDLE(tu_physical_device, physical_device, pdev);
1757
1758 VkPhysicalDeviceMemoryProperties *props = &props2->memoryProperties;
1759 props->memoryHeapCount = 1;
1760 props->memoryHeaps[0].size = physical_device->heap.size;
1761 props->memoryHeaps[0].flags = physical_device->heap.flags;
1762
1763 props->memoryTypeCount = physical_device->memory.type_count;
1764 for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
1765 props->memoryTypes[i] = (VkMemoryType) {
1766 .propertyFlags = physical_device->memory.types[i],
1767 .heapIndex = 0,
1768 };
1769 }
1770
1771 vk_foreach_struct(ext, props2->pNext)
1772 {
1773 switch (ext->sType) {
1774 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1775 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget_props =
1776 (VkPhysicalDeviceMemoryBudgetPropertiesEXT *) ext;
1777 memory_budget_props->heapUsage[0] = physical_device->heap.used;
1778 memory_budget_props->heapBudget[0] = tu_get_budget_memory(physical_device);
1779
1780 /* The heapBudget and heapUsage values must be zero for array elements
1781 * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
1782 */
1783 for (unsigned i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1784 memory_budget_props->heapBudget[i] = 0u;
1785 memory_budget_props->heapUsage[i] = 0u;
1786 }
1787 break;
1788 }
1789 default:
1790 break;
1791 }
1792 }
1793 }
1794
1795 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)1796 tu_GetPhysicalDeviceFragmentShadingRatesKHR(
1797 VkPhysicalDevice physicalDevice,
1798 uint32_t *pFragmentShadingRateCount,
1799 VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
1800 {
1801 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
1802 pFragmentShadingRates, pFragmentShadingRateCount);
1803
1804 #define append_rate(w, h, s) \
1805 { \
1806 VkPhysicalDeviceFragmentShadingRateKHR rate = { \
1807 .sType = \
1808 VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, \
1809 .sampleCounts = s, \
1810 .fragmentSize = { .width = w, .height = h }, \
1811 }; \
1812 vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, \
1813 r) *r = rate; \
1814 }
1815
1816 append_rate(4, 4, VK_SAMPLE_COUNT_1_BIT);
1817 append_rate(4, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT);
1818 append_rate(2, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
1819 append_rate(2, 1, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
1820 append_rate(1, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
1821 append_rate(1, 1, ~0);
1822
1823 #undef append_rate
1824
1825 return vk_outarray_status(&out);
1826 }
1827
1828 uint64_t
tu_device_ticks_to_ns(struct tu_device * dev,uint64_t ts)1829 tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts)
1830 {
1831 /* This is based on the 19.2MHz always-on rbbm timer.
1832 *
1833 * TODO we should probably query this value from kernel..
1834 */
1835 return ts * (1000000000 / 19200000);
1836 }
1837
1838 struct u_trace_context *
tu_device_get_u_trace(struct tu_device * device)1839 tu_device_get_u_trace(struct tu_device *device)
1840 {
1841 return &device->trace_context;
1842 }
1843
1844 static void*
tu_trace_create_buffer(struct u_trace_context * utctx,uint64_t size_B)1845 tu_trace_create_buffer(struct u_trace_context *utctx, uint64_t size_B)
1846 {
1847 struct tu_device *device =
1848 container_of(utctx, struct tu_device, trace_context);
1849
1850 struct tu_bo *bo;
1851 tu_bo_init_new(device, NULL, &bo, size_B, TU_BO_ALLOC_INTERNAL_RESOURCE, "trace");
1852 tu_bo_map(device, bo, NULL);
1853
1854 return bo;
1855 }
1856
1857 static void
tu_trace_destroy_buffer(struct u_trace_context * utctx,void * timestamps)1858 tu_trace_destroy_buffer(struct u_trace_context *utctx, void *timestamps)
1859 {
1860 struct tu_device *device =
1861 container_of(utctx, struct tu_device, trace_context);
1862 struct tu_bo *bo = (struct tu_bo *) timestamps;
1863
1864 tu_bo_finish(device, bo);
1865 }
1866
1867 template <chip CHIP>
1868 static void
tu_trace_record_ts(struct u_trace * ut,void * cs,void * timestamps,uint64_t offset_B,uint32_t)1869 tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
1870 uint64_t offset_B, uint32_t)
1871 {
1872 struct tu_bo *bo = (struct tu_bo *) timestamps;
1873 struct tu_cs *ts_cs = (struct tu_cs *) cs;
1874
1875 if (CHIP == A6XX) {
1876 tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE, 4);
1877 tu_cs_emit(ts_cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
1878 CP_EVENT_WRITE_0_TIMESTAMP);
1879 tu_cs_emit_qw(ts_cs, bo->iova + offset_B);
1880 tu_cs_emit(ts_cs, 0x00000000);
1881 } else {
1882 tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE7, 3);
1883 tu_cs_emit(ts_cs, CP_EVENT_WRITE7_0(.event = RB_DONE_TS,
1884 .write_src = EV_WRITE_ALWAYSON,
1885 .write_dst = EV_DST_RAM,
1886 .write_enabled = true)
1887 .value);
1888 tu_cs_emit_qw(ts_cs, bo->iova + offset_B);
1889 }
1890 }
1891
1892 static uint64_t
tu_trace_read_ts(struct u_trace_context * utctx,void * timestamps,uint64_t offset_B,void * flush_data)1893 tu_trace_read_ts(struct u_trace_context *utctx,
1894 void *timestamps, uint64_t offset_B, void *flush_data)
1895 {
1896 struct tu_device *device =
1897 container_of(utctx, struct tu_device, trace_context);
1898 struct tu_bo *bo = (struct tu_bo *) timestamps;
1899 struct tu_u_trace_submission_data *submission_data =
1900 (struct tu_u_trace_submission_data *) flush_data;
1901
1902 /* Only need to stall on results for the first entry: */
1903 if (offset_B == 0) {
1904 tu_queue_wait_fence(submission_data->queue, submission_data->fence,
1905 1000000000);
1906 }
1907
1908 if (tu_bo_map(device, bo, NULL) != VK_SUCCESS) {
1909 return U_TRACE_NO_TIMESTAMP;
1910 }
1911
1912 uint64_t *ts = (uint64_t *) ((char *)bo->map + offset_B);
1913
1914 /* Don't translate the no-timestamp marker: */
1915 if (*ts == U_TRACE_NO_TIMESTAMP)
1916 return U_TRACE_NO_TIMESTAMP;
1917
1918 return tu_device_ticks_to_ns(device, *ts);
1919 }
1920
1921 static void
tu_trace_delete_flush_data(struct u_trace_context * utctx,void * flush_data)1922 tu_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
1923 {
1924 struct tu_device *device =
1925 container_of(utctx, struct tu_device, trace_context);
1926 struct tu_u_trace_submission_data *submission_data =
1927 (struct tu_u_trace_submission_data *) flush_data;
1928
1929 tu_u_trace_submission_data_finish(device, submission_data);
1930 }
1931
1932 void
tu_copy_buffer(struct u_trace_context * utctx,void * cmdstream,void * ts_from,uint64_t from_offset_B,void * ts_to,uint64_t to_offset_B,uint64_t size_B)1933 tu_copy_buffer(struct u_trace_context *utctx, void *cmdstream,
1934 void *ts_from, uint64_t from_offset_B,
1935 void *ts_to, uint64_t to_offset_B,
1936 uint64_t size_B)
1937 {
1938 struct tu_cs *cs = (struct tu_cs *) cmdstream;
1939 struct tu_bo *bo_from = (struct tu_bo *) ts_from;
1940 struct tu_bo *bo_to = (struct tu_bo *) ts_to;
1941
1942 tu_cs_emit_pkt7(cs, CP_MEMCPY, 5);
1943 tu_cs_emit(cs, size_B / sizeof(uint32_t));
1944 tu_cs_emit_qw(cs, bo_from->iova + from_offset_B);
1945 tu_cs_emit_qw(cs, bo_to->iova + to_offset_B);
1946 }
1947
1948 static void
tu_trace_capture_data(struct u_trace * ut,void * cs,void * dst_buffer,uint64_t dst_offset_B,void * src_buffer,uint64_t src_offset_B,uint32_t size_B)1949 tu_trace_capture_data(struct u_trace *ut,
1950 void *cs,
1951 void *dst_buffer,
1952 uint64_t dst_offset_B,
1953 void *src_buffer,
1954 uint64_t src_offset_B,
1955 uint32_t size_B)
1956 {
1957 if (src_buffer)
1958 tu_copy_buffer(ut->utctx, cs, src_buffer, src_offset_B, dst_buffer,
1959 dst_offset_B, size_B);
1960 }
1961
1962 static const void *
tu_trace_get_data(struct u_trace_context * utctx,void * buffer,uint64_t offset_B,uint32_t size_B)1963 tu_trace_get_data(struct u_trace_context *utctx,
1964 void *buffer,
1965 uint64_t offset_B,
1966 uint32_t size_B)
1967 {
1968 struct tu_bo *bo = (struct tu_bo *) buffer;
1969 return (char *) bo->map + offset_B;
1970 }
1971
1972 /* Special helpers instead of u_trace_begin_iterator()/u_trace_end_iterator()
1973 * that ignore tracepoints at the beginning/end that are part of a
1974 * suspend/resume chain.
1975 */
1976 static struct u_trace_iterator
tu_cmd_begin_iterator(struct tu_cmd_buffer * cmdbuf)1977 tu_cmd_begin_iterator(struct tu_cmd_buffer *cmdbuf)
1978 {
1979 switch (cmdbuf->state.suspend_resume) {
1980 case SR_IN_PRE_CHAIN:
1981 return cmdbuf->trace_renderpass_end;
1982 case SR_AFTER_PRE_CHAIN:
1983 case SR_IN_CHAIN_AFTER_PRE_CHAIN:
1984 return cmdbuf->pre_chain.trace_renderpass_end;
1985 default:
1986 return u_trace_begin_iterator(&cmdbuf->trace);
1987 }
1988 }
1989
1990 static struct u_trace_iterator
tu_cmd_end_iterator(struct tu_cmd_buffer * cmdbuf)1991 tu_cmd_end_iterator(struct tu_cmd_buffer *cmdbuf)
1992 {
1993 switch (cmdbuf->state.suspend_resume) {
1994 case SR_IN_PRE_CHAIN:
1995 return cmdbuf->trace_renderpass_end;
1996 case SR_IN_CHAIN:
1997 case SR_IN_CHAIN_AFTER_PRE_CHAIN:
1998 return cmdbuf->trace_renderpass_start;
1999 default:
2000 return u_trace_end_iterator(&cmdbuf->trace);
2001 }
2002 }
2003 VkResult
tu_create_copy_timestamp_cs(struct tu_cmd_buffer * cmdbuf,struct tu_cs ** cs,struct u_trace ** trace_copy)2004 tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
2005 struct u_trace **trace_copy)
2006 {
2007 *cs = (struct tu_cs *) vk_zalloc(&cmdbuf->device->vk.alloc,
2008 sizeof(struct tu_cs), 8,
2009 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2010
2011 if (*cs == NULL) {
2012 return VK_ERROR_OUT_OF_HOST_MEMORY;
2013 }
2014
2015 tu_cs_init(*cs, cmdbuf->device, TU_CS_MODE_GROW,
2016 list_length(&cmdbuf->trace.trace_chunks) * 6 * 2 + 3, "trace copy timestamp cs");
2017
2018 tu_cs_begin(*cs);
2019
2020 tu_cs_emit_wfi(*cs);
2021 tu_cs_emit_pkt7(*cs, CP_WAIT_FOR_ME, 0);
2022
2023 *trace_copy = (struct u_trace *) vk_zalloc(
2024 &cmdbuf->device->vk.alloc, sizeof(struct u_trace), 8,
2025 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2026
2027 if (*trace_copy == NULL) {
2028 return VK_ERROR_OUT_OF_HOST_MEMORY;
2029 }
2030
2031 u_trace_init(*trace_copy, cmdbuf->trace.utctx);
2032 u_trace_clone_append(tu_cmd_begin_iterator(cmdbuf),
2033 tu_cmd_end_iterator(cmdbuf),
2034 *trace_copy, *cs,
2035 tu_copy_buffer);
2036
2037 tu_cs_emit_wfi(*cs);
2038
2039 tu_cs_end(*cs);
2040
2041 return VK_SUCCESS;
2042 }
2043
2044 VkResult
tu_u_trace_submission_data_create(struct tu_device * device,struct tu_cmd_buffer ** cmd_buffers,uint32_t cmd_buffer_count,struct tu_u_trace_submission_data ** submission_data)2045 tu_u_trace_submission_data_create(
2046 struct tu_device *device,
2047 struct tu_cmd_buffer **cmd_buffers,
2048 uint32_t cmd_buffer_count,
2049 struct tu_u_trace_submission_data **submission_data)
2050 {
2051 *submission_data = (struct tu_u_trace_submission_data *)
2052 vk_zalloc(&device->vk.alloc,
2053 sizeof(struct tu_u_trace_submission_data), 8,
2054 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2055
2056 if (!(*submission_data)) {
2057 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2058 }
2059
2060 struct tu_u_trace_submission_data *data = *submission_data;
2061
2062 data->cmd_trace_data = (struct tu_u_trace_cmd_data *) vk_zalloc(
2063 &device->vk.alloc,
2064 cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8,
2065 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2066
2067 if (!data->cmd_trace_data) {
2068 goto fail;
2069 }
2070
2071 data->cmd_buffer_count = cmd_buffer_count;
2072 data->last_buffer_with_tracepoints = -1;
2073
2074 for (uint32_t i = 0; i < cmd_buffer_count; ++i) {
2075 struct tu_cmd_buffer *cmdbuf = cmd_buffers[i];
2076
2077 if (!u_trace_has_points(&cmdbuf->trace))
2078 continue;
2079
2080 data->last_buffer_with_tracepoints = i;
2081
2082 if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
2083 /* A single command buffer could be submitted several times, but we
2084 * already baked timestamp iova addresses and trace points are
2085 * single-use. Therefor we have to copy trace points and create
2086 * a new timestamp buffer on every submit of reusable command buffer.
2087 */
2088 if (tu_create_copy_timestamp_cs(cmdbuf,
2089 &data->cmd_trace_data[i].timestamp_copy_cs,
2090 &data->cmd_trace_data[i].trace) != VK_SUCCESS) {
2091 goto fail;
2092 }
2093
2094 assert(data->cmd_trace_data[i].timestamp_copy_cs->entry_count == 1);
2095 } else {
2096 data->cmd_trace_data[i].trace = &cmdbuf->trace;
2097 }
2098 }
2099
2100 assert(data->last_buffer_with_tracepoints != -1);
2101
2102 return VK_SUCCESS;
2103
2104 fail:
2105 tu_u_trace_submission_data_finish(device, data);
2106 *submission_data = NULL;
2107
2108 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2109 }
2110
2111 void
tu_u_trace_submission_data_finish(struct tu_device * device,struct tu_u_trace_submission_data * submission_data)2112 tu_u_trace_submission_data_finish(
2113 struct tu_device *device,
2114 struct tu_u_trace_submission_data *submission_data)
2115 {
2116 for (uint32_t i = 0; i < submission_data->cmd_buffer_count; ++i) {
2117 /* Only if we had to create a copy of trace we should free it */
2118 struct tu_u_trace_cmd_data *cmd_data = &submission_data->cmd_trace_data[i];
2119 if (cmd_data->timestamp_copy_cs) {
2120 tu_cs_finish(cmd_data->timestamp_copy_cs);
2121 vk_free(&device->vk.alloc, cmd_data->timestamp_copy_cs);
2122
2123 u_trace_fini(cmd_data->trace);
2124 vk_free(&device->vk.alloc, cmd_data->trace);
2125 }
2126 }
2127
2128 if (submission_data->kgsl_timestamp_bo.bo) {
2129 mtx_lock(&device->kgsl_profiling_mutex);
2130 tu_suballoc_bo_free(&device->kgsl_profiling_suballoc,
2131 &submission_data->kgsl_timestamp_bo);
2132 mtx_unlock(&device->kgsl_profiling_mutex);
2133 }
2134
2135 vk_free(&device->vk.alloc, submission_data->cmd_trace_data);
2136 vk_free(&device->vk.alloc, submission_data);
2137 }
2138
2139 enum tu_reg_stomper_flags
2140 {
2141 TU_DEBUG_REG_STOMP_INVERSE = 1 << 0,
2142 TU_DEBUG_REG_STOMP_CMDBUF = 1 << 1,
2143 TU_DEBUG_REG_STOMP_RENDERPASS = 1 << 2,
2144 };
2145
2146 /* See freedreno.rst for usage tips */
2147 static const struct debug_named_value tu_reg_stomper_options[] = {
2148 { "inverse", TU_DEBUG_REG_STOMP_INVERSE,
2149 "By default the range specifies the regs to stomp, with 'inverse' it "
2150 "specifies the regs NOT to stomp" },
2151 { "cmdbuf", TU_DEBUG_REG_STOMP_CMDBUF,
2152 "Stomp regs at the start of a cmdbuf" },
2153 { "renderpass", TU_DEBUG_REG_STOMP_RENDERPASS,
2154 "Stomp regs before a renderpass" },
2155 { NULL, 0 }
2156 };
2157
2158 template <chip CHIP>
2159 static inline void
tu_cs_dbg_stomp_regs(struct tu_cs * cs,bool is_rp_blit,uint32_t first_reg,uint32_t last_reg,bool inverse)2160 tu_cs_dbg_stomp_regs(struct tu_cs *cs,
2161 bool is_rp_blit,
2162 uint32_t first_reg,
2163 uint32_t last_reg,
2164 bool inverse)
2165 {
2166 const uint16_t *regs = NULL;
2167 size_t count = 0;
2168
2169 if (is_rp_blit) {
2170 regs = &RP_BLIT_REGS<CHIP>[0];
2171 count = ARRAY_SIZE(RP_BLIT_REGS<CHIP>);
2172 } else {
2173 regs = &CMD_REGS<CHIP>[0];
2174 count = ARRAY_SIZE(CMD_REGS<CHIP>);
2175 }
2176
2177 for (size_t i = 0; i < count; i++) {
2178 if (inverse) {
2179 if (regs[i] >= first_reg && regs[i] <= last_reg)
2180 continue;
2181 } else {
2182 if (regs[i] < first_reg || regs[i] > last_reg)
2183 continue;
2184 }
2185
2186 if (fd_reg_stomp_allowed(CHIP, regs[i]))
2187 tu_cs_emit_write_reg(cs, regs[i], 0xffffffff);
2188 }
2189 }
2190
2191 static void
tu_init_dbg_reg_stomper(struct tu_device * device)2192 tu_init_dbg_reg_stomper(struct tu_device *device)
2193 {
2194 const char *stale_reg_range_str =
2195 os_get_option("TU_DEBUG_STALE_REGS_RANGE");
2196 if (!stale_reg_range_str)
2197 return;
2198
2199 uint32_t first_reg, last_reg;
2200
2201 if (sscanf(stale_reg_range_str, "%x,%x", &first_reg, &last_reg) != 2) {
2202 mesa_loge("Incorrect TU_DEBUG_STALE_REGS_RANGE");
2203 return;
2204 }
2205
2206 uint64_t debug_flags = debug_get_flags_option("TU_DEBUG_STALE_REGS_FLAGS",
2207 tu_reg_stomper_options,
2208 TU_DEBUG_REG_STOMP_CMDBUF);
2209
2210 bool inverse = debug_flags & TU_DEBUG_REG_STOMP_INVERSE;
2211
2212 if (debug_flags & TU_DEBUG_REG_STOMP_CMDBUF) {
2213 struct tu_cs *cmdbuf_cs =
2214 (struct tu_cs *) calloc(1, sizeof(struct tu_cs));
2215 tu_cs_init(cmdbuf_cs, device, TU_CS_MODE_GROW, 4096,
2216 "cmdbuf reg stomp cs");
2217 tu_cs_begin(cmdbuf_cs);
2218
2219 TU_CALLX(device, tu_cs_dbg_stomp_regs)(cmdbuf_cs, false, first_reg, last_reg, inverse);
2220 tu_cs_end(cmdbuf_cs);
2221 device->dbg_cmdbuf_stomp_cs = cmdbuf_cs;
2222 }
2223
2224 if (debug_flags & TU_DEBUG_REG_STOMP_RENDERPASS) {
2225 struct tu_cs *rp_cs = (struct tu_cs *) calloc(1, sizeof(struct tu_cs));
2226 tu_cs_init(rp_cs, device, TU_CS_MODE_GROW, 4096, "rp reg stomp cs");
2227 tu_cs_begin(rp_cs);
2228
2229 TU_CALLX(device, tu_cs_dbg_stomp_regs)(rp_cs, true, first_reg, last_reg, inverse);
2230 tu_cs_end(rp_cs);
2231
2232 device->dbg_renderpass_stomp_cs = rp_cs;
2233 }
2234 }
2235
2236 /* It is unknown what this workaround is for and what it fixes. */
2237 static VkResult
tu_init_cmdbuf_start_a725_quirk(struct tu_device * device)2238 tu_init_cmdbuf_start_a725_quirk(struct tu_device *device)
2239 {
2240 struct tu_cs shader_cs;
2241 tu_cs_begin_sub_stream(&device->sub_cs, 10, &shader_cs);
2242
2243 uint32_t raw_shader[] = {
2244 0x00040000, 0x40600000, // mul.f hr0.x, hr0.x, hr1.x
2245 0x00050001, 0x40600001, // mul.f hr0.y, hr0.y, hr1.y
2246 0x00060002, 0x40600002, // mul.f hr0.z, hr0.z, hr1.z
2247 0x00070003, 0x40600003, // mul.f hr0.w, hr0.w, hr1.w
2248 0x00000000, 0x03000000, // end
2249 };
2250
2251 tu_cs_emit_array(&shader_cs, raw_shader, ARRAY_SIZE(raw_shader));
2252 struct tu_cs_entry shader_entry = tu_cs_end_sub_stream(&device->sub_cs, &shader_cs);
2253 uint64_t shader_iova = shader_entry.bo->iova + shader_entry.offset;
2254
2255 struct tu_cs sub_cs;
2256 tu_cs_begin_sub_stream(&device->sub_cs, 47, &sub_cs);
2257
2258 tu_cs_emit_regs(&sub_cs, HLSQ_INVALIDATE_CMD(A7XX,
2259 .vs_state = true, .hs_state = true, .ds_state = true,
2260 .gs_state = true, .fs_state = true, .gfx_ibo = true,
2261 .cs_bindless = 0xff, .gfx_bindless = 0xff));
2262 tu_cs_emit_regs(&sub_cs, HLSQ_CS_CNTL(A7XX,
2263 .constlen = 4,
2264 .enabled = true));
2265 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CONFIG(.enabled = true));
2266 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CTRL_REG0(
2267 .threadmode = MULTI,
2268 .threadsize = THREAD128,
2269 .mergedregs = true));
2270 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_UNKNOWN_A9B1(.shared_size = 1));
2271 tu_cs_emit_regs(&sub_cs, HLSQ_CS_KERNEL_GROUP_X(A7XX, 1),
2272 HLSQ_CS_KERNEL_GROUP_Y(A7XX, 1),
2273 HLSQ_CS_KERNEL_GROUP_Z(A7XX, 1));
2274 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_INSTRLEN(.sp_cs_instrlen = 1));
2275 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_TEX_COUNT(0));
2276 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_IBO_COUNT(0));
2277 tu_cs_emit_regs(&sub_cs, HLSQ_CS_CNTL_1(A7XX,
2278 .linearlocalidregid = regid(63, 0),
2279 .threadsize = THREAD128,
2280 .workgrouprastorderzfirsten = true,
2281 .wgtilewidth = 4,
2282 .wgtileheight = 17));
2283 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CNTL_0(
2284 .wgidconstid = regid(51, 3),
2285 .wgsizeconstid = regid(48, 0),
2286 .wgoffsetconstid = regid(63, 0),
2287 .localidregid = regid(63, 0)));
2288 tu_cs_emit_regs(&sub_cs, SP_CS_CNTL_1(A7XX,
2289 .linearlocalidregid = regid(63, 0),
2290 .threadsize = THREAD128,
2291 .workitemrastorder = WORKITEMRASTORDER_TILED));
2292 tu_cs_emit_regs(&sub_cs, A7XX_SP_CS_UNKNOWN_A9BE(0));
2293
2294 tu_cs_emit_regs(&sub_cs,
2295 HLSQ_CS_NDRANGE_0(A7XX, .kerneldim = 3,
2296 .localsizex = 255,
2297 .localsizey = 1,
2298 .localsizez = 1),
2299 HLSQ_CS_NDRANGE_1(A7XX, .globalsize_x = 3072),
2300 HLSQ_CS_NDRANGE_2(A7XX, .globaloff_x = 0),
2301 HLSQ_CS_NDRANGE_3(A7XX, .globalsize_y = 1),
2302 HLSQ_CS_NDRANGE_4(A7XX, .globaloff_y = 0),
2303 HLSQ_CS_NDRANGE_5(A7XX, .globalsize_z = 1),
2304 HLSQ_CS_NDRANGE_6(A7XX, .globaloff_z = 0));
2305 tu_cs_emit_regs(&sub_cs, A7XX_HLSQ_CS_LOCAL_SIZE(
2306 .localsizex = 255,
2307 .localsizey = 0,
2308 .localsizez = 0));
2309 tu_cs_emit_pkt4(&sub_cs, REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET, 3);
2310 tu_cs_emit(&sub_cs, 0);
2311 tu_cs_emit_qw(&sub_cs, shader_iova);
2312
2313 tu_cs_emit_pkt7(&sub_cs, CP_EXEC_CS, 4);
2314 tu_cs_emit(&sub_cs, 0x00000000);
2315 tu_cs_emit(&sub_cs, CP_EXEC_CS_1_NGROUPS_X(12));
2316 tu_cs_emit(&sub_cs, CP_EXEC_CS_2_NGROUPS_Y(1));
2317 tu_cs_emit(&sub_cs, CP_EXEC_CS_3_NGROUPS_Z(1));
2318
2319 device->cmdbuf_start_a725_quirk_entry =
2320 tu_cs_end_sub_stream(&device->sub_cs, &sub_cs);
2321
2322 return VK_SUCCESS;
2323 }
2324
2325 static VkResult
tu_device_get_timestamp(struct vk_device * vk_device,uint64_t * timestamp)2326 tu_device_get_timestamp(struct vk_device *vk_device, uint64_t *timestamp)
2327 {
2328 struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
2329 const int ret = tu_device_get_gpu_timestamp(dev, timestamp);
2330 return ret == 0 ? VK_SUCCESS : VK_ERROR_UNKNOWN;
2331 }
2332
2333 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2334 tu_CreateDevice(VkPhysicalDevice physicalDevice,
2335 const VkDeviceCreateInfo *pCreateInfo,
2336 const VkAllocationCallbacks *pAllocator,
2337 VkDevice *pDevice)
2338 {
2339 VK_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice);
2340 VkResult result;
2341 struct tu_device *device;
2342 bool border_color_without_format = false;
2343
2344 vk_foreach_struct_const (ext, pCreateInfo->pNext) {
2345 switch (ext->sType) {
2346 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT:
2347 border_color_without_format =
2348 ((const VkPhysicalDeviceCustomBorderColorFeaturesEXT *) ext)
2349 ->customBorderColorWithoutFormat;
2350 break;
2351 default:
2352 break;
2353 }
2354 }
2355
2356 device = (struct tu_device *) vk_zalloc2(
2357 &physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,
2358 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2359 if (!device)
2360 return vk_startup_errorf(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2361
2362 struct vk_device_dispatch_table dispatch_table;
2363 bool override_initial_entrypoints = true;
2364
2365 if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV) {
2366 vk_device_dispatch_table_from_entrypoints(
2367 &dispatch_table, &tu_rmv_device_entrypoints, true);
2368 override_initial_entrypoints = false;
2369 }
2370
2371 vk_device_dispatch_table_from_entrypoints(
2372 &dispatch_table, &tu_device_entrypoints, override_initial_entrypoints);
2373
2374 switch (fd_dev_gen(&physical_device->dev_id)) {
2375 case 6:
2376 vk_device_dispatch_table_from_entrypoints(
2377 &dispatch_table, &tu_device_entrypoints_a6xx, false);
2378 break;
2379 case 7:
2380 vk_device_dispatch_table_from_entrypoints(
2381 &dispatch_table, &tu_device_entrypoints_a7xx, false);
2382 }
2383
2384 vk_device_dispatch_table_from_entrypoints(
2385 &dispatch_table, &wsi_device_entrypoints, false);
2386
2387 const struct vk_device_entrypoint_table *knl_device_entrypoints =
2388 physical_device->instance->knl->device_entrypoints;
2389 if (knl_device_entrypoints) {
2390 vk_device_dispatch_table_from_entrypoints(
2391 &dispatch_table, knl_device_entrypoints, false);
2392 }
2393
2394 result = vk_device_init(&device->vk, &physical_device->vk,
2395 &dispatch_table, pCreateInfo, pAllocator);
2396 if (result != VK_SUCCESS) {
2397 vk_free(&device->vk.alloc, device);
2398 return vk_startup_errorf(physical_device->instance, result,
2399 "vk_device_init failed");
2400 }
2401
2402 device->instance = physical_device->instance;
2403 device->physical_device = physical_device;
2404 device->device_idx = device->physical_device->device_count++;
2405
2406 result = tu_drm_device_init(device);
2407 if (result != VK_SUCCESS) {
2408 vk_free(&device->vk.alloc, device);
2409 return result;
2410 }
2411
2412 device->vk.command_buffer_ops = &tu_cmd_buffer_ops;
2413 device->vk.check_status = tu_device_check_status;
2414 device->vk.get_timestamp = tu_device_get_timestamp;
2415
2416 mtx_init(&device->bo_mutex, mtx_plain);
2417 mtx_init(&device->pipeline_mutex, mtx_plain);
2418 mtx_init(&device->autotune_mutex, mtx_plain);
2419 mtx_init(&device->kgsl_profiling_mutex, mtx_plain);
2420 u_rwlock_init(&device->dma_bo_lock);
2421 pthread_mutex_init(&device->submit_mutex, NULL);
2422
2423 if (physical_device->has_set_iova) {
2424 mtx_init(&device->vma_mutex, mtx_plain);
2425 util_vma_heap_init(&device->vma, physical_device->va_start,
2426 ROUND_DOWN_TO(physical_device->va_size, os_page_size));
2427 }
2428
2429 if (TU_DEBUG(BOS))
2430 device->bo_sizes = _mesa_hash_table_create(NULL, _mesa_hash_string, _mesa_key_string_equal);
2431
2432 if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV)
2433 tu_memory_trace_init(device);
2434
2435 /* kgsl is not a drm device: */
2436 if (!is_kgsl(physical_device->instance))
2437 vk_device_set_drm_fd(&device->vk, device->fd);
2438
2439 struct tu6_global *global = NULL;
2440 uint32_t global_size = sizeof(struct tu6_global);
2441 struct vk_pipeline_cache_create_info pcc_info = { };
2442
2443 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2444 const VkDeviceQueueCreateInfo *queue_create =
2445 &pCreateInfo->pQueueCreateInfos[i];
2446 uint32_t qfi = queue_create->queueFamilyIndex;
2447 device->queues[qfi] = (struct tu_queue *) vk_alloc(
2448 &device->vk.alloc,
2449 queue_create->queueCount * sizeof(struct tu_queue), 8,
2450 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2451 if (!device->queues[qfi]) {
2452 result = vk_startup_errorf(physical_device->instance,
2453 VK_ERROR_OUT_OF_HOST_MEMORY,
2454 "OOM");
2455 goto fail_queues;
2456 }
2457
2458 memset(device->queues[qfi], 0,
2459 queue_create->queueCount * sizeof(struct tu_queue));
2460
2461 device->queue_count[qfi] = queue_create->queueCount;
2462
2463 for (unsigned q = 0; q < queue_create->queueCount; q++) {
2464 result = tu_queue_init(device, &device->queues[qfi][q], q, queue_create);
2465 if (result != VK_SUCCESS) {
2466 device->queue_count[qfi] = q;
2467 goto fail_queues;
2468 }
2469 }
2470 }
2471
2472 {
2473 struct ir3_compiler_options ir3_options = {
2474 .push_ubo_with_preamble = true,
2475 .disable_cache = true,
2476 .bindless_fb_read_descriptor = -1,
2477 .bindless_fb_read_slot = -1,
2478 .storage_16bit = physical_device->info->a6xx.storage_16bit,
2479 .storage_8bit = physical_device->info->a7xx.storage_8bit,
2480 .shared_push_consts = !TU_DEBUG(PUSH_CONSTS_PER_STAGE),
2481 };
2482 device->compiler = ir3_compiler_create(
2483 NULL, &physical_device->dev_id, physical_device->info, &ir3_options);
2484 }
2485 if (!device->compiler) {
2486 result = vk_startup_errorf(physical_device->instance,
2487 VK_ERROR_INITIALIZATION_FAILED,
2488 "failed to initialize ir3 compiler");
2489 goto fail_queues;
2490 }
2491
2492 /* Initialize sparse array for refcounting imported BOs */
2493 util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
2494
2495 if (physical_device->has_set_iova) {
2496 STATIC_ASSERT(TU_MAX_QUEUE_FAMILIES == 1);
2497 if (!u_vector_init(&device->zombie_vmas, 64,
2498 sizeof(struct tu_zombie_vma))) {
2499 result = vk_startup_errorf(physical_device->instance,
2500 VK_ERROR_INITIALIZATION_FAILED,
2501 "zombie_vmas create failed");
2502 goto fail_free_zombie_vma;
2503 }
2504 }
2505
2506 /* initial sizes, these will increase if there is overflow */
2507 device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD;
2508 device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD;
2509
2510 if (device->vk.enabled_features.customBorderColors)
2511 global_size += TU_BORDER_COLOR_COUNT * sizeof(struct bcolor_entry);
2512
2513 tu_bo_suballocator_init(
2514 &device->pipeline_suballoc, device, 128 * 1024,
2515 (enum tu_bo_alloc_flags) (TU_BO_ALLOC_GPU_READ_ONLY |
2516 TU_BO_ALLOC_ALLOW_DUMP |
2517 TU_BO_ALLOC_INTERNAL_RESOURCE),
2518 "pipeline_suballoc");
2519 tu_bo_suballocator_init(&device->autotune_suballoc, device,
2520 128 * 1024, TU_BO_ALLOC_INTERNAL_RESOURCE,
2521 "autotune_suballoc");
2522 if (is_kgsl(physical_device->instance)) {
2523 tu_bo_suballocator_init(&device->kgsl_profiling_suballoc, device,
2524 128 * 1024, TU_BO_ALLOC_INTERNAL_RESOURCE,
2525 "kgsl_profiling_suballoc");
2526 }
2527
2528 result = tu_bo_init_new(
2529 device, NULL, &device->global_bo, global_size,
2530 (enum tu_bo_alloc_flags) (TU_BO_ALLOC_ALLOW_DUMP |
2531 TU_BO_ALLOC_INTERNAL_RESOURCE),
2532 "global");
2533 if (result != VK_SUCCESS) {
2534 vk_startup_errorf(device->instance, result, "BO init");
2535 goto fail_global_bo;
2536 }
2537
2538 result = tu_bo_map(device, device->global_bo, NULL);
2539 if (result != VK_SUCCESS) {
2540 vk_startup_errorf(device->instance, result, "BO map");
2541 goto fail_global_bo_map;
2542 }
2543
2544 global = (struct tu6_global *)device->global_bo->map;
2545 device->global_bo_map = global;
2546 tu_init_clear_blit_shaders(device);
2547
2548 result = tu_init_empty_shaders(device);
2549 if (result != VK_SUCCESS) {
2550 vk_startup_errorf(device->instance, result, "empty shaders");
2551 goto fail_empty_shaders;
2552 }
2553
2554 global->predicate = 0;
2555 global->vtx_stats_query_not_running = 1;
2556 global->dbg_one = (uint32_t)-1;
2557 global->dbg_gmem_total_loads = 0;
2558 global->dbg_gmem_taken_loads = 0;
2559 global->dbg_gmem_total_stores = 0;
2560 global->dbg_gmem_taken_stores = 0;
2561 for (int i = 0; i < TU_BORDER_COLOR_BUILTIN; i++) {
2562 VkClearColorValue border_color = vk_border_color_value((VkBorderColor) i);
2563 tu6_pack_border_color(&global->bcolor_builtin[i], &border_color,
2564 vk_border_color_is_int((VkBorderColor) i));
2565 }
2566
2567 /* initialize to ones so ffs can be used to find unused slots */
2568 BITSET_ONES(device->custom_border_color);
2569
2570 result = tu_init_dynamic_rendering(device);
2571 if (result != VK_SUCCESS) {
2572 vk_startup_errorf(device->instance, result, "dynamic rendering");
2573 goto fail_dynamic_rendering;
2574 }
2575
2576 device->mem_cache = vk_pipeline_cache_create(&device->vk, &pcc_info,
2577 NULL);
2578 if (!device->mem_cache) {
2579 result = VK_ERROR_OUT_OF_HOST_MEMORY;
2580 vk_startup_errorf(device->instance, result, "create pipeline cache failed");
2581 goto fail_pipeline_cache;
2582 }
2583
2584 tu_cs_init(&device->sub_cs, device, TU_CS_MODE_SUB_STREAM, 1024, "device sub cs");
2585
2586 if (device->vk.enabled_features.performanceCounterQueryPools) {
2587 /* Prepare command streams setting pass index to the PERF_CNTRS_REG
2588 * from 0 to 31. One of these will be picked up at cmd submit time
2589 * when the perf query is executed.
2590 */
2591
2592 device->perfcntrs_pass_cs_entries =
2593 (struct tu_cs_entry *) calloc(32, sizeof(struct tu_cs_entry));
2594 if (!device->perfcntrs_pass_cs_entries) {
2595 result = vk_startup_errorf(device->instance,
2596 VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2597 goto fail_perfcntrs_pass_entries_alloc;
2598 }
2599
2600 for (unsigned i = 0; i < 32; i++) {
2601 struct tu_cs sub_cs;
2602
2603 result = tu_cs_begin_sub_stream(&device->sub_cs, 3, &sub_cs);
2604 if (result != VK_SUCCESS) {
2605 vk_startup_errorf(device->instance, result,
2606 "failed to allocate commands streams");
2607 goto fail_prepare_perfcntrs_pass_cs;
2608 }
2609
2610 tu_cs_emit_regs(&sub_cs, A6XX_CP_SCRATCH_REG(PERF_CNTRS_REG, 1 << i));
2611 tu_cs_emit_pkt7(&sub_cs, CP_WAIT_FOR_ME, 0);
2612
2613 device->perfcntrs_pass_cs_entries[i] =
2614 tu_cs_end_sub_stream(&device->sub_cs, &sub_cs);
2615 }
2616 }
2617
2618 result = tu_init_bin_preamble(device);
2619 if (result != VK_SUCCESS)
2620 goto fail_bin_preamble;
2621
2622 if (physical_device->info->a7xx.cmdbuf_start_a725_quirk) {
2623 result = tu_init_cmdbuf_start_a725_quirk(device);
2624 if (result != VK_SUCCESS)
2625 goto fail_a725_workaround;
2626 }
2627
2628 tu_init_dbg_reg_stomper(device);
2629
2630 /* Initialize a condition variable for timeline semaphore */
2631 pthread_condattr_t condattr;
2632 if (pthread_condattr_init(&condattr) != 0) {
2633 result = vk_startup_errorf(physical_device->instance,
2634 VK_ERROR_INITIALIZATION_FAILED,
2635 "pthread condattr init");
2636 goto fail_timeline_cond;
2637 }
2638 if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
2639 pthread_condattr_destroy(&condattr);
2640 result = vk_startup_errorf(physical_device->instance,
2641 VK_ERROR_INITIALIZATION_FAILED,
2642 "pthread condattr clock setup");
2643 goto fail_timeline_cond;
2644 }
2645 if (pthread_cond_init(&device->timeline_cond, &condattr) != 0) {
2646 pthread_condattr_destroy(&condattr);
2647 result = vk_startup_errorf(physical_device->instance,
2648 VK_ERROR_INITIALIZATION_FAILED,
2649 "pthread cond init");
2650 goto fail_timeline_cond;
2651 }
2652 pthread_condattr_destroy(&condattr);
2653
2654 result = tu_autotune_init(&device->autotune, device);
2655 if (result != VK_SUCCESS) {
2656 goto fail_timeline_cond;
2657 }
2658
2659 for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
2660 mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain);
2661
2662 mtx_init(&device->fiber_pvtmem_bo.mtx, mtx_plain);
2663 mtx_init(&device->wave_pvtmem_bo.mtx, mtx_plain);
2664
2665 mtx_init(&device->mutex, mtx_plain);
2666
2667 device->use_z24uint_s8uint =
2668 physical_device->info->a6xx.has_z24uint_s8uint &&
2669 (!border_color_without_format ||
2670 physical_device->instance->disable_d24s8_border_color_workaround);
2671 device->use_lrz = !TU_DEBUG(NOLRZ);
2672
2673 tu_gpu_tracepoint_config_variable();
2674
2675 device->submit_count = 0;
2676 u_trace_context_init(&device->trace_context, device,
2677 sizeof(uint64_t),
2678 12,
2679 tu_trace_create_buffer,
2680 tu_trace_destroy_buffer,
2681 TU_CALLX(device, tu_trace_record_ts),
2682 tu_trace_read_ts,
2683 tu_trace_capture_data,
2684 tu_trace_get_data,
2685 tu_trace_delete_flush_data);
2686
2687 tu_breadcrumbs_init(device);
2688
2689 if (FD_RD_DUMP(ENABLE)) {
2690 struct vk_app_info *app_info = &device->instance->vk.app_info;
2691 const char *app_name_str = app_info->app_name ?
2692 app_info->app_name : util_get_process_name();
2693 const char *engine_name_str = app_info->engine_name ?
2694 app_info->engine_name : "unknown-engine";
2695
2696 char app_name[64];
2697 snprintf(app_name, sizeof(app_name), "%s", app_name_str);
2698
2699 char engine_name[32];
2700 snprintf(engine_name, sizeof(engine_name), "%s", engine_name_str);
2701
2702 char output_name[128];
2703 snprintf(output_name, sizeof(output_name), "tu_%s.%s_instance%u_device%u",
2704 app_name, engine_name, device->instance->instance_idx,
2705 device->device_idx);
2706
2707 fd_rd_output_init(&device->rd_output, output_name);
2708 }
2709
2710 *pDevice = tu_device_to_handle(device);
2711 return VK_SUCCESS;
2712
2713 fail_timeline_cond:
2714 fail_a725_workaround:
2715 fail_bin_preamble:
2716 fail_prepare_perfcntrs_pass_cs:
2717 free(device->perfcntrs_pass_cs_entries);
2718 fail_perfcntrs_pass_entries_alloc:
2719 tu_cs_finish(&device->sub_cs);
2720 vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2721 fail_pipeline_cache:
2722 tu_destroy_dynamic_rendering(device);
2723 fail_dynamic_rendering:
2724 tu_destroy_empty_shaders(device);
2725 fail_empty_shaders:
2726 tu_destroy_clear_blit_shaders(device);
2727 fail_global_bo_map:
2728 TU_RMV(resource_destroy, device, device->global_bo);
2729 tu_bo_finish(device, device->global_bo);
2730 vk_free(&device->vk.alloc, device->submit_bo_list);
2731 util_dynarray_fini(&device->dump_bo_list);
2732 fail_global_bo:
2733 ir3_compiler_destroy(device->compiler);
2734 util_sparse_array_finish(&device->bo_map);
2735 if (physical_device->has_set_iova)
2736 util_vma_heap_finish(&device->vma);
2737 fail_free_zombie_vma:
2738 u_vector_finish(&device->zombie_vmas);
2739 fail_queues:
2740 for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2741 for (unsigned q = 0; q < device->queue_count[i]; q++)
2742 tu_queue_finish(&device->queues[i][q]);
2743 if (device->queues[i])
2744 vk_free(&device->vk.alloc, device->queues[i]);
2745 }
2746
2747 u_rwlock_destroy(&device->dma_bo_lock);
2748 tu_drm_device_finish(device);
2749 vk_device_finish(&device->vk);
2750 vk_free(&device->vk.alloc, device);
2751 return result;
2752 }
2753
2754 VKAPI_ATTR void VKAPI_CALL
tu_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2755 tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
2756 {
2757 VK_FROM_HANDLE(tu_device, device, _device);
2758
2759 if (!device)
2760 return;
2761
2762 tu_memory_trace_finish(device);
2763
2764 if (FD_RD_DUMP(ENABLE))
2765 fd_rd_output_fini(&device->rd_output);
2766
2767 tu_breadcrumbs_finish(device);
2768
2769 u_trace_context_fini(&device->trace_context);
2770
2771 for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
2772 if (device->scratch_bos[i].initialized)
2773 tu_bo_finish(device, device->scratch_bos[i].bo);
2774 }
2775
2776 if (device->fiber_pvtmem_bo.bo)
2777 tu_bo_finish(device, device->fiber_pvtmem_bo.bo);
2778
2779 if (device->wave_pvtmem_bo.bo)
2780 tu_bo_finish(device, device->wave_pvtmem_bo.bo);
2781
2782 tu_destroy_clear_blit_shaders(device);
2783
2784 tu_destroy_empty_shaders(device);
2785
2786 tu_destroy_dynamic_rendering(device);
2787
2788 ir3_compiler_destroy(device->compiler);
2789
2790 vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2791
2792 tu_cs_finish(&device->sub_cs);
2793
2794 if (device->perfcntrs_pass_cs_entries) {
2795 free(device->perfcntrs_pass_cs_entries);
2796 }
2797
2798 if (device->dbg_cmdbuf_stomp_cs) {
2799 tu_cs_finish(device->dbg_cmdbuf_stomp_cs);
2800 free(device->dbg_cmdbuf_stomp_cs);
2801 }
2802
2803 if (device->dbg_renderpass_stomp_cs) {
2804 tu_cs_finish(device->dbg_renderpass_stomp_cs);
2805 free(device->dbg_renderpass_stomp_cs);
2806 }
2807
2808 tu_autotune_fini(&device->autotune, device);
2809
2810 tu_bo_suballocator_finish(&device->pipeline_suballoc);
2811 tu_bo_suballocator_finish(&device->autotune_suballoc);
2812 tu_bo_suballocator_finish(&device->kgsl_profiling_suballoc);
2813
2814 tu_bo_finish(device, device->global_bo);
2815
2816 for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2817 for (unsigned q = 0; q < device->queue_count[i]; q++)
2818 tu_queue_finish(&device->queues[i][q]);
2819 if (device->queue_count[i])
2820 vk_free(&device->vk.alloc, device->queues[i]);
2821 }
2822
2823 tu_drm_device_finish(device);
2824
2825 if (device->physical_device->has_set_iova)
2826 util_vma_heap_finish(&device->vma);
2827
2828 util_sparse_array_finish(&device->bo_map);
2829 u_rwlock_destroy(&device->dma_bo_lock);
2830
2831 u_vector_finish(&device->zombie_vmas);
2832
2833 pthread_cond_destroy(&device->timeline_cond);
2834 _mesa_hash_table_destroy(device->bo_sizes, NULL);
2835 vk_free(&device->vk.alloc, device->submit_bo_list);
2836 util_dynarray_fini(&device->dump_bo_list);
2837 vk_device_finish(&device->vk);
2838 vk_free(&device->vk.alloc, device);
2839 }
2840
2841 VkResult
tu_get_scratch_bo(struct tu_device * dev,uint64_t size,struct tu_bo ** bo)2842 tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
2843 {
2844 unsigned size_log2 = MAX2(util_logbase2_ceil64(size), MIN_SCRATCH_BO_SIZE_LOG2);
2845 unsigned index = size_log2 - MIN_SCRATCH_BO_SIZE_LOG2;
2846 assert(index < ARRAY_SIZE(dev->scratch_bos));
2847
2848 for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
2849 if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
2850 /* Fast path: just return the already-allocated BO. */
2851 *bo = dev->scratch_bos[i].bo;
2852 return VK_SUCCESS;
2853 }
2854 }
2855
2856 /* Slow path: actually allocate the BO. We take a lock because the process
2857 * of allocating it is slow, and we don't want to block the CPU while it
2858 * finishes.
2859 */
2860 mtx_lock(&dev->scratch_bos[index].construct_mtx);
2861
2862 /* Another thread may have allocated it already while we were waiting on
2863 * the lock. We need to check this in order to avoid double-allocating.
2864 */
2865 if (dev->scratch_bos[index].initialized) {
2866 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2867 *bo = dev->scratch_bos[index].bo;
2868 return VK_SUCCESS;
2869 }
2870
2871 unsigned bo_size = 1ull << size_log2;
2872 VkResult result = tu_bo_init_new(dev, NULL, &dev->scratch_bos[index].bo, bo_size,
2873 TU_BO_ALLOC_INTERNAL_RESOURCE, "scratch");
2874 if (result != VK_SUCCESS) {
2875 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2876 return result;
2877 }
2878
2879 p_atomic_set(&dev->scratch_bos[index].initialized, true);
2880
2881 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2882
2883 *bo = dev->scratch_bos[index].bo;
2884 return VK_SUCCESS;
2885 }
2886
2887 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2888 tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2889 VkLayerProperties *pProperties)
2890 {
2891 *pPropertyCount = 0;
2892 return VK_SUCCESS;
2893 }
2894
2895 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)2896 tu_EnumerateInstanceExtensionProperties(const char *pLayerName,
2897 uint32_t *pPropertyCount,
2898 VkExtensionProperties *pProperties)
2899 {
2900 if (pLayerName)
2901 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2902
2903 return vk_enumerate_instance_extension_properties(
2904 &tu_instance_extensions_supported, pPropertyCount, pProperties);
2905 }
2906
2907 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
tu_GetInstanceProcAddr(VkInstance _instance,const char * pName)2908 tu_GetInstanceProcAddr(VkInstance _instance, const char *pName)
2909 {
2910 VK_FROM_HANDLE(tu_instance, instance, _instance);
2911 return vk_instance_get_proc_addr(instance != NULL ? &instance->vk : NULL,
2912 &tu_instance_entrypoints,
2913 pName);
2914 }
2915
2916 /* The loader wants us to expose a second GetInstanceProcAddr function
2917 * to work around certain LD_PRELOAD issues seen in apps.
2918 */
2919 PUBLIC
2920 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2921 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
2922 {
2923 return tu_GetInstanceProcAddr(instance, pName);
2924 }
2925
2926 VKAPI_ATTR VkResult VKAPI_CALL
tu_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2927 tu_AllocateMemory(VkDevice _device,
2928 const VkMemoryAllocateInfo *pAllocateInfo,
2929 const VkAllocationCallbacks *pAllocator,
2930 VkDeviceMemory *pMem)
2931 {
2932 VK_FROM_HANDLE(tu_device, device, _device);
2933 struct tu_device_memory *mem;
2934 VkResult result;
2935
2936 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2937
2938 struct tu_memory_heap *mem_heap = &device->physical_device->heap;
2939 uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
2940 if (mem_heap_used > mem_heap->size)
2941 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2942
2943 mem = (struct tu_device_memory *) vk_device_memory_create(
2944 &device->vk, pAllocateInfo, pAllocator, sizeof(*mem));
2945 if (mem == NULL)
2946 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2947
2948 if (pAllocateInfo->allocationSize == 0 && !mem->vk.ahardware_buffer) {
2949 vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
2950 /* Apparently, this is allowed */
2951 *pMem = VK_NULL_HANDLE;
2952 return VK_SUCCESS;
2953 }
2954
2955 const VkImportMemoryFdInfoKHR *fd_info =
2956 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2957
2958 if (fd_info && fd_info->handleType) {
2959 assert(fd_info->handleType ==
2960 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2961 fd_info->handleType ==
2962 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2963
2964 /*
2965 * TODO Importing the same fd twice gives us the same handle without
2966 * reference counting. We need to maintain a per-instance handle-to-bo
2967 * table and add reference count to tu_bo.
2968 */
2969 result = tu_bo_init_dmabuf(device, &mem->bo,
2970 pAllocateInfo->allocationSize, fd_info->fd);
2971 if (result == VK_SUCCESS) {
2972 /* take ownership and close the fd */
2973 close(fd_info->fd);
2974 }
2975 } else if (mem->vk.ahardware_buffer) {
2976 #if DETECT_OS_ANDROID
2977 const native_handle_t *handle = AHardwareBuffer_getNativeHandle(mem->vk.ahardware_buffer);
2978 assert(handle->numFds > 0);
2979 size_t size = lseek(handle->data[0], 0, SEEK_END);
2980 result = tu_bo_init_dmabuf(device, &mem->bo, size, handle->data[0]);
2981 #else
2982 result = VK_ERROR_FEATURE_NOT_PRESENT;
2983 #endif
2984 } else {
2985 uint64_t client_address = 0;
2986 BITMASK_ENUM(tu_bo_alloc_flags) alloc_flags = TU_BO_ALLOC_NO_FLAGS;
2987
2988 const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info =
2989 vk_find_struct_const(pAllocateInfo->pNext,
2990 MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO);
2991 if (replay_info && replay_info->opaqueCaptureAddress) {
2992 client_address = replay_info->opaqueCaptureAddress;
2993 alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
2994 }
2995
2996 const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const(
2997 pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO);
2998 if (flags_info &&
2999 (flags_info->flags &
3000 VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)) {
3001 alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
3002 }
3003
3004 const VkExportMemoryAllocateInfo *export_info =
3005 vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
3006 if (export_info && (export_info->handleTypes &
3007 (VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
3008 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT)))
3009 alloc_flags |= TU_BO_ALLOC_SHAREABLE;
3010
3011
3012 char name[64] = "vkAllocateMemory()";
3013 if (device->bo_sizes)
3014 snprintf(name, ARRAY_SIZE(name), "vkAllocateMemory(%ldkb)",
3015 (long)DIV_ROUND_UP(pAllocateInfo->allocationSize, 1024));
3016 VkMemoryPropertyFlags mem_property =
3017 device->physical_device->memory.types[pAllocateInfo->memoryTypeIndex];
3018 result = tu_bo_init_new_explicit_iova(
3019 device, &mem->vk.base, &mem->bo, pAllocateInfo->allocationSize,
3020 client_address, mem_property, alloc_flags, name);
3021 }
3022
3023 if (result == VK_SUCCESS) {
3024 mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
3025 if (mem_heap_used > mem_heap->size) {
3026 p_atomic_add(&mem_heap->used, -mem->bo->size);
3027 tu_bo_finish(device, mem->bo);
3028 result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
3029 "Out of heap memory");
3030 }
3031 }
3032
3033 if (result != VK_SUCCESS) {
3034 vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
3035 return result;
3036 }
3037
3038 /* Track in the device whether our BO list contains any implicit-sync BOs, so
3039 * we can suppress implicit sync on non-WSI usage.
3040 */
3041 const struct wsi_memory_allocate_info *wsi_info =
3042 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
3043 if (wsi_info && wsi_info->implicit_sync) {
3044 mtx_lock(&device->bo_mutex);
3045 if (!mem->bo->implicit_sync) {
3046 mem->bo->implicit_sync = true;
3047 device->implicit_sync_bo_count++;
3048 }
3049 mtx_unlock(&device->bo_mutex);
3050 }
3051
3052 const VkMemoryDedicatedAllocateInfo *dedicate_info =
3053 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
3054 if (dedicate_info) {
3055 mem->image = tu_image_from_handle(dedicate_info->image);
3056 } else {
3057 mem->image = NULL;
3058 }
3059
3060 TU_RMV(heap_create, device, pAllocateInfo, mem);
3061
3062 *pMem = tu_device_memory_to_handle(mem);
3063
3064 return VK_SUCCESS;
3065 }
3066
3067 VKAPI_ATTR void VKAPI_CALL
tu_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)3068 tu_FreeMemory(VkDevice _device,
3069 VkDeviceMemory _mem,
3070 const VkAllocationCallbacks *pAllocator)
3071 {
3072 VK_FROM_HANDLE(tu_device, device, _device);
3073 VK_FROM_HANDLE(tu_device_memory, mem, _mem);
3074
3075 if (mem == NULL)
3076 return;
3077
3078 TU_RMV(resource_destroy, device, mem);
3079
3080 p_atomic_add(&device->physical_device->heap.used, -mem->bo->size);
3081 tu_bo_finish(device, mem->bo);
3082 vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
3083 }
3084
3085 VKAPI_ATTR VkResult VKAPI_CALL
tu_MapMemory2KHR(VkDevice _device,const VkMemoryMapInfoKHR * pMemoryMapInfo,void ** ppData)3086 tu_MapMemory2KHR(VkDevice _device, const VkMemoryMapInfoKHR *pMemoryMapInfo, void **ppData)
3087 {
3088 VK_FROM_HANDLE(tu_device, device, _device);
3089 VK_FROM_HANDLE(tu_device_memory, mem, pMemoryMapInfo->memory);
3090 VkResult result;
3091
3092 if (mem == NULL) {
3093 *ppData = NULL;
3094 return VK_SUCCESS;
3095 }
3096
3097 void *placed_addr = NULL;
3098 if (pMemoryMapInfo->flags & VK_MEMORY_MAP_PLACED_BIT_EXT) {
3099 const VkMemoryMapPlacedInfoEXT *placed_info =
3100 vk_find_struct_const(pMemoryMapInfo->pNext, MEMORY_MAP_PLACED_INFO_EXT);
3101 assert(placed_info != NULL);
3102 placed_addr = placed_info->pPlacedAddress;
3103 }
3104
3105 result = tu_bo_map(device, mem->bo, placed_addr);
3106 if (result != VK_SUCCESS)
3107 return result;
3108
3109 *ppData = (char *) mem->bo->map + pMemoryMapInfo->offset;
3110 return VK_SUCCESS;
3111 }
3112
3113 VKAPI_ATTR VkResult VKAPI_CALL
tu_UnmapMemory2KHR(VkDevice _device,const VkMemoryUnmapInfoKHR * pMemoryUnmapInfo)3114 tu_UnmapMemory2KHR(VkDevice _device, const VkMemoryUnmapInfoKHR *pMemoryUnmapInfo)
3115 {
3116 VK_FROM_HANDLE(tu_device, device, _device);
3117 VK_FROM_HANDLE(tu_device_memory, mem, pMemoryUnmapInfo->memory);
3118
3119 if (mem == NULL)
3120 return VK_SUCCESS;
3121
3122 return tu_bo_unmap(device, mem->bo, pMemoryUnmapInfo->flags & VK_MEMORY_UNMAP_RESERVE_BIT_EXT);
3123 }
3124 static VkResult
sync_cache(VkDevice _device,enum tu_mem_sync_op op,uint32_t count,const VkMappedMemoryRange * ranges)3125 sync_cache(VkDevice _device,
3126 enum tu_mem_sync_op op,
3127 uint32_t count,
3128 const VkMappedMemoryRange *ranges)
3129 {
3130 VK_FROM_HANDLE(tu_device, device, _device);
3131
3132 if (!device->physical_device->has_cached_non_coherent_memory) {
3133 tu_finishme(
3134 "data cache clean and invalidation are unsupported on this arch!");
3135 return VK_SUCCESS;
3136 }
3137
3138 for (uint32_t i = 0; i < count; i++) {
3139 VK_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory);
3140 tu_bo_sync_cache(device, mem->bo, ranges[i].offset, ranges[i].size, op);
3141 }
3142
3143 return VK_SUCCESS;
3144 }
3145
3146 VkResult
tu_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3147 tu_FlushMappedMemoryRanges(VkDevice _device,
3148 uint32_t memoryRangeCount,
3149 const VkMappedMemoryRange *pMemoryRanges)
3150 {
3151 return sync_cache(_device, TU_MEM_SYNC_CACHE_TO_GPU, memoryRangeCount,
3152 pMemoryRanges);
3153 }
3154
3155 VkResult
tu_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3156 tu_InvalidateMappedMemoryRanges(VkDevice _device,
3157 uint32_t memoryRangeCount,
3158 const VkMappedMemoryRange *pMemoryRanges)
3159 {
3160 return sync_cache(_device, TU_MEM_SYNC_CACHE_FROM_GPU, memoryRangeCount,
3161 pMemoryRanges);
3162 }
3163
3164 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)3165 tu_GetDeviceMemoryCommitment(VkDevice device,
3166 VkDeviceMemory memory,
3167 VkDeviceSize *pCommittedMemoryInBytes)
3168 {
3169 *pCommittedMemoryInBytes = 0;
3170 }
3171
3172 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)3173 tu_CreateFramebuffer(VkDevice _device,
3174 const VkFramebufferCreateInfo *pCreateInfo,
3175 const VkAllocationCallbacks *pAllocator,
3176 VkFramebuffer *pFramebuffer)
3177 {
3178 VK_FROM_HANDLE(tu_device, device, _device);
3179
3180 if (TU_DEBUG(DYNAMIC))
3181 return vk_common_CreateFramebuffer(_device, pCreateInfo, pAllocator,
3182 pFramebuffer);
3183
3184 VK_FROM_HANDLE(tu_render_pass, pass, pCreateInfo->renderPass);
3185 struct tu_framebuffer *framebuffer;
3186
3187 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3188
3189 bool imageless = pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT;
3190
3191 size_t size = sizeof(*framebuffer);
3192 if (!imageless)
3193 size += sizeof(struct tu_attachment_info) * pCreateInfo->attachmentCount;
3194 framebuffer = (struct tu_framebuffer *) vk_object_alloc(
3195 &device->vk, pAllocator, size, VK_OBJECT_TYPE_FRAMEBUFFER);
3196 if (framebuffer == NULL)
3197 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3198
3199 framebuffer->attachment_count = pCreateInfo->attachmentCount;
3200 framebuffer->width = pCreateInfo->width;
3201 framebuffer->height = pCreateInfo->height;
3202 framebuffer->layers = pCreateInfo->layers;
3203
3204 if (!imageless) {
3205 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3206 VkImageView _iview = pCreateInfo->pAttachments[i];
3207 struct tu_image_view *iview = tu_image_view_from_handle(_iview);
3208 framebuffer->attachments[i].attachment = iview;
3209 }
3210 }
3211
3212 tu_framebuffer_tiling_config(framebuffer, device, pass);
3213
3214 *pFramebuffer = tu_framebuffer_to_handle(framebuffer);
3215 return VK_SUCCESS;
3216 }
3217
3218 void
tu_setup_dynamic_framebuffer(struct tu_cmd_buffer * cmd_buffer,const VkRenderingInfo * pRenderingInfo)3219 tu_setup_dynamic_framebuffer(struct tu_cmd_buffer *cmd_buffer,
3220 const VkRenderingInfo *pRenderingInfo)
3221 {
3222 struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
3223 struct tu_framebuffer *framebuffer = &cmd_buffer->dynamic_framebuffer;
3224
3225 framebuffer->attachment_count = pass->attachment_count;
3226 framebuffer->width = pRenderingInfo->renderArea.offset.x +
3227 pRenderingInfo->renderArea.extent.width;
3228 framebuffer->height = pRenderingInfo->renderArea.offset.y +
3229 pRenderingInfo->renderArea.extent.height;
3230 framebuffer->layers = pRenderingInfo->layerCount;
3231
3232 tu_framebuffer_tiling_config(framebuffer, cmd_buffer->device, pass);
3233 }
3234
3235 VKAPI_ATTR void VKAPI_CALL
tu_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)3236 tu_DestroyFramebuffer(VkDevice _device,
3237 VkFramebuffer _fb,
3238 const VkAllocationCallbacks *pAllocator)
3239 {
3240 VK_FROM_HANDLE(tu_device, device, _device);
3241
3242 if (TU_DEBUG(DYNAMIC)) {
3243 vk_common_DestroyFramebuffer(_device, _fb, pAllocator);
3244 return;
3245 }
3246
3247 VK_FROM_HANDLE(tu_framebuffer, fb, _fb);
3248
3249 if (!fb)
3250 return;
3251
3252 vk_object_free(&device->vk, pAllocator, fb);
3253 }
3254
3255 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)3256 tu_GetMemoryFdKHR(VkDevice _device,
3257 const VkMemoryGetFdInfoKHR *pGetFdInfo,
3258 int *pFd)
3259 {
3260 VK_FROM_HANDLE(tu_device, device, _device);
3261 VK_FROM_HANDLE(tu_device_memory, memory, pGetFdInfo->memory);
3262
3263 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3264
3265 /* At the moment, we support only the below handle types. */
3266 assert(pGetFdInfo->handleType ==
3267 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3268 pGetFdInfo->handleType ==
3269 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3270
3271 int prime_fd = tu_bo_export_dmabuf(device, memory->bo);
3272 if (prime_fd < 0)
3273 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3274
3275 *pFd = prime_fd;
3276
3277 if (memory->image) {
3278 struct fdl_layout *l = &memory->image->layout[0];
3279 uint64_t modifier;
3280 if (l->ubwc) {
3281 modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
3282 } else if (l->tile_mode == 2) {
3283 modifier = DRM_FORMAT_MOD_QCOM_TILED2;
3284 } else if (l->tile_mode == 3) {
3285 modifier = DRM_FORMAT_MOD_QCOM_TILED3;
3286 } else {
3287 assert(!l->tile_mode);
3288 modifier = DRM_FORMAT_MOD_LINEAR;
3289 }
3290 struct fdl_metadata metadata = {
3291 .modifier = modifier,
3292 };
3293 tu_bo_set_metadata(device, memory->bo, &metadata, sizeof(metadata));
3294 }
3295
3296 return VK_SUCCESS;
3297 }
3298
3299 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)3300 tu_GetMemoryFdPropertiesKHR(VkDevice _device,
3301 VkExternalMemoryHandleTypeFlagBits handleType,
3302 int fd,
3303 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
3304 {
3305 VK_FROM_HANDLE(tu_device, device, _device);
3306 assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3307 pMemoryFdProperties->memoryTypeBits =
3308 (1 << device->physical_device->memory.type_count) - 1;
3309 return VK_SUCCESS;
3310 }
3311
3312 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)3313 tu_GetPhysicalDeviceMultisamplePropertiesEXT(
3314 VkPhysicalDevice physicalDevice,
3315 VkSampleCountFlagBits samples,
3316 VkMultisamplePropertiesEXT* pMultisampleProperties)
3317 {
3318 VK_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
3319
3320 if (samples <= VK_SAMPLE_COUNT_4_BIT && pdevice->vk.supported_extensions.EXT_sample_locations)
3321 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 1, 1 };
3322 else
3323 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
3324 }
3325
tu_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3326 uint64_t tu_GetDeviceMemoryOpaqueCaptureAddress(
3327 VkDevice device,
3328 const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
3329 {
3330 VK_FROM_HANDLE(tu_device_memory, mem, pInfo->memory);
3331 return mem->bo->iova;
3332 }
3333
3334 struct tu_debug_bos_entry {
3335 uint32_t count;
3336 uint64_t size;
3337 const char *name;
3338 };
3339
3340 const char *
tu_debug_bos_add(struct tu_device * dev,uint64_t size,const char * name)3341 tu_debug_bos_add(struct tu_device *dev, uint64_t size, const char *name)
3342 {
3343 assert(name);
3344
3345 if (likely(!dev->bo_sizes))
3346 return NULL;
3347
3348 mtx_lock(&dev->bo_mutex);
3349 struct hash_entry *entry = _mesa_hash_table_search(dev->bo_sizes, name);
3350 struct tu_debug_bos_entry *debug_bos;
3351
3352 if (!entry) {
3353 debug_bos = (struct tu_debug_bos_entry *) calloc(
3354 1, sizeof(struct tu_debug_bos_entry));
3355 debug_bos->name = strdup(name);
3356 _mesa_hash_table_insert(dev->bo_sizes, debug_bos->name, debug_bos);
3357 } else {
3358 debug_bos = (struct tu_debug_bos_entry *) entry->data;
3359 }
3360
3361 debug_bos->count++;
3362 debug_bos->size += align(size, 4096);
3363 mtx_unlock(&dev->bo_mutex);
3364
3365 return debug_bos->name;
3366 }
3367
3368 void
tu_debug_bos_del(struct tu_device * dev,struct tu_bo * bo)3369 tu_debug_bos_del(struct tu_device *dev, struct tu_bo *bo)
3370 {
3371 if (likely(!dev->bo_sizes) || !bo->name)
3372 return;
3373
3374 mtx_lock(&dev->bo_mutex);
3375 struct hash_entry *entry =
3376 _mesa_hash_table_search(dev->bo_sizes, bo->name);
3377 /* If we're finishing the BO, it should have been added already */
3378 assert(entry);
3379
3380 struct tu_debug_bos_entry *debug_bos =
3381 (struct tu_debug_bos_entry *) entry->data;
3382 debug_bos->count--;
3383 debug_bos->size -= align(bo->size, 4096);
3384 if (!debug_bos->count) {
3385 _mesa_hash_table_remove(dev->bo_sizes, entry);
3386 free((void *) debug_bos->name);
3387 free(debug_bos);
3388 }
3389 mtx_unlock(&dev->bo_mutex);
3390 }
3391
debug_bos_count_compare(const void * in_a,const void * in_b)3392 static int debug_bos_count_compare(const void *in_a, const void *in_b)
3393 {
3394 struct tu_debug_bos_entry *a = *(struct tu_debug_bos_entry **)in_a;
3395 struct tu_debug_bos_entry *b = *(struct tu_debug_bos_entry **)in_b;
3396 return a->count - b->count;
3397 }
3398
3399 void
tu_debug_bos_print_stats(struct tu_device * dev)3400 tu_debug_bos_print_stats(struct tu_device *dev)
3401 {
3402 if (likely(!dev->bo_sizes))
3403 return;
3404
3405 mtx_lock(&dev->bo_mutex);
3406
3407 /* Put the HT's sizes data in an array so we can sort by number of allocations. */
3408 struct util_dynarray dyn;
3409 util_dynarray_init(&dyn, NULL);
3410
3411 uint32_t size = 0;
3412 uint32_t count = 0;
3413 hash_table_foreach(dev->bo_sizes, entry)
3414 {
3415 struct tu_debug_bos_entry *debug_bos =
3416 (struct tu_debug_bos_entry *) entry->data;
3417 util_dynarray_append(&dyn, struct tu_debug_bos_entry *, debug_bos);
3418 size += debug_bos->size / 1024;
3419 count += debug_bos->count;
3420 }
3421
3422 qsort(dyn.data,
3423 util_dynarray_num_elements(&dyn, struct tu_debug_bos_entry *),
3424 sizeof(struct tu_debug_bos_entryos_entry *), debug_bos_count_compare);
3425
3426 util_dynarray_foreach(&dyn, struct tu_debug_bos_entry *, entryp)
3427 {
3428 struct tu_debug_bos_entry *debug_bos = *entryp;
3429 mesa_logi("%30s: %4d bos, %lld kb\n", debug_bos->name, debug_bos->count,
3430 (long long) (debug_bos->size / 1024));
3431 }
3432
3433 mesa_logi("submitted %d bos (%d MB)\n", count, DIV_ROUND_UP(size, 1024));
3434
3435 util_dynarray_fini(&dyn);
3436
3437 mtx_unlock(&dev->bo_mutex);
3438 }
3439
3440 void
tu_dump_bo_init(struct tu_device * dev,struct tu_bo * bo)3441 tu_dump_bo_init(struct tu_device *dev, struct tu_bo *bo)
3442 {
3443 bo->dump_bo_list_idx = ~0;
3444
3445 if (!FD_RD_DUMP(ENABLE))
3446 return;
3447
3448 mtx_lock(&dev->bo_mutex);
3449 uint32_t idx =
3450 util_dynarray_num_elements(&dev->dump_bo_list, struct tu_bo *);
3451 bo->dump_bo_list_idx = idx;
3452 util_dynarray_append(&dev->dump_bo_list, struct tu_bo *, bo);
3453 mtx_unlock(&dev->bo_mutex);
3454 }
3455
3456 void
tu_dump_bo_del(struct tu_device * dev,struct tu_bo * bo)3457 tu_dump_bo_del(struct tu_device *dev, struct tu_bo *bo)
3458 {
3459 if (bo->dump_bo_list_idx != ~0) {
3460 mtx_lock(&dev->bo_mutex);
3461 struct tu_bo *exchanging_bo =
3462 util_dynarray_pop(&dev->dump_bo_list, struct tu_bo *);
3463 *util_dynarray_element(&dev->dump_bo_list, struct tu_bo *,
3464 bo->dump_bo_list_idx) = exchanging_bo;
3465 exchanging_bo->dump_bo_list_idx = bo->dump_bo_list_idx;
3466 mtx_unlock(&dev->bo_mutex);
3467 }
3468 }
3469
3470 void
tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)3471 tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,
3472 const VkDebugUtilsLabelEXT *pLabelInfo)
3473 {
3474 VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
3475
3476 vk_common_CmdBeginDebugUtilsLabelEXT(_commandBuffer, pLabelInfo);
3477
3478 /* Note that the spec says:
3479 *
3480 * "An application may open a debug label region in one command buffer and
3481 * close it in another, or otherwise split debug label regions across
3482 * multiple command buffers or multiple queue submissions. When viewed
3483 * from the linear series of submissions to a single queue, the calls to
3484 * vkCmdBeginDebugUtilsLabelEXT and vkCmdEndDebugUtilsLabelEXT must be
3485 * matched and balanced."
3486 *
3487 * But if you're beginning labeling during a renderpass and ending outside
3488 * it, or vice versa, these trace ranges in perfetto will be unbalanced. I
3489 * expect that u_trace and perfetto will do something like take just one of
3490 * the begins/ends, or drop the event entirely, but not crash. Similarly,
3491 * I think we'll have problems if the tracepoints are split across cmd
3492 * buffers. Still, getting the simple case of cmd buffer annotation into
3493 * perfetto should prove useful.
3494 */
3495 const char *label = pLabelInfo->pLabelName;
3496 if (cmd_buffer->state.pass) {
3497 trace_start_cmd_buffer_annotation_rp(
3498 &cmd_buffer->trace, &cmd_buffer->draw_cs, strlen(label), label);
3499 } else {
3500 trace_start_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs,
3501 strlen(label), label);
3502 }
3503 }
3504
3505 void
tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)3506 tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)
3507 {
3508 VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
3509
3510 if (cmd_buffer->vk.labels.size > 0) {
3511 if (cmd_buffer->state.pass) {
3512 trace_end_cmd_buffer_annotation_rp(&cmd_buffer->trace,
3513 &cmd_buffer->draw_cs);
3514 } else {
3515 trace_end_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs);
3516 }
3517 }
3518
3519 vk_common_CmdEndDebugUtilsLabelEXT(_commandBuffer);
3520 }
3521