1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 * SPDX-License-Identifier: MIT
5 *
6 * based in part on anv driver which is:
7 * Copyright © 2015 Intel Corporation
8 */
9
10 #include "tu_device.h"
11
12 #include "drm-uapi/drm_fourcc.h"
13 #include "fdl/freedreno_layout.h"
14 #include <fcntl.h>
15 #include <poll.h>
16 #include <sys/sysinfo.h>
17
18 #include "git_sha1.h"
19 #include "util/u_debug.h"
20 #include "util/disk_cache.h"
21 #include "util/hex.h"
22 #include "util/driconf.h"
23 #include "util/os_misc.h"
24 #include "util/u_process.h"
25 #include "vk_shader_module.h"
26 #include "vk_sampler.h"
27 #include "vk_util.h"
28
29 /* for fd_get_driver/device_uuid() */
30 #include "freedreno/common/freedreno_uuid.h"
31 #include "freedreno/common/freedreno_stompable_regs.h"
32
33 #include "tu_clear_blit.h"
34 #include "tu_cmd_buffer.h"
35 #include "tu_cs.h"
36 #include "tu_descriptor_set.h"
37 #include "tu_dynamic_rendering.h"
38 #include "tu_image.h"
39 #include "tu_pass.h"
40 #include "tu_query.h"
41 #include "tu_tracepoints.h"
42 #include "tu_wsi.h"
43
44 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
45 defined(VK_USE_PLATFORM_XCB_KHR) || \
46 defined(VK_USE_PLATFORM_XLIB_KHR) || \
47 defined(VK_USE_PLATFORM_DISPLAY_KHR)
48 #define TU_HAS_SURFACE 1
49 #else
50 #define TU_HAS_SURFACE 0
51 #endif
52
53
54 static int
tu_device_get_cache_uuid(struct tu_physical_device * device,void * uuid)55 tu_device_get_cache_uuid(struct tu_physical_device *device, void *uuid)
56 {
57 struct mesa_sha1 ctx;
58 unsigned char sha1[20];
59 /* Note: IR3_SHADER_DEBUG also affects compilation, but it's not
60 * initialized until after compiler creation so we have to add it to the
61 * shader hash instead, since the compiler is only created with the logical
62 * device.
63 */
64 uint64_t driver_flags = tu_env.debug & TU_DEBUG_NOMULTIPOS;
65 uint16_t family = fd_dev_gpu_id(&device->dev_id);
66
67 memset(uuid, 0, VK_UUID_SIZE);
68 _mesa_sha1_init(&ctx);
69
70 if (!disk_cache_get_function_identifier((void *)tu_device_get_cache_uuid, &ctx))
71 return -1;
72
73 _mesa_sha1_update(&ctx, &family, sizeof(family));
74 _mesa_sha1_update(&ctx, &driver_flags, sizeof(driver_flags));
75 _mesa_sha1_final(&ctx, sha1);
76
77 memcpy(uuid, sha1, VK_UUID_SIZE);
78 return 0;
79 }
80
81 #define TU_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
82
83 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceVersion(uint32_t * pApiVersion)84 tu_EnumerateInstanceVersion(uint32_t *pApiVersion)
85 {
86 *pApiVersion = TU_API_VERSION;
87 return VK_SUCCESS;
88 }
89
90 static const struct vk_instance_extension_table tu_instance_extensions_supported = { .table = {
91 .KHR_device_group_creation = true,
92 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
93 .KHR_display = true,
94 #endif
95 .KHR_external_fence_capabilities = true,
96 .KHR_external_memory_capabilities = true,
97 .KHR_external_semaphore_capabilities = true,
98 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
99 .KHR_get_display_properties2 = true,
100 #endif
101 .KHR_get_physical_device_properties2 = true,
102 .KHR_get_surface_capabilities2 = TU_HAS_SURFACE,
103 .KHR_surface = TU_HAS_SURFACE,
104 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
105 .KHR_wayland_surface = true,
106 #endif
107 #ifdef VK_USE_PLATFORM_XCB_KHR
108 .KHR_xcb_surface = true,
109 #endif
110 #ifdef VK_USE_PLATFORM_XLIB_KHR
111 .KHR_xlib_surface = true,
112 #endif
113 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
114 .EXT_acquire_drm_display = true,
115 #endif
116 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
117 .EXT_acquire_xlib_display = true,
118 #endif
119 .EXT_debug_report = true,
120 .EXT_debug_utils = true,
121 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
122 .EXT_direct_mode_display = true,
123 .EXT_display_surface_counter = true,
124 #endif
125 #ifndef VK_USE_PLATFORM_WIN32_KHR
126 .EXT_headless_surface = true,
127 #endif
128 .EXT_swapchain_colorspace = TU_HAS_SURFACE,
129 } };
130
131 static bool
is_kgsl(struct tu_instance * instance)132 is_kgsl(struct tu_instance *instance)
133 {
134 return strcmp(instance->knl->name, "kgsl") == 0;
135 }
136
137 static void
get_device_extensions(const struct tu_physical_device * device,struct vk_device_extension_table * ext)138 get_device_extensions(const struct tu_physical_device *device,
139 struct vk_device_extension_table *ext)
140 {
141 *ext = (struct vk_device_extension_table) { .table = {
142 .KHR_16bit_storage = device->info->a6xx.storage_16bit,
143 .KHR_bind_memory2 = true,
144 .KHR_buffer_device_address = true,
145 .KHR_copy_commands2 = true,
146 .KHR_create_renderpass2 = true,
147 .KHR_dedicated_allocation = true,
148 .KHR_depth_stencil_resolve = true,
149 .KHR_descriptor_update_template = true,
150 .KHR_device_group = true,
151 .KHR_draw_indirect_count = true,
152 .KHR_driver_properties = true,
153 .KHR_dynamic_rendering = true,
154 .KHR_external_fence = true,
155 .KHR_external_fence_fd = true,
156 .KHR_external_memory = true,
157 .KHR_external_memory_fd = true,
158 .KHR_external_semaphore = true,
159 .KHR_external_semaphore_fd = true,
160 .KHR_format_feature_flags2 = true,
161 .KHR_get_memory_requirements2 = true,
162 .KHR_global_priority = true,
163 .KHR_image_format_list = true,
164 .KHR_imageless_framebuffer = true,
165 .KHR_incremental_present = TU_HAS_SURFACE,
166 .KHR_maintenance1 = true,
167 .KHR_maintenance2 = true,
168 .KHR_maintenance3 = true,
169 .KHR_maintenance4 = true,
170 .KHR_maintenance5 = true,
171 .KHR_multiview = TU_DEBUG(NOCONFORM) ? true : device->info->a6xx.has_hw_multiview,
172 .KHR_performance_query = TU_DEBUG(PERFC),
173 .KHR_pipeline_executable_properties = true,
174 .KHR_pipeline_library = true,
175
176 /* Hide these behind dri configs for now since we cannot implement it reliably on
177 * all surfaces yet. There is no surface capability query for present wait/id,
178 * but the feature is useful enough to hide behind an opt-in mechanism for now.
179 * If the instance only enables surface extensions that unconditionally support present wait,
180 * we can also expose the extension that way. */
181 .KHR_present_id =
182 TU_HAS_SURFACE && (driQueryOptionb(&device->instance->dri_options,
183 "vk_khr_present_wait") ||
184 wsi_common_vk_instance_supports_present_wait(
185 &device->instance->vk)),
186 .KHR_present_wait =
187 TU_HAS_SURFACE && (driQueryOptionb(&device->instance->dri_options,
188 "vk_khr_present_wait") ||
189 wsi_common_vk_instance_supports_present_wait(
190 &device->instance->vk)),
191
192 .KHR_push_descriptor = true,
193 .KHR_relaxed_block_layout = true,
194 .KHR_sampler_mirror_clamp_to_edge = true,
195 .KHR_sampler_ycbcr_conversion = true,
196 .KHR_separate_depth_stencil_layouts = true,
197 .KHR_shader_draw_parameters = true,
198 .KHR_shader_expect_assume = true,
199 .KHR_shader_float16_int8 = true,
200 .KHR_shader_float_controls = true,
201 .KHR_shader_integer_dot_product = true,
202 .KHR_shader_non_semantic_info = true,
203 .KHR_shader_subgroup_extended_types = true,
204 .KHR_shader_terminate_invocation = true,
205 .KHR_spirv_1_4 = true,
206 .KHR_storage_buffer_storage_class = true,
207 .KHR_swapchain = TU_HAS_SURFACE,
208 .KHR_swapchain_mutable_format = TU_HAS_SURFACE,
209 .KHR_synchronization2 = true,
210 .KHR_timeline_semaphore = true,
211 .KHR_uniform_buffer_standard_layout = true,
212 .KHR_variable_pointers = true,
213 .KHR_vulkan_memory_model = true,
214 .KHR_zero_initialize_workgroup_memory = true,
215
216 .EXT_4444_formats = true,
217 .EXT_attachment_feedback_loop_layout = true,
218 .EXT_border_color_swizzle = true,
219 .EXT_color_write_enable = true,
220 .EXT_conditional_rendering = true,
221 .EXT_custom_border_color = true,
222 .EXT_depth_clip_control = true,
223 .EXT_depth_clip_enable = true,
224 .EXT_descriptor_buffer = true,
225 .EXT_descriptor_indexing = true,
226 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
227 .EXT_display_control = true,
228 #endif
229 .EXT_extended_dynamic_state = true,
230 .EXT_extended_dynamic_state2 = true,
231 .EXT_extended_dynamic_state3 = true,
232 .EXT_external_memory_dma_buf = true,
233 .EXT_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
234 .EXT_fragment_density_map = !device->info->a7xx.load_shader_consts_via_preamble,
235 .EXT_global_priority = true,
236 .EXT_global_priority_query = true,
237 .EXT_graphics_pipeline_library = true,
238 .EXT_host_query_reset = true,
239 .EXT_image_2d_view_of_3d = true,
240 .EXT_image_drm_format_modifier = true,
241 .EXT_image_robustness = true,
242 .EXT_image_view_min_lod = true,
243 .EXT_index_type_uint8 = true,
244 .EXT_inline_uniform_block = true,
245 .EXT_line_rasterization = true,
246 .EXT_load_store_op_none = true,
247 .EXT_memory_budget = true,
248 .EXT_multi_draw = true,
249 .EXT_mutable_descriptor_type = true,
250 .EXT_non_seamless_cube_map = true,
251 .EXT_physical_device_drm = !is_kgsl(device->instance),
252 .EXT_pipeline_creation_cache_control = true,
253 .EXT_pipeline_creation_feedback = true,
254 .EXT_post_depth_coverage = true,
255 .EXT_primitive_topology_list_restart = true,
256 .EXT_primitives_generated_query = true,
257 .EXT_private_data = true,
258 .EXT_provoking_vertex = true,
259 .EXT_queue_family_foreign = true,
260 .EXT_rasterization_order_attachment_access = true,
261 .EXT_robustness2 = true,
262 .EXT_sample_locations = device->info->a6xx.has_sample_locations,
263 .EXT_sampler_filter_minmax = device->info->a6xx.has_sampler_minmax,
264 .EXT_scalar_block_layout = true,
265 .EXT_separate_stencil_usage = true,
266 .EXT_shader_demote_to_helper_invocation = true,
267 .EXT_shader_module_identifier = true,
268 .EXT_shader_stencil_export = true,
269 .EXT_shader_viewport_index_layer = TU_DEBUG(NOCONFORM) ? true : device->info->a6xx.has_hw_multiview,
270 .EXT_subgroup_size_control = true,
271 .EXT_texel_buffer_alignment = true,
272 .EXT_tooling_info = true,
273 .EXT_transform_feedback = true,
274 .EXT_vertex_attribute_divisor = true,
275 .EXT_vertex_input_dynamic_state = true,
276
277 /* For Graphics Flight Recorder (GFR) */
278 .AMD_buffer_marker = true,
279 #if DETECT_OS_ANDROID
280 .ANDROID_native_buffer = true,
281 #endif
282 .ARM_rasterization_order_attachment_access = true,
283 .IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
284 .VALVE_mutable_descriptor_type = true,
285 } };
286 }
287
288 static void
tu_get_features(struct tu_physical_device * pdevice,struct vk_features * features)289 tu_get_features(struct tu_physical_device *pdevice,
290 struct vk_features *features)
291 {
292 *features = (struct vk_features) { false };
293
294 /* Vulkan 1.0 */
295 features->robustBufferAccess = true;
296 features->fullDrawIndexUint32 = true;
297 features->imageCubeArray = true;
298 features->independentBlend = true;
299 features->geometryShader = true;
300 features->tessellationShader = true;
301 features->sampleRateShading = true;
302 features->dualSrcBlend = true;
303 features->logicOp = true;
304 features->multiDrawIndirect = true;
305 features->drawIndirectFirstInstance = true;
306 features->depthClamp = true;
307 features->depthBiasClamp = true;
308 features->fillModeNonSolid = true;
309 features->depthBounds = true;
310 features->wideLines = false;
311 features->largePoints = true;
312 features->alphaToOne = true;
313 features->multiViewport = true;
314 features->samplerAnisotropy = true;
315 features->textureCompressionETC2 = true;
316 features->textureCompressionASTC_LDR = true;
317 features->textureCompressionBC = true;
318 features->occlusionQueryPrecise = true;
319 features->pipelineStatisticsQuery = true;
320 features->vertexPipelineStoresAndAtomics = true;
321 features->fragmentStoresAndAtomics = true;
322 features->shaderTessellationAndGeometryPointSize = true;
323 features->shaderImageGatherExtended = true;
324 features->shaderStorageImageExtendedFormats = true;
325 features->shaderStorageImageMultisample = false;
326 features->shaderStorageImageReadWithoutFormat = true;
327 features->shaderStorageImageWriteWithoutFormat = true;
328 features->shaderUniformBufferArrayDynamicIndexing = true;
329 features->shaderSampledImageArrayDynamicIndexing = true;
330 features->shaderStorageBufferArrayDynamicIndexing = true;
331 features->shaderStorageImageArrayDynamicIndexing = true;
332 features->shaderClipDistance = true;
333 features->shaderCullDistance = true;
334 features->shaderFloat64 = false;
335 features->shaderInt64 = false;
336 features->shaderInt16 = true;
337 features->sparseBinding = false;
338 features->variableMultisampleRate = true;
339 features->inheritedQueries = true;
340
341 /* Vulkan 1.1 */
342 features->storageBuffer16BitAccess = pdevice->info->a6xx.storage_16bit;
343 features->uniformAndStorageBuffer16BitAccess = false;
344 features->storagePushConstant16 = false;
345 features->storageInputOutput16 = false;
346 features->multiview = true;
347 features->multiviewGeometryShader = false;
348 features->multiviewTessellationShader = false;
349 features->variablePointersStorageBuffer = true;
350 features->variablePointers = true;
351 features->protectedMemory = false;
352 features->samplerYcbcrConversion = true;
353 features->shaderDrawParameters = true;
354
355 /* Vulkan 1.2 */
356 features->samplerMirrorClampToEdge = true;
357 features->drawIndirectCount = true;
358 features->storageBuffer8BitAccess = false;
359 features->uniformAndStorageBuffer8BitAccess = false;
360 features->storagePushConstant8 = false;
361 features->shaderBufferInt64Atomics = false;
362 features->shaderSharedInt64Atomics = false;
363 features->shaderFloat16 = true;
364 features->shaderInt8 = false;
365
366 features->descriptorIndexing = true;
367 features->shaderInputAttachmentArrayDynamicIndexing = false;
368 features->shaderUniformTexelBufferArrayDynamicIndexing = true;
369 features->shaderStorageTexelBufferArrayDynamicIndexing = true;
370 features->shaderUniformBufferArrayNonUniformIndexing = true;
371 features->shaderSampledImageArrayNonUniformIndexing = true;
372 features->shaderStorageBufferArrayNonUniformIndexing = true;
373 features->shaderStorageImageArrayNonUniformIndexing = true;
374 features->shaderInputAttachmentArrayNonUniformIndexing = false;
375 features->shaderUniformTexelBufferArrayNonUniformIndexing = true;
376 features->shaderStorageTexelBufferArrayNonUniformIndexing = true;
377 features->descriptorBindingUniformBufferUpdateAfterBind = true;
378 features->descriptorBindingSampledImageUpdateAfterBind = true;
379 features->descriptorBindingStorageImageUpdateAfterBind = true;
380 features->descriptorBindingStorageBufferUpdateAfterBind = true;
381 features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
382 features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
383 features->descriptorBindingUpdateUnusedWhilePending = true;
384 features->descriptorBindingPartiallyBound = true;
385 features->descriptorBindingVariableDescriptorCount = true;
386 features->runtimeDescriptorArray = true;
387
388 features->samplerFilterMinmax =
389 pdevice->info->a6xx.has_sampler_minmax;
390 features->scalarBlockLayout = true;
391 features->imagelessFramebuffer = true;
392 features->uniformBufferStandardLayout = true;
393 features->shaderSubgroupExtendedTypes = true;
394 features->separateDepthStencilLayouts = true;
395 features->hostQueryReset = true;
396 features->timelineSemaphore = true;
397 features->bufferDeviceAddress = true;
398 features->bufferDeviceAddressCaptureReplay = pdevice->has_set_iova;
399 features->bufferDeviceAddressMultiDevice = false;
400 features->vulkanMemoryModel = true;
401 features->vulkanMemoryModelDeviceScope = true;
402 features->vulkanMemoryModelAvailabilityVisibilityChains = true;
403 features->shaderOutputViewportIndex = true;
404 features->shaderOutputLayer = true;
405 features->subgroupBroadcastDynamicId = true;
406
407 /* Vulkan 1.3 */
408 features->robustImageAccess = true;
409 features->inlineUniformBlock = true;
410 features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
411 features->pipelineCreationCacheControl = true;
412 features->privateData = true;
413 features->shaderDemoteToHelperInvocation = true;
414 features->shaderTerminateInvocation = true;
415 features->subgroupSizeControl = true;
416 features->computeFullSubgroups = true;
417 features->synchronization2 = true;
418 features->textureCompressionASTC_HDR = false;
419 features->shaderZeroInitializeWorkgroupMemory = true;
420 features->dynamicRendering = true;
421 features->shaderIntegerDotProduct = true;
422 features->maintenance4 = true;
423
424 /* VK_EXT_conditional_rendering */
425 features->conditionalRendering = true;
426 features->inheritedConditionalRendering = true;
427
428 /* VK_EXT_transform_feedback */
429 features->transformFeedback = true;
430 features->geometryStreams = true;
431
432 /* VK_EXT_index_type_uint8 */
433 features->indexTypeUint8 = true;
434 /* VK_EXT_vertex_attribute_divisor */
435 features->vertexAttributeInstanceRateDivisor = true;
436 features->vertexAttributeInstanceRateZeroDivisor = true;
437
438 /* VK_EXT_depth_clip_enable */
439 features->depthClipEnable = true;
440
441 /* VK_EXT_4444_formats */
442 features->formatA4R4G4B4 = true;
443 features->formatA4B4G4R4 = true;
444
445 /* VK_EXT_border_color_swizzle */
446 features->borderColorSwizzle = true;
447 features->borderColorSwizzleFromImage = true;
448
449 /* VK_EXT_custom_border_color */
450 features->customBorderColors = true;
451 features->customBorderColorWithoutFormat = true;
452
453 /* VK_EXT_extended_dynamic_state */
454 features->extendedDynamicState = true;
455
456 /* VK_EXT_extended_dynamic_state2 */
457 features->extendedDynamicState2 = true;
458 features->extendedDynamicState2LogicOp = true;
459 features->extendedDynamicState2PatchControlPoints = true;
460
461 /* VK_EXT_extended_dynamic_state3 */
462 features->extendedDynamicState3PolygonMode = true;
463 features->extendedDynamicState3TessellationDomainOrigin = true;
464 features->extendedDynamicState3DepthClampEnable = true;
465 features->extendedDynamicState3DepthClipEnable = true;
466 features->extendedDynamicState3LogicOpEnable = true;
467 features->extendedDynamicState3SampleMask = true;
468 features->extendedDynamicState3RasterizationSamples = true;
469 features->extendedDynamicState3AlphaToCoverageEnable = true;
470 features->extendedDynamicState3AlphaToOneEnable = true;
471 features->extendedDynamicState3DepthClipNegativeOneToOne = true;
472 features->extendedDynamicState3RasterizationStream = true;
473 features->extendedDynamicState3ConservativeRasterizationMode = false;
474 features->extendedDynamicState3ExtraPrimitiveOverestimationSize = false;
475 features->extendedDynamicState3LineRasterizationMode = true;
476 features->extendedDynamicState3LineStippleEnable = false;
477 features->extendedDynamicState3ProvokingVertexMode = true;
478 features->extendedDynamicState3SampleLocationsEnable = true;
479 features->extendedDynamicState3ColorBlendEnable = true;
480 features->extendedDynamicState3ColorBlendEquation = true;
481 features->extendedDynamicState3ColorWriteMask = true;
482 features->extendedDynamicState3ViewportWScalingEnable = false;
483 features->extendedDynamicState3ViewportSwizzle = false;
484 features->extendedDynamicState3ShadingRateImageEnable = false;
485 features->extendedDynamicState3CoverageToColorEnable = false;
486 features->extendedDynamicState3CoverageToColorLocation = false;
487 features->extendedDynamicState3CoverageModulationMode = false;
488 features->extendedDynamicState3CoverageModulationTableEnable = false;
489 features->extendedDynamicState3CoverageModulationTable = false;
490 features->extendedDynamicState3CoverageReductionMode = false;
491 features->extendedDynamicState3RepresentativeFragmentTestEnable = false;
492 features->extendedDynamicState3ColorBlendAdvanced = false;
493
494 /* VK_KHR_performance_query */
495 features->performanceCounterQueryPools = true;
496 features->performanceCounterMultipleQueryPools = false;
497
498 /* VK_KHR_pipeline_executable_properties */
499 features->pipelineExecutableInfo = true;
500
501 /* VK_EXT_robustness2 */
502 features->robustBufferAccess2 = true;
503 features->robustImageAccess2 = true;
504 features->nullDescriptor = true;
505
506 /* VK_EXT_provoking_vertex */
507 features->provokingVertexLast = true;
508
509 /* VK_EXT_mutable_descriptor_type */
510 features->mutableDescriptorType = true;
511
512 /* VK_EXT_line_rasterization */
513 features->rectangularLines = true;
514 features->bresenhamLines = true;
515 features->smoothLines = false;
516 features->stippledRectangularLines = false;
517 features->stippledBresenhamLines = false;
518 features->stippledSmoothLines = false;
519
520 /* VK_EXT_primitive_topology_list_restart */
521 features->primitiveTopologyListRestart = true;
522 features->primitiveTopologyPatchListRestart = false;
523
524 /* VK_EXT_rasterization_order_attachment_access */
525 features->rasterizationOrderColorAttachmentAccess = true;
526 features->rasterizationOrderDepthAttachmentAccess = true;
527 features->rasterizationOrderStencilAttachmentAccess = true;
528
529 /* VK_EXT_depth_clip_control */
530 features->depthClipControl = true;
531
532 /* VK_EXT_texel_buffer_alignment */
533 features->texelBufferAlignment = true;
534
535 /* VK_EXT_primitives_generated_query */
536 features->primitivesGeneratedQuery = true;
537 features->primitivesGeneratedQueryWithRasterizerDiscard = false;
538 features->primitivesGeneratedQueryWithNonZeroStreams = false;
539
540 /* VK_EXT_image_view_min_lod */
541 features->minLod = true;
542
543 /* VK_EXT_image_2d_view_of_3d */
544 features->image2DViewOf3D = true;
545 features->sampler2DViewOf3D = true;
546
547 /* VK_EXT_color_write_enable */
548 features->colorWriteEnable = true;
549
550 /* VK_EXT_shader_module_identifier */
551 features->shaderModuleIdentifier = true;
552
553 /* VK_EXT_vertex_input_dynamic_state */
554 features->vertexInputDynamicState = true;
555
556 /* VK_EXT_non_seamless_cube_map */
557 features->nonSeamlessCubeMap = true;
558
559 /* VK_EXT_attachment_feedback_loop_layout */
560 features->attachmentFeedbackLoopLayout = true;
561
562 /* VK_EXT_global_priority_query */
563 features->globalPriorityQuery = true;
564
565 /* VK_EXT_multi_draw */
566 features->multiDraw = true;
567
568 /* VK_EXT_graphics_pipeline_library */
569 features->graphicsPipelineLibrary = true;
570
571 /* VK_KHR_present_id */
572 features->presentId = pdevice->vk.supported_extensions.KHR_present_id;
573
574 /* VK_KHR_present_wait */
575 features->presentWait = pdevice->vk.supported_extensions.KHR_present_wait;
576
577 /* VK_EXT_descriptor_buffer */
578 features->descriptorBuffer = true;
579 features->descriptorBufferCaptureReplay = pdevice->has_set_iova;
580 features->descriptorBufferImageLayoutIgnored = true;
581 features->descriptorBufferPushDescriptors = true;
582
583 /* VK_EXT_fragment_density_map */
584 features->fragmentDensityMap = true;
585 features->fragmentDensityMapDynamic = false;
586 features->fragmentDensityMapNonSubsampledImages = true;
587
588 /* VK_KHR_maintenance5 */
589 features->maintenance5 = true;
590
591 /* VK_KHR_shader_expect_assume */
592 features->shaderExpectAssume = true;
593 }
594
595 static void
tu_get_physical_device_properties_1_1(struct tu_physical_device * pdevice,struct vk_properties * p)596 tu_get_physical_device_properties_1_1(struct tu_physical_device *pdevice,
597 struct vk_properties *p)
598 {
599 memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
600 memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
601 memset(p->deviceLUID, 0, VK_LUID_SIZE);
602 p->deviceNodeMask = 0;
603 p->deviceLUIDValid = false;
604
605 p->subgroupSize = pdevice->info->a6xx.supports_double_threadsize ? 128 : 64;
606 p->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
607 p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
608 VK_SUBGROUP_FEATURE_VOTE_BIT |
609 VK_SUBGROUP_FEATURE_BALLOT_BIT |
610 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
611 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
612 VK_SUBGROUP_FEATURE_ARITHMETIC_BIT;
613 if (pdevice->info->a6xx.has_getfiberid) {
614 p->subgroupSupportedStages |= VK_SHADER_STAGE_ALL_GRAPHICS;
615 p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_QUAD_BIT;
616 }
617
618 p->subgroupQuadOperationsInAllStages = false;
619
620 p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
621 p->maxMultiviewViewCount =
622 (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ? MAX_VIEWPORTS : 1;
623 p->maxMultiviewInstanceIndex = INT_MAX;
624 p->protectedNoFault = false;
625 /* Our largest descriptors are 2 texture descriptors, or a texture and
626 * sampler descriptor.
627 */
628 p->maxPerSetDescriptors = MAX_SET_SIZE / (2 * A6XX_TEX_CONST_DWORDS * 4);
629 /* Our buffer size fields allow only this much */
630 p->maxMemoryAllocationSize = 0xFFFFFFFFull;
631
632 }
633
634
635 static const size_t max_descriptor_set_size = MAX_SET_SIZE / (4 * A6XX_TEX_CONST_DWORDS);
636 static const VkSampleCountFlags sample_counts =
637 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
638
639 static void
tu_get_physical_device_properties_1_2(struct tu_physical_device * pdevice,struct vk_properties * p)640 tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice,
641 struct vk_properties *p)
642 {
643 p->driverID = VK_DRIVER_ID_MESA_TURNIP;
644 memset(p->driverName, 0, sizeof(p->driverName));
645 snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
646 "turnip Mesa driver");
647 memset(p->driverInfo, 0, sizeof(p->driverInfo));
648 snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
649 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
650 p->conformanceVersion = (VkConformanceVersion) {
651 .major = 1,
652 .minor = 2,
653 .subminor = 7,
654 .patch = 1,
655 };
656
657 p->denormBehaviorIndependence =
658 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
659 p->roundingModeIndependence =
660 VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
661
662 p->shaderDenormFlushToZeroFloat16 = true;
663 p->shaderDenormPreserveFloat16 = false;
664 p->shaderRoundingModeRTEFloat16 = true;
665 p->shaderRoundingModeRTZFloat16 = false;
666 p->shaderSignedZeroInfNanPreserveFloat16 = true;
667
668 p->shaderDenormFlushToZeroFloat32 = true;
669 p->shaderDenormPreserveFloat32 = false;
670 p->shaderRoundingModeRTEFloat32 = true;
671 p->shaderRoundingModeRTZFloat32 = false;
672 p->shaderSignedZeroInfNanPreserveFloat32 = true;
673
674 p->shaderDenormFlushToZeroFloat64 = false;
675 p->shaderDenormPreserveFloat64 = false;
676 p->shaderRoundingModeRTEFloat64 = false;
677 p->shaderRoundingModeRTZFloat64 = false;
678 p->shaderSignedZeroInfNanPreserveFloat64 = false;
679
680 p->shaderUniformBufferArrayNonUniformIndexingNative = true;
681 p->shaderSampledImageArrayNonUniformIndexingNative = true;
682 p->shaderStorageBufferArrayNonUniformIndexingNative = true;
683 p->shaderStorageImageArrayNonUniformIndexingNative = true;
684 p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
685 p->robustBufferAccessUpdateAfterBind = false;
686 p->quadDivergentImplicitLod = false;
687
688 p->maxUpdateAfterBindDescriptorsInAllPools = max_descriptor_set_size;
689 p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
690 p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
691 p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
692 p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
693 p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
694 p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_RTS;
695 p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
696 p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
697 p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
698 p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
699 p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
700 p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
701 p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
702 p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
703 p->maxDescriptorSetUpdateAfterBindInputAttachments = MAX_RTS;
704
705 p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
706 p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
707 p->independentResolveNone = false;
708 p->independentResolve = false;
709
710 p->filterMinmaxSingleComponentFormats = true;
711 p->filterMinmaxImageComponentMapping = true;
712
713 p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
714
715 p->framebufferIntegerColorSampleCounts = sample_counts;
716 }
717
718 static void
tu_get_physical_device_properties_1_3(struct tu_physical_device * pdevice,struct vk_properties * p)719 tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice,
720 struct vk_properties *p)
721 {
722 /* TODO move threadsize_base and max_waves to fd_dev_info and use them here */
723 p->minSubgroupSize = 64; /* threadsize_base */
724 p->maxSubgroupSize =
725 pdevice->info->a6xx.supports_double_threadsize ? 128 : 64;
726 p->maxComputeWorkgroupSubgroups = 16; /* max_waves */
727 p->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL;
728
729 p->maxInlineUniformBlockSize = MAX_INLINE_UBO_RANGE;
730 p->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UBOS;
731 p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UBOS;
732 p->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UBOS;
733 p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UBOS;
734 p->maxInlineUniformTotalSize = MAX_INLINE_UBOS * MAX_INLINE_UBO_RANGE;
735
736 p->integerDotProduct8BitUnsignedAccelerated = false;
737 p->integerDotProduct8BitSignedAccelerated = false;
738 p->integerDotProduct8BitMixedSignednessAccelerated = false;
739 p->integerDotProduct4x8BitPackedUnsignedAccelerated =
740 pdevice->info->a6xx.has_dp2acc;
741 /* TODO: we should be able to emulate 4x8BitPackedSigned fast enough */
742 p->integerDotProduct4x8BitPackedSignedAccelerated = false;
743 p->integerDotProduct4x8BitPackedMixedSignednessAccelerated =
744 pdevice->info->a6xx.has_dp2acc;
745 p->integerDotProduct16BitUnsignedAccelerated = false;
746 p->integerDotProduct16BitSignedAccelerated = false;
747 p->integerDotProduct16BitMixedSignednessAccelerated = false;
748 p->integerDotProduct32BitUnsignedAccelerated = false;
749 p->integerDotProduct32BitSignedAccelerated = false;
750 p->integerDotProduct32BitMixedSignednessAccelerated = false;
751 p->integerDotProduct64BitUnsignedAccelerated = false;
752 p->integerDotProduct64BitSignedAccelerated = false;
753 p->integerDotProduct64BitMixedSignednessAccelerated = false;
754 p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
755 p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
756 p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
757 p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated =
758 pdevice->info->a6xx.has_dp2acc;
759 /* TODO: we should be able to emulate Saturating4x8BitPackedSigned fast enough */
760 p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
761 p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated =
762 pdevice->info->a6xx.has_dp2acc;
763 p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
764 p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
765 p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
766 p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
767 p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
768 p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
769 p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
770 p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
771 p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
772
773 p->storageTexelBufferOffsetAlignmentBytes = 64;
774 p->storageTexelBufferOffsetSingleTexelAlignment = true;
775 p->uniformTexelBufferOffsetAlignmentBytes = 64;
776 p->uniformTexelBufferOffsetSingleTexelAlignment = true;
777
778 /* The address space is 4GB for current kernels, so there's no point
779 * allowing a larger buffer. Our buffer sizes are 64-bit though, so
780 * GetBufferDeviceRequirements won't fall over if someone actually creates
781 * a 4GB buffer.
782 */
783 p->maxBufferSize = 1ull << 32;
784 }
785
786 static void
tu_get_properties(struct tu_physical_device * pdevice,struct vk_properties * props)787 tu_get_properties(struct tu_physical_device *pdevice,
788 struct vk_properties *props)
789 {
790 /* Limits */
791 props->maxImageDimension1D = (1 << 14);
792 props->maxImageDimension2D = (1 << 14);
793 props->maxImageDimension3D = (1 << 11);
794 props->maxImageDimensionCube = (1 << 14);
795 props->maxImageArrayLayers = (1 << 11);
796 props->maxTexelBufferElements = 128 * 1024 * 1024;
797 props->maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE;
798 props->maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE;
799 props->maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE;
800 props->maxMemoryAllocationCount = UINT32_MAX;
801 props->maxSamplerAllocationCount = 64 * 1024;
802 props->bufferImageGranularity = 64; /* A cache line */
803 props->sparseAddressSpaceSize = 0;
804 props->maxBoundDescriptorSets = pdevice->usable_sets;
805 props->maxPerStageDescriptorSamplers = max_descriptor_set_size;
806 props->maxPerStageDescriptorUniformBuffers = max_descriptor_set_size;
807 props->maxPerStageDescriptorStorageBuffers = max_descriptor_set_size;
808 props->maxPerStageDescriptorSampledImages = max_descriptor_set_size;
809 props->maxPerStageDescriptorStorageImages = max_descriptor_set_size;
810 props->maxPerStageDescriptorInputAttachments = MAX_RTS;
811 props->maxPerStageResources = max_descriptor_set_size;
812 props->maxDescriptorSetSamplers = max_descriptor_set_size;
813 props->maxDescriptorSetUniformBuffers = max_descriptor_set_size;
814 props->maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
815 props->maxDescriptorSetStorageBuffers = max_descriptor_set_size;
816 props->maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
817 props->maxDescriptorSetSampledImages = max_descriptor_set_size;
818 props->maxDescriptorSetStorageImages = max_descriptor_set_size;
819 props->maxDescriptorSetInputAttachments = MAX_RTS;
820 props->maxVertexInputAttributes = pdevice->info->a6xx.vs_max_inputs_count;
821 props->maxVertexInputBindings = pdevice->info->a6xx.vs_max_inputs_count;
822 props->maxVertexInputAttributeOffset = 4095;
823 props->maxVertexInputBindingStride = 2048;
824 props->maxVertexOutputComponents = 128;
825 props->maxTessellationGenerationLevel = 64;
826 props->maxTessellationPatchSize = 32;
827 props->maxTessellationControlPerVertexInputComponents = 128;
828 props->maxTessellationControlPerVertexOutputComponents = 128;
829 props->maxTessellationControlPerPatchOutputComponents = 120;
830 props->maxTessellationControlTotalOutputComponents = 4096;
831 props->maxTessellationEvaluationInputComponents = 128;
832 props->maxTessellationEvaluationOutputComponents = 128;
833 props->maxGeometryShaderInvocations = 32;
834 props->maxGeometryInputComponents = 64;
835 props->maxGeometryOutputComponents = 128;
836 props->maxGeometryOutputVertices = 256;
837 props->maxGeometryTotalOutputComponents = 1024;
838 props->maxFragmentInputComponents = 124;
839 props->maxFragmentOutputAttachments = 8;
840 props->maxFragmentDualSrcAttachments = 1;
841 props->maxFragmentCombinedOutputResources = MAX_RTS + max_descriptor_set_size * 2;
842 props->maxComputeSharedMemorySize = pdevice->info->cs_shared_mem_size;
843 props->maxComputeWorkGroupCount[0] =
844 props->maxComputeWorkGroupCount[1] =
845 props->maxComputeWorkGroupCount[2] = 65535;
846 props->maxComputeWorkGroupInvocations = pdevice->info->a6xx.supports_double_threadsize ? 2048 : 1024;
847 props->maxComputeWorkGroupSize[0] =
848 props->maxComputeWorkGroupSize[1] =
849 props->maxComputeWorkGroupSize[2] = 1024;
850 props->subPixelPrecisionBits = 8;
851 props->subTexelPrecisionBits = 8;
852 props->mipmapPrecisionBits = 8;
853 props->maxDrawIndexedIndexValue = UINT32_MAX;
854 props->maxDrawIndirectCount = UINT32_MAX;
855 props->maxSamplerLodBias = 4095.0 / 256.0; /* [-16, 15.99609375] */
856 props->maxSamplerAnisotropy = 16;
857 props->maxViewports =
858 (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ? MAX_VIEWPORTS : 1;
859 props->maxViewportDimensions[0] =
860 props->maxViewportDimensions[1] = MAX_VIEWPORT_SIZE;
861 props->viewportBoundsRange[0] = INT16_MIN;
862 props->viewportBoundsRange[1] = INT16_MAX;
863 props->viewportSubPixelBits = 8;
864 props->minMemoryMapAlignment = 4096; /* A page */
865 props->minTexelBufferOffsetAlignment = 64;
866 props->minUniformBufferOffsetAlignment = 64;
867 props->minStorageBufferOffsetAlignment = 4;
868 props->minTexelOffset = -16;
869 props->maxTexelOffset = 15;
870 props->minTexelGatherOffset = -32;
871 props->maxTexelGatherOffset = 31;
872 props->minInterpolationOffset = -0.5;
873 props->maxInterpolationOffset = 0.4375;
874 props->subPixelInterpolationOffsetBits = 4;
875 props->maxFramebufferWidth = (1 << 14);
876 props->maxFramebufferHeight = (1 << 14);
877 props->maxFramebufferLayers = (1 << 10);
878 props->framebufferColorSampleCounts = sample_counts;
879 props->framebufferDepthSampleCounts = sample_counts;
880 props->framebufferStencilSampleCounts = sample_counts;
881 props->framebufferNoAttachmentsSampleCounts = sample_counts;
882 props->maxColorAttachments = MAX_RTS;
883 props->sampledImageColorSampleCounts = sample_counts;
884 props->sampledImageIntegerSampleCounts = sample_counts;
885 props->sampledImageDepthSampleCounts = sample_counts;
886 props->sampledImageStencilSampleCounts = sample_counts;
887 props->storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT;
888 props->maxSampleMaskWords = 1;
889 props->timestampComputeAndGraphics = true;
890 props->timestampPeriod = 1000000000.0 / 19200000.0; /* CP_ALWAYS_ON_COUNTER is fixed 19.2MHz */
891 props->maxClipDistances = 8;
892 props->maxCullDistances = 8;
893 props->maxCombinedClipAndCullDistances = 8;
894 props->discreteQueuePriorities = 2;
895 props->pointSizeRange[0] = 1;
896 props->pointSizeRange[1] = 4092;
897 props->lineWidthRange[0] =
898 props->lineWidthRange[1] = 1.0;
899 props->pointSizeGranularity = 0.0625;
900 props->lineWidthGranularity = 0.0;
901 props->strictLines = true;
902 props->standardSampleLocations = true;
903 props->optimalBufferCopyOffsetAlignment = 128;
904 props->optimalBufferCopyRowPitchAlignment = 128;
905 props->nonCoherentAtomSize = 64;
906
907 props->apiVersion =
908 (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ?
909 TU_API_VERSION : VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION);
910 props->driverVersion = vk_get_driver_version();
911 props->vendorID = 0x5143;
912 props->deviceID = pdevice->dev_id.chip_id;
913 props->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
914
915 /* sparse properties */
916 props->sparseResidencyStandard2DBlockShape = { 0 };
917 props->sparseResidencyStandard2DMultisampleBlockShape = { 0 };
918 props->sparseResidencyStandard3DBlockShape = { 0 };
919 props->sparseResidencyAlignedMipSize = { 0 };
920 props->sparseResidencyNonResidentStrict = { 0 };
921
922 strcpy(props->deviceName, pdevice->name);
923 memcpy(props->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
924
925 tu_get_physical_device_properties_1_1(pdevice, props);
926 tu_get_physical_device_properties_1_2(pdevice, props);
927 tu_get_physical_device_properties_1_3(pdevice, props);
928
929 /* VK_KHR_push_descriptor */
930 props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
931
932 /* VK_EXT_transform_feedback */
933 props->maxTransformFeedbackStreams = IR3_MAX_SO_STREAMS;
934 props->maxTransformFeedbackBuffers = IR3_MAX_SO_BUFFERS;
935 props->maxTransformFeedbackBufferSize = UINT32_MAX;
936 props->maxTransformFeedbackStreamDataSize = 512;
937 props->maxTransformFeedbackBufferDataSize = 512;
938 props->maxTransformFeedbackBufferDataStride = 512;
939 props->transformFeedbackQueries = true;
940 props->transformFeedbackStreamsLinesTriangles = true;
941 props->transformFeedbackRasterizationStreamSelect = true;
942 props->transformFeedbackDraw = true;
943
944 /* VK_EXT_sample_locations */
945 props->sampleLocationSampleCounts = 0;
946 if (pdevice->vk.supported_extensions.EXT_sample_locations) {
947 props->sampleLocationSampleCounts =
948 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
949 }
950 props->maxSampleLocationGridSize = (VkExtent2D) { 1 , 1 };
951 props->sampleLocationCoordinateRange[0] = SAMPLE_LOCATION_MIN;
952 props->sampleLocationCoordinateRange[1] = SAMPLE_LOCATION_MAX;
953 props->sampleLocationSubPixelBits = 4;
954 props->variableSampleLocations = true;
955
956 /* VK_KHR_vertex_attribute_divisor */
957 props->maxVertexAttribDivisor = UINT32_MAX;
958
959 /* VK_EXT_custom_border_color */
960 props->maxCustomBorderColorSamplers = TU_BORDER_COLOR_COUNT;
961
962 /* VK_KHR_performance_query */
963 props->allowCommandBufferQueryCopies = false;
964
965 /* VK_EXT_robustness2 */
966 /* see write_buffer_descriptor() */
967 props->robustStorageBufferAccessSizeAlignment = 4;
968 /* see write_ubo_descriptor() */
969 props->robustUniformBufferAccessSizeAlignment = 16;
970
971 /* VK_EXT_provoking_vertex */
972 props->provokingVertexModePerPipeline = true;
973 props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
974
975 /* VK_EXT_line_rasterization */
976 props->lineSubPixelPrecisionBits = 8;
977
978 /* VK_EXT_physical_device_drm */
979 props->drmHasPrimary = pdevice->has_master;
980 props->drmPrimaryMajor = pdevice->master_major;
981 props->drmPrimaryMinor = pdevice->master_minor;
982
983 props->drmHasRender = pdevice->has_local;
984 props->drmRenderMajor = pdevice->local_major;
985 props->drmRenderMinor = pdevice->local_minor;
986
987 /* VK_EXT_shader_module_identifier */
988 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
989 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
990 memcpy(props->shaderModuleIdentifierAlgorithmUUID,
991 vk_shaderModuleIdentifierAlgorithmUUID,
992 sizeof(props->shaderModuleIdentifierAlgorithmUUID));
993
994 /* VK_EXT_multi_draw */
995 props->maxMultiDrawCount = 2048;
996
997 /* VK_EXT_graphics_pipeline_library */
998 props->graphicsPipelineLibraryFastLinking = true;
999 props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1000
1001 /* VK_EXT_extended_dynamic_state3 */
1002 props->dynamicPrimitiveTopologyUnrestricted = true;
1003
1004 /* VK_EXT_descriptor_buffer */
1005 props->combinedImageSamplerDescriptorSingleArray = true;
1006 props->bufferlessPushDescriptors = true;
1007 props->allowSamplerImageViewPostSubmitCreation = true;
1008 props->descriptorBufferOffsetAlignment = A6XX_TEX_CONST_DWORDS * 4;
1009 props->maxDescriptorBufferBindings = pdevice->usable_sets;
1010 props->maxResourceDescriptorBufferBindings = pdevice->usable_sets;
1011 props->maxSamplerDescriptorBufferBindings = pdevice->usable_sets;
1012 props->maxEmbeddedImmutableSamplerBindings = pdevice->usable_sets;
1013 props->maxEmbeddedImmutableSamplers = max_descriptor_set_size;
1014 props->bufferCaptureReplayDescriptorDataSize = 0;
1015 props->imageCaptureReplayDescriptorDataSize = 0;
1016 props->imageViewCaptureReplayDescriptorDataSize = 0;
1017 props->samplerCaptureReplayDescriptorDataSize = 0;
1018 props->accelerationStructureCaptureReplayDescriptorDataSize = 0;
1019 /* Note: these sizes must match descriptor_size() */
1020 props->samplerDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1021 props->combinedImageSamplerDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
1022 props->sampledImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1023 props->storageImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1024 props->uniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1025 props->robustUniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1026 props->storageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1027 props->robustStorageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1028 props->uniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1029 props->robustUniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1030 props->storageBufferDescriptorSize =
1031 pdevice->info->a6xx.storage_16bit ?
1032 2 * A6XX_TEX_CONST_DWORDS * 4 :
1033 A6XX_TEX_CONST_DWORDS * 4;
1034 props->robustStorageBufferDescriptorSize =
1035 props->storageBufferDescriptorSize;
1036 props->inputAttachmentDescriptorSize = TU_DEBUG(DYNAMIC) ?
1037 A6XX_TEX_CONST_DWORDS * 4 : 0;
1038 props->maxSamplerDescriptorBufferRange = ~0ull;
1039 props->maxResourceDescriptorBufferRange = ~0ull;
1040 props->samplerDescriptorBufferAddressSpaceSize = ~0ull;
1041 props->resourceDescriptorBufferAddressSpaceSize = ~0ull;
1042 props->descriptorBufferAddressSpaceSize = ~0ull;
1043 props->combinedImageSamplerDensityMapDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
1044
1045 /* VK_EXT_fragment_density_map*/
1046 props->minFragmentDensityTexelSize = (VkExtent2D) { MIN_FDM_TEXEL_SIZE, MIN_FDM_TEXEL_SIZE };
1047 props->maxFragmentDensityTexelSize = (VkExtent2D) { MAX_FDM_TEXEL_SIZE, MAX_FDM_TEXEL_SIZE };
1048 props->fragmentDensityInvocations = false;
1049
1050 /* VK_KHR_maintenance5 */
1051 props->earlyFragmentMultisampleCoverageAfterSampleCounting = true;
1052 props->earlyFragmentSampleMaskTestBeforeSampleCounting = true;
1053 props->depthStencilSwizzleOneSupport = true;
1054 props->polygonModePointSize = true;
1055 props->nonStrictWideLinesUseParallelogram = false;
1056 props->nonStrictSinglePixelWideLinesUseParallelogram = false;
1057 }
1058
1059 static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
1060 &tu_shader_ops,
1061 &tu_nir_shaders_ops,
1062 NULL,
1063 };
1064
1065 VkResult
tu_physical_device_init(struct tu_physical_device * device,struct tu_instance * instance)1066 tu_physical_device_init(struct tu_physical_device *device,
1067 struct tu_instance *instance)
1068 {
1069 VkResult result = VK_SUCCESS;
1070
1071 const char *fd_name = fd_dev_name(&device->dev_id);
1072 if (!fd_name) {
1073 return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1074 "device (chip_id = %" PRIX64
1075 ", gpu_id = %u) is unsupported",
1076 device->dev_id.chip_id, device->dev_id.gpu_id);
1077 }
1078
1079 if (strncmp(fd_name, "FD", 2) == 0) {
1080 device->name = vk_asprintf(&instance->vk.alloc,
1081 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE,
1082 "Turnip Adreno (TM) %s", &fd_name[2]);
1083 } else {
1084 device->name = vk_strdup(&instance->vk.alloc, fd_name,
1085 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1086
1087 }
1088 if (!device->name) {
1089 return vk_startup_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
1090 "device name alloc fail");
1091 }
1092
1093 const struct fd_dev_info info = fd_dev_info(&device->dev_id);
1094 if (!info.chip) {
1095 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1096 "device %s is unsupported", device->name);
1097 goto fail_free_name;
1098 }
1099 switch (fd_dev_gen(&device->dev_id)) {
1100 case 6:
1101 case 7: {
1102 device->dev_info = info;
1103 device->info = &device->dev_info;
1104 uint32_t depth_cache_size =
1105 device->info->num_ccu * device->info->a6xx.sysmem_per_ccu_depth_cache_size;
1106 uint32_t color_cache_size =
1107 (device->info->num_ccu *
1108 device->info->a6xx.sysmem_per_ccu_color_cache_size);
1109 uint32_t color_cache_size_gmem =
1110 color_cache_size /
1111 (1 << device->info->a6xx.gmem_ccu_color_cache_fraction);
1112
1113 device->ccu_depth_offset_bypass = 0;
1114 device->ccu_offset_bypass =
1115 device->ccu_depth_offset_bypass + depth_cache_size;
1116
1117 if (device->info->a7xx.has_gmem_vpc_attr_buf) {
1118 device->vpc_attr_buf_size_bypass =
1119 device->info->a7xx.sysmem_vpc_attr_buf_size;
1120 device->vpc_attr_buf_offset_bypass =
1121 device->ccu_offset_bypass + color_cache_size;
1122
1123 device->vpc_attr_buf_size_gmem =
1124 device->info->a7xx.gmem_vpc_attr_buf_size;
1125 device->vpc_attr_buf_offset_gmem =
1126 device->gmem_size -
1127 (device->vpc_attr_buf_size_gmem * device->info->num_ccu);
1128
1129 device->ccu_offset_gmem =
1130 device->vpc_attr_buf_offset_gmem - color_cache_size_gmem;
1131
1132 device->usable_gmem_size_gmem = device->vpc_attr_buf_offset_gmem;
1133 } else {
1134 device->ccu_offset_gmem = device->gmem_size - color_cache_size_gmem;
1135 device->usable_gmem_size_gmem = device->gmem_size;
1136 }
1137
1138 if (instance->reserve_descriptor_set) {
1139 device->usable_sets = device->reserved_set_idx = device->info->a6xx.max_sets - 1;
1140 } else {
1141 device->usable_sets = device->info->a6xx.max_sets;
1142 device->reserved_set_idx = -1;
1143 }
1144 break;
1145 }
1146 default:
1147 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1148 "device %s is unsupported", device->name);
1149 goto fail_free_name;
1150 }
1151 if (tu_device_get_cache_uuid(device, device->cache_uuid)) {
1152 result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1153 "cannot generate UUID");
1154 goto fail_free_name;
1155 }
1156
1157 device->memory.type_count = 1;
1158 device->memory.types[0] =
1159 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1160 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1161 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
1162
1163 if (device->has_cached_coherent_memory) {
1164 device->memory.types[device->memory.type_count] =
1165 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1166 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1167 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1168 VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1169 device->memory.type_count++;
1170 }
1171
1172 if (device->has_cached_non_coherent_memory) {
1173 device->memory.types[device->memory.type_count] =
1174 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1175 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1176 VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1177 device->memory.type_count++;
1178 }
1179
1180 fd_get_driver_uuid(device->driver_uuid);
1181 fd_get_device_uuid(device->device_uuid, &device->dev_id);
1182
1183 struct vk_physical_device_dispatch_table dispatch_table;
1184 vk_physical_device_dispatch_table_from_entrypoints(
1185 &dispatch_table, &tu_physical_device_entrypoints, true);
1186 vk_physical_device_dispatch_table_from_entrypoints(
1187 &dispatch_table, &wsi_physical_device_entrypoints, false);
1188
1189 result = vk_physical_device_init(&device->vk, &instance->vk,
1190 NULL, NULL, NULL, /* We set up extensions later */
1191 &dispatch_table);
1192 if (result != VK_SUCCESS)
1193 goto fail_free_name;
1194
1195 get_device_extensions(device, &device->vk.supported_extensions);
1196 tu_get_features(device, &device->vk.supported_features);
1197 tu_get_properties(device, &device->vk.properties);
1198
1199 device->vk.supported_sync_types = device->sync_types;
1200
1201 #if TU_HAS_SURFACE
1202 result = tu_wsi_init(device);
1203 if (result != VK_SUCCESS) {
1204 vk_startup_errorf(instance, result, "WSI init failure");
1205 vk_physical_device_finish(&device->vk);
1206 goto fail_free_name;
1207 }
1208 #endif
1209
1210 /* The gpu id is already embedded in the uuid so we just pass "tu"
1211 * when creating the cache.
1212 */
1213 char buf[VK_UUID_SIZE * 2 + 1];
1214 mesa_bytes_to_hex(buf, device->cache_uuid, VK_UUID_SIZE);
1215 device->vk.disk_cache = disk_cache_create(device->name, buf, 0);
1216
1217 device->vk.pipeline_cache_import_ops = cache_import_ops;
1218
1219 return VK_SUCCESS;
1220
1221 fail_free_name:
1222 vk_free(&instance->vk.alloc, (void *)device->name);
1223 return result;
1224 }
1225
1226 static void
tu_physical_device_finish(struct tu_physical_device * device)1227 tu_physical_device_finish(struct tu_physical_device *device)
1228 {
1229 #if TU_HAS_SURFACE
1230 tu_wsi_finish(device);
1231 #endif
1232
1233 close(device->local_fd);
1234 if (device->master_fd != -1)
1235 close(device->master_fd);
1236
1237 disk_cache_destroy(device->vk.disk_cache);
1238 vk_free(&device->instance->vk.alloc, (void *)device->name);
1239
1240 vk_physical_device_finish(&device->vk);
1241 }
1242
1243 static void
tu_destroy_physical_device(struct vk_physical_device * device)1244 tu_destroy_physical_device(struct vk_physical_device *device)
1245 {
1246 tu_physical_device_finish((struct tu_physical_device *) device);
1247 vk_free(&device->instance->alloc, device);
1248 }
1249
1250 static const driOptionDescription tu_dri_options[] = {
1251 DRI_CONF_SECTION_PERFORMANCE
1252 DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
1253 DRI_CONF_VK_KHR_PRESENT_WAIT(false)
1254 DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
1255 DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
1256 DRI_CONF_VK_XWAYLAND_WAIT_READY(false)
1257 DRI_CONF_SECTION_END
1258
1259 DRI_CONF_SECTION_DEBUG
1260 DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
1261 DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
1262 DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
1263 DRI_CONF_VK_DONT_CARE_AS_LOAD(false)
1264 DRI_CONF_SECTION_END
1265
1266 DRI_CONF_SECTION_MISCELLANEOUS
1267 DRI_CONF_DISABLE_CONSERVATIVE_LRZ(false)
1268 DRI_CONF_TU_DONT_RESERVE_DESCRIPTOR_SET(false)
1269 DRI_CONF_TU_ALLOW_OOB_INDIRECT_UBO_LOADS(false)
1270 DRI_CONF_SECTION_END
1271 };
1272
1273 static void
tu_init_dri_options(struct tu_instance * instance)1274 tu_init_dri_options(struct tu_instance *instance)
1275 {
1276 driParseOptionInfo(&instance->available_dri_options, tu_dri_options,
1277 ARRAY_SIZE(tu_dri_options));
1278 driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "turnip", NULL, NULL,
1279 instance->vk.app_info.app_name, instance->vk.app_info.app_version,
1280 instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
1281
1282 instance->dont_care_as_load =
1283 driQueryOptionb(&instance->dri_options, "vk_dont_care_as_load");
1284 instance->conservative_lrz =
1285 !driQueryOptionb(&instance->dri_options, "disable_conservative_lrz");
1286 instance->reserve_descriptor_set =
1287 !driQueryOptionb(&instance->dri_options, "tu_dont_reserve_descriptor_set");
1288 instance->allow_oob_indirect_ubo_loads =
1289 driQueryOptionb(&instance->dri_options, "tu_allow_oob_indirect_ubo_loads");
1290 }
1291
1292 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1293 tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
1294 const VkAllocationCallbacks *pAllocator,
1295 VkInstance *pInstance)
1296 {
1297 struct tu_instance *instance;
1298 VkResult result;
1299
1300 tu_env_init();
1301
1302 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1303
1304 if (pAllocator == NULL)
1305 pAllocator = vk_default_allocator();
1306
1307 instance = (struct tu_instance *) vk_zalloc(
1308 pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1309
1310 if (!instance)
1311 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1312
1313 struct vk_instance_dispatch_table dispatch_table;
1314 vk_instance_dispatch_table_from_entrypoints(
1315 &dispatch_table, &tu_instance_entrypoints, true);
1316 vk_instance_dispatch_table_from_entrypoints(
1317 &dispatch_table, &wsi_instance_entrypoints, false);
1318
1319 result = vk_instance_init(&instance->vk,
1320 &tu_instance_extensions_supported,
1321 &dispatch_table,
1322 pCreateInfo, pAllocator);
1323 if (result != VK_SUCCESS) {
1324 vk_free(pAllocator, instance);
1325 return vk_error(NULL, result);
1326 }
1327
1328 instance->vk.physical_devices.try_create_for_drm =
1329 tu_physical_device_try_create;
1330 instance->vk.physical_devices.enumerate = tu_enumerate_devices;
1331 instance->vk.physical_devices.destroy = tu_destroy_physical_device;
1332
1333 if (TU_DEBUG(STARTUP))
1334 mesa_logi("Created an instance");
1335
1336 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1337
1338 tu_init_dri_options(instance);
1339
1340 *pInstance = tu_instance_to_handle(instance);
1341
1342 #ifdef HAVE_PERFETTO
1343 tu_perfetto_init();
1344 #endif
1345
1346 util_gpuvis_init();
1347
1348 return VK_SUCCESS;
1349 }
1350
1351 VKAPI_ATTR void VKAPI_CALL
tu_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)1352 tu_DestroyInstance(VkInstance _instance,
1353 const VkAllocationCallbacks *pAllocator)
1354 {
1355 TU_FROM_HANDLE(tu_instance, instance, _instance);
1356
1357 if (!instance)
1358 return;
1359
1360 VG(VALGRIND_DESTROY_MEMPOOL(instance));
1361
1362 driDestroyOptionCache(&instance->dri_options);
1363 driDestroyOptionInfo(&instance->available_dri_options);
1364
1365 vk_instance_finish(&instance->vk);
1366 vk_free(&instance->vk.alloc, instance);
1367 }
1368
1369 static const VkQueueFamilyProperties tu_queue_family_properties = {
1370 .queueFlags =
1371 VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
1372 .queueCount = 1,
1373 .timestampValidBits = 48,
1374 .minImageTransferGranularity = { 1, 1, 1 },
1375 };
1376
1377 static void
tu_physical_device_get_global_priority_properties(const struct tu_physical_device * pdevice,VkQueueFamilyGlobalPriorityPropertiesKHR * props)1378 tu_physical_device_get_global_priority_properties(const struct tu_physical_device *pdevice,
1379 VkQueueFamilyGlobalPriorityPropertiesKHR *props)
1380 {
1381 props->priorityCount = MIN2(pdevice->submitqueue_priority_count, 3);
1382 switch (props->priorityCount) {
1383 case 1:
1384 props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1385 break;
1386 case 2:
1387 props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1388 props->priorities[1] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
1389 break;
1390 case 3:
1391 props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR;
1392 props->priorities[1] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1393 props->priorities[2] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
1394 break;
1395 default:
1396 unreachable("unexpected priority count");
1397 break;
1398 }
1399 }
1400
1401 static int
tu_physical_device_get_submitqueue_priority(const struct tu_physical_device * pdevice,VkQueueGlobalPriorityKHR global_priority,bool global_priority_query)1402 tu_physical_device_get_submitqueue_priority(const struct tu_physical_device *pdevice,
1403 VkQueueGlobalPriorityKHR global_priority,
1404 bool global_priority_query)
1405 {
1406 if (global_priority_query) {
1407 VkQueueFamilyGlobalPriorityPropertiesKHR props;
1408 tu_physical_device_get_global_priority_properties(pdevice, &props);
1409
1410 bool valid = false;
1411 for (uint32_t i = 0; i < props.priorityCount; i++) {
1412 if (props.priorities[i] == global_priority) {
1413 valid = true;
1414 break;
1415 }
1416 }
1417
1418 if (!valid)
1419 return -1;
1420 }
1421
1422 /* Valid values are from 0 to (pdevice->submitqueue_priority_count - 1),
1423 * with 0 being the highest priority. This matches what freedreno does.
1424 */
1425 int priority;
1426 if (global_priority == VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR)
1427 priority = pdevice->submitqueue_priority_count / 2;
1428 else if (global_priority < VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR)
1429 priority = pdevice->submitqueue_priority_count - 1;
1430 else
1431 priority = 0;
1432
1433 return priority;
1434 }
1435
1436 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1437 tu_GetPhysicalDeviceQueueFamilyProperties2(
1438 VkPhysicalDevice physicalDevice,
1439 uint32_t *pQueueFamilyPropertyCount,
1440 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1441 {
1442 TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
1443
1444 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
1445 pQueueFamilyProperties, pQueueFamilyPropertyCount);
1446
1447 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
1448 {
1449 p->queueFamilyProperties = tu_queue_family_properties;
1450
1451 vk_foreach_struct(ext, p->pNext) {
1452 switch (ext->sType) {
1453 case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
1454 VkQueueFamilyGlobalPriorityPropertiesKHR *props =
1455 (VkQueueFamilyGlobalPriorityPropertiesKHR *) ext;
1456 tu_physical_device_get_global_priority_properties(pdevice, props);
1457 break;
1458 }
1459 default:
1460 break;
1461 }
1462 }
1463 }
1464 }
1465
1466 uint64_t
tu_get_system_heap_size(struct tu_physical_device * physical_device)1467 tu_get_system_heap_size(struct tu_physical_device *physical_device)
1468 {
1469 struct sysinfo info;
1470 sysinfo(&info);
1471
1472 uint64_t total_ram = (uint64_t) info.totalram * (uint64_t) info.mem_unit;
1473
1474 /* We don't want to burn too much ram with the GPU. If the user has 4GiB
1475 * or less, we use at most half. If they have more than 4GiB, we use 3/4.
1476 */
1477 uint64_t available_ram;
1478 if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
1479 available_ram = total_ram / 2;
1480 else
1481 available_ram = total_ram * 3 / 4;
1482
1483 if (physical_device->va_size)
1484 available_ram = MIN2(available_ram, physical_device->va_size);
1485
1486 return available_ram;
1487 }
1488
1489 static VkDeviceSize
tu_get_budget_memory(struct tu_physical_device * physical_device)1490 tu_get_budget_memory(struct tu_physical_device *physical_device)
1491 {
1492 uint64_t heap_size = physical_device->heap.size;
1493 uint64_t heap_used = physical_device->heap.used;
1494 uint64_t sys_available;
1495 ASSERTED bool has_available_memory =
1496 os_get_available_system_memory(&sys_available);
1497 assert(has_available_memory);
1498
1499 if (physical_device->va_size)
1500 sys_available = MIN2(sys_available, physical_device->va_size);
1501
1502 /*
1503 * Let's not incite the app to starve the system: report at most 90% of
1504 * available system memory.
1505 */
1506 uint64_t heap_available = sys_available * 9 / 10;
1507 return MIN2(heap_size, heap_used + heap_available);
1508 }
1509
1510 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,VkPhysicalDeviceMemoryProperties2 * props2)1511 tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,
1512 VkPhysicalDeviceMemoryProperties2 *props2)
1513 {
1514 TU_FROM_HANDLE(tu_physical_device, physical_device, pdev);
1515
1516 VkPhysicalDeviceMemoryProperties *props = &props2->memoryProperties;
1517 props->memoryHeapCount = 1;
1518 props->memoryHeaps[0].size = physical_device->heap.size;
1519 props->memoryHeaps[0].flags = physical_device->heap.flags;
1520
1521 props->memoryTypeCount = physical_device->memory.type_count;
1522 for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
1523 props->memoryTypes[i] = (VkMemoryType) {
1524 .propertyFlags = physical_device->memory.types[i],
1525 .heapIndex = 0,
1526 };
1527 }
1528
1529 vk_foreach_struct(ext, props2->pNext)
1530 {
1531 switch (ext->sType) {
1532 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1533 VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget_props =
1534 (VkPhysicalDeviceMemoryBudgetPropertiesEXT *) ext;
1535 memory_budget_props->heapUsage[0] = physical_device->heap.used;
1536 memory_budget_props->heapBudget[0] = tu_get_budget_memory(physical_device);
1537
1538 /* The heapBudget and heapUsage values must be zero for array elements
1539 * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
1540 */
1541 for (unsigned i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1542 memory_budget_props->heapBudget[i] = 0u;
1543 memory_budget_props->heapUsage[i] = 0u;
1544 }
1545 break;
1546 }
1547 default:
1548 break;
1549 }
1550 }
1551 }
1552
1553 static VkResult
tu_queue_init(struct tu_device * device,struct tu_queue * queue,int idx,const VkDeviceQueueCreateInfo * create_info,bool global_priority_query)1554 tu_queue_init(struct tu_device *device,
1555 struct tu_queue *queue,
1556 int idx,
1557 const VkDeviceQueueCreateInfo *create_info,
1558 bool global_priority_query)
1559 {
1560 const VkDeviceQueueGlobalPriorityCreateInfoKHR *priority_info =
1561 vk_find_struct_const(create_info->pNext,
1562 DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
1563 const enum VkQueueGlobalPriorityKHR global_priority = priority_info ?
1564 priority_info->globalPriority : VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1565
1566 const int priority = tu_physical_device_get_submitqueue_priority(
1567 device->physical_device, global_priority, global_priority_query);
1568 if (priority < 0) {
1569 return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
1570 "invalid global priority");
1571 }
1572
1573 VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);
1574 if (result != VK_SUCCESS)
1575 return result;
1576
1577 queue->device = device;
1578 queue->priority = priority;
1579 queue->vk.driver_submit = tu_queue_submit;
1580
1581 int ret = tu_drm_submitqueue_new(device, priority, &queue->msm_queue_id);
1582 if (ret)
1583 return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
1584 "submitqueue create failed");
1585
1586 queue->fence = -1;
1587
1588 return VK_SUCCESS;
1589 }
1590
1591 static void
tu_queue_finish(struct tu_queue * queue)1592 tu_queue_finish(struct tu_queue *queue)
1593 {
1594 vk_queue_finish(&queue->vk);
1595 tu_drm_submitqueue_close(queue->device, queue->msm_queue_id);
1596 }
1597
1598 uint64_t
tu_device_ticks_to_ns(struct tu_device * dev,uint64_t ts)1599 tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts)
1600 {
1601 /* This is based on the 19.2MHz always-on rbbm timer.
1602 *
1603 * TODO we should probably query this value from kernel..
1604 */
1605 return ts * (1000000000 / 19200000);
1606 }
1607
1608 struct u_trace_context *
tu_device_get_u_trace(struct tu_device * device)1609 tu_device_get_u_trace(struct tu_device *device)
1610 {
1611 return &device->trace_context;
1612 }
1613
1614 static void*
tu_trace_create_ts_buffer(struct u_trace_context * utctx,uint32_t size)1615 tu_trace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size)
1616 {
1617 struct tu_device *device =
1618 container_of(utctx, struct tu_device, trace_context);
1619
1620 struct tu_bo *bo;
1621 tu_bo_init_new(device, &bo, size, TU_BO_ALLOC_NO_FLAGS, "trace");
1622
1623 return bo;
1624 }
1625
1626 static void
tu_trace_destroy_ts_buffer(struct u_trace_context * utctx,void * timestamps)1627 tu_trace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
1628 {
1629 struct tu_device *device =
1630 container_of(utctx, struct tu_device, trace_context);
1631 struct tu_bo *bo = (struct tu_bo *) timestamps;
1632
1633 tu_bo_finish(device, bo);
1634 }
1635
1636 template <chip CHIP>
1637 static void
tu_trace_record_ts(struct u_trace * ut,void * cs,void * timestamps,unsigned idx,bool end_of_pipe)1638 tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
1639 unsigned idx, bool end_of_pipe)
1640 {
1641 struct tu_bo *bo = (struct tu_bo *) timestamps;
1642 struct tu_cs *ts_cs = (struct tu_cs *) cs;
1643
1644 unsigned ts_offset = idx * sizeof(uint64_t);
1645
1646 if (CHIP == A6XX) {
1647 tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE, 4);
1648 tu_cs_emit(ts_cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
1649 CP_EVENT_WRITE_0_TIMESTAMP);
1650 tu_cs_emit_qw(ts_cs, bo->iova + ts_offset);
1651 tu_cs_emit(ts_cs, 0x00000000);
1652 } else {
1653 tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE7, 3);
1654 tu_cs_emit(ts_cs, CP_EVENT_WRITE7_0(.event = RB_DONE_TS,
1655 .write_src = EV_WRITE_ALWAYSON,
1656 .write_dst = EV_DST_RAM,
1657 .write_enabled = true)
1658 .value);
1659 tu_cs_emit_qw(ts_cs, bo->iova + ts_offset);
1660 }
1661 }
1662
1663 static uint64_t
tu_trace_read_ts(struct u_trace_context * utctx,void * timestamps,unsigned idx,void * flush_data)1664 tu_trace_read_ts(struct u_trace_context *utctx,
1665 void *timestamps, unsigned idx, void *flush_data)
1666 {
1667 struct tu_device *device =
1668 container_of(utctx, struct tu_device, trace_context);
1669 struct tu_bo *bo = (struct tu_bo *) timestamps;
1670 struct tu_u_trace_submission_data *submission_data =
1671 (struct tu_u_trace_submission_data *) flush_data;
1672
1673 /* Only need to stall on results for the first entry: */
1674 if (idx == 0) {
1675 tu_device_wait_u_trace(device, submission_data->syncobj);
1676 }
1677
1678 if (tu_bo_map(device, bo) != VK_SUCCESS) {
1679 return U_TRACE_NO_TIMESTAMP;
1680 }
1681
1682 uint64_t *ts = (uint64_t *) bo->map;
1683
1684 /* Don't translate the no-timestamp marker: */
1685 if (ts[idx] == U_TRACE_NO_TIMESTAMP)
1686 return U_TRACE_NO_TIMESTAMP;
1687
1688 return tu_device_ticks_to_ns(device, ts[idx]);
1689 }
1690
1691 static void
tu_trace_delete_flush_data(struct u_trace_context * utctx,void * flush_data)1692 tu_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
1693 {
1694 struct tu_device *device =
1695 container_of(utctx, struct tu_device, trace_context);
1696 struct tu_u_trace_submission_data *submission_data =
1697 (struct tu_u_trace_submission_data *) flush_data;
1698
1699 tu_u_trace_submission_data_finish(device, submission_data);
1700 }
1701
1702 void
tu_copy_timestamp_buffer(struct u_trace_context * utctx,void * cmdstream,void * ts_from,uint32_t from_offset,void * ts_to,uint32_t to_offset,uint32_t count)1703 tu_copy_timestamp_buffer(struct u_trace_context *utctx, void *cmdstream,
1704 void *ts_from, uint32_t from_offset,
1705 void *ts_to, uint32_t to_offset,
1706 uint32_t count)
1707 {
1708 struct tu_cs *cs = (struct tu_cs *) cmdstream;
1709 struct tu_bo *bo_from = (struct tu_bo *) ts_from;
1710 struct tu_bo *bo_to = (struct tu_bo *) ts_to;
1711
1712 tu_cs_emit_pkt7(cs, CP_MEMCPY, 5);
1713 tu_cs_emit(cs, count * sizeof(uint64_t) / sizeof(uint32_t));
1714 tu_cs_emit_qw(cs, bo_from->iova + from_offset * sizeof(uint64_t));
1715 tu_cs_emit_qw(cs, bo_to->iova + to_offset * sizeof(uint64_t));
1716 }
1717
1718 /* Special helpers instead of u_trace_begin_iterator()/u_trace_end_iterator()
1719 * that ignore tracepoints at the beginning/end that are part of a
1720 * suspend/resume chain.
1721 */
1722 static struct u_trace_iterator
tu_cmd_begin_iterator(struct tu_cmd_buffer * cmdbuf)1723 tu_cmd_begin_iterator(struct tu_cmd_buffer *cmdbuf)
1724 {
1725 switch (cmdbuf->state.suspend_resume) {
1726 case SR_IN_PRE_CHAIN:
1727 return cmdbuf->trace_renderpass_end;
1728 case SR_AFTER_PRE_CHAIN:
1729 case SR_IN_CHAIN_AFTER_PRE_CHAIN:
1730 return cmdbuf->pre_chain.trace_renderpass_end;
1731 default:
1732 return u_trace_begin_iterator(&cmdbuf->trace);
1733 }
1734 }
1735
1736 static struct u_trace_iterator
tu_cmd_end_iterator(struct tu_cmd_buffer * cmdbuf)1737 tu_cmd_end_iterator(struct tu_cmd_buffer *cmdbuf)
1738 {
1739 switch (cmdbuf->state.suspend_resume) {
1740 case SR_IN_PRE_CHAIN:
1741 return cmdbuf->trace_renderpass_end;
1742 case SR_IN_CHAIN:
1743 case SR_IN_CHAIN_AFTER_PRE_CHAIN:
1744 return cmdbuf->trace_renderpass_start;
1745 default:
1746 return u_trace_end_iterator(&cmdbuf->trace);
1747 }
1748 }
1749 VkResult
tu_create_copy_timestamp_cs(struct tu_cmd_buffer * cmdbuf,struct tu_cs ** cs,struct u_trace ** trace_copy)1750 tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
1751 struct u_trace **trace_copy)
1752 {
1753 *cs = (struct tu_cs *) vk_zalloc(&cmdbuf->device->vk.alloc,
1754 sizeof(struct tu_cs), 8,
1755 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1756
1757 if (*cs == NULL) {
1758 return VK_ERROR_OUT_OF_HOST_MEMORY;
1759 }
1760
1761 tu_cs_init(*cs, cmdbuf->device, TU_CS_MODE_GROW,
1762 list_length(&cmdbuf->trace.trace_chunks) * 6 + 3, "trace copy timestamp cs");
1763
1764 tu_cs_begin(*cs);
1765
1766 tu_cs_emit_wfi(*cs);
1767 tu_cs_emit_pkt7(*cs, CP_WAIT_FOR_ME, 0);
1768
1769 *trace_copy = (struct u_trace *) vk_zalloc(
1770 &cmdbuf->device->vk.alloc, sizeof(struct u_trace), 8,
1771 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1772
1773 if (*trace_copy == NULL) {
1774 return VK_ERROR_OUT_OF_HOST_MEMORY;
1775 }
1776
1777 u_trace_init(*trace_copy, cmdbuf->trace.utctx);
1778 u_trace_clone_append(tu_cmd_begin_iterator(cmdbuf),
1779 tu_cmd_end_iterator(cmdbuf),
1780 *trace_copy, *cs,
1781 tu_copy_timestamp_buffer);
1782
1783 tu_cs_emit_wfi(*cs);
1784
1785 tu_cs_end(*cs);
1786
1787 return VK_SUCCESS;
1788 }
1789
1790 VkResult
tu_u_trace_submission_data_create(struct tu_device * device,struct tu_cmd_buffer ** cmd_buffers,uint32_t cmd_buffer_count,struct tu_u_trace_submission_data ** submission_data)1791 tu_u_trace_submission_data_create(
1792 struct tu_device *device,
1793 struct tu_cmd_buffer **cmd_buffers,
1794 uint32_t cmd_buffer_count,
1795 struct tu_u_trace_submission_data **submission_data)
1796 {
1797 *submission_data = (struct tu_u_trace_submission_data *)
1798 vk_zalloc(&device->vk.alloc,
1799 sizeof(struct tu_u_trace_submission_data), 8,
1800 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1801
1802 if (!(*submission_data)) {
1803 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1804 }
1805
1806 struct tu_u_trace_submission_data *data = *submission_data;
1807
1808 data->cmd_trace_data = (struct tu_u_trace_cmd_data *) vk_zalloc(
1809 &device->vk.alloc,
1810 cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8,
1811 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1812
1813 if (!data->cmd_trace_data) {
1814 goto fail;
1815 }
1816
1817 data->cmd_buffer_count = cmd_buffer_count;
1818 data->last_buffer_with_tracepoints = -1;
1819
1820 for (uint32_t i = 0; i < cmd_buffer_count; ++i) {
1821 struct tu_cmd_buffer *cmdbuf = cmd_buffers[i];
1822
1823 if (!u_trace_has_points(&cmdbuf->trace))
1824 continue;
1825
1826 data->last_buffer_with_tracepoints = i;
1827
1828 if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
1829 /* A single command buffer could be submitted several times, but we
1830 * already baked timestamp iova addresses and trace points are
1831 * single-use. Therefor we have to copy trace points and create
1832 * a new timestamp buffer on every submit of reusable command buffer.
1833 */
1834 if (tu_create_copy_timestamp_cs(cmdbuf,
1835 &data->cmd_trace_data[i].timestamp_copy_cs,
1836 &data->cmd_trace_data[i].trace) != VK_SUCCESS) {
1837 goto fail;
1838 }
1839
1840 assert(data->cmd_trace_data[i].timestamp_copy_cs->entry_count == 1);
1841 } else {
1842 data->cmd_trace_data[i].trace = &cmdbuf->trace;
1843 }
1844 }
1845
1846 assert(data->last_buffer_with_tracepoints != -1);
1847
1848 return VK_SUCCESS;
1849
1850 fail:
1851 tu_u_trace_submission_data_finish(device, data);
1852 *submission_data = NULL;
1853
1854 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1855 }
1856
1857 void
tu_u_trace_submission_data_finish(struct tu_device * device,struct tu_u_trace_submission_data * submission_data)1858 tu_u_trace_submission_data_finish(
1859 struct tu_device *device,
1860 struct tu_u_trace_submission_data *submission_data)
1861 {
1862 for (uint32_t i = 0; i < submission_data->cmd_buffer_count; ++i) {
1863 /* Only if we had to create a copy of trace we should free it */
1864 struct tu_u_trace_cmd_data *cmd_data = &submission_data->cmd_trace_data[i];
1865 if (cmd_data->timestamp_copy_cs) {
1866 tu_cs_finish(cmd_data->timestamp_copy_cs);
1867 vk_free(&device->vk.alloc, cmd_data->timestamp_copy_cs);
1868
1869 u_trace_fini(cmd_data->trace);
1870 vk_free(&device->vk.alloc, cmd_data->trace);
1871 }
1872 }
1873
1874 if (submission_data->kgsl_timestamp_bo.bo) {
1875 mtx_lock(&device->kgsl_profiling_mutex);
1876 tu_suballoc_bo_free(&device->kgsl_profiling_suballoc,
1877 &submission_data->kgsl_timestamp_bo);
1878 mtx_unlock(&device->kgsl_profiling_mutex);
1879 }
1880
1881 vk_free(&device->vk.alloc, submission_data->cmd_trace_data);
1882 vk_free(&device->vk.alloc, submission_data->syncobj);
1883 vk_free(&device->vk.alloc, submission_data);
1884 }
1885
1886 enum tu_reg_stomper_flags
1887 {
1888 TU_DEBUG_REG_STOMP_INVERSE = 1 << 0,
1889 TU_DEBUG_REG_STOMP_CMDBUF = 1 << 1,
1890 TU_DEBUG_REG_STOMP_RENDERPASS = 1 << 2,
1891 };
1892
1893 /* See freedreno.rst for usage tips */
1894 static const struct debug_named_value tu_reg_stomper_options[] = {
1895 { "inverse", TU_DEBUG_REG_STOMP_INVERSE,
1896 "By default the range specifies the regs to stomp, with 'inverse' it "
1897 "specifies the regs NOT to stomp" },
1898 { "cmdbuf", TU_DEBUG_REG_STOMP_CMDBUF,
1899 "Stomp regs at the start of a cmdbuf" },
1900 { "renderpass", TU_DEBUG_REG_STOMP_RENDERPASS,
1901 "Stomp regs before a renderpass" },
1902 { NULL, 0 }
1903 };
1904
1905 template <chip CHIP>
1906 static inline void
tu_cs_dbg_stomp_regs(struct tu_cs * cs,bool is_rp_blit,uint32_t first_reg,uint32_t last_reg,bool inverse)1907 tu_cs_dbg_stomp_regs(struct tu_cs *cs,
1908 bool is_rp_blit,
1909 uint32_t first_reg,
1910 uint32_t last_reg,
1911 bool inverse)
1912 {
1913 const uint16_t *regs = NULL;
1914 size_t count = 0;
1915
1916 if (is_rp_blit) {
1917 regs = &RP_BLIT_REGS<CHIP>[0];
1918 count = ARRAY_SIZE(RP_BLIT_REGS<CHIP>);
1919 } else {
1920 regs = &CMD_REGS<CHIP>[0];
1921 count = ARRAY_SIZE(CMD_REGS<CHIP>);
1922 }
1923
1924 for (size_t i = 0; i < count; i++) {
1925 if (inverse) {
1926 if (regs[i] >= first_reg && regs[i] <= last_reg)
1927 continue;
1928 } else {
1929 if (regs[i] < first_reg || regs[i] > last_reg)
1930 continue;
1931 }
1932
1933 if (fd_reg_stomp_allowed(CHIP, regs[i]))
1934 tu_cs_emit_write_reg(cs, regs[i], 0xffffffff);
1935 }
1936 }
1937
1938 static void
tu_init_dbg_reg_stomper(struct tu_device * device)1939 tu_init_dbg_reg_stomper(struct tu_device *device)
1940 {
1941 const char *stale_reg_range_str =
1942 os_get_option("TU_DEBUG_STALE_REGS_RANGE");
1943 if (!stale_reg_range_str)
1944 return;
1945
1946 uint32_t first_reg, last_reg;
1947
1948 if (sscanf(stale_reg_range_str, "%x,%x", &first_reg, &last_reg) != 2) {
1949 mesa_loge("Incorrect TU_DEBUG_STALE_REGS_RANGE");
1950 return;
1951 }
1952
1953 uint64_t debug_flags = debug_get_flags_option("TU_DEBUG_STALE_REGS_FLAGS",
1954 tu_reg_stomper_options,
1955 TU_DEBUG_REG_STOMP_CMDBUF);
1956
1957 struct tu_cs *cmdbuf_cs = (struct tu_cs *) calloc(1, sizeof(struct tu_cs));
1958 tu_cs_init(cmdbuf_cs, device, TU_CS_MODE_GROW, 4096,
1959 "cmdbuf reg stomp cs");
1960 tu_cs_begin(cmdbuf_cs);
1961
1962 struct tu_cs *rp_cs = (struct tu_cs *) calloc(1, sizeof(struct tu_cs));
1963 tu_cs_init(rp_cs, device, TU_CS_MODE_GROW, 4096, "rp reg stomp cs");
1964 tu_cs_begin(rp_cs);
1965
1966 bool inverse = debug_flags & TU_DEBUG_REG_STOMP_INVERSE;
1967 TU_CALLX(device, tu_cs_dbg_stomp_regs)(cmdbuf_cs, false, first_reg, last_reg, inverse);
1968 TU_CALLX(device, tu_cs_dbg_stomp_regs)(rp_cs, true, first_reg, last_reg, inverse);
1969
1970 tu_cs_end(cmdbuf_cs);
1971 tu_cs_end(rp_cs);
1972
1973 device->dbg_cmdbuf_stomp_cs = cmdbuf_cs;
1974 device->dbg_renderpass_stomp_cs = rp_cs;
1975 }
1976
1977 /* It is unknown what this workaround is for and what it fixes. */
1978 static VkResult
tu_init_cmdbuf_start_a725_quirk(struct tu_device * device)1979 tu_init_cmdbuf_start_a725_quirk(struct tu_device *device)
1980 {
1981 struct tu_cs *cs;
1982
1983 if (!(device->cmdbuf_start_a725_quirk_cs =
1984 (struct tu_cs *) calloc(1, sizeof(struct tu_cs)))) {
1985 return vk_startup_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY,
1986 "OOM");
1987 }
1988
1989 if (!(device->cmdbuf_start_a725_quirk_entry =
1990 (struct tu_cs_entry *) calloc(1, sizeof(struct tu_cs_entry)))) {
1991 free(device->cmdbuf_start_a725_quirk_cs);
1992 return vk_startup_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY,
1993 "OOM");
1994 }
1995
1996 cs = device->cmdbuf_start_a725_quirk_cs;
1997 tu_cs_init(cs, device, TU_CS_MODE_SUB_STREAM, 57, "a725 workaround cs");
1998
1999 struct tu_cs shader_cs;
2000 tu_cs_begin_sub_stream(cs, 10, &shader_cs);
2001
2002 uint32_t raw_shader[] = {
2003 0x00040000, 0x40600000, // mul.f hr0.x, hr0.x, hr1.x
2004 0x00050001, 0x40600001, // mul.f hr0.y, hr0.y, hr1.y
2005 0x00060002, 0x40600002, // mul.f hr0.z, hr0.z, hr1.z
2006 0x00070003, 0x40600003, // mul.f hr0.w, hr0.w, hr1.w
2007 0x00000000, 0x03000000, // end
2008 };
2009
2010 tu_cs_emit_array(&shader_cs, raw_shader, ARRAY_SIZE(raw_shader));
2011 struct tu_cs_entry shader_entry = tu_cs_end_sub_stream(cs, &shader_cs);
2012 uint64_t shader_iova = shader_entry.bo->iova + shader_entry.offset;
2013
2014 struct tu_cs sub_cs;
2015 tu_cs_begin_sub_stream(cs, 47, &sub_cs);
2016
2017 tu_cs_emit_regs(&sub_cs, HLSQ_INVALIDATE_CMD(A7XX,
2018 .vs_state = true, .hs_state = true, .ds_state = true,
2019 .gs_state = true, .fs_state = true, .gfx_ibo = true,
2020 .cs_bindless = 0xff, .gfx_bindless = 0xff));
2021 tu_cs_emit_regs(&sub_cs, HLSQ_CS_CNTL(A7XX,
2022 .constlen = 4,
2023 .enabled = true));
2024 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CONFIG(.enabled = true));
2025 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CTRL_REG0(
2026 .threadmode = MULTI,
2027 .threadsize = THREAD128,
2028 .mergedregs = true));
2029 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_UNKNOWN_A9B1(.shared_size = 1));
2030 tu_cs_emit_regs(&sub_cs, HLSQ_CS_KERNEL_GROUP_X(A7XX, 1),
2031 HLSQ_CS_KERNEL_GROUP_Y(A7XX, 1),
2032 HLSQ_CS_KERNEL_GROUP_Z(A7XX, 1));
2033 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_INSTRLEN(.sp_cs_instrlen = 1));
2034 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_TEX_COUNT(0));
2035 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_IBO_COUNT(0));
2036 tu_cs_emit_regs(&sub_cs, A7XX_HLSQ_CS_CNTL_1(
2037 .linearlocalidregid = regid(63, 0),
2038 .threadsize = THREAD128,
2039 .unk11 = true,
2040 .unk22 = true,
2041 .yalign = CS_YALIGN_1));
2042 tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CNTL_0(
2043 .wgidconstid = regid(51, 3),
2044 .wgsizeconstid = regid(48, 0),
2045 .wgoffsetconstid = regid(63, 0),
2046 .localidregid = regid(63, 0)));
2047 tu_cs_emit_regs(&sub_cs, SP_CS_CNTL_1(A7XX,
2048 .linearlocalidregid = regid(63, 0),
2049 .threadsize = THREAD128,
2050 .unk15 = true));
2051 tu_cs_emit_regs(&sub_cs, A7XX_SP_CS_UNKNOWN_A9BE(0));
2052
2053 tu_cs_emit_regs(&sub_cs,
2054 HLSQ_CS_NDRANGE_0(A7XX, .kerneldim = 3,
2055 .localsizex = 255,
2056 .localsizey = 1,
2057 .localsizez = 1),
2058 HLSQ_CS_NDRANGE_1(A7XX, .globalsize_x = 3072),
2059 HLSQ_CS_NDRANGE_2(A7XX, .globaloff_x = 0),
2060 HLSQ_CS_NDRANGE_3(A7XX, .globalsize_y = 1),
2061 HLSQ_CS_NDRANGE_4(A7XX, .globaloff_y = 0),
2062 HLSQ_CS_NDRANGE_5(A7XX, .globalsize_z = 1),
2063 HLSQ_CS_NDRANGE_6(A7XX, .globaloff_z = 0));
2064 tu_cs_emit_regs(&sub_cs, A7XX_HLSQ_CS_LOCAL_SIZE(
2065 .localsizex = 255,
2066 .localsizey = 0,
2067 .localsizez = 0));
2068 tu_cs_emit_pkt4(&sub_cs, REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET, 3);
2069 tu_cs_emit(&sub_cs, 0);
2070 tu_cs_emit_qw(&sub_cs, shader_iova);
2071
2072 tu_cs_emit_pkt7(&sub_cs, CP_EXEC_CS, 4);
2073 tu_cs_emit(&sub_cs, 0x00000000);
2074 tu_cs_emit(&sub_cs, CP_EXEC_CS_1_NGROUPS_X(12));
2075 tu_cs_emit(&sub_cs, CP_EXEC_CS_2_NGROUPS_Y(1));
2076 tu_cs_emit(&sub_cs, CP_EXEC_CS_3_NGROUPS_Z(1));
2077
2078 *device->cmdbuf_start_a725_quirk_entry = tu_cs_end_sub_stream(cs, &sub_cs);
2079
2080 return VK_SUCCESS;
2081 }
2082
2083 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2084 tu_CreateDevice(VkPhysicalDevice physicalDevice,
2085 const VkDeviceCreateInfo *pCreateInfo,
2086 const VkAllocationCallbacks *pAllocator,
2087 VkDevice *pDevice)
2088 {
2089 TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice);
2090 VkResult result;
2091 struct tu_device *device;
2092 bool custom_border_colors = false;
2093 bool perf_query_pools = false;
2094 bool robust_buffer_access2 = false;
2095 bool border_color_without_format = false;
2096 bool global_priority_query = false;
2097
2098 vk_foreach_struct_const(ext, pCreateInfo->pNext) {
2099 switch (ext->sType) {
2100 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
2101 const VkPhysicalDeviceCustomBorderColorFeaturesEXT
2102 *border_color_features =
2103 (const VkPhysicalDeviceCustomBorderColorFeaturesEXT *) ext;
2104 custom_border_colors = border_color_features->customBorderColors;
2105 border_color_without_format =
2106 border_color_features->customBorderColorWithoutFormat;
2107 break;
2108 }
2109 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
2110 const VkPhysicalDevicePerformanceQueryFeaturesKHR *feature =
2111 (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext;
2112 perf_query_pools = feature->performanceCounterQueryPools;
2113 break;
2114 }
2115 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
2116 VkPhysicalDeviceRobustness2FeaturesEXT *features =
2117 (VkPhysicalDeviceRobustness2FeaturesEXT *) ext;
2118 robust_buffer_access2 = features->robustBufferAccess2;
2119 break;
2120 }
2121 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_KHR: {
2122 VkPhysicalDeviceGlobalPriorityQueryFeaturesKHR *features =
2123 (VkPhysicalDeviceGlobalPriorityQueryFeaturesKHR *) ext;
2124 global_priority_query = features->globalPriorityQuery;
2125 break;
2126 }
2127 default:
2128 break;
2129 }
2130 }
2131
2132 device = (struct tu_device *) vk_zalloc2(
2133 &physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,
2134 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2135 if (!device)
2136 return vk_startup_errorf(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2137
2138 struct vk_device_dispatch_table dispatch_table;
2139 vk_device_dispatch_table_from_entrypoints(
2140 &dispatch_table, &tu_device_entrypoints, true);
2141
2142 switch (fd_dev_gen(&physical_device->dev_id)) {
2143 case 6:
2144 vk_device_dispatch_table_from_entrypoints(
2145 &dispatch_table, &tu_device_entrypoints_a6xx, false);
2146 break;
2147 case 7:
2148 vk_device_dispatch_table_from_entrypoints(
2149 &dispatch_table, &tu_device_entrypoints_a7xx, false);
2150 }
2151
2152 vk_device_dispatch_table_from_entrypoints(
2153 &dispatch_table, &wsi_device_entrypoints, false);
2154
2155 const struct vk_device_entrypoint_table *knl_device_entrypoints =
2156 physical_device->instance->knl->device_entrypoints;
2157 if (knl_device_entrypoints) {
2158 vk_device_dispatch_table_from_entrypoints(
2159 &dispatch_table, knl_device_entrypoints, false);
2160 }
2161
2162 result = vk_device_init(&device->vk, &physical_device->vk,
2163 &dispatch_table, pCreateInfo, pAllocator);
2164 if (result != VK_SUCCESS) {
2165 vk_free(&device->vk.alloc, device);
2166 return vk_startup_errorf(physical_device->instance, result,
2167 "vk_device_init failed");
2168 }
2169
2170 device->instance = physical_device->instance;
2171 device->physical_device = physical_device;
2172 device->device_idx = device->physical_device->device_count++;
2173
2174 result = tu_drm_device_init(device);
2175 if (result != VK_SUCCESS) {
2176 vk_free(&device->vk.alloc, device);
2177 return result;
2178 }
2179
2180 device->vk.command_buffer_ops = &tu_cmd_buffer_ops;
2181 device->vk.check_status = tu_device_check_status;
2182
2183 mtx_init(&device->bo_mutex, mtx_plain);
2184 mtx_init(&device->pipeline_mutex, mtx_plain);
2185 mtx_init(&device->autotune_mutex, mtx_plain);
2186 mtx_init(&device->kgsl_profiling_mutex, mtx_plain);
2187 u_rwlock_init(&device->dma_bo_lock);
2188 pthread_mutex_init(&device->submit_mutex, NULL);
2189
2190 if (physical_device->has_set_iova) {
2191 mtx_init(&device->vma_mutex, mtx_plain);
2192 util_vma_heap_init(&device->vma, physical_device->va_start,
2193 ROUND_DOWN_TO(physical_device->va_size, 4096));
2194 }
2195
2196 if (TU_DEBUG(BOS))
2197 device->bo_sizes = _mesa_hash_table_create(NULL, _mesa_hash_string, _mesa_key_string_equal);
2198
2199 /* kgsl is not a drm device: */
2200 if (!is_kgsl(physical_device->instance))
2201 vk_device_set_drm_fd(&device->vk, device->fd);
2202
2203 struct tu6_global *global = NULL;
2204 uint32_t global_size = sizeof(struct tu6_global);
2205 struct vk_pipeline_cache_create_info pcc_info = { };
2206
2207 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2208 const VkDeviceQueueCreateInfo *queue_create =
2209 &pCreateInfo->pQueueCreateInfos[i];
2210 uint32_t qfi = queue_create->queueFamilyIndex;
2211 device->queues[qfi] = (struct tu_queue *) vk_alloc(
2212 &device->vk.alloc,
2213 queue_create->queueCount * sizeof(struct tu_queue), 8,
2214 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2215 if (!device->queues[qfi]) {
2216 result = vk_startup_errorf(physical_device->instance,
2217 VK_ERROR_OUT_OF_HOST_MEMORY,
2218 "OOM");
2219 goto fail_queues;
2220 }
2221
2222 memset(device->queues[qfi], 0,
2223 queue_create->queueCount * sizeof(struct tu_queue));
2224
2225 device->queue_count[qfi] = queue_create->queueCount;
2226
2227 for (unsigned q = 0; q < queue_create->queueCount; q++) {
2228 result = tu_queue_init(device, &device->queues[qfi][q], q,
2229 queue_create, global_priority_query);
2230 if (result != VK_SUCCESS) {
2231 device->queue_count[qfi] = q;
2232 goto fail_queues;
2233 }
2234 }
2235 }
2236
2237 {
2238 struct ir3_compiler_options ir3_options = {
2239 .robust_buffer_access2 = robust_buffer_access2,
2240 .push_ubo_with_preamble = true,
2241 .disable_cache = true,
2242 .bindless_fb_read_descriptor = -1,
2243 .bindless_fb_read_slot = -1,
2244 .storage_16bit = physical_device->info->a6xx.storage_16bit,
2245 .shared_push_consts = !TU_DEBUG(PUSH_CONSTS_PER_STAGE),
2246 };
2247 device->compiler = ir3_compiler_create(
2248 NULL, &physical_device->dev_id, physical_device->info, &ir3_options);
2249 }
2250 if (!device->compiler) {
2251 result = vk_startup_errorf(physical_device->instance,
2252 VK_ERROR_INITIALIZATION_FAILED,
2253 "failed to initialize ir3 compiler");
2254 goto fail_queues;
2255 }
2256
2257 /* Initialize sparse array for refcounting imported BOs */
2258 util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
2259
2260 if (physical_device->has_set_iova) {
2261 STATIC_ASSERT(TU_MAX_QUEUE_FAMILIES == 1);
2262 if (!u_vector_init(&device->zombie_vmas, 64,
2263 sizeof(struct tu_zombie_vma))) {
2264 result = vk_startup_errorf(physical_device->instance,
2265 VK_ERROR_INITIALIZATION_FAILED,
2266 "zombie_vmas create failed");
2267 goto fail_free_zombie_vma;
2268 }
2269 }
2270
2271 /* initial sizes, these will increase if there is overflow */
2272 device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD;
2273 device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD;
2274
2275 if (custom_border_colors)
2276 global_size += TU_BORDER_COLOR_COUNT * sizeof(struct bcolor_entry);
2277
2278 tu_bo_suballocator_init(
2279 &device->pipeline_suballoc, device, 128 * 1024,
2280 (enum tu_bo_alloc_flags) (TU_BO_ALLOC_GPU_READ_ONLY | TU_BO_ALLOC_ALLOW_DUMP), "pipeline_suballoc");
2281 tu_bo_suballocator_init(&device->autotune_suballoc, device,
2282 128 * 1024, TU_BO_ALLOC_NO_FLAGS, "autotune_suballoc");
2283 if (is_kgsl(physical_device->instance)) {
2284 tu_bo_suballocator_init(&device->kgsl_profiling_suballoc, device,
2285 128 * 1024, TU_BO_ALLOC_NO_FLAGS, "kgsl_profiling_suballoc");
2286 }
2287
2288 result = tu_bo_init_new(device, &device->global_bo, global_size,
2289 TU_BO_ALLOC_ALLOW_DUMP, "global");
2290 if (result != VK_SUCCESS) {
2291 vk_startup_errorf(device->instance, result, "BO init");
2292 goto fail_global_bo;
2293 }
2294
2295 result = tu_bo_map(device, device->global_bo);
2296 if (result != VK_SUCCESS) {
2297 vk_startup_errorf(device->instance, result, "BO map");
2298 goto fail_global_bo_map;
2299 }
2300
2301 global = (struct tu6_global *)device->global_bo->map;
2302 device->global_bo_map = global;
2303 tu_init_clear_blit_shaders(device);
2304
2305 result = tu_init_empty_shaders(device);
2306 if (result != VK_SUCCESS) {
2307 vk_startup_errorf(device->instance, result, "empty shaders");
2308 goto fail_empty_shaders;
2309 }
2310
2311 global->predicate = 0;
2312 global->vtx_stats_query_not_running = 1;
2313 global->dbg_one = (uint32_t)-1;
2314 global->dbg_gmem_total_loads = 0;
2315 global->dbg_gmem_taken_loads = 0;
2316 global->dbg_gmem_total_stores = 0;
2317 global->dbg_gmem_taken_stores = 0;
2318 for (int i = 0; i < TU_BORDER_COLOR_BUILTIN; i++) {
2319 VkClearColorValue border_color = vk_border_color_value((VkBorderColor) i);
2320 tu6_pack_border_color(&global->bcolor_builtin[i], &border_color,
2321 vk_border_color_is_int((VkBorderColor) i));
2322 }
2323
2324 /* initialize to ones so ffs can be used to find unused slots */
2325 BITSET_ONES(device->custom_border_color);
2326
2327 result = tu_init_dynamic_rendering(device);
2328 if (result != VK_SUCCESS) {
2329 vk_startup_errorf(device->instance, result, "dynamic rendering");
2330 goto fail_dynamic_rendering;
2331 }
2332
2333 device->mem_cache = vk_pipeline_cache_create(&device->vk, &pcc_info,
2334 NULL);
2335 if (!device->mem_cache) {
2336 result = VK_ERROR_OUT_OF_HOST_MEMORY;
2337 vk_startup_errorf(device->instance, result, "create pipeline cache failed");
2338 goto fail_pipeline_cache;
2339 }
2340
2341 if (perf_query_pools) {
2342 /* Prepare command streams setting pass index to the PERF_CNTRS_REG
2343 * from 0 to 31. One of these will be picked up at cmd submit time
2344 * when the perf query is executed.
2345 */
2346 struct tu_cs *cs;
2347
2348 if (!(device->perfcntrs_pass_cs =
2349 (struct tu_cs *) calloc(1, sizeof(struct tu_cs)))) {
2350 result = vk_startup_errorf(device->instance,
2351 VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2352 goto fail_perfcntrs_pass_alloc;
2353 }
2354
2355 device->perfcntrs_pass_cs_entries =
2356 (struct tu_cs_entry *) calloc(32, sizeof(struct tu_cs_entry));
2357 if (!device->perfcntrs_pass_cs_entries) {
2358 result = vk_startup_errorf(device->instance,
2359 VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2360 goto fail_perfcntrs_pass_entries_alloc;
2361 }
2362
2363 cs = device->perfcntrs_pass_cs;
2364 tu_cs_init(cs, device, TU_CS_MODE_SUB_STREAM, 96, "perfcntrs cs");
2365
2366 for (unsigned i = 0; i < 32; i++) {
2367 struct tu_cs sub_cs;
2368
2369 result = tu_cs_begin_sub_stream(cs, 3, &sub_cs);
2370 if (result != VK_SUCCESS) {
2371 vk_startup_errorf(device->instance, result,
2372 "failed to allocate commands streams");
2373 goto fail_prepare_perfcntrs_pass_cs;
2374 }
2375
2376 tu_cs_emit_regs(&sub_cs, A6XX_CP_SCRATCH_REG(PERF_CNTRS_REG, 1 << i));
2377 tu_cs_emit_pkt7(&sub_cs, CP_WAIT_FOR_ME, 0);
2378
2379 device->perfcntrs_pass_cs_entries[i] = tu_cs_end_sub_stream(cs, &sub_cs);
2380 }
2381 }
2382
2383 if (physical_device->info->a7xx.cmdbuf_start_a725_quirk) {
2384 result = tu_init_cmdbuf_start_a725_quirk(device);
2385 if (result != VK_SUCCESS)
2386 goto fail_a725_workaround;
2387 }
2388
2389 tu_init_dbg_reg_stomper(device);
2390
2391 /* Initialize a condition variable for timeline semaphore */
2392 pthread_condattr_t condattr;
2393 if (pthread_condattr_init(&condattr) != 0) {
2394 result = vk_startup_errorf(physical_device->instance,
2395 VK_ERROR_INITIALIZATION_FAILED,
2396 "pthread condattr init");
2397 goto fail_timeline_cond;
2398 }
2399 if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
2400 pthread_condattr_destroy(&condattr);
2401 result = vk_startup_errorf(physical_device->instance,
2402 VK_ERROR_INITIALIZATION_FAILED,
2403 "pthread condattr clock setup");
2404 goto fail_timeline_cond;
2405 }
2406 if (pthread_cond_init(&device->timeline_cond, &condattr) != 0) {
2407 pthread_condattr_destroy(&condattr);
2408 result = vk_startup_errorf(physical_device->instance,
2409 VK_ERROR_INITIALIZATION_FAILED,
2410 "pthread cond init");
2411 goto fail_timeline_cond;
2412 }
2413 pthread_condattr_destroy(&condattr);
2414
2415 result = tu_autotune_init(&device->autotune, device);
2416 if (result != VK_SUCCESS) {
2417 goto fail_timeline_cond;
2418 }
2419
2420 for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
2421 mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain);
2422
2423 mtx_init(&device->fiber_pvtmem_bo.mtx, mtx_plain);
2424 mtx_init(&device->wave_pvtmem_bo.mtx, mtx_plain);
2425
2426 mtx_init(&device->mutex, mtx_plain);
2427
2428 device->use_z24uint_s8uint =
2429 physical_device->info->a6xx.has_z24uint_s8uint &&
2430 !border_color_without_format;
2431 device->use_lrz =
2432 !TU_DEBUG(NOLRZ) && device->physical_device->info->chip == 6;
2433
2434 tu_gpu_tracepoint_config_variable();
2435
2436 device->submit_count = 0;
2437 u_trace_context_init(&device->trace_context, device,
2438 tu_trace_create_ts_buffer,
2439 tu_trace_destroy_ts_buffer,
2440 TU_CALLX(device, tu_trace_record_ts),
2441 tu_trace_read_ts,
2442 tu_trace_delete_flush_data);
2443
2444 tu_breadcrumbs_init(device);
2445
2446 if (FD_RD_DUMP(ENABLE)) {
2447 struct vk_app_info *app_info = &device->instance->vk.app_info;
2448 const char *app_name_str = app_info->app_name ?
2449 app_info->app_name : util_get_process_name();
2450 const char *engine_name_str = app_info->engine_name ?
2451 app_info->engine_name : "unknown-engine";
2452
2453 char app_name[64];
2454 snprintf(app_name, sizeof(app_name), "%s", app_name_str);
2455
2456 char engine_name[32];
2457 snprintf(engine_name, sizeof(engine_name), "%s", engine_name_str);
2458
2459 char output_name[128];
2460 snprintf(output_name, sizeof(output_name), "tu_%s.%s_device%u",
2461 app_name, engine_name, device->device_idx);
2462
2463 fd_rd_output_init(&device->rd_output, output_name);
2464 }
2465
2466 *pDevice = tu_device_to_handle(device);
2467 return VK_SUCCESS;
2468
2469 fail_timeline_cond:
2470 if (device->cmdbuf_start_a725_quirk_entry) {
2471 free(device->cmdbuf_start_a725_quirk_entry);
2472 tu_cs_finish(device->cmdbuf_start_a725_quirk_cs);
2473 free(device->cmdbuf_start_a725_quirk_cs);
2474 }
2475 fail_a725_workaround:
2476 fail_prepare_perfcntrs_pass_cs:
2477 free(device->perfcntrs_pass_cs_entries);
2478 tu_cs_finish(device->perfcntrs_pass_cs);
2479 fail_perfcntrs_pass_entries_alloc:
2480 free(device->perfcntrs_pass_cs);
2481 fail_perfcntrs_pass_alloc:
2482 vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2483 fail_pipeline_cache:
2484 tu_destroy_dynamic_rendering(device);
2485 fail_dynamic_rendering:
2486 tu_destroy_empty_shaders(device);
2487 fail_empty_shaders:
2488 tu_destroy_clear_blit_shaders(device);
2489 fail_global_bo_map:
2490 tu_bo_finish(device, device->global_bo);
2491 vk_free(&device->vk.alloc, device->bo_list);
2492 fail_global_bo:
2493 ir3_compiler_destroy(device->compiler);
2494 util_sparse_array_finish(&device->bo_map);
2495 if (physical_device->has_set_iova)
2496 util_vma_heap_finish(&device->vma);
2497 fail_free_zombie_vma:
2498 u_vector_finish(&device->zombie_vmas);
2499 fail_queues:
2500 for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2501 for (unsigned q = 0; q < device->queue_count[i]; q++)
2502 tu_queue_finish(&device->queues[i][q]);
2503 if (device->queues[i])
2504 vk_free(&device->vk.alloc, device->queues[i]);
2505 }
2506
2507 u_rwlock_destroy(&device->dma_bo_lock);
2508 vk_device_finish(&device->vk);
2509 vk_free(&device->vk.alloc, device);
2510 return result;
2511 }
2512
2513 VKAPI_ATTR void VKAPI_CALL
tu_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2514 tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
2515 {
2516 TU_FROM_HANDLE(tu_device, device, _device);
2517
2518 if (!device)
2519 return;
2520
2521 if (FD_RD_DUMP(ENABLE))
2522 fd_rd_output_fini(&device->rd_output);
2523
2524 tu_breadcrumbs_finish(device);
2525
2526 u_trace_context_fini(&device->trace_context);
2527
2528 for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
2529 if (device->scratch_bos[i].initialized)
2530 tu_bo_finish(device, device->scratch_bos[i].bo);
2531 }
2532
2533 if (device->fiber_pvtmem_bo.bo)
2534 tu_bo_finish(device, device->fiber_pvtmem_bo.bo);
2535
2536 if (device->wave_pvtmem_bo.bo)
2537 tu_bo_finish(device, device->wave_pvtmem_bo.bo);
2538
2539 tu_destroy_clear_blit_shaders(device);
2540
2541 tu_destroy_empty_shaders(device);
2542
2543 tu_destroy_dynamic_rendering(device);
2544
2545 ir3_compiler_destroy(device->compiler);
2546
2547 vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2548
2549 if (device->perfcntrs_pass_cs) {
2550 free(device->perfcntrs_pass_cs_entries);
2551 tu_cs_finish(device->perfcntrs_pass_cs);
2552 free(device->perfcntrs_pass_cs);
2553 }
2554
2555 if (device->dbg_cmdbuf_stomp_cs) {
2556 tu_cs_finish(device->dbg_cmdbuf_stomp_cs);
2557 free(device->dbg_cmdbuf_stomp_cs);
2558 }
2559
2560 if (device->dbg_renderpass_stomp_cs) {
2561 tu_cs_finish(device->dbg_renderpass_stomp_cs);
2562 free(device->dbg_renderpass_stomp_cs);
2563 }
2564
2565 if (device->cmdbuf_start_a725_quirk_entry) {
2566 free(device->cmdbuf_start_a725_quirk_entry);
2567 tu_cs_finish(device->cmdbuf_start_a725_quirk_cs);
2568 free(device->cmdbuf_start_a725_quirk_cs);
2569 }
2570
2571 tu_autotune_fini(&device->autotune, device);
2572
2573 tu_bo_suballocator_finish(&device->pipeline_suballoc);
2574 tu_bo_suballocator_finish(&device->autotune_suballoc);
2575 tu_bo_suballocator_finish(&device->kgsl_profiling_suballoc);
2576
2577 tu_bo_finish(device, device->global_bo);
2578
2579 for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2580 for (unsigned q = 0; q < device->queue_count[i]; q++)
2581 tu_queue_finish(&device->queues[i][q]);
2582 if (device->queue_count[i])
2583 vk_free(&device->vk.alloc, device->queues[i]);
2584 }
2585
2586 tu_drm_device_finish(device);
2587
2588 if (device->physical_device->has_set_iova)
2589 util_vma_heap_finish(&device->vma);
2590
2591 util_sparse_array_finish(&device->bo_map);
2592 u_rwlock_destroy(&device->dma_bo_lock);
2593
2594 u_vector_finish(&device->zombie_vmas);
2595
2596 pthread_cond_destroy(&device->timeline_cond);
2597 _mesa_hash_table_destroy(device->bo_sizes, NULL);
2598 vk_free(&device->vk.alloc, device->bo_list);
2599 vk_device_finish(&device->vk);
2600 vk_free(&device->vk.alloc, device);
2601 }
2602
2603 VkResult
tu_get_scratch_bo(struct tu_device * dev,uint64_t size,struct tu_bo ** bo)2604 tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
2605 {
2606 unsigned size_log2 = MAX2(util_logbase2_ceil64(size), MIN_SCRATCH_BO_SIZE_LOG2);
2607 unsigned index = size_log2 - MIN_SCRATCH_BO_SIZE_LOG2;
2608 assert(index < ARRAY_SIZE(dev->scratch_bos));
2609
2610 for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
2611 if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
2612 /* Fast path: just return the already-allocated BO. */
2613 *bo = dev->scratch_bos[i].bo;
2614 return VK_SUCCESS;
2615 }
2616 }
2617
2618 /* Slow path: actually allocate the BO. We take a lock because the process
2619 * of allocating it is slow, and we don't want to block the CPU while it
2620 * finishes.
2621 */
2622 mtx_lock(&dev->scratch_bos[index].construct_mtx);
2623
2624 /* Another thread may have allocated it already while we were waiting on
2625 * the lock. We need to check this in order to avoid double-allocating.
2626 */
2627 if (dev->scratch_bos[index].initialized) {
2628 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2629 *bo = dev->scratch_bos[index].bo;
2630 return VK_SUCCESS;
2631 }
2632
2633 unsigned bo_size = 1ull << size_log2;
2634 VkResult result = tu_bo_init_new(dev, &dev->scratch_bos[index].bo, bo_size,
2635 TU_BO_ALLOC_NO_FLAGS, "scratch");
2636 if (result != VK_SUCCESS) {
2637 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2638 return result;
2639 }
2640
2641 p_atomic_set(&dev->scratch_bos[index].initialized, true);
2642
2643 mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2644
2645 *bo = dev->scratch_bos[index].bo;
2646 return VK_SUCCESS;
2647 }
2648
2649 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2650 tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2651 VkLayerProperties *pProperties)
2652 {
2653 *pPropertyCount = 0;
2654 return VK_SUCCESS;
2655 }
2656
2657 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)2658 tu_EnumerateInstanceExtensionProperties(const char *pLayerName,
2659 uint32_t *pPropertyCount,
2660 VkExtensionProperties *pProperties)
2661 {
2662 if (pLayerName)
2663 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2664
2665 return vk_enumerate_instance_extension_properties(
2666 &tu_instance_extensions_supported, pPropertyCount, pProperties);
2667 }
2668
2669 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
tu_GetInstanceProcAddr(VkInstance _instance,const char * pName)2670 tu_GetInstanceProcAddr(VkInstance _instance, const char *pName)
2671 {
2672 TU_FROM_HANDLE(tu_instance, instance, _instance);
2673 return vk_instance_get_proc_addr(instance != NULL ? &instance->vk : NULL,
2674 &tu_instance_entrypoints,
2675 pName);
2676 }
2677
2678 /* The loader wants us to expose a second GetInstanceProcAddr function
2679 * to work around certain LD_PRELOAD issues seen in apps.
2680 */
2681 PUBLIC
2682 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2683 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
2684 {
2685 return tu_GetInstanceProcAddr(instance, pName);
2686 }
2687
2688 VKAPI_ATTR VkResult VKAPI_CALL
tu_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2689 tu_AllocateMemory(VkDevice _device,
2690 const VkMemoryAllocateInfo *pAllocateInfo,
2691 const VkAllocationCallbacks *pAllocator,
2692 VkDeviceMemory *pMem)
2693 {
2694 TU_FROM_HANDLE(tu_device, device, _device);
2695 struct tu_device_memory *mem;
2696 VkResult result;
2697
2698 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2699
2700 if (pAllocateInfo->allocationSize == 0) {
2701 /* Apparently, this is allowed */
2702 *pMem = VK_NULL_HANDLE;
2703 return VK_SUCCESS;
2704 }
2705
2706 struct tu_memory_heap *mem_heap = &device->physical_device->heap;
2707 uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
2708 if (mem_heap_used > mem_heap->size)
2709 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2710
2711 mem = (struct tu_device_memory *) vk_object_alloc(
2712 &device->vk, pAllocator, sizeof(*mem), VK_OBJECT_TYPE_DEVICE_MEMORY);
2713 if (mem == NULL)
2714 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2715
2716 const VkImportMemoryFdInfoKHR *fd_info =
2717 vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2718 if (fd_info && !fd_info->handleType)
2719 fd_info = NULL;
2720
2721 if (fd_info) {
2722 assert(fd_info->handleType ==
2723 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2724 fd_info->handleType ==
2725 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2726
2727 /*
2728 * TODO Importing the same fd twice gives us the same handle without
2729 * reference counting. We need to maintain a per-instance handle-to-bo
2730 * table and add reference count to tu_bo.
2731 */
2732 result = tu_bo_init_dmabuf(device, &mem->bo,
2733 pAllocateInfo->allocationSize, fd_info->fd);
2734 if (result == VK_SUCCESS) {
2735 /* take ownership and close the fd */
2736 close(fd_info->fd);
2737 }
2738 } else {
2739 uint64_t client_address = 0;
2740 BITMASK_ENUM(tu_bo_alloc_flags) alloc_flags = TU_BO_ALLOC_NO_FLAGS;
2741
2742 const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info =
2743 vk_find_struct_const(pAllocateInfo->pNext,
2744 MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO);
2745 if (replay_info && replay_info->opaqueCaptureAddress) {
2746 client_address = replay_info->opaqueCaptureAddress;
2747 alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
2748 }
2749
2750 const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const(
2751 pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO);
2752 if (flags_info &&
2753 (flags_info->flags &
2754 VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)) {
2755 alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
2756 }
2757
2758 char name[64] = "vkAllocateMemory()";
2759 if (device->bo_sizes)
2760 snprintf(name, ARRAY_SIZE(name), "vkAllocateMemory(%ldkb)",
2761 (long)DIV_ROUND_UP(pAllocateInfo->allocationSize, 1024));
2762 VkMemoryPropertyFlags mem_property =
2763 device->physical_device->memory.types[pAllocateInfo->memoryTypeIndex];
2764 result = tu_bo_init_new_explicit_iova(
2765 device, &mem->bo, pAllocateInfo->allocationSize, client_address,
2766 mem_property, alloc_flags, name);
2767 }
2768
2769 if (result == VK_SUCCESS) {
2770 mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
2771 if (mem_heap_used > mem_heap->size) {
2772 p_atomic_add(&mem_heap->used, -mem->bo->size);
2773 tu_bo_finish(device, mem->bo);
2774 result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
2775 "Out of heap memory");
2776 }
2777 }
2778
2779 if (result != VK_SUCCESS) {
2780 vk_object_free(&device->vk, pAllocator, mem);
2781 return result;
2782 }
2783
2784 /* Track in the device whether our BO list contains any implicit-sync BOs, so
2785 * we can suppress implicit sync on non-WSI usage.
2786 */
2787 const struct wsi_memory_allocate_info *wsi_info =
2788 vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2789 if (wsi_info && wsi_info->implicit_sync) {
2790 mtx_lock(&device->bo_mutex);
2791 if (!mem->bo->implicit_sync) {
2792 mem->bo->implicit_sync = true;
2793 device->implicit_sync_bo_count++;
2794 }
2795 mtx_unlock(&device->bo_mutex);
2796 }
2797
2798 const VkMemoryDedicatedAllocateInfo *dedicate_info =
2799 vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
2800 if (dedicate_info) {
2801 mem->image = tu_image_from_handle(dedicate_info->image);
2802 } else {
2803 mem->image = NULL;
2804 }
2805
2806 *pMem = tu_device_memory_to_handle(mem);
2807
2808 return VK_SUCCESS;
2809 }
2810
2811 VKAPI_ATTR void VKAPI_CALL
tu_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)2812 tu_FreeMemory(VkDevice _device,
2813 VkDeviceMemory _mem,
2814 const VkAllocationCallbacks *pAllocator)
2815 {
2816 TU_FROM_HANDLE(tu_device, device, _device);
2817 TU_FROM_HANDLE(tu_device_memory, mem, _mem);
2818
2819 if (mem == NULL)
2820 return;
2821
2822 p_atomic_add(&device->physical_device->heap.used, -mem->bo->size);
2823 tu_bo_finish(device, mem->bo);
2824 vk_object_free(&device->vk, pAllocator, mem);
2825 }
2826
2827 VKAPI_ATTR VkResult VKAPI_CALL
tu_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)2828 tu_MapMemory(VkDevice _device,
2829 VkDeviceMemory _memory,
2830 VkDeviceSize offset,
2831 VkDeviceSize size,
2832 VkMemoryMapFlags flags,
2833 void **ppData)
2834 {
2835 TU_FROM_HANDLE(tu_device, device, _device);
2836 TU_FROM_HANDLE(tu_device_memory, mem, _memory);
2837 VkResult result;
2838
2839 if (mem == NULL) {
2840 *ppData = NULL;
2841 return VK_SUCCESS;
2842 }
2843
2844 if (!mem->bo->map) {
2845 result = tu_bo_map(device, mem->bo);
2846 if (result != VK_SUCCESS)
2847 return result;
2848 }
2849
2850 *ppData = (char *) mem->bo->map + offset;
2851 return VK_SUCCESS;
2852 }
2853
2854 VKAPI_ATTR void VKAPI_CALL
tu_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)2855 tu_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
2856 {
2857 /* TODO: unmap here instead of waiting for FreeMemory */
2858 }
2859
2860 static void
tu_get_buffer_memory_requirements(struct tu_device * dev,uint64_t size,VkMemoryRequirements2 * pMemoryRequirements)2861 tu_get_buffer_memory_requirements(struct tu_device *dev, uint64_t size,
2862 VkMemoryRequirements2 *pMemoryRequirements)
2863 {
2864 pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
2865 .size = MAX2(align64(size, 64), size),
2866 .alignment = 64,
2867 .memoryTypeBits = (1 << dev->physical_device->memory.type_count) - 1,
2868 };
2869
2870 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2871 switch (ext->sType) {
2872 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
2873 VkMemoryDedicatedRequirements *req =
2874 (VkMemoryDedicatedRequirements *) ext;
2875 req->requiresDedicatedAllocation = false;
2876 req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2877 break;
2878 }
2879 default:
2880 break;
2881 }
2882 }
2883 }
2884
2885 VKAPI_ATTR void VKAPI_CALL
tu_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2886 tu_GetBufferMemoryRequirements2(
2887 VkDevice _device,
2888 const VkBufferMemoryRequirementsInfo2 *pInfo,
2889 VkMemoryRequirements2 *pMemoryRequirements)
2890 {
2891 TU_FROM_HANDLE(tu_device, device, _device);
2892 TU_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer);
2893
2894 tu_get_buffer_memory_requirements(device, buffer->vk.size, pMemoryRequirements);
2895 }
2896
2897 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceBufferMemoryRequirements(VkDevice _device,const VkDeviceBufferMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2898 tu_GetDeviceBufferMemoryRequirements(
2899 VkDevice _device,
2900 const VkDeviceBufferMemoryRequirements *pInfo,
2901 VkMemoryRequirements2 *pMemoryRequirements)
2902 {
2903 TU_FROM_HANDLE(tu_device, device, _device);
2904 tu_get_buffer_memory_requirements(device, pInfo->pCreateInfo->size, pMemoryRequirements);
2905 }
2906
2907 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)2908 tu_GetDeviceMemoryCommitment(VkDevice device,
2909 VkDeviceMemory memory,
2910 VkDeviceSize *pCommittedMemoryInBytes)
2911 {
2912 *pCommittedMemoryInBytes = 0;
2913 }
2914
2915 VKAPI_ATTR VkResult VKAPI_CALL
tu_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)2916 tu_BindBufferMemory2(VkDevice device,
2917 uint32_t bindInfoCount,
2918 const VkBindBufferMemoryInfo *pBindInfos)
2919 {
2920 TU_FROM_HANDLE(tu_device, dev, device);
2921
2922 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2923 TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
2924 TU_FROM_HANDLE(tu_buffer, buffer, pBindInfos[i].buffer);
2925
2926 if (mem) {
2927 buffer->bo = mem->bo;
2928 buffer->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
2929 if (buffer->vk.usage &
2930 (VK_BUFFER_USAGE_2_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT |
2931 VK_BUFFER_USAGE_2_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT))
2932 tu_bo_allow_dump(dev, mem->bo);
2933 } else {
2934 buffer->bo = NULL;
2935 }
2936 }
2937 return VK_SUCCESS;
2938 }
2939
2940 VKAPI_ATTR VkResult VKAPI_CALL
tu_BindImageMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos)2941 tu_BindImageMemory2(VkDevice _device,
2942 uint32_t bindInfoCount,
2943 const VkBindImageMemoryInfo *pBindInfos)
2944 {
2945 TU_FROM_HANDLE(tu_device, device, _device);
2946
2947 for (uint32_t i = 0; i < bindInfoCount; ++i) {
2948 TU_FROM_HANDLE(tu_image, image, pBindInfos[i].image);
2949 TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
2950
2951 if (mem) {
2952 image->bo = mem->bo;
2953 image->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
2954
2955 if (image->vk.usage & VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT) {
2956 if (!mem->bo->map) {
2957 VkResult result = tu_bo_map(device, mem->bo);
2958 if (result != VK_SUCCESS)
2959 return result;
2960 }
2961
2962 image->map = (char *)mem->bo->map + pBindInfos[i].memoryOffset;
2963 } else {
2964 image->map = NULL;
2965 }
2966 } else {
2967 image->bo = NULL;
2968 image->map = NULL;
2969 image->iova = 0;
2970 }
2971 }
2972
2973 return VK_SUCCESS;
2974 }
2975
2976 VKAPI_ATTR VkResult VKAPI_CALL
tu_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence _fence)2977 tu_QueueBindSparse(VkQueue _queue,
2978 uint32_t bindInfoCount,
2979 const VkBindSparseInfo *pBindInfo,
2980 VkFence _fence)
2981 {
2982 return VK_SUCCESS;
2983 }
2984
2985 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)2986 tu_CreateEvent(VkDevice _device,
2987 const VkEventCreateInfo *pCreateInfo,
2988 const VkAllocationCallbacks *pAllocator,
2989 VkEvent *pEvent)
2990 {
2991 TU_FROM_HANDLE(tu_device, device, _device);
2992
2993 struct tu_event *event = (struct tu_event *)
2994 vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
2995 VK_OBJECT_TYPE_EVENT);
2996 if (!event)
2997 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2998
2999 VkResult result = tu_bo_init_new(device, &event->bo, 0x1000,
3000 TU_BO_ALLOC_NO_FLAGS, "event");
3001 if (result != VK_SUCCESS)
3002 goto fail_alloc;
3003
3004 result = tu_bo_map(device, event->bo);
3005 if (result != VK_SUCCESS)
3006 goto fail_map;
3007
3008 *pEvent = tu_event_to_handle(event);
3009
3010 return VK_SUCCESS;
3011
3012 fail_map:
3013 tu_bo_finish(device, event->bo);
3014 fail_alloc:
3015 vk_object_free(&device->vk, pAllocator, event);
3016 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3017 }
3018
3019 VKAPI_ATTR void VKAPI_CALL
tu_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)3020 tu_DestroyEvent(VkDevice _device,
3021 VkEvent _event,
3022 const VkAllocationCallbacks *pAllocator)
3023 {
3024 TU_FROM_HANDLE(tu_device, device, _device);
3025 TU_FROM_HANDLE(tu_event, event, _event);
3026
3027 if (!event)
3028 return;
3029
3030 tu_bo_finish(device, event->bo);
3031 vk_object_free(&device->vk, pAllocator, event);
3032 }
3033
3034 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetEventStatus(VkDevice _device,VkEvent _event)3035 tu_GetEventStatus(VkDevice _device, VkEvent _event)
3036 {
3037 TU_FROM_HANDLE(tu_device, device, _device);
3038 TU_FROM_HANDLE(tu_event, event, _event);
3039
3040 if (vk_device_is_lost(&device->vk))
3041 return VK_ERROR_DEVICE_LOST;
3042
3043 if (*(uint64_t*) event->bo->map == 1)
3044 return VK_EVENT_SET;
3045 return VK_EVENT_RESET;
3046 }
3047
3048 VKAPI_ATTR VkResult VKAPI_CALL
tu_SetEvent(VkDevice _device,VkEvent _event)3049 tu_SetEvent(VkDevice _device, VkEvent _event)
3050 {
3051 TU_FROM_HANDLE(tu_event, event, _event);
3052 *(uint64_t*) event->bo->map = 1;
3053
3054 return VK_SUCCESS;
3055 }
3056
3057 VKAPI_ATTR VkResult VKAPI_CALL
tu_ResetEvent(VkDevice _device,VkEvent _event)3058 tu_ResetEvent(VkDevice _device, VkEvent _event)
3059 {
3060 TU_FROM_HANDLE(tu_event, event, _event);
3061 *(uint64_t*) event->bo->map = 0;
3062
3063 return VK_SUCCESS;
3064 }
3065
3066 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)3067 tu_CreateBuffer(VkDevice _device,
3068 const VkBufferCreateInfo *pCreateInfo,
3069 const VkAllocationCallbacks *pAllocator,
3070 VkBuffer *pBuffer)
3071 {
3072 TU_FROM_HANDLE(tu_device, device, _device);
3073 struct tu_buffer *buffer;
3074
3075 buffer = (struct tu_buffer *) vk_buffer_create(
3076 &device->vk, pCreateInfo, pAllocator, sizeof(*buffer));
3077 if (buffer == NULL)
3078 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3079
3080 *pBuffer = tu_buffer_to_handle(buffer);
3081
3082 return VK_SUCCESS;
3083 }
3084
3085 VKAPI_ATTR void VKAPI_CALL
tu_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)3086 tu_DestroyBuffer(VkDevice _device,
3087 VkBuffer _buffer,
3088 const VkAllocationCallbacks *pAllocator)
3089 {
3090 TU_FROM_HANDLE(tu_device, device, _device);
3091 TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
3092
3093 if (!buffer)
3094 return;
3095
3096 vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
3097 }
3098
3099 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)3100 tu_CreateFramebuffer(VkDevice _device,
3101 const VkFramebufferCreateInfo *pCreateInfo,
3102 const VkAllocationCallbacks *pAllocator,
3103 VkFramebuffer *pFramebuffer)
3104 {
3105 TU_FROM_HANDLE(tu_device, device, _device);
3106
3107 if (TU_DEBUG(DYNAMIC))
3108 return vk_common_CreateFramebuffer(_device, pCreateInfo, pAllocator,
3109 pFramebuffer);
3110
3111 TU_FROM_HANDLE(tu_render_pass, pass, pCreateInfo->renderPass);
3112 struct tu_framebuffer *framebuffer;
3113
3114 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3115
3116 bool imageless = pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT;
3117
3118 size_t size = sizeof(*framebuffer);
3119 if (!imageless)
3120 size += sizeof(struct tu_attachment_info) * pCreateInfo->attachmentCount;
3121 framebuffer = (struct tu_framebuffer *) vk_object_alloc(
3122 &device->vk, pAllocator, size, VK_OBJECT_TYPE_FRAMEBUFFER);
3123 if (framebuffer == NULL)
3124 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3125
3126 framebuffer->attachment_count = pCreateInfo->attachmentCount;
3127 framebuffer->width = pCreateInfo->width;
3128 framebuffer->height = pCreateInfo->height;
3129 framebuffer->layers = pCreateInfo->layers;
3130
3131 if (!imageless) {
3132 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3133 VkImageView _iview = pCreateInfo->pAttachments[i];
3134 struct tu_image_view *iview = tu_image_view_from_handle(_iview);
3135 framebuffer->attachments[i].attachment = iview;
3136 }
3137 }
3138
3139 tu_framebuffer_tiling_config(framebuffer, device, pass);
3140
3141 *pFramebuffer = tu_framebuffer_to_handle(framebuffer);
3142 return VK_SUCCESS;
3143 }
3144
3145 void
tu_setup_dynamic_framebuffer(struct tu_cmd_buffer * cmd_buffer,const VkRenderingInfo * pRenderingInfo)3146 tu_setup_dynamic_framebuffer(struct tu_cmd_buffer *cmd_buffer,
3147 const VkRenderingInfo *pRenderingInfo)
3148 {
3149 struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
3150 struct tu_framebuffer *framebuffer = &cmd_buffer->dynamic_framebuffer;
3151
3152 framebuffer->attachment_count = pass->attachment_count;
3153 framebuffer->width = pRenderingInfo->renderArea.offset.x +
3154 pRenderingInfo->renderArea.extent.width;
3155 framebuffer->height = pRenderingInfo->renderArea.offset.y +
3156 pRenderingInfo->renderArea.extent.height;
3157 framebuffer->layers = pRenderingInfo->layerCount;
3158
3159 tu_framebuffer_tiling_config(framebuffer, cmd_buffer->device, pass);
3160 }
3161
3162 VKAPI_ATTR void VKAPI_CALL
tu_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)3163 tu_DestroyFramebuffer(VkDevice _device,
3164 VkFramebuffer _fb,
3165 const VkAllocationCallbacks *pAllocator)
3166 {
3167 TU_FROM_HANDLE(tu_device, device, _device);
3168
3169 if (TU_DEBUG(DYNAMIC)) {
3170 vk_common_DestroyFramebuffer(_device, _fb, pAllocator);
3171 return;
3172 }
3173
3174 TU_FROM_HANDLE(tu_framebuffer, fb, _fb);
3175
3176 if (!fb)
3177 return;
3178
3179 vk_object_free(&device->vk, pAllocator, fb);
3180 }
3181
3182 static void
tu_init_sampler(struct tu_device * device,struct tu_sampler * sampler,const VkSamplerCreateInfo * pCreateInfo)3183 tu_init_sampler(struct tu_device *device,
3184 struct tu_sampler *sampler,
3185 const VkSamplerCreateInfo *pCreateInfo)
3186 {
3187 const struct VkSamplerReductionModeCreateInfo *reduction =
3188 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO);
3189 const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
3190 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
3191 const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
3192 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
3193 /* for non-custom border colors, the VK enum is translated directly to an offset in
3194 * the border color buffer. custom border colors are located immediately after the
3195 * builtin colors, and thus an offset of TU_BORDER_COLOR_BUILTIN is added.
3196 */
3197 uint32_t border_color = (unsigned) pCreateInfo->borderColor;
3198 if (pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT ||
3199 pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
3200 mtx_lock(&device->mutex);
3201 border_color = BITSET_FFS(device->custom_border_color) - 1;
3202 assert(border_color < TU_BORDER_COLOR_COUNT);
3203 BITSET_CLEAR(device->custom_border_color, border_color);
3204 mtx_unlock(&device->mutex);
3205
3206 VkClearColorValue color = custom_border_color->customBorderColor;
3207 if (custom_border_color->format == VK_FORMAT_D24_UNORM_S8_UINT &&
3208 pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT &&
3209 device->use_z24uint_s8uint) {
3210 /* When sampling stencil using the special Z24UINT_S8UINT format, the
3211 * border color is in the second component. Note: if
3212 * customBorderColorWithoutFormat is enabled, we may miss doing this
3213 * here if the format isn't specified, which is why we don't use that
3214 * format.
3215 */
3216 color.uint32[1] = color.uint32[0];
3217 }
3218
3219 tu6_pack_border_color(
3220 &device->global_bo_map->bcolor[border_color], &color,
3221 pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT);
3222 border_color += TU_BORDER_COLOR_BUILTIN;
3223 }
3224
3225 unsigned aniso = pCreateInfo->anisotropyEnable ?
3226 util_last_bit(MIN2((uint32_t)pCreateInfo->maxAnisotropy >> 1, 8)) : 0;
3227 bool miplinear = (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR);
3228 float min_lod = CLAMP(pCreateInfo->minLod, 0.0f, 4095.0f / 256.0f);
3229 float max_lod = CLAMP(pCreateInfo->maxLod, 0.0f, 4095.0f / 256.0f);
3230
3231 sampler->descriptor[0] =
3232 COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
3233 A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(pCreateInfo->magFilter, aniso)) |
3234 A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(pCreateInfo->minFilter, aniso)) |
3235 A6XX_TEX_SAMP_0_ANISO((enum a6xx_tex_aniso) aniso) |
3236 A6XX_TEX_SAMP_0_WRAP_S(tu6_tex_wrap(pCreateInfo->addressModeU)) |
3237 A6XX_TEX_SAMP_0_WRAP_T(tu6_tex_wrap(pCreateInfo->addressModeV)) |
3238 A6XX_TEX_SAMP_0_WRAP_R(tu6_tex_wrap(pCreateInfo->addressModeW)) |
3239 A6XX_TEX_SAMP_0_LOD_BIAS(pCreateInfo->mipLodBias);
3240 sampler->descriptor[1] =
3241 COND(pCreateInfo->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT,
3242 A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
3243 COND(pCreateInfo->unnormalizedCoordinates, A6XX_TEX_SAMP_1_UNNORM_COORDS) |
3244 A6XX_TEX_SAMP_1_MIN_LOD(min_lod) |
3245 A6XX_TEX_SAMP_1_MAX_LOD(max_lod) |
3246 COND(pCreateInfo->compareEnable,
3247 A6XX_TEX_SAMP_1_COMPARE_FUNC(tu6_compare_func(pCreateInfo->compareOp)));
3248 sampler->descriptor[2] = A6XX_TEX_SAMP_2_BCOLOR(border_color);
3249 sampler->descriptor[3] = 0;
3250
3251 if (reduction) {
3252 sampler->descriptor[2] |= A6XX_TEX_SAMP_2_REDUCTION_MODE(
3253 tu6_reduction_mode(reduction->reductionMode));
3254 }
3255
3256 sampler->ycbcr_sampler = ycbcr_conversion ?
3257 tu_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL;
3258
3259 if (sampler->ycbcr_sampler &&
3260 sampler->ycbcr_sampler->chroma_filter == VK_FILTER_LINEAR) {
3261 sampler->descriptor[2] |= A6XX_TEX_SAMP_2_CHROMA_LINEAR;
3262 }
3263
3264 /* TODO:
3265 * A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR disables mipmapping, but vk has no NONE mipfilter?
3266 */
3267 }
3268
3269 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)3270 tu_CreateSampler(VkDevice _device,
3271 const VkSamplerCreateInfo *pCreateInfo,
3272 const VkAllocationCallbacks *pAllocator,
3273 VkSampler *pSampler)
3274 {
3275 TU_FROM_HANDLE(tu_device, device, _device);
3276 struct tu_sampler *sampler;
3277
3278 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3279
3280 sampler = (struct tu_sampler *) vk_object_alloc(
3281 &device->vk, pAllocator, sizeof(*sampler), VK_OBJECT_TYPE_SAMPLER);
3282 if (!sampler)
3283 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3284
3285 tu_init_sampler(device, sampler, pCreateInfo);
3286 *pSampler = tu_sampler_to_handle(sampler);
3287
3288 return VK_SUCCESS;
3289 }
3290
3291 VKAPI_ATTR void VKAPI_CALL
tu_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)3292 tu_DestroySampler(VkDevice _device,
3293 VkSampler _sampler,
3294 const VkAllocationCallbacks *pAllocator)
3295 {
3296 TU_FROM_HANDLE(tu_device, device, _device);
3297 TU_FROM_HANDLE(tu_sampler, sampler, _sampler);
3298 uint32_t border_color;
3299
3300 if (!sampler)
3301 return;
3302
3303 border_color = (sampler->descriptor[2] & A6XX_TEX_SAMP_2_BCOLOR__MASK) >> A6XX_TEX_SAMP_2_BCOLOR__SHIFT;
3304 if (border_color >= TU_BORDER_COLOR_BUILTIN) {
3305 border_color -= TU_BORDER_COLOR_BUILTIN;
3306 /* if the sampler had a custom border color, free it. TODO: no lock */
3307 mtx_lock(&device->mutex);
3308 assert(!BITSET_TEST(device->custom_border_color, border_color));
3309 BITSET_SET(device->custom_border_color, border_color);
3310 mtx_unlock(&device->mutex);
3311 }
3312
3313 vk_object_free(&device->vk, pAllocator, sampler);
3314 }
3315
3316 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)3317 tu_GetMemoryFdKHR(VkDevice _device,
3318 const VkMemoryGetFdInfoKHR *pGetFdInfo,
3319 int *pFd)
3320 {
3321 TU_FROM_HANDLE(tu_device, device, _device);
3322 TU_FROM_HANDLE(tu_device_memory, memory, pGetFdInfo->memory);
3323
3324 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3325
3326 /* At the moment, we support only the below handle types. */
3327 assert(pGetFdInfo->handleType ==
3328 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3329 pGetFdInfo->handleType ==
3330 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3331
3332 int prime_fd = tu_bo_export_dmabuf(device, memory->bo);
3333 if (prime_fd < 0)
3334 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3335
3336 *pFd = prime_fd;
3337
3338 if (memory->image) {
3339 struct fdl_layout *l = &memory->image->layout[0];
3340 uint64_t modifier;
3341 if (l->ubwc) {
3342 modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
3343 } else if (l->tile_mode == 2) {
3344 modifier = DRM_FORMAT_MOD_QCOM_TILED2;
3345 } else if (l->tile_mode == 3) {
3346 modifier = DRM_FORMAT_MOD_QCOM_TILED3;
3347 } else {
3348 assert(!l->tile_mode);
3349 modifier = DRM_FORMAT_MOD_LINEAR;
3350 }
3351 struct fdl_metadata metadata = {
3352 .modifier = modifier,
3353 };
3354 tu_bo_set_metadata(device, memory->bo, &metadata, sizeof(metadata));
3355 }
3356
3357 return VK_SUCCESS;
3358 }
3359
3360 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)3361 tu_GetMemoryFdPropertiesKHR(VkDevice _device,
3362 VkExternalMemoryHandleTypeFlagBits handleType,
3363 int fd,
3364 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
3365 {
3366 TU_FROM_HANDLE(tu_device, device, _device);
3367 assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3368 pMemoryFdProperties->memoryTypeBits =
3369 (1 << device->physical_device->memory.type_count) - 1;
3370 return VK_SUCCESS;
3371 }
3372
3373 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)3374 tu_GetPhysicalDeviceMultisamplePropertiesEXT(
3375 VkPhysicalDevice physicalDevice,
3376 VkSampleCountFlagBits samples,
3377 VkMultisamplePropertiesEXT* pMultisampleProperties)
3378 {
3379 TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
3380
3381 if (samples <= VK_SAMPLE_COUNT_4_BIT && pdevice->vk.supported_extensions.EXT_sample_locations)
3382 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 1, 1 };
3383 else
3384 pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
3385 }
3386
3387 VkDeviceAddress
tu_GetBufferDeviceAddress(VkDevice _device,const VkBufferDeviceAddressInfo * pInfo)3388 tu_GetBufferDeviceAddress(VkDevice _device,
3389 const VkBufferDeviceAddressInfo* pInfo)
3390 {
3391 TU_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer);
3392
3393 return buffer->iova;
3394 }
3395
tu_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3396 uint64_t tu_GetBufferOpaqueCaptureAddress(
3397 VkDevice device,
3398 const VkBufferDeviceAddressInfo* pInfo)
3399 {
3400 /* We care only about memory allocation opaque addresses */
3401 return 0;
3402 }
3403
tu_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3404 uint64_t tu_GetDeviceMemoryOpaqueCaptureAddress(
3405 VkDevice device,
3406 const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
3407 {
3408 TU_FROM_HANDLE(tu_device_memory, mem, pInfo->memory);
3409 return mem->bo->iova;
3410 }
3411
3412 struct tu_debug_bos_entry {
3413 uint32_t count;
3414 uint64_t size;
3415 const char *name;
3416 };
3417
3418 const char *
tu_debug_bos_add(struct tu_device * dev,uint64_t size,const char * name)3419 tu_debug_bos_add(struct tu_device *dev, uint64_t size, const char *name)
3420 {
3421 assert(name);
3422
3423 if (likely(!dev->bo_sizes))
3424 return NULL;
3425
3426 mtx_lock(&dev->bo_mutex);
3427 struct hash_entry *entry = _mesa_hash_table_search(dev->bo_sizes, name);
3428 struct tu_debug_bos_entry *debug_bos;
3429
3430 if (!entry) {
3431 debug_bos = (struct tu_debug_bos_entry *) calloc(
3432 1, sizeof(struct tu_debug_bos_entry));
3433 debug_bos->name = strdup(name);
3434 _mesa_hash_table_insert(dev->bo_sizes, debug_bos->name, debug_bos);
3435 } else {
3436 debug_bos = (struct tu_debug_bos_entry *) entry->data;
3437 }
3438
3439 debug_bos->count++;
3440 debug_bos->size += align(size, 4096);
3441 mtx_unlock(&dev->bo_mutex);
3442
3443 return debug_bos->name;
3444 }
3445
3446 void
tu_debug_bos_del(struct tu_device * dev,struct tu_bo * bo)3447 tu_debug_bos_del(struct tu_device *dev, struct tu_bo *bo)
3448 {
3449 if (likely(!dev->bo_sizes) || !bo->name)
3450 return;
3451
3452 mtx_lock(&dev->bo_mutex);
3453 struct hash_entry *entry =
3454 _mesa_hash_table_search(dev->bo_sizes, bo->name);
3455 /* If we're finishing the BO, it should have been added already */
3456 assert(entry);
3457
3458 struct tu_debug_bos_entry *debug_bos =
3459 (struct tu_debug_bos_entry *) entry->data;
3460 debug_bos->count--;
3461 debug_bos->size -= align(bo->size, 4096);
3462 if (!debug_bos->count) {
3463 _mesa_hash_table_remove(dev->bo_sizes, entry);
3464 free((void *) debug_bos->name);
3465 free(debug_bos);
3466 }
3467 mtx_unlock(&dev->bo_mutex);
3468 }
3469
debug_bos_count_compare(const void * in_a,const void * in_b)3470 static int debug_bos_count_compare(const void *in_a, const void *in_b)
3471 {
3472 struct tu_debug_bos_entry *a = *(struct tu_debug_bos_entry **)in_a;
3473 struct tu_debug_bos_entry *b = *(struct tu_debug_bos_entry **)in_b;
3474 return a->count - b->count;
3475 }
3476
3477 void
tu_debug_bos_print_stats(struct tu_device * dev)3478 tu_debug_bos_print_stats(struct tu_device *dev)
3479 {
3480 if (likely(!dev->bo_sizes))
3481 return;
3482
3483 mtx_lock(&dev->bo_mutex);
3484
3485 /* Put the HT's sizes data in an array so we can sort by number of allocations. */
3486 struct util_dynarray dyn;
3487 util_dynarray_init(&dyn, NULL);
3488
3489 uint32_t size = 0;
3490 uint32_t count = 0;
3491 hash_table_foreach(dev->bo_sizes, entry)
3492 {
3493 struct tu_debug_bos_entry *debug_bos =
3494 (struct tu_debug_bos_entry *) entry->data;
3495 util_dynarray_append(&dyn, struct tu_debug_bos_entry *, debug_bos);
3496 size += debug_bos->size / 1024;
3497 count += debug_bos->count;
3498 }
3499
3500 qsort(dyn.data,
3501 util_dynarray_num_elements(&dyn, struct tu_debug_bos_entry *),
3502 sizeof(struct tu_debug_bos_entryos_entry *), debug_bos_count_compare);
3503
3504 util_dynarray_foreach(&dyn, struct tu_debug_bos_entry *, entryp)
3505 {
3506 struct tu_debug_bos_entry *debug_bos = *entryp;
3507 mesa_logi("%30s: %4d bos, %lld kb\n", debug_bos->name, debug_bos->count,
3508 (long long) (debug_bos->size / 1024));
3509 }
3510
3511 mesa_logi("submitted %d bos (%d MB)\n", count, DIV_ROUND_UP(size, 1024));
3512
3513 util_dynarray_fini(&dyn);
3514
3515 mtx_unlock(&dev->bo_mutex);
3516 }
3517
3518 void
tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)3519 tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,
3520 const VkDebugUtilsLabelEXT *pLabelInfo)
3521 {
3522 VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
3523
3524 vk_common_CmdBeginDebugUtilsLabelEXT(_commandBuffer, pLabelInfo);
3525
3526 /* Note that the spec says:
3527 *
3528 * "An application may open a debug label region in one command buffer and
3529 * close it in another, or otherwise split debug label regions across
3530 * multiple command buffers or multiple queue submissions. When viewed
3531 * from the linear series of submissions to a single queue, the calls to
3532 * vkCmdBeginDebugUtilsLabelEXT and vkCmdEndDebugUtilsLabelEXT must be
3533 * matched and balanced."
3534 *
3535 * But if you're beginning labeling during a renderpass and ending outside
3536 * it, or vice versa, these trace ranges in perfetto will be unbalanced. I
3537 * expect that u_trace and perfetto will do something like take just one of
3538 * the begins/ends, or drop the event entirely, but not crash. Similarly,
3539 * I think we'll have problems if the tracepoints are split across cmd
3540 * buffers. Still, getting the simple case of cmd buffer annotation into
3541 * perfetto should prove useful.
3542 */
3543 const char *label = pLabelInfo->pLabelName;
3544 if (cmd_buffer->state.pass) {
3545 trace_start_cmd_buffer_annotation_rp(
3546 &cmd_buffer->trace, &cmd_buffer->draw_cs, strlen(label), label);
3547 } else {
3548 trace_start_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs,
3549 strlen(label), label);
3550 }
3551 }
3552
3553 void
tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)3554 tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)
3555 {
3556 VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
3557
3558 if (cmd_buffer->vk.labels.size > 0) {
3559 if (cmd_buffer->state.pass) {
3560 trace_end_cmd_buffer_annotation_rp(&cmd_buffer->trace,
3561 &cmd_buffer->draw_cs);
3562 } else {
3563 trace_end_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs);
3564 }
3565 }
3566
3567 vk_common_CmdEndDebugUtilsLabelEXT(_commandBuffer);
3568 }
3569