• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  * SPDX-License-Identifier: MIT
5  *
6  * based in part on anv driver which is:
7  * Copyright © 2015 Intel Corporation
8  */
9 
10 #include "tu_device.h"
11 
12 #include "drm-uapi/drm_fourcc.h"
13 #include "fdl/freedreno_layout.h"
14 #include <fcntl.h>
15 #include <poll.h>
16 
17 #include "git_sha1.h"
18 #include "util/u_debug.h"
19 #include "util/disk_cache.h"
20 #include "util/hex.h"
21 #include "util/driconf.h"
22 #include "util/os_misc.h"
23 #include "util/u_process.h"
24 #include "vk_android.h"
25 #include "vk_shader_module.h"
26 #include "vk_sampler.h"
27 #include "vk_util.h"
28 
29 /* for fd_get_driver/device_uuid() */
30 #include "freedreno/common/freedreno_uuid.h"
31 #include "freedreno/common/freedreno_stompable_regs.h"
32 
33 #include "tu_clear_blit.h"
34 #include "tu_cmd_buffer.h"
35 #include "tu_cs.h"
36 #include "tu_descriptor_set.h"
37 #include "tu_dynamic_rendering.h"
38 #include "tu_image.h"
39 #include "tu_pass.h"
40 #include "tu_queue.h"
41 #include "tu_query_pool.h"
42 #include "tu_rmv.h"
43 #include "tu_tracepoints.h"
44 #include "tu_wsi.h"
45 
46 #if DETECT_OS_ANDROID
47 #include "util/u_gralloc/u_gralloc.h"
48 #include <vndk/hardware_buffer.h>
49 #endif
50 
51 uint64_t os_page_size = 4096;
52 
53 static int
tu_device_get_cache_uuid(struct tu_physical_device * device,void * uuid)54 tu_device_get_cache_uuid(struct tu_physical_device *device, void *uuid)
55 {
56    struct mesa_sha1 ctx;
57    unsigned char sha1[20];
58    /* Note: IR3_SHADER_DEBUG also affects compilation, but it's not
59     * initialized until after compiler creation so we have to add it to the
60     * shader hash instead, since the compiler is only created with the logical
61     * device.
62     */
63    uint64_t driver_flags = tu_env.debug & TU_DEBUG_NOMULTIPOS;
64    uint16_t family = fd_dev_gpu_id(&device->dev_id);
65 
66    memset(uuid, 0, VK_UUID_SIZE);
67    _mesa_sha1_init(&ctx);
68 
69    if (!disk_cache_get_function_identifier((void *)tu_device_get_cache_uuid, &ctx))
70       return -1;
71 
72    _mesa_sha1_update(&ctx, &family, sizeof(family));
73    _mesa_sha1_update(&ctx, &driver_flags, sizeof(driver_flags));
74    _mesa_sha1_final(&ctx, sha1);
75 
76    memcpy(uuid, sha1, VK_UUID_SIZE);
77    return 0;
78 }
79 
80 #define TU_API_VERSION VK_MAKE_VERSION(1, 4, VK_HEADER_VERSION)
81 
82 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceVersion(uint32_t * pApiVersion)83 tu_EnumerateInstanceVersion(uint32_t *pApiVersion)
84 {
85     *pApiVersion = TU_API_VERSION;
86     return VK_SUCCESS;
87 }
88 
89 static const struct vk_instance_extension_table tu_instance_extensions_supported = { .table = {
90    .KHR_device_group_creation           = true,
91 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
92    .KHR_display                         = true,
93 #endif
94    .KHR_external_fence_capabilities     = true,
95    .KHR_external_memory_capabilities    = true,
96    .KHR_external_semaphore_capabilities = true,
97 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
98    .KHR_get_display_properties2         = true,
99 #endif
100    .KHR_get_physical_device_properties2 = true,
101 #ifdef TU_USE_WSI_PLATFORM
102    .KHR_get_surface_capabilities2       = true,
103    .KHR_surface                         = true,
104    .KHR_surface_protected_capabilities  = true,
105 #endif
106 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
107    .KHR_wayland_surface                 = true,
108 #endif
109 #ifdef VK_USE_PLATFORM_XCB_KHR
110    .KHR_xcb_surface                     = true,
111 #endif
112 #ifdef VK_USE_PLATFORM_XLIB_KHR
113    .KHR_xlib_surface                    = true,
114 #endif
115 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
116    .EXT_acquire_drm_display             = true,
117 #endif
118 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
119    .EXT_acquire_xlib_display            = true,
120 #endif
121    .EXT_debug_report                    = true,
122    .EXT_debug_utils                     = true,
123 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
124    .EXT_direct_mode_display             = true,
125    .EXT_display_surface_counter         = true,
126 #endif
127 #ifndef VK_USE_PLATFORM_WIN32_KHR
128    .EXT_headless_surface                = true,
129 #endif
130 #ifdef TU_USE_WSI_PLATFORM
131    .EXT_surface_maintenance1            = true,
132    .EXT_swapchain_colorspace            = true,
133 #endif
134 } };
135 
136 static bool
is_kgsl(struct tu_instance * instance)137 is_kgsl(struct tu_instance *instance)
138 {
139    return strcmp(instance->knl->name, "kgsl") == 0;
140 }
141 
142 static void
get_device_extensions(const struct tu_physical_device * device,struct vk_device_extension_table * ext)143 get_device_extensions(const struct tu_physical_device *device,
144                       struct vk_device_extension_table *ext)
145 {
146    *ext = (struct vk_device_extension_table) { .table = {
147       .KHR_8bit_storage = device->info->a7xx.storage_8bit,
148       .KHR_16bit_storage = device->info->a6xx.storage_16bit,
149       .KHR_bind_memory2 = true,
150       .KHR_buffer_device_address = true,
151       .KHR_calibrated_timestamps = device->info->a7xx.has_persistent_counter,
152       .KHR_compute_shader_derivatives = device->info->chip >= 7,
153       .KHR_copy_commands2 = true,
154       .KHR_create_renderpass2 = true,
155       .KHR_dedicated_allocation = true,
156       .KHR_depth_stencil_resolve = true,
157       .KHR_descriptor_update_template = true,
158       .KHR_device_group = true,
159       .KHR_draw_indirect_count = true,
160       .KHR_driver_properties = true,
161       .KHR_dynamic_rendering = true,
162       .KHR_dynamic_rendering_local_read = true,
163       .KHR_external_fence = true,
164       .KHR_external_fence_fd = true,
165       .KHR_external_memory = true,
166       .KHR_external_memory_fd = true,
167       .KHR_external_semaphore = true,
168       .KHR_external_semaphore_fd = true,
169       .KHR_format_feature_flags2 = true,
170       .KHR_fragment_shading_rate = device->info->a6xx.has_attachment_shading_rate,
171       .KHR_get_memory_requirements2 = true,
172       .KHR_global_priority = true,
173       .KHR_image_format_list = true,
174       .KHR_imageless_framebuffer = true,
175 #ifdef TU_USE_WSI_PLATFORM
176       .KHR_incremental_present = true,
177 #endif
178       .KHR_index_type_uint8 = true,
179       .KHR_line_rasterization = true,
180       .KHR_load_store_op_none = true,
181       .KHR_maintenance1 = true,
182       .KHR_maintenance2 = true,
183       .KHR_maintenance3 = true,
184       .KHR_maintenance4 = true,
185       .KHR_maintenance5 = true,
186       .KHR_maintenance6 = true,
187       .KHR_map_memory2 = true,
188       .KHR_multiview = TU_DEBUG(NOCONFORM) ? true : device->info->a6xx.has_hw_multiview,
189       .KHR_performance_query = TU_DEBUG(PERFC),
190       .KHR_pipeline_executable_properties = true,
191       .KHR_pipeline_library = true,
192 #ifdef TU_USE_WSI_PLATFORM
193       /* Hide these behind dri configs for now since we cannot implement it reliably on
194        * all surfaces yet. There is no surface capability query for present wait/id,
195        * but the feature is useful enough to hide behind an opt-in mechanism for now.
196        * If the instance only enables surface extensions that unconditionally support present wait,
197        * we can also expose the extension that way. */
198       .KHR_present_id = (driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
199                          wsi_common_vk_instance_supports_present_wait(&device->instance->vk)),
200       .KHR_present_wait = (driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
201                            wsi_common_vk_instance_supports_present_wait(&device->instance->vk)),
202 #endif
203       .KHR_push_descriptor = true,
204       .KHR_relaxed_block_layout = true,
205       .KHR_sampler_mirror_clamp_to_edge = true,
206       .KHR_sampler_ycbcr_conversion = true,
207       .KHR_separate_depth_stencil_layouts = true,
208       .KHR_shader_atomic_int64 = device->info->a7xx.has_64b_ssbo_atomics,
209       .KHR_shader_draw_parameters = true,
210       .KHR_shader_expect_assume = true,
211       .KHR_shader_float16_int8 = true,
212       .KHR_shader_float_controls = true,
213       .KHR_shader_float_controls2 = true,
214       .KHR_shader_integer_dot_product = true,
215       .KHR_shader_non_semantic_info = true,
216       .KHR_shader_relaxed_extended_instruction = true,
217       .KHR_shader_subgroup_extended_types = true,
218       .KHR_shader_subgroup_rotate = true,
219       .KHR_shader_subgroup_uniform_control_flow = true,
220       .KHR_shader_terminate_invocation = true,
221       .KHR_spirv_1_4 = true,
222       .KHR_storage_buffer_storage_class = true,
223 #ifdef TU_USE_WSI_PLATFORM
224       .KHR_swapchain = true,
225       .KHR_swapchain_mutable_format = true,
226 #endif
227       .KHR_synchronization2 = true,
228       .KHR_timeline_semaphore = true,
229       .KHR_uniform_buffer_standard_layout = true,
230       .KHR_variable_pointers = true,
231       .KHR_vertex_attribute_divisor = true,
232       .KHR_vulkan_memory_model = true,
233       .KHR_workgroup_memory_explicit_layout = true,
234       .KHR_zero_initialize_workgroup_memory = true,
235 
236       .EXT_4444_formats = true,
237       .EXT_attachment_feedback_loop_dynamic_state = true,
238       .EXT_attachment_feedback_loop_layout = true,
239       .EXT_border_color_swizzle = true,
240       .EXT_calibrated_timestamps = device->info->a7xx.has_persistent_counter,
241       .EXT_color_write_enable = true,
242       .EXT_conditional_rendering = true,
243       .EXT_custom_border_color = true,
244       .EXT_depth_clamp_zero_one = true,
245       .EXT_depth_clip_control = true,
246       .EXT_depth_clip_enable = true,
247       .EXT_descriptor_buffer = true,
248       .EXT_descriptor_indexing = true,
249       .EXT_device_address_binding_report = true,
250 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
251       .EXT_display_control = true,
252 #endif
253       .EXT_extended_dynamic_state = true,
254       .EXT_extended_dynamic_state2 = true,
255       .EXT_extended_dynamic_state3 = true,
256       .EXT_external_memory_dma_buf = true,
257       .EXT_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
258       .EXT_fragment_density_map = true,
259       .EXT_global_priority = true,
260       .EXT_global_priority_query = true,
261       .EXT_graphics_pipeline_library = true,
262       .EXT_host_image_copy = true,
263       .EXT_host_query_reset = true,
264       .EXT_image_2d_view_of_3d = true,
265       .EXT_image_drm_format_modifier = true,
266       .EXT_image_robustness = true,
267       .EXT_image_view_min_lod = true,
268       .EXT_index_type_uint8 = true,
269       .EXT_inline_uniform_block = true,
270       .EXT_legacy_dithering = true,
271       .EXT_legacy_vertex_attributes = true,
272       .EXT_line_rasterization = true,
273       .EXT_load_store_op_none = true,
274       .EXT_map_memory_placed = true,
275       .EXT_memory_budget = true,
276       .EXT_multi_draw = true,
277       .EXT_mutable_descriptor_type = true,
278       .EXT_nested_command_buffer = true,
279       .EXT_non_seamless_cube_map = true,
280       .EXT_physical_device_drm = !is_kgsl(device->instance),
281       .EXT_pipeline_creation_cache_control = true,
282       .EXT_pipeline_creation_feedback = true,
283       .EXT_post_depth_coverage = true,
284       .EXT_primitive_topology_list_restart = true,
285       .EXT_primitives_generated_query = true,
286       .EXT_private_data = true,
287       .EXT_provoking_vertex = true,
288       .EXT_queue_family_foreign = true,
289       .EXT_rasterization_order_attachment_access = true,
290       .EXT_robustness2 = true,
291       .EXT_sample_locations = device->info->a6xx.has_sample_locations,
292       .EXT_sampler_filter_minmax = device->info->a6xx.has_sampler_minmax,
293       .EXT_scalar_block_layout = true,
294       .EXT_separate_stencil_usage = true,
295       .EXT_shader_demote_to_helper_invocation = true,
296       .EXT_shader_module_identifier = true,
297       .EXT_shader_replicated_composites = true,
298       .EXT_shader_stencil_export = true,
299       .EXT_shader_viewport_index_layer = TU_DEBUG(NOCONFORM) ? true : device->info->a6xx.has_hw_multiview,
300       .EXT_subgroup_size_control = true,
301 #ifdef TU_USE_WSI_PLATFORM
302       .EXT_swapchain_maintenance1 = true,
303 #endif
304       .EXT_texel_buffer_alignment = true,
305       .EXT_tooling_info = true,
306       .EXT_transform_feedback = true,
307       .EXT_vertex_attribute_divisor = true,
308       .EXT_vertex_input_dynamic_state = true,
309 
310       /* For Graphics Flight Recorder (GFR) */
311       .AMD_buffer_marker = true,
312       .ARM_rasterization_order_attachment_access = true,
313       .GOOGLE_decorate_string = true,
314       .GOOGLE_hlsl_functionality1 = true,
315       .GOOGLE_user_type = true,
316       .IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
317       .NV_compute_shader_derivatives = device->info->chip >= 7,
318       .VALVE_mutable_descriptor_type = true,
319    } };
320 
321 #if DETECT_OS_ANDROID
322    if (vk_android_get_ugralloc() != NULL) {
323       ext->ANDROID_external_memory_android_hardware_buffer = true,
324       ext->ANDROID_native_buffer = true;
325    }
326 #endif
327 }
328 
329 static void
tu_get_features(struct tu_physical_device * pdevice,struct vk_features * features)330 tu_get_features(struct tu_physical_device *pdevice,
331                 struct vk_features *features)
332 {
333    *features = (struct vk_features) { false };
334 
335    /* Vulkan 1.0 */
336    features->robustBufferAccess = true;
337    features->fullDrawIndexUint32 = true;
338    features->imageCubeArray = true;
339    features->independentBlend = true;
340    features->geometryShader = true;
341    features->tessellationShader = true;
342    features->sampleRateShading = true;
343    features->dualSrcBlend = true;
344    features->logicOp = true;
345    features->multiDrawIndirect = true;
346    features->drawIndirectFirstInstance = true;
347    features->depthClamp = true;
348    features->depthBiasClamp = true;
349    features->fillModeNonSolid = true;
350    features->depthBounds = true;
351    features->wideLines = pdevice->info->a6xx.line_width_max > 1.0;
352    features->largePoints = true;
353    features->alphaToOne = true;
354    features->multiViewport = true;
355    features->samplerAnisotropy = true;
356    features->textureCompressionETC2 = true;
357    features->textureCompressionASTC_LDR = true;
358    features->textureCompressionBC = true;
359    features->occlusionQueryPrecise = true;
360    features->pipelineStatisticsQuery = true;
361    features->vertexPipelineStoresAndAtomics = true;
362    features->fragmentStoresAndAtomics = true;
363    features->shaderTessellationAndGeometryPointSize = true;
364    features->shaderImageGatherExtended = true;
365    features->shaderStorageImageExtendedFormats = true;
366    features->shaderStorageImageMultisample = false;
367    features->shaderStorageImageReadWithoutFormat = true;
368    features->shaderStorageImageWriteWithoutFormat = true;
369    features->shaderUniformBufferArrayDynamicIndexing = true;
370    features->shaderSampledImageArrayDynamicIndexing = true;
371    features->shaderStorageBufferArrayDynamicIndexing = true;
372    features->shaderStorageImageArrayDynamicIndexing = true;
373    features->shaderClipDistance = true;
374    features->shaderCullDistance = true;
375    features->shaderFloat64 = false;
376    features->shaderInt64 = true;
377    features->shaderInt16 = true;
378    features->sparseBinding = false;
379    features->variableMultisampleRate = true;
380    features->inheritedQueries = true;
381 
382    /* Vulkan 1.1 */
383    features->storageBuffer16BitAccess            = pdevice->info->a6xx.storage_16bit;
384    features->uniformAndStorageBuffer16BitAccess  = false;
385    features->storagePushConstant16               = false;
386    features->storageInputOutput16                = false;
387    features->multiview                           = true;
388    features->multiviewGeometryShader             = false;
389    features->multiviewTessellationShader         = false;
390    features->variablePointersStorageBuffer       = true;
391    features->variablePointers                    = true;
392    features->protectedMemory                     = false;
393    features->samplerYcbcrConversion              = true;
394    features->shaderDrawParameters                = true;
395 
396    /* Vulkan 1.2 */
397    features->samplerMirrorClampToEdge            = true;
398    features->drawIndirectCount                   = true;
399    features->storageBuffer8BitAccess             = pdevice->info->a7xx.storage_8bit;
400    features->uniformAndStorageBuffer8BitAccess   = false;
401    features->storagePushConstant8                = false;
402    features->shaderBufferInt64Atomics =
403       pdevice->info->a7xx.has_64b_ssbo_atomics;
404    features->shaderSharedInt64Atomics            = false;
405    features->shaderFloat16                       = true;
406    features->shaderInt8                          = true;
407 
408    features->descriptorIndexing                                 = true;
409    features->shaderInputAttachmentArrayDynamicIndexing          = false;
410    features->shaderUniformTexelBufferArrayDynamicIndexing       = true;
411    features->shaderStorageTexelBufferArrayDynamicIndexing       = true;
412    features->shaderUniformBufferArrayNonUniformIndexing         = true;
413    features->shaderSampledImageArrayNonUniformIndexing          = true;
414    features->shaderStorageBufferArrayNonUniformIndexing         = true;
415    features->shaderStorageImageArrayNonUniformIndexing          = true;
416    features->shaderInputAttachmentArrayNonUniformIndexing       = false;
417    features->shaderUniformTexelBufferArrayNonUniformIndexing    = true;
418    features->shaderStorageTexelBufferArrayNonUniformIndexing    = true;
419    features->descriptorBindingUniformBufferUpdateAfterBind      = true;
420    features->descriptorBindingSampledImageUpdateAfterBind       = true;
421    features->descriptorBindingStorageImageUpdateAfterBind       = true;
422    features->descriptorBindingStorageBufferUpdateAfterBind      = true;
423    features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
424    features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
425    features->descriptorBindingUpdateUnusedWhilePending          = true;
426    features->descriptorBindingPartiallyBound                    = true;
427    features->descriptorBindingVariableDescriptorCount           = true;
428    features->runtimeDescriptorArray                             = true;
429 
430    features->samplerFilterMinmax                 =
431       pdevice->info->a6xx.has_sampler_minmax;
432    features->scalarBlockLayout                   = true;
433    features->imagelessFramebuffer                = true;
434    features->uniformBufferStandardLayout         = true;
435    features->shaderSubgroupExtendedTypes         = true;
436    features->separateDepthStencilLayouts         = true;
437    features->hostQueryReset                      = true;
438    features->timelineSemaphore                   = true;
439    features->bufferDeviceAddress                 = true;
440    features->bufferDeviceAddressCaptureReplay    = pdevice->has_set_iova;
441    features->bufferDeviceAddressMultiDevice      = false;
442    features->vulkanMemoryModel                   = true;
443    features->vulkanMemoryModelDeviceScope        = true;
444    features->vulkanMemoryModelAvailabilityVisibilityChains = true;
445    features->shaderOutputViewportIndex           = true;
446    features->shaderOutputLayer                   = true;
447    features->subgroupBroadcastDynamicId          = true;
448 
449    /* Vulkan 1.3 */
450    features->robustImageAccess                   = true;
451    features->inlineUniformBlock                  = true;
452    features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
453    features->pipelineCreationCacheControl        = true;
454    features->privateData                         = true;
455    features->shaderDemoteToHelperInvocation      = true;
456    features->shaderTerminateInvocation           = true;
457    features->subgroupSizeControl                 = true;
458    features->computeFullSubgroups                = true;
459    features->synchronization2                    = true;
460    features->textureCompressionASTC_HDR          = false;
461    features->shaderZeroInitializeWorkgroupMemory = true;
462    features->dynamicRendering                    = true;
463    features->shaderIntegerDotProduct             = true;
464    features->maintenance4                        = true;
465 
466    /* Vulkan 1.4 */
467    features->pushDescriptor = true;
468 
469    /* VK_KHR_compute_shader_derivatives */
470    features->computeDerivativeGroupQuads = pdevice->info->chip >= 7;
471    features->computeDerivativeGroupLinear = pdevice->info->chip >= 7;
472 
473    /* VK_KHR_dynamic_rendering_local_read */
474    features->dynamicRenderingLocalRead = true;
475 
476    /* VK_KHR_fragment_shading_rate */
477    features->pipelineFragmentShadingRate = pdevice->info->a6xx.has_attachment_shading_rate;
478    features->primitiveFragmentShadingRate = pdevice->info->a7xx.has_primitive_shading_rate;
479    features->attachmentFragmentShadingRate = pdevice->info->a6xx.has_attachment_shading_rate;
480 
481    /* VK_KHR_index_type_uint8 */
482    features->indexTypeUint8 = true;
483 
484    /* VK_KHR_line_rasterization */
485    features->rectangularLines = true;
486    features->bresenhamLines = true;
487    features->smoothLines = false;
488    features->stippledRectangularLines = false;
489    features->stippledBresenhamLines = false;
490    features->stippledSmoothLines = false;
491 
492    /* VK_KHR_maintenance5 */
493    features->maintenance5 = true;
494 
495    /* VK_KHR_maintenance6 */
496    features->maintenance6 = true;
497 
498    /* VK_KHR_performance_query */
499    features->performanceCounterQueryPools = true;
500    features->performanceCounterMultipleQueryPools = false;
501 
502    /* VK_KHR_pipeline_executable_properties */
503    features->pipelineExecutableInfo = true;
504 
505    /* VK_KHR_present_id */
506    features->presentId = pdevice->vk.supported_extensions.KHR_present_id;
507 
508    /* VK_KHR_present_wait */
509    features->presentWait = pdevice->vk.supported_extensions.KHR_present_wait;
510 
511    /* VK_KHR_shader_expect_assume */
512    features->shaderExpectAssume = true;
513 
514    /* VK_KHR_shader_float_controls2 */
515    features->shaderFloatControls2 = true;
516 
517    /* VK_KHR_shader_subgroup_uniform_control_flow */
518    features->shaderSubgroupUniformControlFlow = true;
519 
520    /* VK_KHR_vertex_attribute_divisor */
521    features->vertexAttributeInstanceRateDivisor = true;
522    features->vertexAttributeInstanceRateZeroDivisor = true;
523 
524    /* VK_KHR_workgroup_memory_explicit_layout */
525    features->workgroupMemoryExplicitLayout = true;
526    features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
527    features->workgroupMemoryExplicitLayout8BitAccess = true;
528    features->workgroupMemoryExplicitLayout16BitAccess = true;
529 
530    /* VK_EXT_4444_formats */
531    features->formatA4R4G4B4 = true;
532    features->formatA4B4G4R4 = true;
533 
534    /* VK_EXT_attachment_feedback_loop_dynamic_state */
535    features->attachmentFeedbackLoopDynamicState = true;
536 
537    /* VK_EXT_attachment_feedback_loop_layout */
538    features->attachmentFeedbackLoopLayout = true;
539 
540    /* VK_EXT_border_color_swizzle */
541    features->borderColorSwizzle = true;
542    features->borderColorSwizzleFromImage = true;
543 
544    /* VK_EXT_color_write_enable */
545    features->colorWriteEnable = true;
546 
547    /* VK_EXT_conditional_rendering */
548    features->conditionalRendering = true;
549    features->inheritedConditionalRendering = true;
550 
551    /* VK_EXT_custom_border_color */
552    features->customBorderColors = true;
553    features->customBorderColorWithoutFormat = true;
554 
555    /* VK_EXT_depth_clamp_zero_one */
556    features->depthClampZeroOne = true;
557 
558    /* VK_EXT_depth_clip_control */
559    features->depthClipControl = true;
560 
561    /* VK_EXT_depth_clip_enable */
562    features->depthClipEnable = true;
563 
564    /* VK_EXT_descriptor_buffer */
565    features->descriptorBuffer = true;
566    features->descriptorBufferCaptureReplay = pdevice->has_set_iova;
567    features->descriptorBufferImageLayoutIgnored = true;
568    features->descriptorBufferPushDescriptors = true;
569 
570    /* VK_EXT_device_address_binding_report */
571    features->reportAddressBinding = true;
572 
573    /* VK_EXT_extended_dynamic_state */
574    features->extendedDynamicState = true;
575 
576    /* VK_EXT_extended_dynamic_state2 */
577    features->extendedDynamicState2 = true;
578    features->extendedDynamicState2LogicOp = true;
579    features->extendedDynamicState2PatchControlPoints = true;
580 
581    /* VK_EXT_extended_dynamic_state3 */
582    features->extendedDynamicState3PolygonMode = true;
583    features->extendedDynamicState3TessellationDomainOrigin = true;
584    features->extendedDynamicState3DepthClampEnable = true;
585    features->extendedDynamicState3DepthClipEnable = true;
586    features->extendedDynamicState3LogicOpEnable = true;
587    features->extendedDynamicState3SampleMask = true;
588    features->extendedDynamicState3RasterizationSamples = true;
589    features->extendedDynamicState3AlphaToCoverageEnable = true;
590    features->extendedDynamicState3AlphaToOneEnable = true;
591    features->extendedDynamicState3DepthClipNegativeOneToOne = true;
592    features->extendedDynamicState3RasterizationStream = true;
593    features->extendedDynamicState3ConservativeRasterizationMode = false;
594    features->extendedDynamicState3ExtraPrimitiveOverestimationSize = false;
595    features->extendedDynamicState3LineRasterizationMode = true;
596    features->extendedDynamicState3LineStippleEnable = false;
597    features->extendedDynamicState3ProvokingVertexMode = true;
598    features->extendedDynamicState3SampleLocationsEnable =
599       pdevice->info->a6xx.has_sample_locations;
600    features->extendedDynamicState3ColorBlendEnable = true;
601    features->extendedDynamicState3ColorBlendEquation = true;
602    features->extendedDynamicState3ColorWriteMask = true;
603    features->extendedDynamicState3ViewportWScalingEnable = false;
604    features->extendedDynamicState3ViewportSwizzle = false;
605    features->extendedDynamicState3ShadingRateImageEnable = false;
606    features->extendedDynamicState3CoverageToColorEnable = false;
607    features->extendedDynamicState3CoverageToColorLocation = false;
608    features->extendedDynamicState3CoverageModulationMode = false;
609    features->extendedDynamicState3CoverageModulationTableEnable = false;
610    features->extendedDynamicState3CoverageModulationTable = false;
611    features->extendedDynamicState3CoverageReductionMode = false;
612    features->extendedDynamicState3RepresentativeFragmentTestEnable = false;
613    features->extendedDynamicState3ColorBlendAdvanced = false;
614 
615    /* VK_EXT_fragment_density_map */
616    features->fragmentDensityMap = true;
617    features->fragmentDensityMapDynamic = false;
618    features->fragmentDensityMapNonSubsampledImages = true;
619 
620    /* VK_EXT_global_priority_query */
621    features->globalPriorityQuery = true;
622 
623    /* VK_EXT_graphics_pipeline_library */
624    features->graphicsPipelineLibrary = true;
625 
626    /* VK_EXT_host_image_copy */
627    features->hostImageCopy = true;
628 
629    /* VK_EXT_image_2d_view_of_3d  */
630    features->image2DViewOf3D = true;
631    features->sampler2DViewOf3D = true;
632 
633    /* VK_EXT_image_view_min_lod */
634    features->minLod = true;
635 
636    /* VK_EXT_legacy_vertex_attributes */
637    features->legacyVertexAttributes = true;
638 
639    /* VK_EXT_legacy_dithering */
640    features->legacyDithering = true;
641 
642    /* VK_EXT_map_memory_placed */
643    features->memoryMapPlaced = true;
644    features->memoryMapRangePlaced = false;
645    features->memoryUnmapReserve = true;
646 
647    /* VK_EXT_multi_draw */
648    features->multiDraw = true;
649 
650    /* VK_EXT_mutable_descriptor_type */
651    features->mutableDescriptorType = true;
652 
653    /* VK_EXT_nested_command_buffer */
654    features->nestedCommandBuffer = true;
655    features->nestedCommandBufferRendering = true;
656    features->nestedCommandBufferSimultaneousUse = true;
657 
658    /* VK_EXT_non_seamless_cube_map */
659    features->nonSeamlessCubeMap = true;
660 
661    /* VK_EXT_pipeline_robustness */
662    features->pipelineRobustness = true;
663 
664    /* VK_EXT_primitive_topology_list_restart */
665    features->primitiveTopologyListRestart = true;
666    features->primitiveTopologyPatchListRestart = false;
667 
668    /* VK_EXT_primitives_generated_query */
669    features->primitivesGeneratedQuery = true;
670    features->primitivesGeneratedQueryWithRasterizerDiscard = false;
671    features->primitivesGeneratedQueryWithNonZeroStreams = false;
672 
673    /* VK_EXT_provoking_vertex */
674    features->provokingVertexLast = true;
675 
676    /* VK_EXT_rasterization_order_attachment_access */
677    features->rasterizationOrderColorAttachmentAccess = true;
678    features->rasterizationOrderDepthAttachmentAccess = true;
679    features->rasterizationOrderStencilAttachmentAccess = true;
680 
681    /* VK_EXT_robustness2 */
682    features->robustBufferAccess2 = true;
683    features->robustImageAccess2 = true;
684    features->nullDescriptor = true;
685 
686    /* VK_EXT_shader_module_identifier */
687    features->shaderModuleIdentifier = true;
688 
689    /* VK_EXT_shader_replicated_composites */
690    features->shaderReplicatedComposites = true;
691 
692 #ifdef TU_USE_WSI_PLATFORM
693    /* VK_EXT_swapchain_maintenance1 */
694    features->swapchainMaintenance1 = true;
695 #endif
696 
697    /* VK_EXT_texel_buffer_alignment */
698    features->texelBufferAlignment = true;
699 
700    /* VK_EXT_transform_feedback */
701    features->transformFeedback = true;
702    features->geometryStreams = true;
703 
704    /* VK_EXT_vertex_input_dynamic_state */
705    features->vertexInputDynamicState = true;
706 
707    /* VK_KHR_shader_relaxed_extended_instruction */
708    features->shaderRelaxedExtendedInstruction = true;
709 
710    /* VK_KHR_subgroup_rotate */
711    features->shaderSubgroupRotate = true;
712    features->shaderSubgroupRotateClustered = true;
713 }
714 
715 static void
tu_get_physical_device_properties_1_1(struct tu_physical_device * pdevice,struct vk_properties * p)716 tu_get_physical_device_properties_1_1(struct tu_physical_device *pdevice,
717                                       struct vk_properties *p)
718 {
719    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
720    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
721    memset(p->deviceLUID, 0, VK_LUID_SIZE);
722    p->deviceNodeMask = 0;
723    p->deviceLUIDValid = false;
724 
725    p->subgroupSize = pdevice->info->a6xx.supports_double_threadsize ?
726       pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
727    p->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
728    p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
729                                     VK_SUBGROUP_FEATURE_VOTE_BIT |
730                                     VK_SUBGROUP_FEATURE_BALLOT_BIT |
731                                     VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
732                                     VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
733                                     VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
734                                     VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR |
735                                     VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
736                                     VK_SUBGROUP_FEATURE_ARITHMETIC_BIT;
737    if (pdevice->info->a6xx.has_getfiberid) {
738       p->subgroupSupportedStages |= VK_SHADER_STAGE_ALL_GRAPHICS;
739       p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_QUAD_BIT;
740    }
741 
742    p->subgroupQuadOperationsInAllStages = false;
743 
744    p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
745    p->maxMultiviewViewCount =
746       (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ? MAX_VIEWPORTS : 1;
747    p->maxMultiviewInstanceIndex = INT_MAX;
748    p->protectedNoFault = false;
749    /* Our largest descriptors are 2 texture descriptors, or a texture and
750     * sampler descriptor.
751     */
752    p->maxPerSetDescriptors = MAX_SET_SIZE / (2 * A6XX_TEX_CONST_DWORDS * 4);
753    /* Our buffer size fields allow only this much */
754    p->maxMemoryAllocationSize = 0xFFFFFFFFull;
755 
756 }
757 
758 
759 static const size_t max_descriptor_set_size = MAX_SET_SIZE / (4 * A6XX_TEX_CONST_DWORDS);
760 static const VkSampleCountFlags sample_counts =
761    VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
762 
763 static void
tu_get_physical_device_properties_1_2(struct tu_physical_device * pdevice,struct vk_properties * p)764 tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice,
765                                       struct vk_properties *p)
766 {
767    p->driverID = VK_DRIVER_ID_MESA_TURNIP;
768    memset(p->driverName, 0, sizeof(p->driverName));
769    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
770             "turnip Mesa driver");
771    memset(p->driverInfo, 0, sizeof(p->driverInfo));
772    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
773             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
774    if (pdevice->info->chip >= 7) {
775       p->conformanceVersion = (VkConformanceVersion) {
776          .major = 1,
777          .minor = 4,
778          .subminor = 0,
779          .patch = 0,
780       };
781    } else {
782       p->conformanceVersion = (VkConformanceVersion) {
783          .major = 1,
784          .minor = 2,
785          .subminor = 7,
786          .patch = 1,
787       };
788    }
789 
790    p->denormBehaviorIndependence =
791       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
792    p->roundingModeIndependence =
793       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
794 
795    p->shaderDenormFlushToZeroFloat16         = true;
796    p->shaderDenormPreserveFloat16            = false;
797    p->shaderRoundingModeRTEFloat16           = true;
798    p->shaderRoundingModeRTZFloat16           = false;
799    p->shaderSignedZeroInfNanPreserveFloat16  = true;
800 
801    p->shaderDenormFlushToZeroFloat32         = true;
802    p->shaderDenormPreserveFloat32            = false;
803    p->shaderRoundingModeRTEFloat32           = true;
804    p->shaderRoundingModeRTZFloat32           = false;
805    p->shaderSignedZeroInfNanPreserveFloat32  = true;
806 
807    p->shaderDenormFlushToZeroFloat64         = false;
808    p->shaderDenormPreserveFloat64            = false;
809    p->shaderRoundingModeRTEFloat64           = false;
810    p->shaderRoundingModeRTZFloat64           = false;
811    p->shaderSignedZeroInfNanPreserveFloat64  = false;
812 
813    p->shaderUniformBufferArrayNonUniformIndexingNative   = true;
814    p->shaderSampledImageArrayNonUniformIndexingNative    = true;
815    p->shaderStorageBufferArrayNonUniformIndexingNative   = true;
816    p->shaderStorageImageArrayNonUniformIndexingNative    = true;
817    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
818    p->robustBufferAccessUpdateAfterBind                  = false;
819    p->quadDivergentImplicitLod                           = false;
820 
821    p->maxUpdateAfterBindDescriptorsInAllPools            = max_descriptor_set_size;
822    p->maxPerStageDescriptorUpdateAfterBindSamplers       = max_descriptor_set_size;
823    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
824    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
825    p->maxPerStageDescriptorUpdateAfterBindSampledImages  = max_descriptor_set_size;
826    p->maxPerStageDescriptorUpdateAfterBindStorageImages  = max_descriptor_set_size;
827    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_RTS;
828    p->maxPerStageUpdateAfterBindResources                = max_descriptor_set_size;
829    p->maxDescriptorSetUpdateAfterBindSamplers            = max_descriptor_set_size;
830    p->maxDescriptorSetUpdateAfterBindUniformBuffers      = max_descriptor_set_size;
831    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
832    p->maxDescriptorSetUpdateAfterBindStorageBuffers      = max_descriptor_set_size;
833    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
834    p->maxDescriptorSetUpdateAfterBindSampledImages       = max_descriptor_set_size;
835    p->maxDescriptorSetUpdateAfterBindStorageImages       = max_descriptor_set_size;
836    p->maxDescriptorSetUpdateAfterBindInputAttachments    = MAX_RTS;
837 
838    p->supportedDepthResolveModes    = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
839    p->supportedStencilResolveModes  = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
840    p->independentResolveNone  = false;
841    p->independentResolve      = false;
842 
843    p->filterMinmaxSingleComponentFormats  = true;
844    p->filterMinmaxImageComponentMapping   = true;
845 
846    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
847 
848    p->framebufferIntegerColorSampleCounts = sample_counts;
849 }
850 
851 static void
tu_get_physical_device_properties_1_3(struct tu_physical_device * pdevice,struct vk_properties * p)852 tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice,
853                                       struct vk_properties *p)
854 {
855    p->minSubgroupSize = pdevice->info->threadsize_base;
856    p->maxSubgroupSize = pdevice->info->a6xx.supports_double_threadsize ?
857       pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
858    p->maxComputeWorkgroupSubgroups = pdevice->info->max_waves;
859    p->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL;
860 
861    p->maxInlineUniformBlockSize = MAX_INLINE_UBO_RANGE;
862    p->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UBOS;
863    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UBOS;
864    p->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UBOS;
865    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UBOS;
866    p->maxInlineUniformTotalSize = MAX_INLINE_UBOS * MAX_INLINE_UBO_RANGE;
867 
868    p->integerDotProduct8BitUnsignedAccelerated = false;
869    p->integerDotProduct8BitSignedAccelerated = false;
870    p->integerDotProduct8BitMixedSignednessAccelerated = false;
871    p->integerDotProduct4x8BitPackedUnsignedAccelerated =
872       pdevice->info->a6xx.has_dp2acc;
873    /* TODO: we should be able to emulate 4x8BitPackedSigned fast enough */
874    p->integerDotProduct4x8BitPackedSignedAccelerated = false;
875    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated =
876       pdevice->info->a6xx.has_dp2acc;
877    p->integerDotProduct16BitUnsignedAccelerated = false;
878    p->integerDotProduct16BitSignedAccelerated = false;
879    p->integerDotProduct16BitMixedSignednessAccelerated = false;
880    p->integerDotProduct32BitUnsignedAccelerated = false;
881    p->integerDotProduct32BitSignedAccelerated = false;
882    p->integerDotProduct32BitMixedSignednessAccelerated = false;
883    p->integerDotProduct64BitUnsignedAccelerated = false;
884    p->integerDotProduct64BitSignedAccelerated = false;
885    p->integerDotProduct64BitMixedSignednessAccelerated = false;
886    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
887    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
888    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
889    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated =
890       pdevice->info->a6xx.has_dp2acc;
891    /* TODO: we should be able to emulate Saturating4x8BitPackedSigned fast enough */
892    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
893    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated =
894       pdevice->info->a6xx.has_dp2acc;
895    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
896    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
897    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
898    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
899    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
900    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
901    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
902    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
903    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
904 
905    p->storageTexelBufferOffsetAlignmentBytes = 64;
906    p->storageTexelBufferOffsetSingleTexelAlignment = true;
907    p->uniformTexelBufferOffsetAlignmentBytes = 64;
908    p->uniformTexelBufferOffsetSingleTexelAlignment = true;
909 
910    /* The address space is 4GB for current kernels, so there's no point
911     * allowing a larger buffer. Our buffer sizes are 64-bit though, so
912     * GetBufferDeviceRequirements won't fall over if someone actually creates
913     * a 4GB buffer.
914     */
915    p->maxBufferSize = 1ull << 32;
916 }
917 
918 /* CP_ALWAYS_ON_COUNTER is fixed 19.2 MHz */
919 #define ALWAYS_ON_FREQUENCY 19200000
920 
921 static void
tu_get_properties(struct tu_physical_device * pdevice,struct vk_properties * props)922 tu_get_properties(struct tu_physical_device *pdevice,
923                   struct vk_properties *props)
924 {
925    /* Limits */
926    props->maxImageDimension1D = (1 << 14);
927    props->maxImageDimension2D = (1 << 14);
928    props->maxImageDimension3D = (1 << 11);
929    props->maxImageDimensionCube = (1 << 14);
930    props->maxImageArrayLayers = (1 << 11);
931    props->maxTexelBufferElements = 128 * 1024 * 1024;
932    props->maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE;
933    props->maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE;
934    props->maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE;
935    props->maxMemoryAllocationCount = UINT32_MAX;
936    props->maxSamplerAllocationCount = 64 * 1024;
937    props->bufferImageGranularity = 64;          /* A cache line */
938    props->sparseAddressSpaceSize = 0;
939    props->maxBoundDescriptorSets = pdevice->usable_sets;
940    props->maxPerStageDescriptorSamplers = max_descriptor_set_size;
941    props->maxPerStageDescriptorUniformBuffers = max_descriptor_set_size;
942    props->maxPerStageDescriptorStorageBuffers = max_descriptor_set_size;
943    props->maxPerStageDescriptorSampledImages = max_descriptor_set_size;
944    props->maxPerStageDescriptorStorageImages = max_descriptor_set_size;
945    props->maxPerStageDescriptorInputAttachments = MAX_RTS;
946    props->maxPerStageResources = max_descriptor_set_size;
947    props->maxDescriptorSetSamplers = max_descriptor_set_size;
948    props->maxDescriptorSetUniformBuffers = max_descriptor_set_size;
949    props->maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
950    props->maxDescriptorSetStorageBuffers = max_descriptor_set_size;
951    props->maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
952    props->maxDescriptorSetSampledImages = max_descriptor_set_size;
953    props->maxDescriptorSetStorageImages = max_descriptor_set_size;
954    props->maxDescriptorSetInputAttachments = MAX_RTS;
955    props->maxVertexInputAttributes = pdevice->info->a6xx.vs_max_inputs_count;
956    props->maxVertexInputBindings = pdevice->info->a6xx.vs_max_inputs_count;
957    props->maxVertexInputAttributeOffset = 4095;
958    props->maxVertexInputBindingStride = 2048;
959    props->maxVertexOutputComponents = 128;
960    props->maxTessellationGenerationLevel = 64;
961    props->maxTessellationPatchSize = 32;
962    props->maxTessellationControlPerVertexInputComponents = 128;
963    props->maxTessellationControlPerVertexOutputComponents = 128;
964    props->maxTessellationControlPerPatchOutputComponents = 120;
965    props->maxTessellationControlTotalOutputComponents = 4096;
966    props->maxTessellationEvaluationInputComponents = 128;
967    props->maxTessellationEvaluationOutputComponents = 128;
968    props->maxGeometryShaderInvocations = 32;
969    props->maxGeometryInputComponents = 64;
970    props->maxGeometryOutputComponents = 128;
971    props->maxGeometryOutputVertices = 256;
972    props->maxGeometryTotalOutputComponents = 1024;
973    props->maxFragmentInputComponents = 124;
974    props->maxFragmentOutputAttachments = 8;
975    props->maxFragmentDualSrcAttachments = 1;
976    props->maxFragmentCombinedOutputResources = MAX_RTS + max_descriptor_set_size * 2;
977    props->maxComputeSharedMemorySize = pdevice->info->cs_shared_mem_size;
978    props->maxComputeWorkGroupCount[0] =
979       props->maxComputeWorkGroupCount[1] =
980       props->maxComputeWorkGroupCount[2] = 65535;
981    props->maxComputeWorkGroupInvocations = pdevice->info->a6xx.supports_double_threadsize ?
982       pdevice->info->threadsize_base * 2 * pdevice->info->max_waves :
983       pdevice->info->threadsize_base * pdevice->info->max_waves;
984    props->maxComputeWorkGroupSize[0] =
985       props->maxComputeWorkGroupSize[1] =
986       props->maxComputeWorkGroupSize[2] = 1024;
987    props->subPixelPrecisionBits = 8;
988    props->subTexelPrecisionBits = 8;
989    props->mipmapPrecisionBits = 8;
990    props->maxDrawIndexedIndexValue = UINT32_MAX;
991    props->maxDrawIndirectCount = UINT32_MAX;
992    props->maxSamplerLodBias = 4095.0 / 256.0; /* [-16, 15.99609375] */
993    props->maxSamplerAnisotropy = 16;
994    props->maxViewports =
995          (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ? MAX_VIEWPORTS : 1;
996    props->maxViewportDimensions[0] =
997       props->maxViewportDimensions[1] = MAX_VIEWPORT_SIZE;
998    props->viewportBoundsRange[0] = INT16_MIN;
999    props->viewportBoundsRange[1] = INT16_MAX;
1000    props->viewportSubPixelBits = 8;
1001    props->minMemoryMapAlignment = 4096; /* A page */
1002    props->minTexelBufferOffsetAlignment = 64;
1003    props->minUniformBufferOffsetAlignment = 64;
1004    props->minStorageBufferOffsetAlignment = 4;
1005    props->minTexelOffset = -16;
1006    props->maxTexelOffset = 15;
1007    props->minTexelGatherOffset = -32;
1008    props->maxTexelGatherOffset = 31;
1009    props->minInterpolationOffset = -0.5;
1010    props->maxInterpolationOffset = 0.4375;
1011    props->subPixelInterpolationOffsetBits = 4;
1012    props->maxFramebufferWidth = (1 << 14);
1013    props->maxFramebufferHeight = (1 << 14);
1014    props->maxFramebufferLayers = (1 << 10);
1015    props->framebufferColorSampleCounts = sample_counts;
1016    props->framebufferDepthSampleCounts = sample_counts;
1017    props->framebufferStencilSampleCounts = sample_counts;
1018    props->framebufferNoAttachmentsSampleCounts = sample_counts;
1019    props->maxColorAttachments = MAX_RTS;
1020    props->sampledImageColorSampleCounts = sample_counts;
1021    props->sampledImageIntegerSampleCounts = sample_counts;
1022    props->sampledImageDepthSampleCounts = sample_counts;
1023    props->sampledImageStencilSampleCounts = sample_counts;
1024    props->storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT;
1025    props->maxSampleMaskWords = 1;
1026    props->timestampComputeAndGraphics = true;
1027    props->timestampPeriod = 1000000000.0 / (float) ALWAYS_ON_FREQUENCY;
1028    props->maxClipDistances = 8;
1029    props->maxCullDistances = 8;
1030    props->maxCombinedClipAndCullDistances = 8;
1031    props->discreteQueuePriorities = 2;
1032    props->pointSizeRange[0] = 1;
1033    props->pointSizeRange[1] = 4092;
1034    props->lineWidthRange[0] = pdevice->info->a6xx.line_width_min;
1035    props->lineWidthRange[1] = pdevice->info->a6xx.line_width_max;
1036    props->pointSizeGranularity = 	0.0625;
1037    props->lineWidthGranularity =
1038       pdevice->info->a6xx.line_width_max == 1.0 ? 0.0 : 0.5;
1039    props->strictLines = true;
1040    props->standardSampleLocations = true;
1041    props->optimalBufferCopyOffsetAlignment = 128;
1042    props->optimalBufferCopyRowPitchAlignment = 128;
1043    props->nonCoherentAtomSize = 64;
1044 
1045    props->apiVersion =
1046       (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ?
1047          ((pdevice->info->chip >= 7) ? TU_API_VERSION :
1048             VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION))
1049          : VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION);
1050    props->driverVersion = vk_get_driver_version();
1051    props->vendorID = 0x5143;
1052    props->deviceID = pdevice->dev_id.chip_id;
1053    props->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
1054 
1055    /* Vulkan 1.4 */
1056    props->dynamicRenderingLocalReadDepthStencilAttachments = true;
1057    props->dynamicRenderingLocalReadMultisampledAttachments = true;
1058 
1059    /* sparse properties */
1060    props->sparseResidencyStandard2DBlockShape = { 0 };
1061    props->sparseResidencyStandard2DMultisampleBlockShape = { 0 };
1062    props->sparseResidencyStandard3DBlockShape = { 0 };
1063    props->sparseResidencyAlignedMipSize = { 0 };
1064    props->sparseResidencyNonResidentStrict = { 0 };
1065 
1066    strcpy(props->deviceName, pdevice->name);
1067    memcpy(props->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1068 
1069    tu_get_physical_device_properties_1_1(pdevice, props);
1070    tu_get_physical_device_properties_1_2(pdevice, props);
1071    tu_get_physical_device_properties_1_3(pdevice, props);
1072 
1073    /* VK_KHR_compute_shader_derivatives */
1074    props->meshAndTaskShaderDerivatives = false;
1075 
1076    /* VK_KHR_fragment_shading_rate */
1077    if (pdevice->info->a6xx.has_attachment_shading_rate) {
1078       props->minFragmentShadingRateAttachmentTexelSize = {8, 8};
1079       props->maxFragmentShadingRateAttachmentTexelSize = {8, 8};
1080    } else {
1081       props->minFragmentShadingRateAttachmentTexelSize = {0, 0};
1082       props->maxFragmentShadingRateAttachmentTexelSize = {0, 0};
1083    }
1084    props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
1085    props->primitiveFragmentShadingRateWithMultipleViewports =
1086       pdevice->info->a7xx.has_primitive_shading_rate;
1087    /* A7XX TODO: dEQP-VK.fragment_shading_rate.*.srlayered.* are failing
1088     * for some reason.
1089     */
1090    props->layeredShadingRateAttachments = false;
1091    props->fragmentShadingRateNonTrivialCombinerOps = true;
1092    props->maxFragmentSize = {4, 4};
1093    props->maxFragmentSizeAspectRatio = 4;
1094    props->maxFragmentShadingRateCoverageSamples = 16;
1095    props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_4_BIT;
1096    props->fragmentShadingRateWithShaderDepthStencilWrites = true;
1097    props->fragmentShadingRateWithSampleMask = true;
1098    /* Has wrong gl_SampleMaskIn[0] values with VK_EXT_post_depth_coverage used. */
1099    props->fragmentShadingRateWithShaderSampleMask = false;
1100    props->fragmentShadingRateWithConservativeRasterization = false;
1101    props->fragmentShadingRateWithFragmentShaderInterlock = false;
1102    props->fragmentShadingRateWithCustomSampleLocations = true;
1103    props->fragmentShadingRateStrictMultiplyCombiner = true;
1104 
1105    /* VK_KHR_push_descriptor */
1106    props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1107 
1108    /* VK_EXT_transform_feedback */
1109    props->maxTransformFeedbackStreams = IR3_MAX_SO_STREAMS;
1110    props->maxTransformFeedbackBuffers = IR3_MAX_SO_BUFFERS;
1111    props->maxTransformFeedbackBufferSize = UINT32_MAX;
1112    props->maxTransformFeedbackStreamDataSize = 512;
1113    props->maxTransformFeedbackBufferDataSize = 512;
1114    props->maxTransformFeedbackBufferDataStride = 512;
1115    props->transformFeedbackQueries = true;
1116    props->transformFeedbackStreamsLinesTriangles = true;
1117    props->transformFeedbackRasterizationStreamSelect = true;
1118    props->transformFeedbackDraw = true;
1119 
1120    /* VK_EXT_sample_locations */
1121    props->sampleLocationSampleCounts =
1122       pdevice->vk.supported_extensions.EXT_sample_locations ? sample_counts : 0;
1123    props->maxSampleLocationGridSize = (VkExtent2D) { 1 , 1 };
1124    props->sampleLocationCoordinateRange[0] = SAMPLE_LOCATION_MIN;
1125    props->sampleLocationCoordinateRange[1] = SAMPLE_LOCATION_MAX;
1126    props->sampleLocationSubPixelBits = 4;
1127    props->variableSampleLocations = true;
1128 
1129    /* VK_KHR_vertex_attribute_divisor */
1130    props->maxVertexAttribDivisor = UINT32_MAX;
1131    props->supportsNonZeroFirstInstance = true;
1132 
1133    /* VK_EXT_custom_border_color */
1134    props->maxCustomBorderColorSamplers = TU_BORDER_COLOR_COUNT;
1135 
1136    /* VK_KHR_performance_query */
1137    props->allowCommandBufferQueryCopies = false;
1138 
1139    /* VK_EXT_robustness2 */
1140    /* see write_buffer_descriptor() */
1141    props->robustStorageBufferAccessSizeAlignment = 4;
1142    /* see write_ubo_descriptor() */
1143    props->robustUniformBufferAccessSizeAlignment = 16;
1144 
1145    /* VK_EXT_pipeline_robustness */
1146    props->defaultRobustnessStorageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1147    props->defaultRobustnessUniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1148    props->defaultRobustnessVertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT;
1149    props->defaultRobustnessImages = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT;
1150 
1151    /* VK_EXT_provoking_vertex */
1152    props->provokingVertexModePerPipeline = true;
1153    props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1154 
1155    /* VK_KHR_line_rasterization */
1156    props->lineSubPixelPrecisionBits = 8;
1157 
1158    /* VK_EXT_physical_device_drm */
1159    props->drmHasPrimary = pdevice->has_master;
1160    props->drmPrimaryMajor = pdevice->master_major;
1161    props->drmPrimaryMinor = pdevice->master_minor;
1162 
1163    props->drmHasRender = pdevice->has_local;
1164    props->drmRenderMajor = pdevice->local_major;
1165    props->drmRenderMinor = pdevice->local_minor;
1166 
1167    /* VK_EXT_shader_module_identifier */
1168    STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1169                  sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1170    memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1171           vk_shaderModuleIdentifierAlgorithmUUID,
1172           sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1173 
1174    /* VK_EXT_map_memory_placed */
1175    os_get_page_size(&os_page_size);
1176    props->minPlacedMemoryMapAlignment = os_page_size;
1177 
1178    /* VK_EXT_multi_draw */
1179    props->maxMultiDrawCount = 2048;
1180 
1181    /* VK_EXT_nested_command_buffer */
1182    props->maxCommandBufferNestingLevel = UINT32_MAX;
1183 
1184    /* VK_EXT_graphics_pipeline_library */
1185    props->graphicsPipelineLibraryFastLinking = true;
1186    props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1187 
1188    /* VK_EXT_extended_dynamic_state3 */
1189    props->dynamicPrimitiveTopologyUnrestricted = true;
1190 
1191    /* VK_EXT_descriptor_buffer */
1192    props->combinedImageSamplerDescriptorSingleArray = true;
1193    props->bufferlessPushDescriptors = true;
1194    props->allowSamplerImageViewPostSubmitCreation = true;
1195    props->descriptorBufferOffsetAlignment = A6XX_TEX_CONST_DWORDS * 4;
1196    props->maxDescriptorBufferBindings = pdevice->usable_sets;
1197    props->maxResourceDescriptorBufferBindings = pdevice->usable_sets;
1198    props->maxSamplerDescriptorBufferBindings = pdevice->usable_sets;
1199    props->maxEmbeddedImmutableSamplerBindings = pdevice->usable_sets;
1200    props->maxEmbeddedImmutableSamplers = max_descriptor_set_size;
1201    props->bufferCaptureReplayDescriptorDataSize = 0;
1202    props->imageCaptureReplayDescriptorDataSize = 0;
1203    props->imageViewCaptureReplayDescriptorDataSize = 0;
1204    props->samplerCaptureReplayDescriptorDataSize = 0;
1205    props->accelerationStructureCaptureReplayDescriptorDataSize = 0;
1206    /* Note: these sizes must match descriptor_size() */
1207    props->samplerDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1208    props->combinedImageSamplerDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
1209    props->sampledImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1210    props->storageImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1211    props->uniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1212    props->robustUniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1213    props->storageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1214    props->robustStorageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1215    props->uniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1216    props->robustUniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1217    props->storageBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4 * (1 +
1218       COND(pdevice->info->a6xx.storage_16bit && !pdevice->info->a6xx.has_isam_v, 1) +
1219       COND(pdevice->info->a7xx.storage_8bit, 1));
1220    props->robustStorageBufferDescriptorSize =
1221       props->storageBufferDescriptorSize;
1222    props->inputAttachmentDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1223    props->maxSamplerDescriptorBufferRange = ~0ull;
1224    props->maxResourceDescriptorBufferRange = ~0ull;
1225    props->samplerDescriptorBufferAddressSpaceSize = ~0ull;
1226    props->resourceDescriptorBufferAddressSpaceSize = ~0ull;
1227    props->descriptorBufferAddressSpaceSize = ~0ull;
1228    props->combinedImageSamplerDensityMapDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
1229 
1230    /* VK_EXT_legacy_vertex_attributes */
1231    props->nativeUnalignedPerformance = true;
1232 
1233    /* VK_EXT_fragment_density_map*/
1234    props->minFragmentDensityTexelSize = (VkExtent2D) { MIN_FDM_TEXEL_SIZE, MIN_FDM_TEXEL_SIZE };
1235    props->maxFragmentDensityTexelSize = (VkExtent2D) { MAX_FDM_TEXEL_SIZE, MAX_FDM_TEXEL_SIZE };
1236    props->fragmentDensityInvocations = false;
1237 
1238    /* VK_KHR_maintenance5 */
1239    props->earlyFragmentMultisampleCoverageAfterSampleCounting = true;
1240    props->earlyFragmentSampleMaskTestBeforeSampleCounting = true;
1241    props->depthStencilSwizzleOneSupport = true;
1242    props->polygonModePointSize = true;
1243    props->nonStrictWideLinesUseParallelogram = false;
1244    props->nonStrictSinglePixelWideLinesUseParallelogram = false;
1245 
1246    /* VK_KHR_maintenance6 */
1247    props->blockTexelViewCompatibleMultipleLayers = true;
1248    props->maxCombinedImageSamplerDescriptorCount = 1;
1249    props->fragmentShadingRateClampCombinerInputs = true;
1250 
1251    /* VK_EXT_host_image_copy */
1252 
1253    /* We don't use the layouts ATM so just report all layouts from
1254     * extensions that we support as compatible.
1255     */
1256    static const VkImageLayout supported_layouts[] = {
1257       VK_IMAGE_LAYOUT_GENERAL, /* required by spec */
1258       VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1259       VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1260       VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
1261       VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
1262       VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1263       VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1264       VK_IMAGE_LAYOUT_PREINITIALIZED,
1265       VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
1266       VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
1267       VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
1268       VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
1269       VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
1270       VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
1271       VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
1272       VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
1273       VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT,
1274       VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
1275    };
1276 
1277    props->pCopySrcLayouts = (VkImageLayout *)supported_layouts;
1278    props->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
1279    props->pCopyDstLayouts = (VkImageLayout *)supported_layouts;
1280    props->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
1281 
1282    /* We're a UMR so we can always map every kind of memory */
1283    props->identicalMemoryTypeRequirements = true;
1284 
1285    {
1286       struct mesa_sha1 sha1_ctx;
1287       uint8_t sha1[20];
1288 
1289       _mesa_sha1_init(&sha1_ctx);
1290 
1291       /* Make sure we don't match with other vendors */
1292       const char *driver = "turnip-v1";
1293       _mesa_sha1_update(&sha1_ctx, driver, strlen(driver));
1294 
1295       /* Hash in UBWC configuration */
1296       _mesa_sha1_update(&sha1_ctx, &pdevice->ubwc_config.highest_bank_bit,
1297                         sizeof(pdevice->ubwc_config.highest_bank_bit));
1298       _mesa_sha1_update(&sha1_ctx, &pdevice->ubwc_config.bank_swizzle_levels,
1299                         sizeof(pdevice->ubwc_config.bank_swizzle_levels));
1300       _mesa_sha1_update(&sha1_ctx, &pdevice->ubwc_config.macrotile_mode,
1301                         sizeof(pdevice->ubwc_config.macrotile_mode));
1302 
1303       _mesa_sha1_final(&sha1_ctx, sha1);
1304 
1305       memcpy(props->optimalTilingLayoutUUID, sha1, VK_UUID_SIZE);
1306    }
1307 }
1308 
1309 static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
1310    &tu_shader_ops,
1311    &tu_nir_shaders_ops,
1312    NULL,
1313 };
1314 
1315 VkResult
tu_physical_device_init(struct tu_physical_device * device,struct tu_instance * instance)1316 tu_physical_device_init(struct tu_physical_device *device,
1317                         struct tu_instance *instance)
1318 {
1319    VkResult result = VK_SUCCESS;
1320 
1321    const char *fd_name = fd_dev_name(&device->dev_id);
1322    if (!fd_name) {
1323       return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1324                                "device (chip_id = %" PRIX64
1325                                ", gpu_id = %u) is unsupported",
1326                                device->dev_id.chip_id, device->dev_id.gpu_id);
1327    }
1328 
1329    if (strncmp(fd_name, "FD", 2) == 0) {
1330       device->name = vk_asprintf(&instance->vk.alloc,
1331                                  VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE,
1332                                  "Turnip Adreno (TM) %s", &fd_name[2]);
1333    } else {
1334       device->name = vk_strdup(&instance->vk.alloc, fd_name,
1335                                VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1336 
1337    }
1338    if (!device->name) {
1339       return vk_startup_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
1340                                "device name alloc fail");
1341    }
1342 
1343    const struct fd_dev_info info = fd_dev_info(&device->dev_id);
1344    if (!info.chip) {
1345       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1346                                  "device %s is unsupported", device->name);
1347       goto fail_free_name;
1348    }
1349    switch (fd_dev_gen(&device->dev_id)) {
1350    case 6:
1351    case 7: {
1352       device->dev_info = info;
1353       device->info = &device->dev_info;
1354       uint32_t depth_cache_size =
1355          device->info->num_ccu * device->info->a6xx.sysmem_per_ccu_depth_cache_size;
1356       uint32_t color_cache_size =
1357          (device->info->num_ccu *
1358           device->info->a6xx.sysmem_per_ccu_color_cache_size);
1359       uint32_t color_cache_size_gmem =
1360          color_cache_size /
1361          (1 << device->info->a6xx.gmem_ccu_color_cache_fraction);
1362 
1363       device->ccu_depth_offset_bypass = 0;
1364       device->ccu_offset_bypass =
1365          device->ccu_depth_offset_bypass + depth_cache_size;
1366 
1367       if (device->info->a7xx.has_gmem_vpc_attr_buf) {
1368          device->vpc_attr_buf_size_bypass =
1369             device->info->a7xx.sysmem_vpc_attr_buf_size;
1370          device->vpc_attr_buf_offset_bypass =
1371             device->ccu_offset_bypass + color_cache_size;
1372 
1373          device->vpc_attr_buf_size_gmem =
1374             device->info->a7xx.gmem_vpc_attr_buf_size;
1375          device->vpc_attr_buf_offset_gmem =
1376             device->gmem_size -
1377             (device->vpc_attr_buf_size_gmem * device->info->num_ccu);
1378 
1379          device->ccu_offset_gmem =
1380             device->vpc_attr_buf_offset_gmem - color_cache_size_gmem;
1381 
1382          device->usable_gmem_size_gmem = device->vpc_attr_buf_offset_gmem;
1383       } else {
1384          device->ccu_offset_gmem = device->gmem_size - color_cache_size_gmem;
1385          device->usable_gmem_size_gmem = device->gmem_size;
1386       }
1387 
1388       if (instance->reserve_descriptor_set) {
1389          device->usable_sets = device->reserved_set_idx = device->info->a6xx.max_sets - 1;
1390       } else {
1391          device->usable_sets = device->info->a6xx.max_sets;
1392          device->reserved_set_idx = -1;
1393       }
1394       break;
1395    }
1396    default:
1397       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1398                                  "device %s is unsupported", device->name);
1399       goto fail_free_name;
1400    }
1401    if (tu_device_get_cache_uuid(device, device->cache_uuid)) {
1402       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1403                                  "cannot generate UUID");
1404       goto fail_free_name;
1405    }
1406 
1407    device->level1_dcache_size = tu_get_l1_dcache_size();
1408    device->has_cached_non_coherent_memory =
1409       device->level1_dcache_size > 0 && !DETECT_ARCH_ARM;
1410 
1411    device->memory.type_count = 1;
1412    device->memory.types[0] =
1413       VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1414       VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1415       VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
1416 
1417    if (device->has_cached_coherent_memory) {
1418       device->memory.types[device->memory.type_count] =
1419          VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1420          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1421          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1422          VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1423       device->memory.type_count++;
1424    }
1425 
1426    if (device->has_cached_non_coherent_memory) {
1427       device->memory.types[device->memory.type_count] =
1428          VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1429          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1430          VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1431       device->memory.type_count++;
1432    }
1433 
1434    /* Provide fallback UBWC config values if the kernel doesn't support
1435     * providing them. This should match what the kernel programs.
1436     */
1437    if (!device->ubwc_config.highest_bank_bit) {
1438       device->ubwc_config.highest_bank_bit = info.highest_bank_bit;
1439    }
1440    if (device->ubwc_config.bank_swizzle_levels == ~0) {
1441       device->ubwc_config.bank_swizzle_levels = info.ubwc_swizzle;
1442    }
1443    if (device->ubwc_config.macrotile_mode == FDL_MACROTILE_INVALID) {
1444       device->ubwc_config.macrotile_mode =
1445          (enum fdl_macrotile_mode) info.macrotile_mode;
1446    }
1447 
1448    fd_get_driver_uuid(device->driver_uuid);
1449    fd_get_device_uuid(device->device_uuid, &device->dev_id);
1450 
1451    struct vk_physical_device_dispatch_table dispatch_table;
1452    vk_physical_device_dispatch_table_from_entrypoints(
1453       &dispatch_table, &tu_physical_device_entrypoints, true);
1454    vk_physical_device_dispatch_table_from_entrypoints(
1455       &dispatch_table, &wsi_physical_device_entrypoints, false);
1456 
1457    result = vk_physical_device_init(&device->vk, &instance->vk,
1458                                     NULL, NULL, NULL, /* We set up extensions later */
1459                                     &dispatch_table);
1460    if (result != VK_SUCCESS)
1461       goto fail_free_name;
1462 
1463    get_device_extensions(device, &device->vk.supported_extensions);
1464    tu_get_features(device, &device->vk.supported_features);
1465    tu_get_properties(device, &device->vk.properties);
1466 
1467    device->vk.supported_sync_types = device->sync_types;
1468 
1469 #ifdef TU_USE_WSI_PLATFORM
1470    result = tu_wsi_init(device);
1471    if (result != VK_SUCCESS) {
1472       vk_startup_errorf(instance, result, "WSI init failure");
1473       vk_physical_device_finish(&device->vk);
1474       goto fail_free_name;
1475    }
1476 #endif
1477 
1478    /* The gpu id is already embedded in the uuid so we just pass "tu"
1479     * when creating the cache.
1480     */
1481    char buf[VK_UUID_SIZE * 2 + 1];
1482    mesa_bytes_to_hex(buf, device->cache_uuid, VK_UUID_SIZE);
1483    device->vk.disk_cache = disk_cache_create(device->name, buf, 0);
1484 
1485    device->vk.pipeline_cache_import_ops = cache_import_ops;
1486 
1487    return VK_SUCCESS;
1488 
1489 fail_free_name:
1490    vk_free(&instance->vk.alloc, (void *)device->name);
1491    return result;
1492 }
1493 
1494 static void
tu_physical_device_finish(struct tu_physical_device * device)1495 tu_physical_device_finish(struct tu_physical_device *device)
1496 {
1497 #ifdef TU_USE_WSI_PLATFORM
1498    tu_wsi_finish(device);
1499 #endif
1500 
1501    close(device->local_fd);
1502    if (device->master_fd != -1)
1503       close(device->master_fd);
1504 
1505    if (device->kgsl_dma_fd != -1)
1506       close(device->kgsl_dma_fd);
1507 
1508    disk_cache_destroy(device->vk.disk_cache);
1509    vk_free(&device->instance->vk.alloc, (void *)device->name);
1510 
1511    vk_physical_device_finish(&device->vk);
1512 }
1513 
1514 static void
tu_destroy_physical_device(struct vk_physical_device * device)1515 tu_destroy_physical_device(struct vk_physical_device *device)
1516 {
1517    tu_physical_device_finish((struct tu_physical_device *) device);
1518    vk_free(&device->instance->alloc, device);
1519 }
1520 
1521 static const driOptionDescription tu_dri_options[] = {
1522    DRI_CONF_SECTION_PERFORMANCE
1523       DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
1524       DRI_CONF_VK_KHR_PRESENT_WAIT(false)
1525       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
1526       DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
1527       DRI_CONF_VK_XWAYLAND_WAIT_READY(false)
1528    DRI_CONF_SECTION_END
1529 
1530    DRI_CONF_SECTION_DEBUG
1531       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
1532       DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
1533       DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
1534       DRI_CONF_VK_DONT_CARE_AS_LOAD(false)
1535    DRI_CONF_SECTION_END
1536 
1537    DRI_CONF_SECTION_MISCELLANEOUS
1538       DRI_CONF_DISABLE_CONSERVATIVE_LRZ(false)
1539       DRI_CONF_TU_DONT_RESERVE_DESCRIPTOR_SET(false)
1540       DRI_CONF_TU_ALLOW_OOB_INDIRECT_UBO_LOADS(false)
1541       DRI_CONF_TU_DISABLE_D24S8_BORDER_COLOR_WORKAROUND(false)
1542    DRI_CONF_SECTION_END
1543 };
1544 
1545 static void
tu_init_dri_options(struct tu_instance * instance)1546 tu_init_dri_options(struct tu_instance *instance)
1547 {
1548    driParseOptionInfo(&instance->available_dri_options, tu_dri_options,
1549                       ARRAY_SIZE(tu_dri_options));
1550    driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "turnip", NULL, NULL,
1551                        instance->vk.app_info.app_name, instance->vk.app_info.app_version,
1552                        instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
1553 
1554    instance->dont_care_as_load =
1555          driQueryOptionb(&instance->dri_options, "vk_dont_care_as_load");
1556    instance->conservative_lrz =
1557          !driQueryOptionb(&instance->dri_options, "disable_conservative_lrz");
1558    instance->reserve_descriptor_set =
1559          !driQueryOptionb(&instance->dri_options, "tu_dont_reserve_descriptor_set");
1560    instance->allow_oob_indirect_ubo_loads =
1561          driQueryOptionb(&instance->dri_options, "tu_allow_oob_indirect_ubo_loads");
1562    instance->disable_d24s8_border_color_workaround =
1563          driQueryOptionb(&instance->dri_options, "tu_disable_d24s8_border_color_workaround");
1564 }
1565 
1566 static uint32_t instance_count = 0;
1567 
1568 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1569 tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
1570                   const VkAllocationCallbacks *pAllocator,
1571                   VkInstance *pInstance)
1572 {
1573    struct tu_instance *instance;
1574    VkResult result;
1575 
1576    tu_env_init();
1577 
1578    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1579 
1580    if (pAllocator == NULL)
1581       pAllocator = vk_default_allocator();
1582 
1583    instance = (struct tu_instance *) vk_zalloc(
1584       pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1585 
1586    if (!instance)
1587       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1588 
1589    struct vk_instance_dispatch_table dispatch_table;
1590    vk_instance_dispatch_table_from_entrypoints(
1591       &dispatch_table, &tu_instance_entrypoints, true);
1592    vk_instance_dispatch_table_from_entrypoints(
1593       &dispatch_table, &wsi_instance_entrypoints, false);
1594 
1595    result = vk_instance_init(&instance->vk,
1596                              &tu_instance_extensions_supported,
1597                              &dispatch_table,
1598                              pCreateInfo, pAllocator);
1599    if (result != VK_SUCCESS) {
1600       vk_free(pAllocator, instance);
1601       return vk_error(NULL, result);
1602    }
1603 
1604    instance->vk.physical_devices.try_create_for_drm =
1605       tu_physical_device_try_create;
1606    instance->vk.physical_devices.enumerate = tu_enumerate_devices;
1607    instance->vk.physical_devices.destroy = tu_destroy_physical_device;
1608 
1609    instance->instance_idx = p_atomic_fetch_add(&instance_count, 1);
1610    if (TU_DEBUG(STARTUP))
1611       mesa_logi("Created an instance");
1612 
1613    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1614 
1615    tu_init_dri_options(instance);
1616 
1617    *pInstance = tu_instance_to_handle(instance);
1618 
1619 #ifdef HAVE_PERFETTO
1620    tu_perfetto_init();
1621 #endif
1622 
1623    util_gpuvis_init();
1624 
1625    return VK_SUCCESS;
1626 }
1627 
1628 VKAPI_ATTR void VKAPI_CALL
tu_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)1629 tu_DestroyInstance(VkInstance _instance,
1630                    const VkAllocationCallbacks *pAllocator)
1631 {
1632    VK_FROM_HANDLE(tu_instance, instance, _instance);
1633 
1634    if (!instance)
1635       return;
1636 
1637    VG(VALGRIND_DESTROY_MEMPOOL(instance));
1638 
1639    driDestroyOptionCache(&instance->dri_options);
1640    driDestroyOptionInfo(&instance->available_dri_options);
1641 
1642    vk_instance_finish(&instance->vk);
1643    vk_free(&instance->vk.alloc, instance);
1644 }
1645 
1646 static const VkQueueFamilyProperties tu_queue_family_properties = {
1647    .queueFlags =
1648       VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
1649    .queueCount = 1,
1650    .timestampValidBits = 48,
1651    .minImageTransferGranularity = { 1, 1, 1 },
1652 };
1653 
1654 void
tu_physical_device_get_global_priority_properties(const struct tu_physical_device * pdevice,VkQueueFamilyGlobalPriorityPropertiesKHR * props)1655 tu_physical_device_get_global_priority_properties(const struct tu_physical_device *pdevice,
1656                                                   VkQueueFamilyGlobalPriorityPropertiesKHR *props)
1657 {
1658    props->priorityCount = MIN2(pdevice->submitqueue_priority_count, 3);
1659    switch (props->priorityCount) {
1660    case 1:
1661       props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1662       break;
1663    case 2:
1664       props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1665       props->priorities[1] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
1666       break;
1667    case 3:
1668       props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR;
1669       props->priorities[1] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1670       props->priorities[2] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
1671       break;
1672    default:
1673       unreachable("unexpected priority count");
1674       break;
1675    }
1676 }
1677 
1678 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1679 tu_GetPhysicalDeviceQueueFamilyProperties2(
1680    VkPhysicalDevice physicalDevice,
1681    uint32_t *pQueueFamilyPropertyCount,
1682    VkQueueFamilyProperties2 *pQueueFamilyProperties)
1683 {
1684    VK_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
1685 
1686    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
1687                           pQueueFamilyProperties, pQueueFamilyPropertyCount);
1688 
1689    vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
1690    {
1691       p->queueFamilyProperties = tu_queue_family_properties;
1692 
1693       vk_foreach_struct(ext, p->pNext) {
1694          switch (ext->sType) {
1695          case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
1696             VkQueueFamilyGlobalPriorityPropertiesKHR *props =
1697                (VkQueueFamilyGlobalPriorityPropertiesKHR *) ext;
1698             tu_physical_device_get_global_priority_properties(pdevice, props);
1699             break;
1700          }
1701          default:
1702             break;
1703          }
1704       }
1705    }
1706 }
1707 
1708 uint64_t
tu_get_system_heap_size(struct tu_physical_device * physical_device)1709 tu_get_system_heap_size(struct tu_physical_device *physical_device)
1710 {
1711    uint64_t total_ram = 0;
1712    ASSERTED bool has_physical_memory =
1713       os_get_total_physical_memory(&total_ram);
1714    assert(has_physical_memory);
1715 
1716    /* We don't want to burn too much ram with the GPU.  If the user has 4GiB
1717     * or less, we use at most half.  If they have more than 4GiB, we use 3/4.
1718     */
1719    uint64_t available_ram;
1720    if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
1721       available_ram = total_ram / 2;
1722    else
1723       available_ram = total_ram * 3 / 4;
1724 
1725    if (physical_device->va_size)
1726       available_ram = MIN2(available_ram, physical_device->va_size);
1727 
1728    return available_ram;
1729 }
1730 
1731 static VkDeviceSize
tu_get_budget_memory(struct tu_physical_device * physical_device)1732 tu_get_budget_memory(struct tu_physical_device *physical_device)
1733 {
1734    uint64_t heap_size = physical_device->heap.size;
1735    uint64_t heap_used = physical_device->heap.used;
1736    uint64_t sys_available;
1737    ASSERTED bool has_available_memory =
1738       os_get_available_system_memory(&sys_available);
1739    assert(has_available_memory);
1740 
1741    if (physical_device->va_size)
1742       sys_available = MIN2(sys_available, physical_device->va_size);
1743 
1744    /*
1745     * Let's not incite the app to starve the system: report at most 90% of
1746     * available system memory.
1747     */
1748    uint64_t heap_available = sys_available * 9 / 10;
1749    return MIN2(heap_size, heap_used + heap_available);
1750 }
1751 
1752 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,VkPhysicalDeviceMemoryProperties2 * props2)1753 tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,
1754                                       VkPhysicalDeviceMemoryProperties2 *props2)
1755 {
1756    VK_FROM_HANDLE(tu_physical_device, physical_device, pdev);
1757 
1758    VkPhysicalDeviceMemoryProperties *props = &props2->memoryProperties;
1759    props->memoryHeapCount = 1;
1760    props->memoryHeaps[0].size = physical_device->heap.size;
1761    props->memoryHeaps[0].flags = physical_device->heap.flags;
1762 
1763    props->memoryTypeCount = physical_device->memory.type_count;
1764    for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
1765       props->memoryTypes[i] = (VkMemoryType) {
1766          .propertyFlags = physical_device->memory.types[i],
1767          .heapIndex     = 0,
1768       };
1769    }
1770 
1771    vk_foreach_struct(ext, props2->pNext)
1772    {
1773       switch (ext->sType) {
1774       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1775          VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget_props =
1776             (VkPhysicalDeviceMemoryBudgetPropertiesEXT *) ext;
1777          memory_budget_props->heapUsage[0] = physical_device->heap.used;
1778          memory_budget_props->heapBudget[0] = tu_get_budget_memory(physical_device);
1779 
1780          /* The heapBudget and heapUsage values must be zero for array elements
1781           * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
1782           */
1783          for (unsigned i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1784             memory_budget_props->heapBudget[i] = 0u;
1785             memory_budget_props->heapUsage[i] = 0u;
1786          }
1787          break;
1788       }
1789       default:
1790          break;
1791       }
1792    }
1793 }
1794 
1795 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)1796 tu_GetPhysicalDeviceFragmentShadingRatesKHR(
1797    VkPhysicalDevice physicalDevice,
1798    uint32_t *pFragmentShadingRateCount,
1799    VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
1800 {
1801    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
1802                           pFragmentShadingRates, pFragmentShadingRateCount);
1803 
1804 #define append_rate(w, h, s)                                                        \
1805    {                                                                                \
1806       VkPhysicalDeviceFragmentShadingRateKHR rate = {                               \
1807          .sType =                                                                   \
1808             VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, \
1809          .sampleCounts = s,                                                         \
1810          .fragmentSize = { .width = w, .height = h },                               \
1811       };                                                                            \
1812       vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out,        \
1813                                r) *r = rate;                                        \
1814    }
1815 
1816    append_rate(4, 4, VK_SAMPLE_COUNT_1_BIT);
1817    append_rate(4, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT);
1818    append_rate(2, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
1819    append_rate(2, 1, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
1820    append_rate(1, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
1821    append_rate(1, 1, ~0);
1822 
1823 #undef append_rate
1824 
1825    return vk_outarray_status(&out);
1826 }
1827 
1828 uint64_t
tu_device_ticks_to_ns(struct tu_device * dev,uint64_t ts)1829 tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts)
1830 {
1831    /* This is based on the 19.2MHz always-on rbbm timer.
1832     *
1833     * TODO we should probably query this value from kernel..
1834     */
1835    return ts * (1000000000 / 19200000);
1836 }
1837 
1838 struct u_trace_context *
tu_device_get_u_trace(struct tu_device * device)1839 tu_device_get_u_trace(struct tu_device *device)
1840 {
1841    return &device->trace_context;
1842 }
1843 
1844 static void*
tu_trace_create_buffer(struct u_trace_context * utctx,uint64_t size_B)1845 tu_trace_create_buffer(struct u_trace_context *utctx, uint64_t size_B)
1846 {
1847    struct tu_device *device =
1848       container_of(utctx, struct tu_device, trace_context);
1849 
1850    struct tu_bo *bo;
1851    tu_bo_init_new(device, NULL, &bo, size_B, TU_BO_ALLOC_INTERNAL_RESOURCE, "trace");
1852    tu_bo_map(device, bo, NULL);
1853 
1854    return bo;
1855 }
1856 
1857 static void
tu_trace_destroy_buffer(struct u_trace_context * utctx,void * timestamps)1858 tu_trace_destroy_buffer(struct u_trace_context *utctx, void *timestamps)
1859 {
1860    struct tu_device *device =
1861       container_of(utctx, struct tu_device, trace_context);
1862    struct tu_bo *bo = (struct tu_bo *) timestamps;
1863 
1864    tu_bo_finish(device, bo);
1865 }
1866 
1867 template <chip CHIP>
1868 static void
tu_trace_record_ts(struct u_trace * ut,void * cs,void * timestamps,uint64_t offset_B,uint32_t)1869 tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
1870                    uint64_t offset_B, uint32_t)
1871 {
1872    struct tu_bo *bo = (struct tu_bo *) timestamps;
1873    struct tu_cs *ts_cs = (struct tu_cs *) cs;
1874 
1875    if (CHIP == A6XX) {
1876       tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE, 4);
1877       tu_cs_emit(ts_cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
1878                            CP_EVENT_WRITE_0_TIMESTAMP);
1879       tu_cs_emit_qw(ts_cs, bo->iova + offset_B);
1880       tu_cs_emit(ts_cs, 0x00000000);
1881    } else {
1882       tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE7, 3);
1883       tu_cs_emit(ts_cs, CP_EVENT_WRITE7_0(.event = RB_DONE_TS,
1884                                           .write_src = EV_WRITE_ALWAYSON,
1885                                           .write_dst = EV_DST_RAM,
1886                                           .write_enabled = true)
1887                            .value);
1888       tu_cs_emit_qw(ts_cs, bo->iova + offset_B);
1889    }
1890 }
1891 
1892 static uint64_t
tu_trace_read_ts(struct u_trace_context * utctx,void * timestamps,uint64_t offset_B,void * flush_data)1893 tu_trace_read_ts(struct u_trace_context *utctx,
1894                  void *timestamps, uint64_t offset_B, void *flush_data)
1895 {
1896    struct tu_device *device =
1897       container_of(utctx, struct tu_device, trace_context);
1898    struct tu_bo *bo = (struct tu_bo *) timestamps;
1899    struct tu_u_trace_submission_data *submission_data =
1900       (struct tu_u_trace_submission_data *) flush_data;
1901 
1902    /* Only need to stall on results for the first entry: */
1903    if (offset_B == 0) {
1904       tu_queue_wait_fence(submission_data->queue, submission_data->fence,
1905                           1000000000);
1906    }
1907 
1908    if (tu_bo_map(device, bo, NULL) != VK_SUCCESS) {
1909       return U_TRACE_NO_TIMESTAMP;
1910    }
1911 
1912    uint64_t *ts = (uint64_t *) ((char *)bo->map + offset_B);
1913 
1914    /* Don't translate the no-timestamp marker: */
1915    if (*ts == U_TRACE_NO_TIMESTAMP)
1916       return U_TRACE_NO_TIMESTAMP;
1917 
1918    return tu_device_ticks_to_ns(device, *ts);
1919 }
1920 
1921 static void
tu_trace_delete_flush_data(struct u_trace_context * utctx,void * flush_data)1922 tu_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
1923 {
1924    struct tu_device *device =
1925       container_of(utctx, struct tu_device, trace_context);
1926    struct tu_u_trace_submission_data *submission_data =
1927       (struct tu_u_trace_submission_data *) flush_data;
1928 
1929    tu_u_trace_submission_data_finish(device, submission_data);
1930 }
1931 
1932 void
tu_copy_buffer(struct u_trace_context * utctx,void * cmdstream,void * ts_from,uint64_t from_offset_B,void * ts_to,uint64_t to_offset_B,uint64_t size_B)1933 tu_copy_buffer(struct u_trace_context *utctx, void *cmdstream,
1934                void *ts_from, uint64_t from_offset_B,
1935                void *ts_to, uint64_t to_offset_B,
1936                uint64_t size_B)
1937 {
1938    struct tu_cs *cs = (struct tu_cs *) cmdstream;
1939    struct tu_bo *bo_from = (struct tu_bo *) ts_from;
1940    struct tu_bo *bo_to = (struct tu_bo *) ts_to;
1941 
1942    tu_cs_emit_pkt7(cs, CP_MEMCPY, 5);
1943    tu_cs_emit(cs, size_B / sizeof(uint32_t));
1944    tu_cs_emit_qw(cs, bo_from->iova + from_offset_B);
1945    tu_cs_emit_qw(cs, bo_to->iova + to_offset_B);
1946 }
1947 
1948 static void
tu_trace_capture_data(struct u_trace * ut,void * cs,void * dst_buffer,uint64_t dst_offset_B,void * src_buffer,uint64_t src_offset_B,uint32_t size_B)1949 tu_trace_capture_data(struct u_trace *ut,
1950                         void *cs,
1951                         void *dst_buffer,
1952                         uint64_t dst_offset_B,
1953                         void *src_buffer,
1954                         uint64_t src_offset_B,
1955                         uint32_t size_B)
1956 {
1957    if (src_buffer)
1958       tu_copy_buffer(ut->utctx, cs, src_buffer, src_offset_B, dst_buffer,
1959                      dst_offset_B, size_B);
1960 }
1961 
1962 static const void *
tu_trace_get_data(struct u_trace_context * utctx,void * buffer,uint64_t offset_B,uint32_t size_B)1963 tu_trace_get_data(struct u_trace_context *utctx,
1964                   void *buffer,
1965                   uint64_t offset_B,
1966                   uint32_t size_B)
1967 {
1968    struct tu_bo *bo = (struct tu_bo *) buffer;
1969    return (char *) bo->map + offset_B;
1970 }
1971 
1972 /* Special helpers instead of u_trace_begin_iterator()/u_trace_end_iterator()
1973  * that ignore tracepoints at the beginning/end that are part of a
1974  * suspend/resume chain.
1975  */
1976 static struct u_trace_iterator
tu_cmd_begin_iterator(struct tu_cmd_buffer * cmdbuf)1977 tu_cmd_begin_iterator(struct tu_cmd_buffer *cmdbuf)
1978 {
1979    switch (cmdbuf->state.suspend_resume) {
1980    case SR_IN_PRE_CHAIN:
1981       return cmdbuf->trace_renderpass_end;
1982    case SR_AFTER_PRE_CHAIN:
1983    case SR_IN_CHAIN_AFTER_PRE_CHAIN:
1984       return cmdbuf->pre_chain.trace_renderpass_end;
1985    default:
1986       return u_trace_begin_iterator(&cmdbuf->trace);
1987    }
1988 }
1989 
1990 static struct u_trace_iterator
tu_cmd_end_iterator(struct tu_cmd_buffer * cmdbuf)1991 tu_cmd_end_iterator(struct tu_cmd_buffer *cmdbuf)
1992 {
1993    switch (cmdbuf->state.suspend_resume) {
1994    case SR_IN_PRE_CHAIN:
1995       return cmdbuf->trace_renderpass_end;
1996    case SR_IN_CHAIN:
1997    case SR_IN_CHAIN_AFTER_PRE_CHAIN:
1998       return cmdbuf->trace_renderpass_start;
1999    default:
2000       return u_trace_end_iterator(&cmdbuf->trace);
2001    }
2002 }
2003 VkResult
tu_create_copy_timestamp_cs(struct tu_cmd_buffer * cmdbuf,struct tu_cs ** cs,struct u_trace ** trace_copy)2004 tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
2005                             struct u_trace **trace_copy)
2006 {
2007    *cs = (struct tu_cs *) vk_zalloc(&cmdbuf->device->vk.alloc,
2008                                     sizeof(struct tu_cs), 8,
2009                                     VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2010 
2011    if (*cs == NULL) {
2012       return VK_ERROR_OUT_OF_HOST_MEMORY;
2013    }
2014 
2015    tu_cs_init(*cs, cmdbuf->device, TU_CS_MODE_GROW,
2016               list_length(&cmdbuf->trace.trace_chunks) * 6 * 2 + 3, "trace copy timestamp cs");
2017 
2018    tu_cs_begin(*cs);
2019 
2020    tu_cs_emit_wfi(*cs);
2021    tu_cs_emit_pkt7(*cs, CP_WAIT_FOR_ME, 0);
2022 
2023    *trace_copy = (struct u_trace *) vk_zalloc(
2024       &cmdbuf->device->vk.alloc, sizeof(struct u_trace), 8,
2025       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2026 
2027    if (*trace_copy == NULL) {
2028       return VK_ERROR_OUT_OF_HOST_MEMORY;
2029    }
2030 
2031    u_trace_init(*trace_copy, cmdbuf->trace.utctx);
2032    u_trace_clone_append(tu_cmd_begin_iterator(cmdbuf),
2033                         tu_cmd_end_iterator(cmdbuf),
2034                         *trace_copy, *cs,
2035                         tu_copy_buffer);
2036 
2037    tu_cs_emit_wfi(*cs);
2038 
2039    tu_cs_end(*cs);
2040 
2041    return VK_SUCCESS;
2042 }
2043 
2044 VkResult
tu_u_trace_submission_data_create(struct tu_device * device,struct tu_cmd_buffer ** cmd_buffers,uint32_t cmd_buffer_count,struct tu_u_trace_submission_data ** submission_data)2045 tu_u_trace_submission_data_create(
2046    struct tu_device *device,
2047    struct tu_cmd_buffer **cmd_buffers,
2048    uint32_t cmd_buffer_count,
2049    struct tu_u_trace_submission_data **submission_data)
2050 {
2051    *submission_data = (struct tu_u_trace_submission_data *)
2052       vk_zalloc(&device->vk.alloc,
2053                 sizeof(struct tu_u_trace_submission_data), 8,
2054                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2055 
2056    if (!(*submission_data)) {
2057       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2058    }
2059 
2060    struct tu_u_trace_submission_data *data = *submission_data;
2061 
2062    data->cmd_trace_data = (struct tu_u_trace_cmd_data *) vk_zalloc(
2063       &device->vk.alloc,
2064       cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8,
2065       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2066 
2067    if (!data->cmd_trace_data) {
2068       goto fail;
2069    }
2070 
2071    data->cmd_buffer_count = cmd_buffer_count;
2072    data->last_buffer_with_tracepoints = -1;
2073 
2074    for (uint32_t i = 0; i < cmd_buffer_count; ++i) {
2075       struct tu_cmd_buffer *cmdbuf = cmd_buffers[i];
2076 
2077       if (!u_trace_has_points(&cmdbuf->trace))
2078          continue;
2079 
2080       data->last_buffer_with_tracepoints = i;
2081 
2082       if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
2083          /* A single command buffer could be submitted several times, but we
2084           * already baked timestamp iova addresses and trace points are
2085           * single-use. Therefor we have to copy trace points and create
2086           * a new timestamp buffer on every submit of reusable command buffer.
2087           */
2088          if (tu_create_copy_timestamp_cs(cmdbuf,
2089                &data->cmd_trace_data[i].timestamp_copy_cs,
2090                &data->cmd_trace_data[i].trace) != VK_SUCCESS) {
2091             goto fail;
2092          }
2093 
2094          assert(data->cmd_trace_data[i].timestamp_copy_cs->entry_count == 1);
2095       } else {
2096          data->cmd_trace_data[i].trace = &cmdbuf->trace;
2097       }
2098    }
2099 
2100    assert(data->last_buffer_with_tracepoints != -1);
2101 
2102    return VK_SUCCESS;
2103 
2104 fail:
2105    tu_u_trace_submission_data_finish(device, data);
2106    *submission_data = NULL;
2107 
2108    return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2109 }
2110 
2111 void
tu_u_trace_submission_data_finish(struct tu_device * device,struct tu_u_trace_submission_data * submission_data)2112 tu_u_trace_submission_data_finish(
2113    struct tu_device *device,
2114    struct tu_u_trace_submission_data *submission_data)
2115 {
2116    for (uint32_t i = 0; i < submission_data->cmd_buffer_count; ++i) {
2117       /* Only if we had to create a copy of trace we should free it */
2118       struct tu_u_trace_cmd_data *cmd_data = &submission_data->cmd_trace_data[i];
2119       if (cmd_data->timestamp_copy_cs) {
2120          tu_cs_finish(cmd_data->timestamp_copy_cs);
2121          vk_free(&device->vk.alloc, cmd_data->timestamp_copy_cs);
2122 
2123          u_trace_fini(cmd_data->trace);
2124          vk_free(&device->vk.alloc, cmd_data->trace);
2125       }
2126    }
2127 
2128    if (submission_data->kgsl_timestamp_bo.bo) {
2129       mtx_lock(&device->kgsl_profiling_mutex);
2130       tu_suballoc_bo_free(&device->kgsl_profiling_suballoc,
2131                         &submission_data->kgsl_timestamp_bo);
2132       mtx_unlock(&device->kgsl_profiling_mutex);
2133    }
2134 
2135    vk_free(&device->vk.alloc, submission_data->cmd_trace_data);
2136    vk_free(&device->vk.alloc, submission_data);
2137 }
2138 
2139 enum tu_reg_stomper_flags
2140 {
2141    TU_DEBUG_REG_STOMP_INVERSE = 1 << 0,
2142    TU_DEBUG_REG_STOMP_CMDBUF = 1 << 1,
2143    TU_DEBUG_REG_STOMP_RENDERPASS = 1 << 2,
2144 };
2145 
2146 /* See freedreno.rst for usage tips */
2147 static const struct debug_named_value tu_reg_stomper_options[] = {
2148    { "inverse", TU_DEBUG_REG_STOMP_INVERSE,
2149      "By default the range specifies the regs to stomp, with 'inverse' it "
2150      "specifies the regs NOT to stomp" },
2151    { "cmdbuf", TU_DEBUG_REG_STOMP_CMDBUF,
2152      "Stomp regs at the start of a cmdbuf" },
2153    { "renderpass", TU_DEBUG_REG_STOMP_RENDERPASS,
2154      "Stomp regs before a renderpass" },
2155    { NULL, 0 }
2156 };
2157 
2158 template <chip CHIP>
2159 static inline void
tu_cs_dbg_stomp_regs(struct tu_cs * cs,bool is_rp_blit,uint32_t first_reg,uint32_t last_reg,bool inverse)2160 tu_cs_dbg_stomp_regs(struct tu_cs *cs,
2161                      bool is_rp_blit,
2162                      uint32_t first_reg,
2163                      uint32_t last_reg,
2164                      bool inverse)
2165 {
2166    const uint16_t *regs = NULL;
2167    size_t count = 0;
2168 
2169    if (is_rp_blit) {
2170       regs = &RP_BLIT_REGS<CHIP>[0];
2171       count = ARRAY_SIZE(RP_BLIT_REGS<CHIP>);
2172    } else {
2173       regs = &CMD_REGS<CHIP>[0];
2174       count = ARRAY_SIZE(CMD_REGS<CHIP>);
2175    }
2176 
2177    for (size_t i = 0; i < count; i++) {
2178       if (inverse) {
2179          if (regs[i] >= first_reg && regs[i] <= last_reg)
2180             continue;
2181       } else {
2182          if (regs[i] < first_reg || regs[i] > last_reg)
2183             continue;
2184       }
2185 
2186       if (fd_reg_stomp_allowed(CHIP, regs[i]))
2187          tu_cs_emit_write_reg(cs, regs[i], 0xffffffff);
2188    }
2189 }
2190 
2191 static void
tu_init_dbg_reg_stomper(struct tu_device * device)2192 tu_init_dbg_reg_stomper(struct tu_device *device)
2193 {
2194    const char *stale_reg_range_str =
2195       os_get_option("TU_DEBUG_STALE_REGS_RANGE");
2196    if (!stale_reg_range_str)
2197       return;
2198 
2199    uint32_t first_reg, last_reg;
2200 
2201    if (sscanf(stale_reg_range_str, "%x,%x", &first_reg, &last_reg) != 2) {
2202       mesa_loge("Incorrect TU_DEBUG_STALE_REGS_RANGE");
2203       return;
2204    }
2205 
2206    uint64_t debug_flags = debug_get_flags_option("TU_DEBUG_STALE_REGS_FLAGS",
2207                                                  tu_reg_stomper_options,
2208                                                  TU_DEBUG_REG_STOMP_CMDBUF);
2209 
2210    bool inverse = debug_flags & TU_DEBUG_REG_STOMP_INVERSE;
2211 
2212    if (debug_flags & TU_DEBUG_REG_STOMP_CMDBUF) {
2213       struct tu_cs *cmdbuf_cs =
2214          (struct tu_cs *) calloc(1, sizeof(struct tu_cs));
2215       tu_cs_init(cmdbuf_cs, device, TU_CS_MODE_GROW, 4096,
2216                  "cmdbuf reg stomp cs");
2217       tu_cs_begin(cmdbuf_cs);
2218 
2219       TU_CALLX(device, tu_cs_dbg_stomp_regs)(cmdbuf_cs, false, first_reg, last_reg, inverse);
2220       tu_cs_end(cmdbuf_cs);
2221       device->dbg_cmdbuf_stomp_cs = cmdbuf_cs;
2222    }
2223 
2224    if (debug_flags & TU_DEBUG_REG_STOMP_RENDERPASS) {
2225       struct tu_cs *rp_cs = (struct tu_cs *) calloc(1, sizeof(struct tu_cs));
2226       tu_cs_init(rp_cs, device, TU_CS_MODE_GROW, 4096, "rp reg stomp cs");
2227       tu_cs_begin(rp_cs);
2228 
2229       TU_CALLX(device, tu_cs_dbg_stomp_regs)(rp_cs, true, first_reg, last_reg, inverse);
2230       tu_cs_end(rp_cs);
2231 
2232       device->dbg_renderpass_stomp_cs = rp_cs;
2233    }
2234 }
2235 
2236 /* It is unknown what this workaround is for and what it fixes. */
2237 static VkResult
tu_init_cmdbuf_start_a725_quirk(struct tu_device * device)2238 tu_init_cmdbuf_start_a725_quirk(struct tu_device *device)
2239 {
2240    struct tu_cs shader_cs;
2241    tu_cs_begin_sub_stream(&device->sub_cs, 10, &shader_cs);
2242 
2243    uint32_t raw_shader[] = {
2244       0x00040000, 0x40600000, // mul.f hr0.x, hr0.x, hr1.x
2245       0x00050001, 0x40600001, // mul.f hr0.y, hr0.y, hr1.y
2246       0x00060002, 0x40600002, // mul.f hr0.z, hr0.z, hr1.z
2247       0x00070003, 0x40600003, // mul.f hr0.w, hr0.w, hr1.w
2248       0x00000000, 0x03000000, // end
2249    };
2250 
2251    tu_cs_emit_array(&shader_cs, raw_shader, ARRAY_SIZE(raw_shader));
2252    struct tu_cs_entry shader_entry = tu_cs_end_sub_stream(&device->sub_cs, &shader_cs);
2253    uint64_t shader_iova = shader_entry.bo->iova + shader_entry.offset;
2254 
2255    struct tu_cs sub_cs;
2256    tu_cs_begin_sub_stream(&device->sub_cs, 47, &sub_cs);
2257 
2258    tu_cs_emit_regs(&sub_cs, HLSQ_INVALIDATE_CMD(A7XX,
2259             .vs_state = true, .hs_state = true, .ds_state = true,
2260             .gs_state = true, .fs_state = true, .gfx_ibo = true,
2261             .cs_bindless = 0xff, .gfx_bindless = 0xff));
2262    tu_cs_emit_regs(&sub_cs, HLSQ_CS_CNTL(A7XX,
2263             .constlen = 4,
2264             .enabled = true));
2265    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CONFIG(.enabled = true));
2266    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CTRL_REG0(
2267             .threadmode = MULTI,
2268             .threadsize = THREAD128,
2269             .mergedregs = true));
2270    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_UNKNOWN_A9B1(.shared_size = 1));
2271    tu_cs_emit_regs(&sub_cs, HLSQ_CS_KERNEL_GROUP_X(A7XX, 1),
2272                      HLSQ_CS_KERNEL_GROUP_Y(A7XX, 1),
2273                      HLSQ_CS_KERNEL_GROUP_Z(A7XX, 1));
2274    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_INSTRLEN(.sp_cs_instrlen = 1));
2275    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_TEX_COUNT(0));
2276    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_IBO_COUNT(0));
2277    tu_cs_emit_regs(&sub_cs, HLSQ_CS_CNTL_1(A7XX,
2278             .linearlocalidregid = regid(63, 0),
2279             .threadsize = THREAD128,
2280             .workgrouprastorderzfirsten = true,
2281             .wgtilewidth = 4,
2282             .wgtileheight = 17));
2283    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CNTL_0(
2284             .wgidconstid = regid(51, 3),
2285             .wgsizeconstid = regid(48, 0),
2286             .wgoffsetconstid = regid(63, 0),
2287             .localidregid = regid(63, 0)));
2288    tu_cs_emit_regs(&sub_cs, SP_CS_CNTL_1(A7XX,
2289             .linearlocalidregid = regid(63, 0),
2290             .threadsize = THREAD128,
2291             .workitemrastorder = WORKITEMRASTORDER_TILED));
2292    tu_cs_emit_regs(&sub_cs, A7XX_SP_CS_UNKNOWN_A9BE(0));
2293 
2294    tu_cs_emit_regs(&sub_cs,
2295                   HLSQ_CS_NDRANGE_0(A7XX, .kerneldim = 3,
2296                                           .localsizex = 255,
2297                                           .localsizey = 1,
2298                                           .localsizez = 1),
2299                   HLSQ_CS_NDRANGE_1(A7XX, .globalsize_x = 3072),
2300                   HLSQ_CS_NDRANGE_2(A7XX, .globaloff_x = 0),
2301                   HLSQ_CS_NDRANGE_3(A7XX, .globalsize_y = 1),
2302                   HLSQ_CS_NDRANGE_4(A7XX, .globaloff_y = 0),
2303                   HLSQ_CS_NDRANGE_5(A7XX, .globalsize_z = 1),
2304                   HLSQ_CS_NDRANGE_6(A7XX, .globaloff_z = 0));
2305    tu_cs_emit_regs(&sub_cs, A7XX_HLSQ_CS_LOCAL_SIZE(
2306             .localsizex = 255,
2307             .localsizey = 0,
2308             .localsizez = 0));
2309    tu_cs_emit_pkt4(&sub_cs, REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET, 3);
2310    tu_cs_emit(&sub_cs, 0);
2311    tu_cs_emit_qw(&sub_cs, shader_iova);
2312 
2313    tu_cs_emit_pkt7(&sub_cs, CP_EXEC_CS, 4);
2314    tu_cs_emit(&sub_cs, 0x00000000);
2315    tu_cs_emit(&sub_cs, CP_EXEC_CS_1_NGROUPS_X(12));
2316    tu_cs_emit(&sub_cs, CP_EXEC_CS_2_NGROUPS_Y(1));
2317    tu_cs_emit(&sub_cs, CP_EXEC_CS_3_NGROUPS_Z(1));
2318 
2319    device->cmdbuf_start_a725_quirk_entry =
2320       tu_cs_end_sub_stream(&device->sub_cs, &sub_cs);
2321 
2322    return VK_SUCCESS;
2323 }
2324 
2325 static VkResult
tu_device_get_timestamp(struct vk_device * vk_device,uint64_t * timestamp)2326 tu_device_get_timestamp(struct vk_device *vk_device, uint64_t *timestamp)
2327 {
2328    struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
2329    const int ret = tu_device_get_gpu_timestamp(dev, timestamp);
2330    return ret == 0 ? VK_SUCCESS : VK_ERROR_UNKNOWN;
2331 }
2332 
2333 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2334 tu_CreateDevice(VkPhysicalDevice physicalDevice,
2335                 const VkDeviceCreateInfo *pCreateInfo,
2336                 const VkAllocationCallbacks *pAllocator,
2337                 VkDevice *pDevice)
2338 {
2339    VK_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice);
2340    VkResult result;
2341    struct tu_device *device;
2342    bool border_color_without_format = false;
2343 
2344    vk_foreach_struct_const (ext, pCreateInfo->pNext) {
2345       switch (ext->sType) {
2346       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT:
2347          border_color_without_format =
2348             ((const VkPhysicalDeviceCustomBorderColorFeaturesEXT *) ext)
2349                ->customBorderColorWithoutFormat;
2350          break;
2351       default:
2352          break;
2353       }
2354    }
2355 
2356    device = (struct tu_device *) vk_zalloc2(
2357       &physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,
2358       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2359    if (!device)
2360       return vk_startup_errorf(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2361 
2362    struct vk_device_dispatch_table dispatch_table;
2363    bool override_initial_entrypoints = true;
2364 
2365    if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV) {
2366       vk_device_dispatch_table_from_entrypoints(
2367          &dispatch_table, &tu_rmv_device_entrypoints, true);
2368       override_initial_entrypoints = false;
2369    }
2370 
2371    vk_device_dispatch_table_from_entrypoints(
2372       &dispatch_table, &tu_device_entrypoints, override_initial_entrypoints);
2373 
2374    switch (fd_dev_gen(&physical_device->dev_id)) {
2375    case 6:
2376       vk_device_dispatch_table_from_entrypoints(
2377          &dispatch_table, &tu_device_entrypoints_a6xx, false);
2378       break;
2379    case 7:
2380       vk_device_dispatch_table_from_entrypoints(
2381          &dispatch_table, &tu_device_entrypoints_a7xx, false);
2382    }
2383 
2384    vk_device_dispatch_table_from_entrypoints(
2385       &dispatch_table, &wsi_device_entrypoints, false);
2386 
2387    const struct vk_device_entrypoint_table *knl_device_entrypoints =
2388          physical_device->instance->knl->device_entrypoints;
2389    if (knl_device_entrypoints) {
2390       vk_device_dispatch_table_from_entrypoints(
2391          &dispatch_table, knl_device_entrypoints, false);
2392    }
2393 
2394    result = vk_device_init(&device->vk, &physical_device->vk,
2395                            &dispatch_table, pCreateInfo, pAllocator);
2396    if (result != VK_SUCCESS) {
2397       vk_free(&device->vk.alloc, device);
2398       return vk_startup_errorf(physical_device->instance, result,
2399                                "vk_device_init failed");
2400    }
2401 
2402    device->instance = physical_device->instance;
2403    device->physical_device = physical_device;
2404    device->device_idx = device->physical_device->device_count++;
2405 
2406    result = tu_drm_device_init(device);
2407    if (result != VK_SUCCESS) {
2408       vk_free(&device->vk.alloc, device);
2409       return result;
2410    }
2411 
2412    device->vk.command_buffer_ops = &tu_cmd_buffer_ops;
2413    device->vk.check_status = tu_device_check_status;
2414    device->vk.get_timestamp = tu_device_get_timestamp;
2415 
2416    mtx_init(&device->bo_mutex, mtx_plain);
2417    mtx_init(&device->pipeline_mutex, mtx_plain);
2418    mtx_init(&device->autotune_mutex, mtx_plain);
2419    mtx_init(&device->kgsl_profiling_mutex, mtx_plain);
2420    u_rwlock_init(&device->dma_bo_lock);
2421    pthread_mutex_init(&device->submit_mutex, NULL);
2422 
2423    if (physical_device->has_set_iova) {
2424       mtx_init(&device->vma_mutex, mtx_plain);
2425       util_vma_heap_init(&device->vma, physical_device->va_start,
2426                          ROUND_DOWN_TO(physical_device->va_size, os_page_size));
2427    }
2428 
2429    if (TU_DEBUG(BOS))
2430       device->bo_sizes = _mesa_hash_table_create(NULL, _mesa_hash_string, _mesa_key_string_equal);
2431 
2432    if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV)
2433       tu_memory_trace_init(device);
2434 
2435    /* kgsl is not a drm device: */
2436    if (!is_kgsl(physical_device->instance))
2437       vk_device_set_drm_fd(&device->vk, device->fd);
2438 
2439    struct tu6_global *global = NULL;
2440    uint32_t global_size = sizeof(struct tu6_global);
2441    struct vk_pipeline_cache_create_info pcc_info = { };
2442 
2443    for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2444       const VkDeviceQueueCreateInfo *queue_create =
2445          &pCreateInfo->pQueueCreateInfos[i];
2446       uint32_t qfi = queue_create->queueFamilyIndex;
2447       device->queues[qfi] = (struct tu_queue *) vk_alloc(
2448          &device->vk.alloc,
2449          queue_create->queueCount * sizeof(struct tu_queue), 8,
2450          VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2451       if (!device->queues[qfi]) {
2452          result = vk_startup_errorf(physical_device->instance,
2453                                     VK_ERROR_OUT_OF_HOST_MEMORY,
2454                                     "OOM");
2455          goto fail_queues;
2456       }
2457 
2458       memset(device->queues[qfi], 0,
2459              queue_create->queueCount * sizeof(struct tu_queue));
2460 
2461       device->queue_count[qfi] = queue_create->queueCount;
2462 
2463       for (unsigned q = 0; q < queue_create->queueCount; q++) {
2464          result = tu_queue_init(device, &device->queues[qfi][q], q, queue_create);
2465          if (result != VK_SUCCESS) {
2466             device->queue_count[qfi] = q;
2467             goto fail_queues;
2468          }
2469       }
2470    }
2471 
2472    {
2473       struct ir3_compiler_options ir3_options = {
2474          .push_ubo_with_preamble = true,
2475          .disable_cache = true,
2476          .bindless_fb_read_descriptor = -1,
2477          .bindless_fb_read_slot = -1,
2478          .storage_16bit = physical_device->info->a6xx.storage_16bit,
2479          .storage_8bit = physical_device->info->a7xx.storage_8bit,
2480          .shared_push_consts = !TU_DEBUG(PUSH_CONSTS_PER_STAGE),
2481       };
2482       device->compiler = ir3_compiler_create(
2483          NULL, &physical_device->dev_id, physical_device->info, &ir3_options);
2484    }
2485    if (!device->compiler) {
2486       result = vk_startup_errorf(physical_device->instance,
2487                                  VK_ERROR_INITIALIZATION_FAILED,
2488                                  "failed to initialize ir3 compiler");
2489       goto fail_queues;
2490    }
2491 
2492    /* Initialize sparse array for refcounting imported BOs */
2493    util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
2494 
2495    if (physical_device->has_set_iova) {
2496       STATIC_ASSERT(TU_MAX_QUEUE_FAMILIES == 1);
2497       if (!u_vector_init(&device->zombie_vmas, 64,
2498                          sizeof(struct tu_zombie_vma))) {
2499          result = vk_startup_errorf(physical_device->instance,
2500                                     VK_ERROR_INITIALIZATION_FAILED,
2501                                     "zombie_vmas create failed");
2502          goto fail_free_zombie_vma;
2503       }
2504    }
2505 
2506    /* initial sizes, these will increase if there is overflow */
2507    device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD;
2508    device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD;
2509 
2510    if (device->vk.enabled_features.customBorderColors)
2511       global_size += TU_BORDER_COLOR_COUNT * sizeof(struct bcolor_entry);
2512 
2513    tu_bo_suballocator_init(
2514       &device->pipeline_suballoc, device, 128 * 1024,
2515       (enum tu_bo_alloc_flags) (TU_BO_ALLOC_GPU_READ_ONLY |
2516                                 TU_BO_ALLOC_ALLOW_DUMP |
2517                                 TU_BO_ALLOC_INTERNAL_RESOURCE),
2518       "pipeline_suballoc");
2519    tu_bo_suballocator_init(&device->autotune_suballoc, device,
2520                            128 * 1024, TU_BO_ALLOC_INTERNAL_RESOURCE,
2521                            "autotune_suballoc");
2522    if (is_kgsl(physical_device->instance)) {
2523       tu_bo_suballocator_init(&device->kgsl_profiling_suballoc, device,
2524                               128 * 1024, TU_BO_ALLOC_INTERNAL_RESOURCE,
2525                               "kgsl_profiling_suballoc");
2526    }
2527 
2528    result = tu_bo_init_new(
2529       device, NULL, &device->global_bo, global_size,
2530       (enum tu_bo_alloc_flags) (TU_BO_ALLOC_ALLOW_DUMP |
2531                                 TU_BO_ALLOC_INTERNAL_RESOURCE),
2532       "global");
2533    if (result != VK_SUCCESS) {
2534       vk_startup_errorf(device->instance, result, "BO init");
2535       goto fail_global_bo;
2536    }
2537 
2538    result = tu_bo_map(device, device->global_bo, NULL);
2539    if (result != VK_SUCCESS) {
2540       vk_startup_errorf(device->instance, result, "BO map");
2541       goto fail_global_bo_map;
2542    }
2543 
2544    global = (struct tu6_global *)device->global_bo->map;
2545    device->global_bo_map = global;
2546    tu_init_clear_blit_shaders(device);
2547 
2548    result = tu_init_empty_shaders(device);
2549    if (result != VK_SUCCESS) {
2550       vk_startup_errorf(device->instance, result, "empty shaders");
2551       goto fail_empty_shaders;
2552    }
2553 
2554    global->predicate = 0;
2555    global->vtx_stats_query_not_running = 1;
2556    global->dbg_one = (uint32_t)-1;
2557    global->dbg_gmem_total_loads = 0;
2558    global->dbg_gmem_taken_loads = 0;
2559    global->dbg_gmem_total_stores = 0;
2560    global->dbg_gmem_taken_stores = 0;
2561    for (int i = 0; i < TU_BORDER_COLOR_BUILTIN; i++) {
2562       VkClearColorValue border_color = vk_border_color_value((VkBorderColor) i);
2563       tu6_pack_border_color(&global->bcolor_builtin[i], &border_color,
2564                             vk_border_color_is_int((VkBorderColor) i));
2565    }
2566 
2567    /* initialize to ones so ffs can be used to find unused slots */
2568    BITSET_ONES(device->custom_border_color);
2569 
2570    result = tu_init_dynamic_rendering(device);
2571    if (result != VK_SUCCESS) {
2572       vk_startup_errorf(device->instance, result, "dynamic rendering");
2573       goto fail_dynamic_rendering;
2574    }
2575 
2576    device->mem_cache = vk_pipeline_cache_create(&device->vk, &pcc_info,
2577                                                 NULL);
2578    if (!device->mem_cache) {
2579       result = VK_ERROR_OUT_OF_HOST_MEMORY;
2580       vk_startup_errorf(device->instance, result, "create pipeline cache failed");
2581       goto fail_pipeline_cache;
2582    }
2583 
2584    tu_cs_init(&device->sub_cs, device, TU_CS_MODE_SUB_STREAM, 1024, "device sub cs");
2585 
2586    if (device->vk.enabled_features.performanceCounterQueryPools) {
2587       /* Prepare command streams setting pass index to the PERF_CNTRS_REG
2588        * from 0 to 31. One of these will be picked up at cmd submit time
2589        * when the perf query is executed.
2590        */
2591 
2592       device->perfcntrs_pass_cs_entries =
2593          (struct tu_cs_entry *) calloc(32, sizeof(struct tu_cs_entry));
2594       if (!device->perfcntrs_pass_cs_entries) {
2595          result = vk_startup_errorf(device->instance,
2596                VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2597          goto fail_perfcntrs_pass_entries_alloc;
2598       }
2599 
2600       for (unsigned i = 0; i < 32; i++) {
2601          struct tu_cs sub_cs;
2602 
2603          result = tu_cs_begin_sub_stream(&device->sub_cs, 3, &sub_cs);
2604          if (result != VK_SUCCESS) {
2605             vk_startup_errorf(device->instance, result,
2606                   "failed to allocate commands streams");
2607             goto fail_prepare_perfcntrs_pass_cs;
2608          }
2609 
2610          tu_cs_emit_regs(&sub_cs, A6XX_CP_SCRATCH_REG(PERF_CNTRS_REG, 1 << i));
2611          tu_cs_emit_pkt7(&sub_cs, CP_WAIT_FOR_ME, 0);
2612 
2613          device->perfcntrs_pass_cs_entries[i] =
2614             tu_cs_end_sub_stream(&device->sub_cs, &sub_cs);
2615       }
2616    }
2617 
2618    result = tu_init_bin_preamble(device);
2619    if (result != VK_SUCCESS)
2620       goto fail_bin_preamble;
2621 
2622    if (physical_device->info->a7xx.cmdbuf_start_a725_quirk) {
2623          result = tu_init_cmdbuf_start_a725_quirk(device);
2624          if (result != VK_SUCCESS)
2625             goto fail_a725_workaround;
2626    }
2627 
2628    tu_init_dbg_reg_stomper(device);
2629 
2630    /* Initialize a condition variable for timeline semaphore */
2631    pthread_condattr_t condattr;
2632    if (pthread_condattr_init(&condattr) != 0) {
2633       result = vk_startup_errorf(physical_device->instance,
2634                                  VK_ERROR_INITIALIZATION_FAILED,
2635                                  "pthread condattr init");
2636       goto fail_timeline_cond;
2637    }
2638    if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
2639       pthread_condattr_destroy(&condattr);
2640       result = vk_startup_errorf(physical_device->instance,
2641                                  VK_ERROR_INITIALIZATION_FAILED,
2642                                  "pthread condattr clock setup");
2643       goto fail_timeline_cond;
2644    }
2645    if (pthread_cond_init(&device->timeline_cond, &condattr) != 0) {
2646       pthread_condattr_destroy(&condattr);
2647       result = vk_startup_errorf(physical_device->instance,
2648                                  VK_ERROR_INITIALIZATION_FAILED,
2649                                  "pthread cond init");
2650       goto fail_timeline_cond;
2651    }
2652    pthread_condattr_destroy(&condattr);
2653 
2654    result = tu_autotune_init(&device->autotune, device);
2655    if (result != VK_SUCCESS) {
2656       goto fail_timeline_cond;
2657    }
2658 
2659    for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
2660       mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain);
2661 
2662    mtx_init(&device->fiber_pvtmem_bo.mtx, mtx_plain);
2663    mtx_init(&device->wave_pvtmem_bo.mtx, mtx_plain);
2664 
2665    mtx_init(&device->mutex, mtx_plain);
2666 
2667    device->use_z24uint_s8uint =
2668       physical_device->info->a6xx.has_z24uint_s8uint &&
2669       (!border_color_without_format ||
2670        physical_device->instance->disable_d24s8_border_color_workaround);
2671    device->use_lrz = !TU_DEBUG(NOLRZ);
2672 
2673    tu_gpu_tracepoint_config_variable();
2674 
2675    device->submit_count = 0;
2676    u_trace_context_init(&device->trace_context, device,
2677                      sizeof(uint64_t),
2678                      12,
2679                      tu_trace_create_buffer,
2680                      tu_trace_destroy_buffer,
2681                      TU_CALLX(device, tu_trace_record_ts),
2682                      tu_trace_read_ts,
2683                      tu_trace_capture_data,
2684                      tu_trace_get_data,
2685                      tu_trace_delete_flush_data);
2686 
2687    tu_breadcrumbs_init(device);
2688 
2689    if (FD_RD_DUMP(ENABLE)) {
2690       struct vk_app_info *app_info = &device->instance->vk.app_info;
2691       const char *app_name_str = app_info->app_name ?
2692          app_info->app_name : util_get_process_name();
2693       const char *engine_name_str = app_info->engine_name ?
2694          app_info->engine_name : "unknown-engine";
2695 
2696       char app_name[64];
2697       snprintf(app_name, sizeof(app_name), "%s", app_name_str);
2698 
2699       char engine_name[32];
2700       snprintf(engine_name, sizeof(engine_name), "%s", engine_name_str);
2701 
2702       char output_name[128];
2703       snprintf(output_name, sizeof(output_name), "tu_%s.%s_instance%u_device%u",
2704                app_name, engine_name, device->instance->instance_idx,
2705                device->device_idx);
2706 
2707       fd_rd_output_init(&device->rd_output, output_name);
2708    }
2709 
2710    *pDevice = tu_device_to_handle(device);
2711    return VK_SUCCESS;
2712 
2713 fail_timeline_cond:
2714 fail_a725_workaround:
2715 fail_bin_preamble:
2716 fail_prepare_perfcntrs_pass_cs:
2717    free(device->perfcntrs_pass_cs_entries);
2718 fail_perfcntrs_pass_entries_alloc:
2719    tu_cs_finish(&device->sub_cs);
2720    vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2721 fail_pipeline_cache:
2722    tu_destroy_dynamic_rendering(device);
2723 fail_dynamic_rendering:
2724    tu_destroy_empty_shaders(device);
2725 fail_empty_shaders:
2726    tu_destroy_clear_blit_shaders(device);
2727 fail_global_bo_map:
2728    TU_RMV(resource_destroy, device, device->global_bo);
2729    tu_bo_finish(device, device->global_bo);
2730    vk_free(&device->vk.alloc, device->submit_bo_list);
2731    util_dynarray_fini(&device->dump_bo_list);
2732 fail_global_bo:
2733    ir3_compiler_destroy(device->compiler);
2734    util_sparse_array_finish(&device->bo_map);
2735    if (physical_device->has_set_iova)
2736       util_vma_heap_finish(&device->vma);
2737 fail_free_zombie_vma:
2738    u_vector_finish(&device->zombie_vmas);
2739 fail_queues:
2740    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2741       for (unsigned q = 0; q < device->queue_count[i]; q++)
2742          tu_queue_finish(&device->queues[i][q]);
2743       if (device->queues[i])
2744          vk_free(&device->vk.alloc, device->queues[i]);
2745    }
2746 
2747    u_rwlock_destroy(&device->dma_bo_lock);
2748    tu_drm_device_finish(device);
2749    vk_device_finish(&device->vk);
2750    vk_free(&device->vk.alloc, device);
2751    return result;
2752 }
2753 
2754 VKAPI_ATTR void VKAPI_CALL
tu_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2755 tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
2756 {
2757    VK_FROM_HANDLE(tu_device, device, _device);
2758 
2759    if (!device)
2760       return;
2761 
2762    tu_memory_trace_finish(device);
2763 
2764    if (FD_RD_DUMP(ENABLE))
2765       fd_rd_output_fini(&device->rd_output);
2766 
2767    tu_breadcrumbs_finish(device);
2768 
2769    u_trace_context_fini(&device->trace_context);
2770 
2771    for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
2772       if (device->scratch_bos[i].initialized)
2773          tu_bo_finish(device, device->scratch_bos[i].bo);
2774    }
2775 
2776    if (device->fiber_pvtmem_bo.bo)
2777       tu_bo_finish(device, device->fiber_pvtmem_bo.bo);
2778 
2779    if (device->wave_pvtmem_bo.bo)
2780       tu_bo_finish(device, device->wave_pvtmem_bo.bo);
2781 
2782    tu_destroy_clear_blit_shaders(device);
2783 
2784    tu_destroy_empty_shaders(device);
2785 
2786    tu_destroy_dynamic_rendering(device);
2787 
2788    ir3_compiler_destroy(device->compiler);
2789 
2790    vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2791 
2792    tu_cs_finish(&device->sub_cs);
2793 
2794    if (device->perfcntrs_pass_cs_entries) {
2795       free(device->perfcntrs_pass_cs_entries);
2796    }
2797 
2798    if (device->dbg_cmdbuf_stomp_cs) {
2799       tu_cs_finish(device->dbg_cmdbuf_stomp_cs);
2800       free(device->dbg_cmdbuf_stomp_cs);
2801    }
2802 
2803    if (device->dbg_renderpass_stomp_cs) {
2804       tu_cs_finish(device->dbg_renderpass_stomp_cs);
2805       free(device->dbg_renderpass_stomp_cs);
2806    }
2807 
2808    tu_autotune_fini(&device->autotune, device);
2809 
2810    tu_bo_suballocator_finish(&device->pipeline_suballoc);
2811    tu_bo_suballocator_finish(&device->autotune_suballoc);
2812    tu_bo_suballocator_finish(&device->kgsl_profiling_suballoc);
2813 
2814    tu_bo_finish(device, device->global_bo);
2815 
2816    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2817       for (unsigned q = 0; q < device->queue_count[i]; q++)
2818          tu_queue_finish(&device->queues[i][q]);
2819       if (device->queue_count[i])
2820          vk_free(&device->vk.alloc, device->queues[i]);
2821    }
2822 
2823    tu_drm_device_finish(device);
2824 
2825    if (device->physical_device->has_set_iova)
2826       util_vma_heap_finish(&device->vma);
2827 
2828    util_sparse_array_finish(&device->bo_map);
2829    u_rwlock_destroy(&device->dma_bo_lock);
2830 
2831    u_vector_finish(&device->zombie_vmas);
2832 
2833    pthread_cond_destroy(&device->timeline_cond);
2834    _mesa_hash_table_destroy(device->bo_sizes, NULL);
2835    vk_free(&device->vk.alloc, device->submit_bo_list);
2836    util_dynarray_fini(&device->dump_bo_list);
2837    vk_device_finish(&device->vk);
2838    vk_free(&device->vk.alloc, device);
2839 }
2840 
2841 VkResult
tu_get_scratch_bo(struct tu_device * dev,uint64_t size,struct tu_bo ** bo)2842 tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
2843 {
2844    unsigned size_log2 = MAX2(util_logbase2_ceil64(size), MIN_SCRATCH_BO_SIZE_LOG2);
2845    unsigned index = size_log2 - MIN_SCRATCH_BO_SIZE_LOG2;
2846    assert(index < ARRAY_SIZE(dev->scratch_bos));
2847 
2848    for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
2849       if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
2850          /* Fast path: just return the already-allocated BO. */
2851          *bo = dev->scratch_bos[i].bo;
2852          return VK_SUCCESS;
2853       }
2854    }
2855 
2856    /* Slow path: actually allocate the BO. We take a lock because the process
2857     * of allocating it is slow, and we don't want to block the CPU while it
2858     * finishes.
2859    */
2860    mtx_lock(&dev->scratch_bos[index].construct_mtx);
2861 
2862    /* Another thread may have allocated it already while we were waiting on
2863     * the lock. We need to check this in order to avoid double-allocating.
2864     */
2865    if (dev->scratch_bos[index].initialized) {
2866       mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2867       *bo = dev->scratch_bos[index].bo;
2868       return VK_SUCCESS;
2869    }
2870 
2871    unsigned bo_size = 1ull << size_log2;
2872    VkResult result = tu_bo_init_new(dev, NULL, &dev->scratch_bos[index].bo, bo_size,
2873                                     TU_BO_ALLOC_INTERNAL_RESOURCE, "scratch");
2874    if (result != VK_SUCCESS) {
2875       mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2876       return result;
2877    }
2878 
2879    p_atomic_set(&dev->scratch_bos[index].initialized, true);
2880 
2881    mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2882 
2883    *bo = dev->scratch_bos[index].bo;
2884    return VK_SUCCESS;
2885 }
2886 
2887 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2888 tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2889                                     VkLayerProperties *pProperties)
2890 {
2891    *pPropertyCount = 0;
2892    return VK_SUCCESS;
2893 }
2894 
2895 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)2896 tu_EnumerateInstanceExtensionProperties(const char *pLayerName,
2897                                         uint32_t *pPropertyCount,
2898                                         VkExtensionProperties *pProperties)
2899 {
2900    if (pLayerName)
2901       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2902 
2903    return vk_enumerate_instance_extension_properties(
2904       &tu_instance_extensions_supported, pPropertyCount, pProperties);
2905 }
2906 
2907 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
tu_GetInstanceProcAddr(VkInstance _instance,const char * pName)2908 tu_GetInstanceProcAddr(VkInstance _instance, const char *pName)
2909 {
2910    VK_FROM_HANDLE(tu_instance, instance, _instance);
2911    return vk_instance_get_proc_addr(instance != NULL ? &instance->vk : NULL,
2912                                     &tu_instance_entrypoints,
2913                                     pName);
2914 }
2915 
2916 /* The loader wants us to expose a second GetInstanceProcAddr function
2917  * to work around certain LD_PRELOAD issues seen in apps.
2918  */
2919 PUBLIC
2920 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2921 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
2922 {
2923    return tu_GetInstanceProcAddr(instance, pName);
2924 }
2925 
2926 VKAPI_ATTR VkResult VKAPI_CALL
tu_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2927 tu_AllocateMemory(VkDevice _device,
2928                   const VkMemoryAllocateInfo *pAllocateInfo,
2929                   const VkAllocationCallbacks *pAllocator,
2930                   VkDeviceMemory *pMem)
2931 {
2932    VK_FROM_HANDLE(tu_device, device, _device);
2933    struct tu_device_memory *mem;
2934    VkResult result;
2935 
2936    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2937 
2938    struct tu_memory_heap *mem_heap = &device->physical_device->heap;
2939    uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
2940    if (mem_heap_used > mem_heap->size)
2941       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2942 
2943    mem = (struct tu_device_memory *) vk_device_memory_create(
2944       &device->vk, pAllocateInfo, pAllocator, sizeof(*mem));
2945    if (mem == NULL)
2946       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2947 
2948    if (pAllocateInfo->allocationSize == 0 && !mem->vk.ahardware_buffer) {
2949       vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
2950       /* Apparently, this is allowed */
2951       *pMem = VK_NULL_HANDLE;
2952       return VK_SUCCESS;
2953    }
2954 
2955    const VkImportMemoryFdInfoKHR *fd_info =
2956       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2957 
2958    if (fd_info && fd_info->handleType) {
2959       assert(fd_info->handleType ==
2960                 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2961              fd_info->handleType ==
2962                 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2963 
2964       /*
2965        * TODO Importing the same fd twice gives us the same handle without
2966        * reference counting.  We need to maintain a per-instance handle-to-bo
2967        * table and add reference count to tu_bo.
2968        */
2969       result = tu_bo_init_dmabuf(device, &mem->bo,
2970                                  pAllocateInfo->allocationSize, fd_info->fd);
2971       if (result == VK_SUCCESS) {
2972          /* take ownership and close the fd */
2973          close(fd_info->fd);
2974       }
2975    } else if (mem->vk.ahardware_buffer) {
2976 #if DETECT_OS_ANDROID
2977       const native_handle_t *handle = AHardwareBuffer_getNativeHandle(mem->vk.ahardware_buffer);
2978       assert(handle->numFds > 0);
2979       size_t size = lseek(handle->data[0], 0, SEEK_END);
2980       result = tu_bo_init_dmabuf(device, &mem->bo, size, handle->data[0]);
2981 #else
2982       result = VK_ERROR_FEATURE_NOT_PRESENT;
2983 #endif
2984    } else {
2985       uint64_t client_address = 0;
2986       BITMASK_ENUM(tu_bo_alloc_flags) alloc_flags = TU_BO_ALLOC_NO_FLAGS;
2987 
2988       const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info =
2989          vk_find_struct_const(pAllocateInfo->pNext,
2990                               MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO);
2991       if (replay_info && replay_info->opaqueCaptureAddress) {
2992          client_address = replay_info->opaqueCaptureAddress;
2993          alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
2994       }
2995 
2996       const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const(
2997          pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO);
2998       if (flags_info &&
2999           (flags_info->flags &
3000            VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)) {
3001          alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
3002       }
3003 
3004       const VkExportMemoryAllocateInfo *export_info =
3005          vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
3006       if (export_info && (export_info->handleTypes &
3007                           (VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
3008                            VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT)))
3009          alloc_flags |= TU_BO_ALLOC_SHAREABLE;
3010 
3011 
3012       char name[64] = "vkAllocateMemory()";
3013       if (device->bo_sizes)
3014          snprintf(name, ARRAY_SIZE(name), "vkAllocateMemory(%ldkb)",
3015                   (long)DIV_ROUND_UP(pAllocateInfo->allocationSize, 1024));
3016       VkMemoryPropertyFlags mem_property =
3017          device->physical_device->memory.types[pAllocateInfo->memoryTypeIndex];
3018       result = tu_bo_init_new_explicit_iova(
3019          device, &mem->vk.base, &mem->bo, pAllocateInfo->allocationSize,
3020          client_address, mem_property, alloc_flags, name);
3021    }
3022 
3023    if (result == VK_SUCCESS) {
3024       mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
3025       if (mem_heap_used > mem_heap->size) {
3026          p_atomic_add(&mem_heap->used, -mem->bo->size);
3027          tu_bo_finish(device, mem->bo);
3028          result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
3029                             "Out of heap memory");
3030       }
3031    }
3032 
3033    if (result != VK_SUCCESS) {
3034       vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
3035       return result;
3036    }
3037 
3038    /* Track in the device whether our BO list contains any implicit-sync BOs, so
3039     * we can suppress implicit sync on non-WSI usage.
3040     */
3041    const struct wsi_memory_allocate_info *wsi_info =
3042       vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
3043    if (wsi_info && wsi_info->implicit_sync) {
3044       mtx_lock(&device->bo_mutex);
3045       if (!mem->bo->implicit_sync) {
3046          mem->bo->implicit_sync = true;
3047          device->implicit_sync_bo_count++;
3048       }
3049       mtx_unlock(&device->bo_mutex);
3050    }
3051 
3052    const VkMemoryDedicatedAllocateInfo *dedicate_info =
3053       vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
3054    if (dedicate_info) {
3055       mem->image = tu_image_from_handle(dedicate_info->image);
3056    } else {
3057       mem->image = NULL;
3058    }
3059 
3060    TU_RMV(heap_create, device, pAllocateInfo, mem);
3061 
3062    *pMem = tu_device_memory_to_handle(mem);
3063 
3064    return VK_SUCCESS;
3065 }
3066 
3067 VKAPI_ATTR void VKAPI_CALL
tu_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)3068 tu_FreeMemory(VkDevice _device,
3069               VkDeviceMemory _mem,
3070               const VkAllocationCallbacks *pAllocator)
3071 {
3072    VK_FROM_HANDLE(tu_device, device, _device);
3073    VK_FROM_HANDLE(tu_device_memory, mem, _mem);
3074 
3075    if (mem == NULL)
3076       return;
3077 
3078    TU_RMV(resource_destroy, device, mem);
3079 
3080    p_atomic_add(&device->physical_device->heap.used, -mem->bo->size);
3081    tu_bo_finish(device, mem->bo);
3082    vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
3083 }
3084 
3085 VKAPI_ATTR VkResult VKAPI_CALL
tu_MapMemory2KHR(VkDevice _device,const VkMemoryMapInfoKHR * pMemoryMapInfo,void ** ppData)3086 tu_MapMemory2KHR(VkDevice _device, const VkMemoryMapInfoKHR *pMemoryMapInfo, void **ppData)
3087 {
3088    VK_FROM_HANDLE(tu_device, device, _device);
3089    VK_FROM_HANDLE(tu_device_memory, mem, pMemoryMapInfo->memory);
3090    VkResult result;
3091 
3092    if (mem == NULL) {
3093       *ppData = NULL;
3094       return VK_SUCCESS;
3095    }
3096 
3097    void *placed_addr = NULL;
3098    if (pMemoryMapInfo->flags & VK_MEMORY_MAP_PLACED_BIT_EXT) {
3099       const VkMemoryMapPlacedInfoEXT *placed_info =
3100          vk_find_struct_const(pMemoryMapInfo->pNext, MEMORY_MAP_PLACED_INFO_EXT);
3101       assert(placed_info != NULL);
3102       placed_addr = placed_info->pPlacedAddress;
3103    }
3104 
3105    result = tu_bo_map(device, mem->bo, placed_addr);
3106    if (result != VK_SUCCESS)
3107       return result;
3108 
3109    *ppData = (char *) mem->bo->map + pMemoryMapInfo->offset;
3110    return VK_SUCCESS;
3111 }
3112 
3113 VKAPI_ATTR VkResult VKAPI_CALL
tu_UnmapMemory2KHR(VkDevice _device,const VkMemoryUnmapInfoKHR * pMemoryUnmapInfo)3114 tu_UnmapMemory2KHR(VkDevice _device, const VkMemoryUnmapInfoKHR *pMemoryUnmapInfo)
3115 {
3116    VK_FROM_HANDLE(tu_device, device, _device);
3117    VK_FROM_HANDLE(tu_device_memory, mem, pMemoryUnmapInfo->memory);
3118 
3119    if (mem == NULL)
3120       return VK_SUCCESS;
3121 
3122    return tu_bo_unmap(device, mem->bo, pMemoryUnmapInfo->flags & VK_MEMORY_UNMAP_RESERVE_BIT_EXT);
3123 }
3124 static VkResult
sync_cache(VkDevice _device,enum tu_mem_sync_op op,uint32_t count,const VkMappedMemoryRange * ranges)3125 sync_cache(VkDevice _device,
3126            enum tu_mem_sync_op op,
3127            uint32_t count,
3128            const VkMappedMemoryRange *ranges)
3129 {
3130    VK_FROM_HANDLE(tu_device, device, _device);
3131 
3132    if (!device->physical_device->has_cached_non_coherent_memory) {
3133       tu_finishme(
3134          "data cache clean and invalidation are unsupported on this arch!");
3135       return VK_SUCCESS;
3136    }
3137 
3138    for (uint32_t i = 0; i < count; i++) {
3139       VK_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory);
3140       tu_bo_sync_cache(device, mem->bo, ranges[i].offset, ranges[i].size, op);
3141    }
3142 
3143    return VK_SUCCESS;
3144 }
3145 
3146 VkResult
tu_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3147 tu_FlushMappedMemoryRanges(VkDevice _device,
3148                            uint32_t memoryRangeCount,
3149                            const VkMappedMemoryRange *pMemoryRanges)
3150 {
3151    return sync_cache(_device, TU_MEM_SYNC_CACHE_TO_GPU, memoryRangeCount,
3152                      pMemoryRanges);
3153 }
3154 
3155 VkResult
tu_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3156 tu_InvalidateMappedMemoryRanges(VkDevice _device,
3157                                 uint32_t memoryRangeCount,
3158                                 const VkMappedMemoryRange *pMemoryRanges)
3159 {
3160    return sync_cache(_device, TU_MEM_SYNC_CACHE_FROM_GPU, memoryRangeCount,
3161                      pMemoryRanges);
3162 }
3163 
3164 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)3165 tu_GetDeviceMemoryCommitment(VkDevice device,
3166                              VkDeviceMemory memory,
3167                              VkDeviceSize *pCommittedMemoryInBytes)
3168 {
3169    *pCommittedMemoryInBytes = 0;
3170 }
3171 
3172 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)3173 tu_CreateFramebuffer(VkDevice _device,
3174                      const VkFramebufferCreateInfo *pCreateInfo,
3175                      const VkAllocationCallbacks *pAllocator,
3176                      VkFramebuffer *pFramebuffer)
3177 {
3178    VK_FROM_HANDLE(tu_device, device, _device);
3179 
3180    if (TU_DEBUG(DYNAMIC))
3181       return vk_common_CreateFramebuffer(_device, pCreateInfo, pAllocator,
3182                                          pFramebuffer);
3183 
3184    VK_FROM_HANDLE(tu_render_pass, pass, pCreateInfo->renderPass);
3185    struct tu_framebuffer *framebuffer;
3186 
3187    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3188 
3189    bool imageless = pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT;
3190 
3191    size_t size = sizeof(*framebuffer);
3192    if (!imageless)
3193       size += sizeof(struct tu_attachment_info) * pCreateInfo->attachmentCount;
3194    framebuffer = (struct tu_framebuffer *) vk_object_alloc(
3195       &device->vk, pAllocator, size, VK_OBJECT_TYPE_FRAMEBUFFER);
3196    if (framebuffer == NULL)
3197       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3198 
3199    framebuffer->attachment_count = pCreateInfo->attachmentCount;
3200    framebuffer->width = pCreateInfo->width;
3201    framebuffer->height = pCreateInfo->height;
3202    framebuffer->layers = pCreateInfo->layers;
3203 
3204    if (!imageless) {
3205       for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3206          VkImageView _iview = pCreateInfo->pAttachments[i];
3207          struct tu_image_view *iview = tu_image_view_from_handle(_iview);
3208          framebuffer->attachments[i].attachment = iview;
3209       }
3210    }
3211 
3212    tu_framebuffer_tiling_config(framebuffer, device, pass);
3213 
3214    *pFramebuffer = tu_framebuffer_to_handle(framebuffer);
3215    return VK_SUCCESS;
3216 }
3217 
3218 void
tu_setup_dynamic_framebuffer(struct tu_cmd_buffer * cmd_buffer,const VkRenderingInfo * pRenderingInfo)3219 tu_setup_dynamic_framebuffer(struct tu_cmd_buffer *cmd_buffer,
3220                              const VkRenderingInfo *pRenderingInfo)
3221 {
3222    struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
3223    struct tu_framebuffer *framebuffer = &cmd_buffer->dynamic_framebuffer;
3224 
3225    framebuffer->attachment_count = pass->attachment_count;
3226    framebuffer->width = pRenderingInfo->renderArea.offset.x +
3227       pRenderingInfo->renderArea.extent.width;
3228    framebuffer->height = pRenderingInfo->renderArea.offset.y +
3229       pRenderingInfo->renderArea.extent.height;
3230    framebuffer->layers = pRenderingInfo->layerCount;
3231 
3232    tu_framebuffer_tiling_config(framebuffer, cmd_buffer->device, pass);
3233 }
3234 
3235 VKAPI_ATTR void VKAPI_CALL
tu_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)3236 tu_DestroyFramebuffer(VkDevice _device,
3237                       VkFramebuffer _fb,
3238                       const VkAllocationCallbacks *pAllocator)
3239 {
3240    VK_FROM_HANDLE(tu_device, device, _device);
3241 
3242    if (TU_DEBUG(DYNAMIC)) {
3243       vk_common_DestroyFramebuffer(_device, _fb, pAllocator);
3244       return;
3245    }
3246 
3247    VK_FROM_HANDLE(tu_framebuffer, fb, _fb);
3248 
3249    if (!fb)
3250       return;
3251 
3252    vk_object_free(&device->vk, pAllocator, fb);
3253 }
3254 
3255 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)3256 tu_GetMemoryFdKHR(VkDevice _device,
3257                   const VkMemoryGetFdInfoKHR *pGetFdInfo,
3258                   int *pFd)
3259 {
3260    VK_FROM_HANDLE(tu_device, device, _device);
3261    VK_FROM_HANDLE(tu_device_memory, memory, pGetFdInfo->memory);
3262 
3263    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3264 
3265    /* At the moment, we support only the below handle types. */
3266    assert(pGetFdInfo->handleType ==
3267              VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3268           pGetFdInfo->handleType ==
3269              VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3270 
3271    int prime_fd = tu_bo_export_dmabuf(device, memory->bo);
3272    if (prime_fd < 0)
3273       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3274 
3275    *pFd = prime_fd;
3276 
3277    if (memory->image) {
3278       struct fdl_layout *l = &memory->image->layout[0];
3279       uint64_t modifier;
3280       if (l->ubwc) {
3281          modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
3282       } else if (l->tile_mode == 2) {
3283          modifier = DRM_FORMAT_MOD_QCOM_TILED2;
3284       } else if (l->tile_mode == 3) {
3285          modifier = DRM_FORMAT_MOD_QCOM_TILED3;
3286       } else {
3287          assert(!l->tile_mode);
3288          modifier = DRM_FORMAT_MOD_LINEAR;
3289       }
3290       struct fdl_metadata metadata = {
3291          .modifier = modifier,
3292       };
3293       tu_bo_set_metadata(device, memory->bo, &metadata, sizeof(metadata));
3294    }
3295 
3296    return VK_SUCCESS;
3297 }
3298 
3299 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)3300 tu_GetMemoryFdPropertiesKHR(VkDevice _device,
3301                             VkExternalMemoryHandleTypeFlagBits handleType,
3302                             int fd,
3303                             VkMemoryFdPropertiesKHR *pMemoryFdProperties)
3304 {
3305    VK_FROM_HANDLE(tu_device, device, _device);
3306    assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3307    pMemoryFdProperties->memoryTypeBits =
3308       (1 << device->physical_device->memory.type_count) - 1;
3309    return VK_SUCCESS;
3310 }
3311 
3312 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)3313 tu_GetPhysicalDeviceMultisamplePropertiesEXT(
3314    VkPhysicalDevice                            physicalDevice,
3315    VkSampleCountFlagBits                       samples,
3316    VkMultisamplePropertiesEXT*                 pMultisampleProperties)
3317 {
3318    VK_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
3319 
3320    if (samples <= VK_SAMPLE_COUNT_4_BIT && pdevice->vk.supported_extensions.EXT_sample_locations)
3321       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 1, 1 };
3322    else
3323       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
3324 }
3325 
tu_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3326 uint64_t tu_GetDeviceMemoryOpaqueCaptureAddress(
3327     VkDevice                                    device,
3328     const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
3329 {
3330    VK_FROM_HANDLE(tu_device_memory, mem, pInfo->memory);
3331    return mem->bo->iova;
3332 }
3333 
3334 struct tu_debug_bos_entry {
3335    uint32_t count;
3336    uint64_t size;
3337    const char *name;
3338 };
3339 
3340 const char *
tu_debug_bos_add(struct tu_device * dev,uint64_t size,const char * name)3341 tu_debug_bos_add(struct tu_device *dev, uint64_t size, const char *name)
3342 {
3343    assert(name);
3344 
3345    if (likely(!dev->bo_sizes))
3346       return NULL;
3347 
3348    mtx_lock(&dev->bo_mutex);
3349    struct hash_entry *entry = _mesa_hash_table_search(dev->bo_sizes, name);
3350    struct tu_debug_bos_entry *debug_bos;
3351 
3352    if (!entry) {
3353       debug_bos = (struct tu_debug_bos_entry *) calloc(
3354          1, sizeof(struct tu_debug_bos_entry));
3355       debug_bos->name = strdup(name);
3356       _mesa_hash_table_insert(dev->bo_sizes, debug_bos->name, debug_bos);
3357    } else {
3358       debug_bos = (struct tu_debug_bos_entry *) entry->data;
3359    }
3360 
3361    debug_bos->count++;
3362    debug_bos->size += align(size, 4096);
3363    mtx_unlock(&dev->bo_mutex);
3364 
3365    return debug_bos->name;
3366 }
3367 
3368 void
tu_debug_bos_del(struct tu_device * dev,struct tu_bo * bo)3369 tu_debug_bos_del(struct tu_device *dev, struct tu_bo *bo)
3370 {
3371    if (likely(!dev->bo_sizes) || !bo->name)
3372       return;
3373 
3374    mtx_lock(&dev->bo_mutex);
3375    struct hash_entry *entry =
3376       _mesa_hash_table_search(dev->bo_sizes, bo->name);
3377    /* If we're finishing the BO, it should have been added already */
3378    assert(entry);
3379 
3380    struct tu_debug_bos_entry *debug_bos =
3381       (struct tu_debug_bos_entry *) entry->data;
3382    debug_bos->count--;
3383    debug_bos->size -= align(bo->size, 4096);
3384    if (!debug_bos->count) {
3385       _mesa_hash_table_remove(dev->bo_sizes, entry);
3386       free((void *) debug_bos->name);
3387       free(debug_bos);
3388    }
3389    mtx_unlock(&dev->bo_mutex);
3390 }
3391 
debug_bos_count_compare(const void * in_a,const void * in_b)3392 static int debug_bos_count_compare(const void *in_a, const void *in_b)
3393 {
3394    struct tu_debug_bos_entry *a = *(struct tu_debug_bos_entry **)in_a;
3395    struct tu_debug_bos_entry *b = *(struct tu_debug_bos_entry **)in_b;
3396    return a->count - b->count;
3397 }
3398 
3399 void
tu_debug_bos_print_stats(struct tu_device * dev)3400 tu_debug_bos_print_stats(struct tu_device *dev)
3401 {
3402    if (likely(!dev->bo_sizes))
3403       return;
3404 
3405    mtx_lock(&dev->bo_mutex);
3406 
3407    /* Put the HT's sizes data in an array so we can sort by number of allocations. */
3408    struct util_dynarray dyn;
3409    util_dynarray_init(&dyn, NULL);
3410 
3411    uint32_t size = 0;
3412    uint32_t count = 0;
3413    hash_table_foreach(dev->bo_sizes, entry)
3414    {
3415       struct tu_debug_bos_entry *debug_bos =
3416          (struct tu_debug_bos_entry *) entry->data;
3417       util_dynarray_append(&dyn, struct tu_debug_bos_entry *, debug_bos);
3418       size += debug_bos->size / 1024;
3419       count += debug_bos->count;
3420    }
3421 
3422    qsort(dyn.data,
3423          util_dynarray_num_elements(&dyn, struct tu_debug_bos_entry *),
3424          sizeof(struct tu_debug_bos_entryos_entry *), debug_bos_count_compare);
3425 
3426    util_dynarray_foreach(&dyn, struct tu_debug_bos_entry *, entryp)
3427    {
3428       struct tu_debug_bos_entry *debug_bos = *entryp;
3429       mesa_logi("%30s: %4d bos, %lld kb\n", debug_bos->name, debug_bos->count,
3430                 (long long) (debug_bos->size / 1024));
3431    }
3432 
3433    mesa_logi("submitted %d bos (%d MB)\n", count, DIV_ROUND_UP(size, 1024));
3434 
3435    util_dynarray_fini(&dyn);
3436 
3437    mtx_unlock(&dev->bo_mutex);
3438 }
3439 
3440 void
tu_dump_bo_init(struct tu_device * dev,struct tu_bo * bo)3441 tu_dump_bo_init(struct tu_device *dev, struct tu_bo *bo)
3442 {
3443    bo->dump_bo_list_idx = ~0;
3444 
3445    if (!FD_RD_DUMP(ENABLE))
3446       return;
3447 
3448    mtx_lock(&dev->bo_mutex);
3449    uint32_t idx =
3450       util_dynarray_num_elements(&dev->dump_bo_list, struct tu_bo *);
3451    bo->dump_bo_list_idx = idx;
3452    util_dynarray_append(&dev->dump_bo_list, struct tu_bo *, bo);
3453    mtx_unlock(&dev->bo_mutex);
3454 }
3455 
3456 void
tu_dump_bo_del(struct tu_device * dev,struct tu_bo * bo)3457 tu_dump_bo_del(struct tu_device *dev, struct tu_bo *bo)
3458 {
3459    if (bo->dump_bo_list_idx != ~0) {
3460       mtx_lock(&dev->bo_mutex);
3461       struct tu_bo *exchanging_bo =
3462          util_dynarray_pop(&dev->dump_bo_list, struct tu_bo *);
3463       *util_dynarray_element(&dev->dump_bo_list, struct tu_bo *,
3464                              bo->dump_bo_list_idx) = exchanging_bo;
3465       exchanging_bo->dump_bo_list_idx = bo->dump_bo_list_idx;
3466       mtx_unlock(&dev->bo_mutex);
3467    }
3468 }
3469 
3470 void
tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)3471 tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,
3472                               const VkDebugUtilsLabelEXT *pLabelInfo)
3473 {
3474    VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
3475 
3476    vk_common_CmdBeginDebugUtilsLabelEXT(_commandBuffer, pLabelInfo);
3477 
3478    /* Note that the spec says:
3479     *
3480     * "An application may open a debug label region in one command buffer and
3481     *  close it in another, or otherwise split debug label regions across
3482     *  multiple command buffers or multiple queue submissions. When viewed
3483     * from the linear series of submissions to a single queue, the calls to
3484     *  vkCmdBeginDebugUtilsLabelEXT and vkCmdEndDebugUtilsLabelEXT must be
3485     *  matched and balanced."
3486     *
3487     * But if you're beginning labeling during a renderpass and ending outside
3488     * it, or vice versa, these trace ranges in perfetto will be unbalanced.  I
3489     * expect that u_trace and perfetto will do something like take just one of
3490     * the begins/ends, or drop the event entirely, but not crash.  Similarly,
3491     * I think we'll have problems if the tracepoints are split across cmd
3492     * buffers. Still, getting the simple case of cmd buffer annotation into
3493     * perfetto should prove useful.
3494     */
3495    const char *label = pLabelInfo->pLabelName;
3496    if (cmd_buffer->state.pass) {
3497       trace_start_cmd_buffer_annotation_rp(
3498          &cmd_buffer->trace, &cmd_buffer->draw_cs, strlen(label), label);
3499    } else {
3500       trace_start_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs,
3501                                         strlen(label), label);
3502    }
3503 }
3504 
3505 void
tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)3506 tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)
3507 {
3508    VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
3509 
3510    if (cmd_buffer->vk.labels.size > 0) {
3511       if (cmd_buffer->state.pass) {
3512          trace_end_cmd_buffer_annotation_rp(&cmd_buffer->trace,
3513                                             &cmd_buffer->draw_cs);
3514       } else {
3515          trace_end_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs);
3516       }
3517    }
3518 
3519    vk_common_CmdEndDebugUtilsLabelEXT(_commandBuffer);
3520 }
3521