• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  * SPDX-License-Identifier: MIT
5  *
6  * based in part on anv driver which is:
7  * Copyright © 2015 Intel Corporation
8  */
9 
10 #include "tu_device.h"
11 
12 #include "drm-uapi/drm_fourcc.h"
13 #include "fdl/freedreno_layout.h"
14 #include <fcntl.h>
15 #include <poll.h>
16 #include <sys/sysinfo.h>
17 
18 #include "git_sha1.h"
19 #include "util/u_debug.h"
20 #include "util/disk_cache.h"
21 #include "util/hex.h"
22 #include "util/driconf.h"
23 #include "util/os_misc.h"
24 #include "util/u_process.h"
25 #include "vk_shader_module.h"
26 #include "vk_sampler.h"
27 #include "vk_util.h"
28 
29 /* for fd_get_driver/device_uuid() */
30 #include "freedreno/common/freedreno_uuid.h"
31 #include "freedreno/common/freedreno_stompable_regs.h"
32 
33 #include "tu_clear_blit.h"
34 #include "tu_cmd_buffer.h"
35 #include "tu_cs.h"
36 #include "tu_descriptor_set.h"
37 #include "tu_dynamic_rendering.h"
38 #include "tu_image.h"
39 #include "tu_pass.h"
40 #include "tu_query.h"
41 #include "tu_tracepoints.h"
42 #include "tu_wsi.h"
43 
44 #if defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
45      defined(VK_USE_PLATFORM_XCB_KHR) || \
46      defined(VK_USE_PLATFORM_XLIB_KHR) || \
47      defined(VK_USE_PLATFORM_DISPLAY_KHR)
48 #define TU_HAS_SURFACE 1
49 #else
50 #define TU_HAS_SURFACE 0
51 #endif
52 
53 
54 static int
tu_device_get_cache_uuid(struct tu_physical_device * device,void * uuid)55 tu_device_get_cache_uuid(struct tu_physical_device *device, void *uuid)
56 {
57    struct mesa_sha1 ctx;
58    unsigned char sha1[20];
59    /* Note: IR3_SHADER_DEBUG also affects compilation, but it's not
60     * initialized until after compiler creation so we have to add it to the
61     * shader hash instead, since the compiler is only created with the logical
62     * device.
63     */
64    uint64_t driver_flags = tu_env.debug & TU_DEBUG_NOMULTIPOS;
65    uint16_t family = fd_dev_gpu_id(&device->dev_id);
66 
67    memset(uuid, 0, VK_UUID_SIZE);
68    _mesa_sha1_init(&ctx);
69 
70    if (!disk_cache_get_function_identifier((void *)tu_device_get_cache_uuid, &ctx))
71       return -1;
72 
73    _mesa_sha1_update(&ctx, &family, sizeof(family));
74    _mesa_sha1_update(&ctx, &driver_flags, sizeof(driver_flags));
75    _mesa_sha1_final(&ctx, sha1);
76 
77    memcpy(uuid, sha1, VK_UUID_SIZE);
78    return 0;
79 }
80 
81 #define TU_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
82 
83 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceVersion(uint32_t * pApiVersion)84 tu_EnumerateInstanceVersion(uint32_t *pApiVersion)
85 {
86     *pApiVersion = TU_API_VERSION;
87     return VK_SUCCESS;
88 }
89 
90 static const struct vk_instance_extension_table tu_instance_extensions_supported = { .table = {
91    .KHR_device_group_creation           = true,
92 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
93    .KHR_display                         = true,
94 #endif
95    .KHR_external_fence_capabilities     = true,
96    .KHR_external_memory_capabilities    = true,
97    .KHR_external_semaphore_capabilities = true,
98 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
99    .KHR_get_display_properties2         = true,
100 #endif
101    .KHR_get_physical_device_properties2 = true,
102    .KHR_get_surface_capabilities2       = TU_HAS_SURFACE,
103    .KHR_surface                         = TU_HAS_SURFACE,
104 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
105    .KHR_wayland_surface                 = true,
106 #endif
107 #ifdef VK_USE_PLATFORM_XCB_KHR
108    .KHR_xcb_surface                     = true,
109 #endif
110 #ifdef VK_USE_PLATFORM_XLIB_KHR
111    .KHR_xlib_surface                    = true,
112 #endif
113 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
114    .EXT_acquire_drm_display             = true,
115 #endif
116 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
117    .EXT_acquire_xlib_display            = true,
118 #endif
119    .EXT_debug_report                    = true,
120    .EXT_debug_utils                     = true,
121 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
122    .EXT_direct_mode_display             = true,
123    .EXT_display_surface_counter         = true,
124 #endif
125 #ifndef VK_USE_PLATFORM_WIN32_KHR
126    .EXT_headless_surface                = true,
127 #endif
128    .EXT_swapchain_colorspace            = TU_HAS_SURFACE,
129 } };
130 
131 static bool
is_kgsl(struct tu_instance * instance)132 is_kgsl(struct tu_instance *instance)
133 {
134    return strcmp(instance->knl->name, "kgsl") == 0;
135 }
136 
137 static void
get_device_extensions(const struct tu_physical_device * device,struct vk_device_extension_table * ext)138 get_device_extensions(const struct tu_physical_device *device,
139                       struct vk_device_extension_table *ext)
140 {
141    *ext = (struct vk_device_extension_table) { .table = {
142       .KHR_16bit_storage = device->info->a6xx.storage_16bit,
143       .KHR_bind_memory2 = true,
144       .KHR_buffer_device_address = true,
145       .KHR_copy_commands2 = true,
146       .KHR_create_renderpass2 = true,
147       .KHR_dedicated_allocation = true,
148       .KHR_depth_stencil_resolve = true,
149       .KHR_descriptor_update_template = true,
150       .KHR_device_group = true,
151       .KHR_draw_indirect_count = true,
152       .KHR_driver_properties = true,
153       .KHR_dynamic_rendering = true,
154       .KHR_external_fence = true,
155       .KHR_external_fence_fd = true,
156       .KHR_external_memory = true,
157       .KHR_external_memory_fd = true,
158       .KHR_external_semaphore = true,
159       .KHR_external_semaphore_fd = true,
160       .KHR_format_feature_flags2 = true,
161       .KHR_get_memory_requirements2 = true,
162       .KHR_global_priority = true,
163       .KHR_image_format_list = true,
164       .KHR_imageless_framebuffer = true,
165       .KHR_incremental_present = TU_HAS_SURFACE,
166       .KHR_maintenance1 = true,
167       .KHR_maintenance2 = true,
168       .KHR_maintenance3 = true,
169       .KHR_maintenance4 = true,
170       .KHR_maintenance5 = true,
171       .KHR_multiview = TU_DEBUG(NOCONFORM) ? true : device->info->a6xx.has_hw_multiview,
172       .KHR_performance_query = TU_DEBUG(PERFC),
173       .KHR_pipeline_executable_properties = true,
174       .KHR_pipeline_library = true,
175 
176       /* Hide these behind dri configs for now since we cannot implement it reliably on
177        * all surfaces yet. There is no surface capability query for present wait/id,
178        * but the feature is useful enough to hide behind an opt-in mechanism for now.
179        * If the instance only enables surface extensions that unconditionally support present wait,
180        * we can also expose the extension that way. */
181       .KHR_present_id =
182          TU_HAS_SURFACE && (driQueryOptionb(&device->instance->dri_options,
183                                             "vk_khr_present_wait") ||
184                             wsi_common_vk_instance_supports_present_wait(
185                                &device->instance->vk)),
186       .KHR_present_wait =
187          TU_HAS_SURFACE && (driQueryOptionb(&device->instance->dri_options,
188                                             "vk_khr_present_wait") ||
189                             wsi_common_vk_instance_supports_present_wait(
190                                &device->instance->vk)),
191 
192       .KHR_push_descriptor = true,
193       .KHR_relaxed_block_layout = true,
194       .KHR_sampler_mirror_clamp_to_edge = true,
195       .KHR_sampler_ycbcr_conversion = true,
196       .KHR_separate_depth_stencil_layouts = true,
197       .KHR_shader_draw_parameters = true,
198       .KHR_shader_expect_assume = true,
199       .KHR_shader_float16_int8 = true,
200       .KHR_shader_float_controls = true,
201       .KHR_shader_integer_dot_product = true,
202       .KHR_shader_non_semantic_info = true,
203       .KHR_shader_subgroup_extended_types = true,
204       .KHR_shader_terminate_invocation = true,
205       .KHR_spirv_1_4 = true,
206       .KHR_storage_buffer_storage_class = true,
207       .KHR_swapchain = TU_HAS_SURFACE,
208       .KHR_swapchain_mutable_format = TU_HAS_SURFACE,
209       .KHR_synchronization2 = true,
210       .KHR_timeline_semaphore = true,
211       .KHR_uniform_buffer_standard_layout = true,
212       .KHR_variable_pointers = true,
213       .KHR_vulkan_memory_model = true,
214       .KHR_zero_initialize_workgroup_memory = true,
215 
216       .EXT_4444_formats = true,
217       .EXT_attachment_feedback_loop_layout = true,
218       .EXT_border_color_swizzle = true,
219       .EXT_color_write_enable = true,
220       .EXT_conditional_rendering = true,
221       .EXT_custom_border_color = true,
222       .EXT_depth_clip_control = true,
223       .EXT_depth_clip_enable = true,
224       .EXT_descriptor_buffer = true,
225       .EXT_descriptor_indexing = true,
226 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
227       .EXT_display_control = true,
228 #endif
229       .EXT_extended_dynamic_state = true,
230       .EXT_extended_dynamic_state2 = true,
231       .EXT_extended_dynamic_state3 = true,
232       .EXT_external_memory_dma_buf = true,
233       .EXT_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
234       .EXT_fragment_density_map = !device->info->a7xx.load_shader_consts_via_preamble,
235       .EXT_global_priority = true,
236       .EXT_global_priority_query = true,
237       .EXT_graphics_pipeline_library = true,
238       .EXT_host_query_reset = true,
239       .EXT_image_2d_view_of_3d = true,
240       .EXT_image_drm_format_modifier = true,
241       .EXT_image_robustness = true,
242       .EXT_image_view_min_lod = true,
243       .EXT_index_type_uint8 = true,
244       .EXT_inline_uniform_block = true,
245       .EXT_line_rasterization = true,
246       .EXT_load_store_op_none = true,
247       .EXT_memory_budget = true,
248       .EXT_multi_draw = true,
249       .EXT_mutable_descriptor_type = true,
250       .EXT_non_seamless_cube_map = true,
251       .EXT_physical_device_drm = !is_kgsl(device->instance),
252       .EXT_pipeline_creation_cache_control = true,
253       .EXT_pipeline_creation_feedback = true,
254       .EXT_post_depth_coverage = true,
255       .EXT_primitive_topology_list_restart = true,
256       .EXT_primitives_generated_query = true,
257       .EXT_private_data = true,
258       .EXT_provoking_vertex = true,
259       .EXT_queue_family_foreign = true,
260       .EXT_rasterization_order_attachment_access = true,
261       .EXT_robustness2 = true,
262       .EXT_sample_locations = device->info->a6xx.has_sample_locations,
263       .EXT_sampler_filter_minmax = device->info->a6xx.has_sampler_minmax,
264       .EXT_scalar_block_layout = true,
265       .EXT_separate_stencil_usage = true,
266       .EXT_shader_demote_to_helper_invocation = true,
267       .EXT_shader_module_identifier = true,
268       .EXT_shader_stencil_export = true,
269       .EXT_shader_viewport_index_layer = TU_DEBUG(NOCONFORM) ? true : device->info->a6xx.has_hw_multiview,
270       .EXT_subgroup_size_control = true,
271       .EXT_texel_buffer_alignment = true,
272       .EXT_tooling_info = true,
273       .EXT_transform_feedback = true,
274       .EXT_vertex_attribute_divisor = true,
275       .EXT_vertex_input_dynamic_state = true,
276 
277       /* For Graphics Flight Recorder (GFR) */
278       .AMD_buffer_marker = true,
279 #if DETECT_OS_ANDROID
280       .ANDROID_native_buffer = true,
281 #endif
282       .ARM_rasterization_order_attachment_access = true,
283       .IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
284       .VALVE_mutable_descriptor_type = true,
285    } };
286 }
287 
288 static void
tu_get_features(struct tu_physical_device * pdevice,struct vk_features * features)289 tu_get_features(struct tu_physical_device *pdevice,
290                 struct vk_features *features)
291 {
292    *features = (struct vk_features) { false };
293 
294    /* Vulkan 1.0 */
295    features->robustBufferAccess = true;
296    features->fullDrawIndexUint32 = true;
297    features->imageCubeArray = true;
298    features->independentBlend = true;
299    features->geometryShader = true;
300    features->tessellationShader = true;
301    features->sampleRateShading = true;
302    features->dualSrcBlend = true;
303    features->logicOp = true;
304    features->multiDrawIndirect = true;
305    features->drawIndirectFirstInstance = true;
306    features->depthClamp = true;
307    features->depthBiasClamp = true;
308    features->fillModeNonSolid = true;
309    features->depthBounds = true;
310    features->wideLines = false;
311    features->largePoints = true;
312    features->alphaToOne = true;
313    features->multiViewport = true;
314    features->samplerAnisotropy = true;
315    features->textureCompressionETC2 = true;
316    features->textureCompressionASTC_LDR = true;
317    features->textureCompressionBC = true;
318    features->occlusionQueryPrecise = true;
319    features->pipelineStatisticsQuery = true;
320    features->vertexPipelineStoresAndAtomics = true;
321    features->fragmentStoresAndAtomics = true;
322    features->shaderTessellationAndGeometryPointSize = true;
323    features->shaderImageGatherExtended = true;
324    features->shaderStorageImageExtendedFormats = true;
325    features->shaderStorageImageMultisample = false;
326    features->shaderStorageImageReadWithoutFormat = true;
327    features->shaderStorageImageWriteWithoutFormat = true;
328    features->shaderUniformBufferArrayDynamicIndexing = true;
329    features->shaderSampledImageArrayDynamicIndexing = true;
330    features->shaderStorageBufferArrayDynamicIndexing = true;
331    features->shaderStorageImageArrayDynamicIndexing = true;
332    features->shaderClipDistance = true;
333    features->shaderCullDistance = true;
334    features->shaderFloat64 = false;
335    features->shaderInt64 = false;
336    features->shaderInt16 = true;
337    features->sparseBinding = false;
338    features->variableMultisampleRate = true;
339    features->inheritedQueries = true;
340 
341    /* Vulkan 1.1 */
342    features->storageBuffer16BitAccess            = pdevice->info->a6xx.storage_16bit;
343    features->uniformAndStorageBuffer16BitAccess  = false;
344    features->storagePushConstant16               = false;
345    features->storageInputOutput16                = false;
346    features->multiview                           = true;
347    features->multiviewGeometryShader             = false;
348    features->multiviewTessellationShader         = false;
349    features->variablePointersStorageBuffer       = true;
350    features->variablePointers                    = true;
351    features->protectedMemory                     = false;
352    features->samplerYcbcrConversion              = true;
353    features->shaderDrawParameters                = true;
354 
355    /* Vulkan 1.2 */
356    features->samplerMirrorClampToEdge            = true;
357    features->drawIndirectCount                   = true;
358    features->storageBuffer8BitAccess             = false;
359    features->uniformAndStorageBuffer8BitAccess   = false;
360    features->storagePushConstant8                = false;
361    features->shaderBufferInt64Atomics            = false;
362    features->shaderSharedInt64Atomics            = false;
363    features->shaderFloat16                       = true;
364    features->shaderInt8                          = false;
365 
366    features->descriptorIndexing                                 = true;
367    features->shaderInputAttachmentArrayDynamicIndexing          = false;
368    features->shaderUniformTexelBufferArrayDynamicIndexing       = true;
369    features->shaderStorageTexelBufferArrayDynamicIndexing       = true;
370    features->shaderUniformBufferArrayNonUniformIndexing         = true;
371    features->shaderSampledImageArrayNonUniformIndexing          = true;
372    features->shaderStorageBufferArrayNonUniformIndexing         = true;
373    features->shaderStorageImageArrayNonUniformIndexing          = true;
374    features->shaderInputAttachmentArrayNonUniformIndexing       = false;
375    features->shaderUniformTexelBufferArrayNonUniformIndexing    = true;
376    features->shaderStorageTexelBufferArrayNonUniformIndexing    = true;
377    features->descriptorBindingUniformBufferUpdateAfterBind      = true;
378    features->descriptorBindingSampledImageUpdateAfterBind       = true;
379    features->descriptorBindingStorageImageUpdateAfterBind       = true;
380    features->descriptorBindingStorageBufferUpdateAfterBind      = true;
381    features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
382    features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
383    features->descriptorBindingUpdateUnusedWhilePending          = true;
384    features->descriptorBindingPartiallyBound                    = true;
385    features->descriptorBindingVariableDescriptorCount           = true;
386    features->runtimeDescriptorArray                             = true;
387 
388    features->samplerFilterMinmax                 =
389       pdevice->info->a6xx.has_sampler_minmax;
390    features->scalarBlockLayout                   = true;
391    features->imagelessFramebuffer                = true;
392    features->uniformBufferStandardLayout         = true;
393    features->shaderSubgroupExtendedTypes         = true;
394    features->separateDepthStencilLayouts         = true;
395    features->hostQueryReset                      = true;
396    features->timelineSemaphore                   = true;
397    features->bufferDeviceAddress                 = true;
398    features->bufferDeviceAddressCaptureReplay    = pdevice->has_set_iova;
399    features->bufferDeviceAddressMultiDevice      = false;
400    features->vulkanMemoryModel                   = true;
401    features->vulkanMemoryModelDeviceScope        = true;
402    features->vulkanMemoryModelAvailabilityVisibilityChains = true;
403    features->shaderOutputViewportIndex           = true;
404    features->shaderOutputLayer                   = true;
405    features->subgroupBroadcastDynamicId          = true;
406 
407    /* Vulkan 1.3 */
408    features->robustImageAccess                   = true;
409    features->inlineUniformBlock                  = true;
410    features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
411    features->pipelineCreationCacheControl        = true;
412    features->privateData                         = true;
413    features->shaderDemoteToHelperInvocation      = true;
414    features->shaderTerminateInvocation           = true;
415    features->subgroupSizeControl                 = true;
416    features->computeFullSubgroups                = true;
417    features->synchronization2                    = true;
418    features->textureCompressionASTC_HDR          = false;
419    features->shaderZeroInitializeWorkgroupMemory = true;
420    features->dynamicRendering                    = true;
421    features->shaderIntegerDotProduct             = true;
422    features->maintenance4                        = true;
423 
424    /* VK_EXT_conditional_rendering */
425    features->conditionalRendering = true;
426    features->inheritedConditionalRendering = true;
427 
428    /* VK_EXT_transform_feedback */
429    features->transformFeedback = true;
430    features->geometryStreams = true;
431 
432    /* VK_EXT_index_type_uint8 */
433    features->indexTypeUint8 = true;
434    /* VK_EXT_vertex_attribute_divisor */
435    features->vertexAttributeInstanceRateDivisor = true;
436    features->vertexAttributeInstanceRateZeroDivisor = true;
437 
438    /* VK_EXT_depth_clip_enable */
439    features->depthClipEnable = true;
440 
441    /* VK_EXT_4444_formats */
442    features->formatA4R4G4B4 = true;
443    features->formatA4B4G4R4 = true;
444 
445    /* VK_EXT_border_color_swizzle */
446    features->borderColorSwizzle = true;
447    features->borderColorSwizzleFromImage = true;
448 
449    /* VK_EXT_custom_border_color */
450    features->customBorderColors = true;
451    features->customBorderColorWithoutFormat = true;
452 
453    /* VK_EXT_extended_dynamic_state */
454    features->extendedDynamicState = true;
455 
456    /* VK_EXT_extended_dynamic_state2 */
457    features->extendedDynamicState2 = true;
458    features->extendedDynamicState2LogicOp = true;
459    features->extendedDynamicState2PatchControlPoints = true;
460 
461    /* VK_EXT_extended_dynamic_state3 */
462    features->extendedDynamicState3PolygonMode = true;
463    features->extendedDynamicState3TessellationDomainOrigin = true;
464    features->extendedDynamicState3DepthClampEnable = true;
465    features->extendedDynamicState3DepthClipEnable = true;
466    features->extendedDynamicState3LogicOpEnable = true;
467    features->extendedDynamicState3SampleMask = true;
468    features->extendedDynamicState3RasterizationSamples = true;
469    features->extendedDynamicState3AlphaToCoverageEnable = true;
470    features->extendedDynamicState3AlphaToOneEnable = true;
471    features->extendedDynamicState3DepthClipNegativeOneToOne = true;
472    features->extendedDynamicState3RasterizationStream = true;
473    features->extendedDynamicState3ConservativeRasterizationMode = false;
474    features->extendedDynamicState3ExtraPrimitiveOverestimationSize = false;
475    features->extendedDynamicState3LineRasterizationMode = true;
476    features->extendedDynamicState3LineStippleEnable = false;
477    features->extendedDynamicState3ProvokingVertexMode = true;
478    features->extendedDynamicState3SampleLocationsEnable = true;
479    features->extendedDynamicState3ColorBlendEnable = true;
480    features->extendedDynamicState3ColorBlendEquation = true;
481    features->extendedDynamicState3ColorWriteMask = true;
482    features->extendedDynamicState3ViewportWScalingEnable = false;
483    features->extendedDynamicState3ViewportSwizzle = false;
484    features->extendedDynamicState3ShadingRateImageEnable = false;
485    features->extendedDynamicState3CoverageToColorEnable = false;
486    features->extendedDynamicState3CoverageToColorLocation = false;
487    features->extendedDynamicState3CoverageModulationMode = false;
488    features->extendedDynamicState3CoverageModulationTableEnable = false;
489    features->extendedDynamicState3CoverageModulationTable = false;
490    features->extendedDynamicState3CoverageReductionMode = false;
491    features->extendedDynamicState3RepresentativeFragmentTestEnable = false;
492    features->extendedDynamicState3ColorBlendAdvanced = false;
493 
494    /* VK_KHR_performance_query */
495    features->performanceCounterQueryPools = true;
496    features->performanceCounterMultipleQueryPools = false;
497 
498    /* VK_KHR_pipeline_executable_properties */
499    features->pipelineExecutableInfo = true;
500 
501    /* VK_EXT_robustness2 */
502    features->robustBufferAccess2 = true;
503    features->robustImageAccess2 = true;
504    features->nullDescriptor = true;
505 
506    /* VK_EXT_provoking_vertex */
507    features->provokingVertexLast = true;
508 
509    /* VK_EXT_mutable_descriptor_type */
510    features->mutableDescriptorType = true;
511 
512    /* VK_EXT_line_rasterization */
513    features->rectangularLines = true;
514    features->bresenhamLines = true;
515    features->smoothLines = false;
516    features->stippledRectangularLines = false;
517    features->stippledBresenhamLines = false;
518    features->stippledSmoothLines = false;
519 
520    /* VK_EXT_primitive_topology_list_restart */
521    features->primitiveTopologyListRestart = true;
522    features->primitiveTopologyPatchListRestart = false;
523 
524    /* VK_EXT_rasterization_order_attachment_access */
525    features->rasterizationOrderColorAttachmentAccess = true;
526    features->rasterizationOrderDepthAttachmentAccess = true;
527    features->rasterizationOrderStencilAttachmentAccess = true;
528 
529    /* VK_EXT_depth_clip_control */
530    features->depthClipControl = true;
531 
532    /* VK_EXT_texel_buffer_alignment */
533    features->texelBufferAlignment = true;
534 
535    /* VK_EXT_primitives_generated_query */
536    features->primitivesGeneratedQuery = true;
537    features->primitivesGeneratedQueryWithRasterizerDiscard = false;
538    features->primitivesGeneratedQueryWithNonZeroStreams = false;
539 
540    /* VK_EXT_image_view_min_lod */
541    features->minLod = true;
542 
543    /* VK_EXT_image_2d_view_of_3d  */
544    features->image2DViewOf3D = true;
545    features->sampler2DViewOf3D = true;
546 
547    /* VK_EXT_color_write_enable */
548    features->colorWriteEnable = true;
549 
550    /* VK_EXT_shader_module_identifier */
551    features->shaderModuleIdentifier = true;
552 
553    /* VK_EXT_vertex_input_dynamic_state */
554    features->vertexInputDynamicState = true;
555 
556    /* VK_EXT_non_seamless_cube_map */
557    features->nonSeamlessCubeMap = true;
558 
559    /* VK_EXT_attachment_feedback_loop_layout */
560    features->attachmentFeedbackLoopLayout = true;
561 
562    /* VK_EXT_global_priority_query */
563    features->globalPriorityQuery = true;
564 
565    /* VK_EXT_multi_draw */
566    features->multiDraw = true;
567 
568    /* VK_EXT_graphics_pipeline_library */
569    features->graphicsPipelineLibrary = true;
570 
571    /* VK_KHR_present_id */
572    features->presentId = pdevice->vk.supported_extensions.KHR_present_id;
573 
574    /* VK_KHR_present_wait */
575    features->presentWait = pdevice->vk.supported_extensions.KHR_present_wait;
576 
577    /* VK_EXT_descriptor_buffer */
578    features->descriptorBuffer = true;
579    features->descriptorBufferCaptureReplay = pdevice->has_set_iova;
580    features->descriptorBufferImageLayoutIgnored = true;
581    features->descriptorBufferPushDescriptors = true;
582 
583    /* VK_EXT_fragment_density_map */
584    features->fragmentDensityMap = true;
585    features->fragmentDensityMapDynamic = false;
586    features->fragmentDensityMapNonSubsampledImages = true;
587 
588    /* VK_KHR_maintenance5 */
589    features->maintenance5 = true;
590 
591    /* VK_KHR_shader_expect_assume */
592    features->shaderExpectAssume = true;
593 }
594 
595 static void
tu_get_physical_device_properties_1_1(struct tu_physical_device * pdevice,struct vk_properties * p)596 tu_get_physical_device_properties_1_1(struct tu_physical_device *pdevice,
597                                       struct vk_properties *p)
598 {
599    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
600    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
601    memset(p->deviceLUID, 0, VK_LUID_SIZE);
602    p->deviceNodeMask = 0;
603    p->deviceLUIDValid = false;
604 
605    p->subgroupSize = pdevice->info->a6xx.supports_double_threadsize ? 128 : 64;
606    p->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
607    p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
608                                     VK_SUBGROUP_FEATURE_VOTE_BIT |
609                                     VK_SUBGROUP_FEATURE_BALLOT_BIT |
610                                     VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
611                                     VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
612                                     VK_SUBGROUP_FEATURE_ARITHMETIC_BIT;
613    if (pdevice->info->a6xx.has_getfiberid) {
614       p->subgroupSupportedStages |= VK_SHADER_STAGE_ALL_GRAPHICS;
615       p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_QUAD_BIT;
616    }
617 
618    p->subgroupQuadOperationsInAllStages = false;
619 
620    p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
621    p->maxMultiviewViewCount =
622       (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ? MAX_VIEWPORTS : 1;
623    p->maxMultiviewInstanceIndex = INT_MAX;
624    p->protectedNoFault = false;
625    /* Our largest descriptors are 2 texture descriptors, or a texture and
626     * sampler descriptor.
627     */
628    p->maxPerSetDescriptors = MAX_SET_SIZE / (2 * A6XX_TEX_CONST_DWORDS * 4);
629    /* Our buffer size fields allow only this much */
630    p->maxMemoryAllocationSize = 0xFFFFFFFFull;
631 
632 }
633 
634 
635 static const size_t max_descriptor_set_size = MAX_SET_SIZE / (4 * A6XX_TEX_CONST_DWORDS);
636 static const VkSampleCountFlags sample_counts =
637    VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
638 
639 static void
tu_get_physical_device_properties_1_2(struct tu_physical_device * pdevice,struct vk_properties * p)640 tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice,
641                                       struct vk_properties *p)
642 {
643    p->driverID = VK_DRIVER_ID_MESA_TURNIP;
644    memset(p->driverName, 0, sizeof(p->driverName));
645    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
646             "turnip Mesa driver");
647    memset(p->driverInfo, 0, sizeof(p->driverInfo));
648    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
649             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
650    p->conformanceVersion = (VkConformanceVersion) {
651       .major = 1,
652       .minor = 2,
653       .subminor = 7,
654       .patch = 1,
655    };
656 
657    p->denormBehaviorIndependence =
658       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
659    p->roundingModeIndependence =
660       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
661 
662    p->shaderDenormFlushToZeroFloat16         = true;
663    p->shaderDenormPreserveFloat16            = false;
664    p->shaderRoundingModeRTEFloat16           = true;
665    p->shaderRoundingModeRTZFloat16           = false;
666    p->shaderSignedZeroInfNanPreserveFloat16  = true;
667 
668    p->shaderDenormFlushToZeroFloat32         = true;
669    p->shaderDenormPreserveFloat32            = false;
670    p->shaderRoundingModeRTEFloat32           = true;
671    p->shaderRoundingModeRTZFloat32           = false;
672    p->shaderSignedZeroInfNanPreserveFloat32  = true;
673 
674    p->shaderDenormFlushToZeroFloat64         = false;
675    p->shaderDenormPreserveFloat64            = false;
676    p->shaderRoundingModeRTEFloat64           = false;
677    p->shaderRoundingModeRTZFloat64           = false;
678    p->shaderSignedZeroInfNanPreserveFloat64  = false;
679 
680    p->shaderUniformBufferArrayNonUniformIndexingNative   = true;
681    p->shaderSampledImageArrayNonUniformIndexingNative    = true;
682    p->shaderStorageBufferArrayNonUniformIndexingNative   = true;
683    p->shaderStorageImageArrayNonUniformIndexingNative    = true;
684    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
685    p->robustBufferAccessUpdateAfterBind                  = false;
686    p->quadDivergentImplicitLod                           = false;
687 
688    p->maxUpdateAfterBindDescriptorsInAllPools            = max_descriptor_set_size;
689    p->maxPerStageDescriptorUpdateAfterBindSamplers       = max_descriptor_set_size;
690    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
691    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
692    p->maxPerStageDescriptorUpdateAfterBindSampledImages  = max_descriptor_set_size;
693    p->maxPerStageDescriptorUpdateAfterBindStorageImages  = max_descriptor_set_size;
694    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_RTS;
695    p->maxPerStageUpdateAfterBindResources                = max_descriptor_set_size;
696    p->maxDescriptorSetUpdateAfterBindSamplers            = max_descriptor_set_size;
697    p->maxDescriptorSetUpdateAfterBindUniformBuffers      = max_descriptor_set_size;
698    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
699    p->maxDescriptorSetUpdateAfterBindStorageBuffers      = max_descriptor_set_size;
700    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
701    p->maxDescriptorSetUpdateAfterBindSampledImages       = max_descriptor_set_size;
702    p->maxDescriptorSetUpdateAfterBindStorageImages       = max_descriptor_set_size;
703    p->maxDescriptorSetUpdateAfterBindInputAttachments    = MAX_RTS;
704 
705    p->supportedDepthResolveModes    = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
706    p->supportedStencilResolveModes  = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
707    p->independentResolveNone  = false;
708    p->independentResolve      = false;
709 
710    p->filterMinmaxSingleComponentFormats  = true;
711    p->filterMinmaxImageComponentMapping   = true;
712 
713    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
714 
715    p->framebufferIntegerColorSampleCounts = sample_counts;
716 }
717 
718 static void
tu_get_physical_device_properties_1_3(struct tu_physical_device * pdevice,struct vk_properties * p)719 tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice,
720                                       struct vk_properties *p)
721 {
722    /* TODO move threadsize_base and max_waves to fd_dev_info and use them here */
723    p->minSubgroupSize = 64; /* threadsize_base */
724    p->maxSubgroupSize =
725       pdevice->info->a6xx.supports_double_threadsize ? 128 : 64;
726    p->maxComputeWorkgroupSubgroups = 16; /* max_waves */
727    p->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL;
728 
729    p->maxInlineUniformBlockSize = MAX_INLINE_UBO_RANGE;
730    p->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UBOS;
731    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UBOS;
732    p->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UBOS;
733    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UBOS;
734    p->maxInlineUniformTotalSize = MAX_INLINE_UBOS * MAX_INLINE_UBO_RANGE;
735 
736    p->integerDotProduct8BitUnsignedAccelerated = false;
737    p->integerDotProduct8BitSignedAccelerated = false;
738    p->integerDotProduct8BitMixedSignednessAccelerated = false;
739    p->integerDotProduct4x8BitPackedUnsignedAccelerated =
740       pdevice->info->a6xx.has_dp2acc;
741    /* TODO: we should be able to emulate 4x8BitPackedSigned fast enough */
742    p->integerDotProduct4x8BitPackedSignedAccelerated = false;
743    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated =
744       pdevice->info->a6xx.has_dp2acc;
745    p->integerDotProduct16BitUnsignedAccelerated = false;
746    p->integerDotProduct16BitSignedAccelerated = false;
747    p->integerDotProduct16BitMixedSignednessAccelerated = false;
748    p->integerDotProduct32BitUnsignedAccelerated = false;
749    p->integerDotProduct32BitSignedAccelerated = false;
750    p->integerDotProduct32BitMixedSignednessAccelerated = false;
751    p->integerDotProduct64BitUnsignedAccelerated = false;
752    p->integerDotProduct64BitSignedAccelerated = false;
753    p->integerDotProduct64BitMixedSignednessAccelerated = false;
754    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
755    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
756    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
757    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated =
758       pdevice->info->a6xx.has_dp2acc;
759    /* TODO: we should be able to emulate Saturating4x8BitPackedSigned fast enough */
760    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
761    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated =
762       pdevice->info->a6xx.has_dp2acc;
763    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
764    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
765    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
766    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
767    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
768    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
769    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
770    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
771    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
772 
773    p->storageTexelBufferOffsetAlignmentBytes = 64;
774    p->storageTexelBufferOffsetSingleTexelAlignment = true;
775    p->uniformTexelBufferOffsetAlignmentBytes = 64;
776    p->uniformTexelBufferOffsetSingleTexelAlignment = true;
777 
778    /* The address space is 4GB for current kernels, so there's no point
779     * allowing a larger buffer. Our buffer sizes are 64-bit though, so
780     * GetBufferDeviceRequirements won't fall over if someone actually creates
781     * a 4GB buffer.
782     */
783    p->maxBufferSize = 1ull << 32;
784 }
785 
786 static void
tu_get_properties(struct tu_physical_device * pdevice,struct vk_properties * props)787 tu_get_properties(struct tu_physical_device *pdevice,
788                   struct vk_properties *props)
789 {
790    /* Limits */
791    props->maxImageDimension1D = (1 << 14);
792    props->maxImageDimension2D = (1 << 14);
793    props->maxImageDimension3D = (1 << 11);
794    props->maxImageDimensionCube = (1 << 14);
795    props->maxImageArrayLayers = (1 << 11);
796    props->maxTexelBufferElements = 128 * 1024 * 1024;
797    props->maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE;
798    props->maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE;
799    props->maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE;
800    props->maxMemoryAllocationCount = UINT32_MAX;
801    props->maxSamplerAllocationCount = 64 * 1024;
802    props->bufferImageGranularity = 64;          /* A cache line */
803    props->sparseAddressSpaceSize = 0;
804    props->maxBoundDescriptorSets = pdevice->usable_sets;
805    props->maxPerStageDescriptorSamplers = max_descriptor_set_size;
806    props->maxPerStageDescriptorUniformBuffers = max_descriptor_set_size;
807    props->maxPerStageDescriptorStorageBuffers = max_descriptor_set_size;
808    props->maxPerStageDescriptorSampledImages = max_descriptor_set_size;
809    props->maxPerStageDescriptorStorageImages = max_descriptor_set_size;
810    props->maxPerStageDescriptorInputAttachments = MAX_RTS;
811    props->maxPerStageResources = max_descriptor_set_size;
812    props->maxDescriptorSetSamplers = max_descriptor_set_size;
813    props->maxDescriptorSetUniformBuffers = max_descriptor_set_size;
814    props->maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
815    props->maxDescriptorSetStorageBuffers = max_descriptor_set_size;
816    props->maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
817    props->maxDescriptorSetSampledImages = max_descriptor_set_size;
818    props->maxDescriptorSetStorageImages = max_descriptor_set_size;
819    props->maxDescriptorSetInputAttachments = MAX_RTS;
820    props->maxVertexInputAttributes = pdevice->info->a6xx.vs_max_inputs_count;
821    props->maxVertexInputBindings = pdevice->info->a6xx.vs_max_inputs_count;
822    props->maxVertexInputAttributeOffset = 4095;
823    props->maxVertexInputBindingStride = 2048;
824    props->maxVertexOutputComponents = 128;
825    props->maxTessellationGenerationLevel = 64;
826    props->maxTessellationPatchSize = 32;
827    props->maxTessellationControlPerVertexInputComponents = 128;
828    props->maxTessellationControlPerVertexOutputComponents = 128;
829    props->maxTessellationControlPerPatchOutputComponents = 120;
830    props->maxTessellationControlTotalOutputComponents = 4096;
831    props->maxTessellationEvaluationInputComponents = 128;
832    props->maxTessellationEvaluationOutputComponents = 128;
833    props->maxGeometryShaderInvocations = 32;
834    props->maxGeometryInputComponents = 64;
835    props->maxGeometryOutputComponents = 128;
836    props->maxGeometryOutputVertices = 256;
837    props->maxGeometryTotalOutputComponents = 1024;
838    props->maxFragmentInputComponents = 124;
839    props->maxFragmentOutputAttachments = 8;
840    props->maxFragmentDualSrcAttachments = 1;
841    props->maxFragmentCombinedOutputResources = MAX_RTS + max_descriptor_set_size * 2;
842    props->maxComputeSharedMemorySize = pdevice->info->cs_shared_mem_size;
843    props->maxComputeWorkGroupCount[0] =
844       props->maxComputeWorkGroupCount[1] =
845       props->maxComputeWorkGroupCount[2] = 65535;
846    props->maxComputeWorkGroupInvocations = pdevice->info->a6xx.supports_double_threadsize ? 2048 : 1024;
847    props->maxComputeWorkGroupSize[0] =
848       props->maxComputeWorkGroupSize[1] =
849       props->maxComputeWorkGroupSize[2] = 1024;
850    props->subPixelPrecisionBits = 8;
851    props->subTexelPrecisionBits = 8;
852    props->mipmapPrecisionBits = 8;
853    props->maxDrawIndexedIndexValue = UINT32_MAX;
854    props->maxDrawIndirectCount = UINT32_MAX;
855    props->maxSamplerLodBias = 4095.0 / 256.0; /* [-16, 15.99609375] */
856    props->maxSamplerAnisotropy = 16;
857    props->maxViewports =
858          (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ? MAX_VIEWPORTS : 1;
859    props->maxViewportDimensions[0] =
860       props->maxViewportDimensions[1] = MAX_VIEWPORT_SIZE;
861    props->viewportBoundsRange[0] = INT16_MIN;
862    props->viewportBoundsRange[1] = INT16_MAX;
863    props->viewportSubPixelBits = 8;
864    props->minMemoryMapAlignment = 4096; /* A page */
865    props->minTexelBufferOffsetAlignment = 64;
866    props->minUniformBufferOffsetAlignment = 64;
867    props->minStorageBufferOffsetAlignment = 4;
868    props->minTexelOffset = -16;
869    props->maxTexelOffset = 15;
870    props->minTexelGatherOffset = -32;
871    props->maxTexelGatherOffset = 31;
872    props->minInterpolationOffset = -0.5;
873    props->maxInterpolationOffset = 0.4375;
874    props->subPixelInterpolationOffsetBits = 4;
875    props->maxFramebufferWidth = (1 << 14);
876    props->maxFramebufferHeight = (1 << 14);
877    props->maxFramebufferLayers = (1 << 10);
878    props->framebufferColorSampleCounts = sample_counts;
879    props->framebufferDepthSampleCounts = sample_counts;
880    props->framebufferStencilSampleCounts = sample_counts;
881    props->framebufferNoAttachmentsSampleCounts = sample_counts;
882    props->maxColorAttachments = MAX_RTS;
883    props->sampledImageColorSampleCounts = sample_counts;
884    props->sampledImageIntegerSampleCounts = sample_counts;
885    props->sampledImageDepthSampleCounts = sample_counts;
886    props->sampledImageStencilSampleCounts = sample_counts;
887    props->storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT;
888    props->maxSampleMaskWords = 1;
889    props->timestampComputeAndGraphics = true;
890    props->timestampPeriod = 1000000000.0 / 19200000.0; /* CP_ALWAYS_ON_COUNTER is fixed 19.2MHz */
891    props->maxClipDistances = 8;
892    props->maxCullDistances = 8;
893    props->maxCombinedClipAndCullDistances = 8;
894    props->discreteQueuePriorities = 2;
895    props->pointSizeRange[0] = 1;
896    props->pointSizeRange[1] = 4092;
897    props->lineWidthRange[0] =
898       props->lineWidthRange[1] = 1.0;
899    props->pointSizeGranularity = 	0.0625;
900    props->lineWidthGranularity = 0.0;
901    props->strictLines = true;
902    props->standardSampleLocations = true;
903    props->optimalBufferCopyOffsetAlignment = 128;
904    props->optimalBufferCopyRowPitchAlignment = 128;
905    props->nonCoherentAtomSize = 64;
906 
907    props->apiVersion =
908       (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ?
909          TU_API_VERSION : VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION);
910    props->driverVersion = vk_get_driver_version();
911    props->vendorID = 0x5143;
912    props->deviceID = pdevice->dev_id.chip_id;
913    props->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
914 
915    /* sparse properties */
916    props->sparseResidencyStandard2DBlockShape = { 0 };
917    props->sparseResidencyStandard2DMultisampleBlockShape = { 0 };
918    props->sparseResidencyStandard3DBlockShape = { 0 };
919    props->sparseResidencyAlignedMipSize = { 0 };
920    props->sparseResidencyNonResidentStrict = { 0 };
921 
922    strcpy(props->deviceName, pdevice->name);
923    memcpy(props->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
924 
925    tu_get_physical_device_properties_1_1(pdevice, props);
926    tu_get_physical_device_properties_1_2(pdevice, props);
927    tu_get_physical_device_properties_1_3(pdevice, props);
928 
929    /* VK_KHR_push_descriptor */
930    props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
931 
932    /* VK_EXT_transform_feedback */
933    props->maxTransformFeedbackStreams = IR3_MAX_SO_STREAMS;
934    props->maxTransformFeedbackBuffers = IR3_MAX_SO_BUFFERS;
935    props->maxTransformFeedbackBufferSize = UINT32_MAX;
936    props->maxTransformFeedbackStreamDataSize = 512;
937    props->maxTransformFeedbackBufferDataSize = 512;
938    props->maxTransformFeedbackBufferDataStride = 512;
939    props->transformFeedbackQueries = true;
940    props->transformFeedbackStreamsLinesTriangles = true;
941    props->transformFeedbackRasterizationStreamSelect = true;
942    props->transformFeedbackDraw = true;
943 
944    /* VK_EXT_sample_locations */
945    props->sampleLocationSampleCounts = 0;
946    if (pdevice->vk.supported_extensions.EXT_sample_locations) {
947       props->sampleLocationSampleCounts =
948          VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
949    }
950    props->maxSampleLocationGridSize = (VkExtent2D) { 1 , 1 };
951    props->sampleLocationCoordinateRange[0] = SAMPLE_LOCATION_MIN;
952    props->sampleLocationCoordinateRange[1] = SAMPLE_LOCATION_MAX;
953    props->sampleLocationSubPixelBits = 4;
954    props->variableSampleLocations = true;
955 
956    /* VK_KHR_vertex_attribute_divisor */
957    props->maxVertexAttribDivisor = UINT32_MAX;
958 
959    /* VK_EXT_custom_border_color */
960    props->maxCustomBorderColorSamplers = TU_BORDER_COLOR_COUNT;
961 
962    /* VK_KHR_performance_query */
963    props->allowCommandBufferQueryCopies = false;
964 
965    /* VK_EXT_robustness2 */
966    /* see write_buffer_descriptor() */
967    props->robustStorageBufferAccessSizeAlignment = 4;
968    /* see write_ubo_descriptor() */
969    props->robustUniformBufferAccessSizeAlignment = 16;
970 
971    /* VK_EXT_provoking_vertex */
972    props->provokingVertexModePerPipeline = true;
973    props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
974 
975    /* VK_EXT_line_rasterization */
976   props->lineSubPixelPrecisionBits = 8;
977 
978    /* VK_EXT_physical_device_drm */
979    props->drmHasPrimary = pdevice->has_master;
980    props->drmPrimaryMajor = pdevice->master_major;
981    props->drmPrimaryMinor = pdevice->master_minor;
982 
983    props->drmHasRender = pdevice->has_local;
984    props->drmRenderMajor = pdevice->local_major;
985    props->drmRenderMinor = pdevice->local_minor;
986 
987    /* VK_EXT_shader_module_identifier */
988    STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
989                  sizeof(props->shaderModuleIdentifierAlgorithmUUID));
990    memcpy(props->shaderModuleIdentifierAlgorithmUUID,
991           vk_shaderModuleIdentifierAlgorithmUUID,
992           sizeof(props->shaderModuleIdentifierAlgorithmUUID));
993 
994    /* VK_EXT_multi_draw */
995    props->maxMultiDrawCount = 2048;
996 
997    /* VK_EXT_graphics_pipeline_library */
998    props->graphicsPipelineLibraryFastLinking = true;
999    props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1000 
1001    /* VK_EXT_extended_dynamic_state3 */
1002    props->dynamicPrimitiveTopologyUnrestricted = true;
1003 
1004    /* VK_EXT_descriptor_buffer */
1005    props->combinedImageSamplerDescriptorSingleArray = true;
1006    props->bufferlessPushDescriptors = true;
1007    props->allowSamplerImageViewPostSubmitCreation = true;
1008    props->descriptorBufferOffsetAlignment = A6XX_TEX_CONST_DWORDS * 4;
1009    props->maxDescriptorBufferBindings = pdevice->usable_sets;
1010    props->maxResourceDescriptorBufferBindings = pdevice->usable_sets;
1011    props->maxSamplerDescriptorBufferBindings = pdevice->usable_sets;
1012    props->maxEmbeddedImmutableSamplerBindings = pdevice->usable_sets;
1013    props->maxEmbeddedImmutableSamplers = max_descriptor_set_size;
1014    props->bufferCaptureReplayDescriptorDataSize = 0;
1015    props->imageCaptureReplayDescriptorDataSize = 0;
1016    props->imageViewCaptureReplayDescriptorDataSize = 0;
1017    props->samplerCaptureReplayDescriptorDataSize = 0;
1018    props->accelerationStructureCaptureReplayDescriptorDataSize = 0;
1019    /* Note: these sizes must match descriptor_size() */
1020    props->samplerDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1021    props->combinedImageSamplerDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
1022    props->sampledImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1023    props->storageImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1024    props->uniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1025    props->robustUniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1026    props->storageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1027    props->robustStorageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1028    props->uniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1029    props->robustUniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1030    props->storageBufferDescriptorSize =
1031       pdevice->info->a6xx.storage_16bit ?
1032       2 * A6XX_TEX_CONST_DWORDS * 4 :
1033       A6XX_TEX_CONST_DWORDS * 4;
1034    props->robustStorageBufferDescriptorSize =
1035       props->storageBufferDescriptorSize;
1036    props->inputAttachmentDescriptorSize = TU_DEBUG(DYNAMIC) ?
1037       A6XX_TEX_CONST_DWORDS * 4 : 0;
1038    props->maxSamplerDescriptorBufferRange = ~0ull;
1039    props->maxResourceDescriptorBufferRange = ~0ull;
1040    props->samplerDescriptorBufferAddressSpaceSize = ~0ull;
1041    props->resourceDescriptorBufferAddressSpaceSize = ~0ull;
1042    props->descriptorBufferAddressSpaceSize = ~0ull;
1043    props->combinedImageSamplerDensityMapDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
1044 
1045    /* VK_EXT_fragment_density_map*/
1046    props->minFragmentDensityTexelSize = (VkExtent2D) { MIN_FDM_TEXEL_SIZE, MIN_FDM_TEXEL_SIZE };
1047    props->maxFragmentDensityTexelSize = (VkExtent2D) { MAX_FDM_TEXEL_SIZE, MAX_FDM_TEXEL_SIZE };
1048    props->fragmentDensityInvocations = false;
1049 
1050    /* VK_KHR_maintenance5 */
1051    props->earlyFragmentMultisampleCoverageAfterSampleCounting = true;
1052    props->earlyFragmentSampleMaskTestBeforeSampleCounting = true;
1053    props->depthStencilSwizzleOneSupport = true;
1054    props->polygonModePointSize = true;
1055    props->nonStrictWideLinesUseParallelogram = false;
1056    props->nonStrictSinglePixelWideLinesUseParallelogram = false;
1057 }
1058 
1059 static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
1060    &tu_shader_ops,
1061    &tu_nir_shaders_ops,
1062    NULL,
1063 };
1064 
1065 VkResult
tu_physical_device_init(struct tu_physical_device * device,struct tu_instance * instance)1066 tu_physical_device_init(struct tu_physical_device *device,
1067                         struct tu_instance *instance)
1068 {
1069    VkResult result = VK_SUCCESS;
1070 
1071    const char *fd_name = fd_dev_name(&device->dev_id);
1072    if (!fd_name) {
1073       return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1074                                "device (chip_id = %" PRIX64
1075                                ", gpu_id = %u) is unsupported",
1076                                device->dev_id.chip_id, device->dev_id.gpu_id);
1077    }
1078 
1079    if (strncmp(fd_name, "FD", 2) == 0) {
1080       device->name = vk_asprintf(&instance->vk.alloc,
1081                                  VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE,
1082                                  "Turnip Adreno (TM) %s", &fd_name[2]);
1083    } else {
1084       device->name = vk_strdup(&instance->vk.alloc, fd_name,
1085                                VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1086 
1087    }
1088    if (!device->name) {
1089       return vk_startup_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
1090                                "device name alloc fail");
1091    }
1092 
1093    const struct fd_dev_info info = fd_dev_info(&device->dev_id);
1094    if (!info.chip) {
1095       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1096                                  "device %s is unsupported", device->name);
1097       goto fail_free_name;
1098    }
1099    switch (fd_dev_gen(&device->dev_id)) {
1100    case 6:
1101    case 7: {
1102       device->dev_info = info;
1103       device->info = &device->dev_info;
1104       uint32_t depth_cache_size =
1105          device->info->num_ccu * device->info->a6xx.sysmem_per_ccu_depth_cache_size;
1106       uint32_t color_cache_size =
1107          (device->info->num_ccu *
1108           device->info->a6xx.sysmem_per_ccu_color_cache_size);
1109       uint32_t color_cache_size_gmem =
1110          color_cache_size /
1111          (1 << device->info->a6xx.gmem_ccu_color_cache_fraction);
1112 
1113       device->ccu_depth_offset_bypass = 0;
1114       device->ccu_offset_bypass =
1115          device->ccu_depth_offset_bypass + depth_cache_size;
1116 
1117       if (device->info->a7xx.has_gmem_vpc_attr_buf) {
1118          device->vpc_attr_buf_size_bypass =
1119             device->info->a7xx.sysmem_vpc_attr_buf_size;
1120          device->vpc_attr_buf_offset_bypass =
1121             device->ccu_offset_bypass + color_cache_size;
1122 
1123          device->vpc_attr_buf_size_gmem =
1124             device->info->a7xx.gmem_vpc_attr_buf_size;
1125          device->vpc_attr_buf_offset_gmem =
1126             device->gmem_size -
1127             (device->vpc_attr_buf_size_gmem * device->info->num_ccu);
1128 
1129          device->ccu_offset_gmem =
1130             device->vpc_attr_buf_offset_gmem - color_cache_size_gmem;
1131 
1132          device->usable_gmem_size_gmem = device->vpc_attr_buf_offset_gmem;
1133       } else {
1134          device->ccu_offset_gmem = device->gmem_size - color_cache_size_gmem;
1135          device->usable_gmem_size_gmem = device->gmem_size;
1136       }
1137 
1138       if (instance->reserve_descriptor_set) {
1139          device->usable_sets = device->reserved_set_idx = device->info->a6xx.max_sets - 1;
1140       } else {
1141          device->usable_sets = device->info->a6xx.max_sets;
1142          device->reserved_set_idx = -1;
1143       }
1144       break;
1145    }
1146    default:
1147       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1148                                  "device %s is unsupported", device->name);
1149       goto fail_free_name;
1150    }
1151    if (tu_device_get_cache_uuid(device, device->cache_uuid)) {
1152       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1153                                  "cannot generate UUID");
1154       goto fail_free_name;
1155    }
1156 
1157    device->memory.type_count = 1;
1158    device->memory.types[0] =
1159       VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1160       VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1161       VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
1162 
1163    if (device->has_cached_coherent_memory) {
1164       device->memory.types[device->memory.type_count] =
1165          VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1166          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1167          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1168          VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1169       device->memory.type_count++;
1170    }
1171 
1172    if (device->has_cached_non_coherent_memory) {
1173       device->memory.types[device->memory.type_count] =
1174          VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1175          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1176          VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1177       device->memory.type_count++;
1178    }
1179 
1180    fd_get_driver_uuid(device->driver_uuid);
1181    fd_get_device_uuid(device->device_uuid, &device->dev_id);
1182 
1183    struct vk_physical_device_dispatch_table dispatch_table;
1184    vk_physical_device_dispatch_table_from_entrypoints(
1185       &dispatch_table, &tu_physical_device_entrypoints, true);
1186    vk_physical_device_dispatch_table_from_entrypoints(
1187       &dispatch_table, &wsi_physical_device_entrypoints, false);
1188 
1189    result = vk_physical_device_init(&device->vk, &instance->vk,
1190                                     NULL, NULL, NULL, /* We set up extensions later */
1191                                     &dispatch_table);
1192    if (result != VK_SUCCESS)
1193       goto fail_free_name;
1194 
1195    get_device_extensions(device, &device->vk.supported_extensions);
1196    tu_get_features(device, &device->vk.supported_features);
1197    tu_get_properties(device, &device->vk.properties);
1198 
1199    device->vk.supported_sync_types = device->sync_types;
1200 
1201 #if TU_HAS_SURFACE
1202    result = tu_wsi_init(device);
1203    if (result != VK_SUCCESS) {
1204       vk_startup_errorf(instance, result, "WSI init failure");
1205       vk_physical_device_finish(&device->vk);
1206       goto fail_free_name;
1207    }
1208 #endif
1209 
1210    /* The gpu id is already embedded in the uuid so we just pass "tu"
1211     * when creating the cache.
1212     */
1213    char buf[VK_UUID_SIZE * 2 + 1];
1214    mesa_bytes_to_hex(buf, device->cache_uuid, VK_UUID_SIZE);
1215    device->vk.disk_cache = disk_cache_create(device->name, buf, 0);
1216 
1217    device->vk.pipeline_cache_import_ops = cache_import_ops;
1218 
1219    return VK_SUCCESS;
1220 
1221 fail_free_name:
1222    vk_free(&instance->vk.alloc, (void *)device->name);
1223    return result;
1224 }
1225 
1226 static void
tu_physical_device_finish(struct tu_physical_device * device)1227 tu_physical_device_finish(struct tu_physical_device *device)
1228 {
1229 #if TU_HAS_SURFACE
1230    tu_wsi_finish(device);
1231 #endif
1232 
1233    close(device->local_fd);
1234    if (device->master_fd != -1)
1235       close(device->master_fd);
1236 
1237    disk_cache_destroy(device->vk.disk_cache);
1238    vk_free(&device->instance->vk.alloc, (void *)device->name);
1239 
1240    vk_physical_device_finish(&device->vk);
1241 }
1242 
1243 static void
tu_destroy_physical_device(struct vk_physical_device * device)1244 tu_destroy_physical_device(struct vk_physical_device *device)
1245 {
1246    tu_physical_device_finish((struct tu_physical_device *) device);
1247    vk_free(&device->instance->alloc, device);
1248 }
1249 
1250 static const driOptionDescription tu_dri_options[] = {
1251    DRI_CONF_SECTION_PERFORMANCE
1252       DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
1253       DRI_CONF_VK_KHR_PRESENT_WAIT(false)
1254       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
1255       DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
1256       DRI_CONF_VK_XWAYLAND_WAIT_READY(false)
1257    DRI_CONF_SECTION_END
1258 
1259    DRI_CONF_SECTION_DEBUG
1260       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
1261       DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
1262       DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
1263       DRI_CONF_VK_DONT_CARE_AS_LOAD(false)
1264    DRI_CONF_SECTION_END
1265 
1266    DRI_CONF_SECTION_MISCELLANEOUS
1267       DRI_CONF_DISABLE_CONSERVATIVE_LRZ(false)
1268       DRI_CONF_TU_DONT_RESERVE_DESCRIPTOR_SET(false)
1269       DRI_CONF_TU_ALLOW_OOB_INDIRECT_UBO_LOADS(false)
1270    DRI_CONF_SECTION_END
1271 };
1272 
1273 static void
tu_init_dri_options(struct tu_instance * instance)1274 tu_init_dri_options(struct tu_instance *instance)
1275 {
1276    driParseOptionInfo(&instance->available_dri_options, tu_dri_options,
1277                       ARRAY_SIZE(tu_dri_options));
1278    driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "turnip", NULL, NULL,
1279                        instance->vk.app_info.app_name, instance->vk.app_info.app_version,
1280                        instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
1281 
1282    instance->dont_care_as_load =
1283          driQueryOptionb(&instance->dri_options, "vk_dont_care_as_load");
1284    instance->conservative_lrz =
1285          !driQueryOptionb(&instance->dri_options, "disable_conservative_lrz");
1286    instance->reserve_descriptor_set =
1287          !driQueryOptionb(&instance->dri_options, "tu_dont_reserve_descriptor_set");
1288    instance->allow_oob_indirect_ubo_loads =
1289          driQueryOptionb(&instance->dri_options, "tu_allow_oob_indirect_ubo_loads");
1290 }
1291 
1292 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1293 tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
1294                   const VkAllocationCallbacks *pAllocator,
1295                   VkInstance *pInstance)
1296 {
1297    struct tu_instance *instance;
1298    VkResult result;
1299 
1300    tu_env_init();
1301 
1302    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1303 
1304    if (pAllocator == NULL)
1305       pAllocator = vk_default_allocator();
1306 
1307    instance = (struct tu_instance *) vk_zalloc(
1308       pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1309 
1310    if (!instance)
1311       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1312 
1313    struct vk_instance_dispatch_table dispatch_table;
1314    vk_instance_dispatch_table_from_entrypoints(
1315       &dispatch_table, &tu_instance_entrypoints, true);
1316    vk_instance_dispatch_table_from_entrypoints(
1317       &dispatch_table, &wsi_instance_entrypoints, false);
1318 
1319    result = vk_instance_init(&instance->vk,
1320                              &tu_instance_extensions_supported,
1321                              &dispatch_table,
1322                              pCreateInfo, pAllocator);
1323    if (result != VK_SUCCESS) {
1324       vk_free(pAllocator, instance);
1325       return vk_error(NULL, result);
1326    }
1327 
1328    instance->vk.physical_devices.try_create_for_drm =
1329       tu_physical_device_try_create;
1330    instance->vk.physical_devices.enumerate = tu_enumerate_devices;
1331    instance->vk.physical_devices.destroy = tu_destroy_physical_device;
1332 
1333    if (TU_DEBUG(STARTUP))
1334       mesa_logi("Created an instance");
1335 
1336    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1337 
1338    tu_init_dri_options(instance);
1339 
1340    *pInstance = tu_instance_to_handle(instance);
1341 
1342 #ifdef HAVE_PERFETTO
1343    tu_perfetto_init();
1344 #endif
1345 
1346    util_gpuvis_init();
1347 
1348    return VK_SUCCESS;
1349 }
1350 
1351 VKAPI_ATTR void VKAPI_CALL
tu_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)1352 tu_DestroyInstance(VkInstance _instance,
1353                    const VkAllocationCallbacks *pAllocator)
1354 {
1355    TU_FROM_HANDLE(tu_instance, instance, _instance);
1356 
1357    if (!instance)
1358       return;
1359 
1360    VG(VALGRIND_DESTROY_MEMPOOL(instance));
1361 
1362    driDestroyOptionCache(&instance->dri_options);
1363    driDestroyOptionInfo(&instance->available_dri_options);
1364 
1365    vk_instance_finish(&instance->vk);
1366    vk_free(&instance->vk.alloc, instance);
1367 }
1368 
1369 static const VkQueueFamilyProperties tu_queue_family_properties = {
1370    .queueFlags =
1371       VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
1372    .queueCount = 1,
1373    .timestampValidBits = 48,
1374    .minImageTransferGranularity = { 1, 1, 1 },
1375 };
1376 
1377 static void
tu_physical_device_get_global_priority_properties(const struct tu_physical_device * pdevice,VkQueueFamilyGlobalPriorityPropertiesKHR * props)1378 tu_physical_device_get_global_priority_properties(const struct tu_physical_device *pdevice,
1379                                                   VkQueueFamilyGlobalPriorityPropertiesKHR *props)
1380 {
1381    props->priorityCount = MIN2(pdevice->submitqueue_priority_count, 3);
1382    switch (props->priorityCount) {
1383    case 1:
1384       props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1385       break;
1386    case 2:
1387       props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1388       props->priorities[1] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
1389       break;
1390    case 3:
1391       props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR;
1392       props->priorities[1] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1393       props->priorities[2] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
1394       break;
1395    default:
1396       unreachable("unexpected priority count");
1397       break;
1398    }
1399 }
1400 
1401 static int
tu_physical_device_get_submitqueue_priority(const struct tu_physical_device * pdevice,VkQueueGlobalPriorityKHR global_priority,bool global_priority_query)1402 tu_physical_device_get_submitqueue_priority(const struct tu_physical_device *pdevice,
1403                                             VkQueueGlobalPriorityKHR global_priority,
1404                                             bool global_priority_query)
1405 {
1406    if (global_priority_query) {
1407       VkQueueFamilyGlobalPriorityPropertiesKHR props;
1408       tu_physical_device_get_global_priority_properties(pdevice, &props);
1409 
1410       bool valid = false;
1411       for (uint32_t i = 0; i < props.priorityCount; i++) {
1412          if (props.priorities[i] == global_priority) {
1413             valid = true;
1414             break;
1415          }
1416       }
1417 
1418       if (!valid)
1419          return -1;
1420    }
1421 
1422    /* Valid values are from 0 to (pdevice->submitqueue_priority_count - 1),
1423     * with 0 being the highest priority.  This matches what freedreno does.
1424     */
1425    int priority;
1426    if (global_priority == VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR)
1427       priority = pdevice->submitqueue_priority_count / 2;
1428    else if (global_priority < VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR)
1429       priority = pdevice->submitqueue_priority_count - 1;
1430    else
1431       priority = 0;
1432 
1433    return priority;
1434 }
1435 
1436 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1437 tu_GetPhysicalDeviceQueueFamilyProperties2(
1438    VkPhysicalDevice physicalDevice,
1439    uint32_t *pQueueFamilyPropertyCount,
1440    VkQueueFamilyProperties2 *pQueueFamilyProperties)
1441 {
1442    TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
1443 
1444    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
1445                           pQueueFamilyProperties, pQueueFamilyPropertyCount);
1446 
1447    vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
1448    {
1449       p->queueFamilyProperties = tu_queue_family_properties;
1450 
1451       vk_foreach_struct(ext, p->pNext) {
1452          switch (ext->sType) {
1453          case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
1454             VkQueueFamilyGlobalPriorityPropertiesKHR *props =
1455                (VkQueueFamilyGlobalPriorityPropertiesKHR *) ext;
1456             tu_physical_device_get_global_priority_properties(pdevice, props);
1457             break;
1458          }
1459          default:
1460             break;
1461          }
1462       }
1463    }
1464 }
1465 
1466 uint64_t
tu_get_system_heap_size(struct tu_physical_device * physical_device)1467 tu_get_system_heap_size(struct tu_physical_device *physical_device)
1468 {
1469    struct sysinfo info;
1470    sysinfo(&info);
1471 
1472    uint64_t total_ram = (uint64_t) info.totalram * (uint64_t) info.mem_unit;
1473 
1474    /* We don't want to burn too much ram with the GPU.  If the user has 4GiB
1475     * or less, we use at most half.  If they have more than 4GiB, we use 3/4.
1476     */
1477    uint64_t available_ram;
1478    if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
1479       available_ram = total_ram / 2;
1480    else
1481       available_ram = total_ram * 3 / 4;
1482 
1483    if (physical_device->va_size)
1484       available_ram = MIN2(available_ram, physical_device->va_size);
1485 
1486    return available_ram;
1487 }
1488 
1489 static VkDeviceSize
tu_get_budget_memory(struct tu_physical_device * physical_device)1490 tu_get_budget_memory(struct tu_physical_device *physical_device)
1491 {
1492    uint64_t heap_size = physical_device->heap.size;
1493    uint64_t heap_used = physical_device->heap.used;
1494    uint64_t sys_available;
1495    ASSERTED bool has_available_memory =
1496       os_get_available_system_memory(&sys_available);
1497    assert(has_available_memory);
1498 
1499    if (physical_device->va_size)
1500       sys_available = MIN2(sys_available, physical_device->va_size);
1501 
1502    /*
1503     * Let's not incite the app to starve the system: report at most 90% of
1504     * available system memory.
1505     */
1506    uint64_t heap_available = sys_available * 9 / 10;
1507    return MIN2(heap_size, heap_used + heap_available);
1508 }
1509 
1510 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,VkPhysicalDeviceMemoryProperties2 * props2)1511 tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,
1512                                       VkPhysicalDeviceMemoryProperties2 *props2)
1513 {
1514    TU_FROM_HANDLE(tu_physical_device, physical_device, pdev);
1515 
1516    VkPhysicalDeviceMemoryProperties *props = &props2->memoryProperties;
1517    props->memoryHeapCount = 1;
1518    props->memoryHeaps[0].size = physical_device->heap.size;
1519    props->memoryHeaps[0].flags = physical_device->heap.flags;
1520 
1521    props->memoryTypeCount = physical_device->memory.type_count;
1522    for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
1523       props->memoryTypes[i] = (VkMemoryType) {
1524          .propertyFlags = physical_device->memory.types[i],
1525          .heapIndex     = 0,
1526       };
1527    }
1528 
1529    vk_foreach_struct(ext, props2->pNext)
1530    {
1531       switch (ext->sType) {
1532       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1533          VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget_props =
1534             (VkPhysicalDeviceMemoryBudgetPropertiesEXT *) ext;
1535          memory_budget_props->heapUsage[0] = physical_device->heap.used;
1536          memory_budget_props->heapBudget[0] = tu_get_budget_memory(physical_device);
1537 
1538          /* The heapBudget and heapUsage values must be zero for array elements
1539           * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
1540           */
1541          for (unsigned i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1542             memory_budget_props->heapBudget[i] = 0u;
1543             memory_budget_props->heapUsage[i] = 0u;
1544          }
1545          break;
1546       }
1547       default:
1548          break;
1549       }
1550    }
1551 }
1552 
1553 static VkResult
tu_queue_init(struct tu_device * device,struct tu_queue * queue,int idx,const VkDeviceQueueCreateInfo * create_info,bool global_priority_query)1554 tu_queue_init(struct tu_device *device,
1555               struct tu_queue *queue,
1556               int idx,
1557               const VkDeviceQueueCreateInfo *create_info,
1558               bool global_priority_query)
1559 {
1560    const VkDeviceQueueGlobalPriorityCreateInfoKHR *priority_info =
1561       vk_find_struct_const(create_info->pNext,
1562             DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_KHR);
1563    const enum VkQueueGlobalPriorityKHR global_priority = priority_info ?
1564       priority_info->globalPriority : VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1565 
1566    const int priority = tu_physical_device_get_submitqueue_priority(
1567          device->physical_device, global_priority, global_priority_query);
1568    if (priority < 0) {
1569       return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
1570                                "invalid global priority");
1571    }
1572 
1573    VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info, idx);
1574    if (result != VK_SUCCESS)
1575       return result;
1576 
1577    queue->device = device;
1578    queue->priority = priority;
1579    queue->vk.driver_submit = tu_queue_submit;
1580 
1581    int ret = tu_drm_submitqueue_new(device, priority, &queue->msm_queue_id);
1582    if (ret)
1583       return vk_startup_errorf(device->instance, VK_ERROR_INITIALIZATION_FAILED,
1584                                "submitqueue create failed");
1585 
1586    queue->fence = -1;
1587 
1588    return VK_SUCCESS;
1589 }
1590 
1591 static void
tu_queue_finish(struct tu_queue * queue)1592 tu_queue_finish(struct tu_queue *queue)
1593 {
1594    vk_queue_finish(&queue->vk);
1595    tu_drm_submitqueue_close(queue->device, queue->msm_queue_id);
1596 }
1597 
1598 uint64_t
tu_device_ticks_to_ns(struct tu_device * dev,uint64_t ts)1599 tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts)
1600 {
1601    /* This is based on the 19.2MHz always-on rbbm timer.
1602     *
1603     * TODO we should probably query this value from kernel..
1604     */
1605    return ts * (1000000000 / 19200000);
1606 }
1607 
1608 struct u_trace_context *
tu_device_get_u_trace(struct tu_device * device)1609 tu_device_get_u_trace(struct tu_device *device)
1610 {
1611    return &device->trace_context;
1612 }
1613 
1614 static void*
tu_trace_create_ts_buffer(struct u_trace_context * utctx,uint32_t size)1615 tu_trace_create_ts_buffer(struct u_trace_context *utctx, uint32_t size)
1616 {
1617    struct tu_device *device =
1618       container_of(utctx, struct tu_device, trace_context);
1619 
1620    struct tu_bo *bo;
1621    tu_bo_init_new(device, &bo, size, TU_BO_ALLOC_NO_FLAGS, "trace");
1622 
1623    return bo;
1624 }
1625 
1626 static void
tu_trace_destroy_ts_buffer(struct u_trace_context * utctx,void * timestamps)1627 tu_trace_destroy_ts_buffer(struct u_trace_context *utctx, void *timestamps)
1628 {
1629    struct tu_device *device =
1630       container_of(utctx, struct tu_device, trace_context);
1631    struct tu_bo *bo = (struct tu_bo *) timestamps;
1632 
1633    tu_bo_finish(device, bo);
1634 }
1635 
1636 template <chip CHIP>
1637 static void
tu_trace_record_ts(struct u_trace * ut,void * cs,void * timestamps,unsigned idx,bool end_of_pipe)1638 tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
1639                    unsigned idx, bool end_of_pipe)
1640 {
1641    struct tu_bo *bo = (struct tu_bo *) timestamps;
1642    struct tu_cs *ts_cs = (struct tu_cs *) cs;
1643 
1644    unsigned ts_offset = idx * sizeof(uint64_t);
1645 
1646    if (CHIP == A6XX) {
1647       tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE, 4);
1648       tu_cs_emit(ts_cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
1649                            CP_EVENT_WRITE_0_TIMESTAMP);
1650       tu_cs_emit_qw(ts_cs, bo->iova + ts_offset);
1651       tu_cs_emit(ts_cs, 0x00000000);
1652    } else {
1653       tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE7, 3);
1654       tu_cs_emit(ts_cs, CP_EVENT_WRITE7_0(.event = RB_DONE_TS,
1655                                           .write_src = EV_WRITE_ALWAYSON,
1656                                           .write_dst = EV_DST_RAM,
1657                                           .write_enabled = true)
1658                            .value);
1659       tu_cs_emit_qw(ts_cs, bo->iova + ts_offset);
1660    }
1661 }
1662 
1663 static uint64_t
tu_trace_read_ts(struct u_trace_context * utctx,void * timestamps,unsigned idx,void * flush_data)1664 tu_trace_read_ts(struct u_trace_context *utctx,
1665                  void *timestamps, unsigned idx, void *flush_data)
1666 {
1667    struct tu_device *device =
1668       container_of(utctx, struct tu_device, trace_context);
1669    struct tu_bo *bo = (struct tu_bo *) timestamps;
1670    struct tu_u_trace_submission_data *submission_data =
1671       (struct tu_u_trace_submission_data *) flush_data;
1672 
1673    /* Only need to stall on results for the first entry: */
1674    if (idx == 0) {
1675       tu_device_wait_u_trace(device, submission_data->syncobj);
1676    }
1677 
1678    if (tu_bo_map(device, bo) != VK_SUCCESS) {
1679       return U_TRACE_NO_TIMESTAMP;
1680    }
1681 
1682    uint64_t *ts = (uint64_t *) bo->map;
1683 
1684    /* Don't translate the no-timestamp marker: */
1685    if (ts[idx] == U_TRACE_NO_TIMESTAMP)
1686       return U_TRACE_NO_TIMESTAMP;
1687 
1688    return tu_device_ticks_to_ns(device, ts[idx]);
1689 }
1690 
1691 static void
tu_trace_delete_flush_data(struct u_trace_context * utctx,void * flush_data)1692 tu_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
1693 {
1694    struct tu_device *device =
1695       container_of(utctx, struct tu_device, trace_context);
1696    struct tu_u_trace_submission_data *submission_data =
1697       (struct tu_u_trace_submission_data *) flush_data;
1698 
1699    tu_u_trace_submission_data_finish(device, submission_data);
1700 }
1701 
1702 void
tu_copy_timestamp_buffer(struct u_trace_context * utctx,void * cmdstream,void * ts_from,uint32_t from_offset,void * ts_to,uint32_t to_offset,uint32_t count)1703 tu_copy_timestamp_buffer(struct u_trace_context *utctx, void *cmdstream,
1704                          void *ts_from, uint32_t from_offset,
1705                          void *ts_to, uint32_t to_offset,
1706                          uint32_t count)
1707 {
1708    struct tu_cs *cs = (struct tu_cs *) cmdstream;
1709    struct tu_bo *bo_from = (struct tu_bo *) ts_from;
1710    struct tu_bo *bo_to = (struct tu_bo *) ts_to;
1711 
1712    tu_cs_emit_pkt7(cs, CP_MEMCPY, 5);
1713    tu_cs_emit(cs, count * sizeof(uint64_t) / sizeof(uint32_t));
1714    tu_cs_emit_qw(cs, bo_from->iova + from_offset * sizeof(uint64_t));
1715    tu_cs_emit_qw(cs, bo_to->iova + to_offset * sizeof(uint64_t));
1716 }
1717 
1718 /* Special helpers instead of u_trace_begin_iterator()/u_trace_end_iterator()
1719  * that ignore tracepoints at the beginning/end that are part of a
1720  * suspend/resume chain.
1721  */
1722 static struct u_trace_iterator
tu_cmd_begin_iterator(struct tu_cmd_buffer * cmdbuf)1723 tu_cmd_begin_iterator(struct tu_cmd_buffer *cmdbuf)
1724 {
1725    switch (cmdbuf->state.suspend_resume) {
1726    case SR_IN_PRE_CHAIN:
1727       return cmdbuf->trace_renderpass_end;
1728    case SR_AFTER_PRE_CHAIN:
1729    case SR_IN_CHAIN_AFTER_PRE_CHAIN:
1730       return cmdbuf->pre_chain.trace_renderpass_end;
1731    default:
1732       return u_trace_begin_iterator(&cmdbuf->trace);
1733    }
1734 }
1735 
1736 static struct u_trace_iterator
tu_cmd_end_iterator(struct tu_cmd_buffer * cmdbuf)1737 tu_cmd_end_iterator(struct tu_cmd_buffer *cmdbuf)
1738 {
1739    switch (cmdbuf->state.suspend_resume) {
1740    case SR_IN_PRE_CHAIN:
1741       return cmdbuf->trace_renderpass_end;
1742    case SR_IN_CHAIN:
1743    case SR_IN_CHAIN_AFTER_PRE_CHAIN:
1744       return cmdbuf->trace_renderpass_start;
1745    default:
1746       return u_trace_end_iterator(&cmdbuf->trace);
1747    }
1748 }
1749 VkResult
tu_create_copy_timestamp_cs(struct tu_cmd_buffer * cmdbuf,struct tu_cs ** cs,struct u_trace ** trace_copy)1750 tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
1751                             struct u_trace **trace_copy)
1752 {
1753    *cs = (struct tu_cs *) vk_zalloc(&cmdbuf->device->vk.alloc,
1754                                     sizeof(struct tu_cs), 8,
1755                                     VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1756 
1757    if (*cs == NULL) {
1758       return VK_ERROR_OUT_OF_HOST_MEMORY;
1759    }
1760 
1761    tu_cs_init(*cs, cmdbuf->device, TU_CS_MODE_GROW,
1762               list_length(&cmdbuf->trace.trace_chunks) * 6 + 3, "trace copy timestamp cs");
1763 
1764    tu_cs_begin(*cs);
1765 
1766    tu_cs_emit_wfi(*cs);
1767    tu_cs_emit_pkt7(*cs, CP_WAIT_FOR_ME, 0);
1768 
1769    *trace_copy = (struct u_trace *) vk_zalloc(
1770       &cmdbuf->device->vk.alloc, sizeof(struct u_trace), 8,
1771       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1772 
1773    if (*trace_copy == NULL) {
1774       return VK_ERROR_OUT_OF_HOST_MEMORY;
1775    }
1776 
1777    u_trace_init(*trace_copy, cmdbuf->trace.utctx);
1778    u_trace_clone_append(tu_cmd_begin_iterator(cmdbuf),
1779                         tu_cmd_end_iterator(cmdbuf),
1780                         *trace_copy, *cs,
1781                         tu_copy_timestamp_buffer);
1782 
1783    tu_cs_emit_wfi(*cs);
1784 
1785    tu_cs_end(*cs);
1786 
1787    return VK_SUCCESS;
1788 }
1789 
1790 VkResult
tu_u_trace_submission_data_create(struct tu_device * device,struct tu_cmd_buffer ** cmd_buffers,uint32_t cmd_buffer_count,struct tu_u_trace_submission_data ** submission_data)1791 tu_u_trace_submission_data_create(
1792    struct tu_device *device,
1793    struct tu_cmd_buffer **cmd_buffers,
1794    uint32_t cmd_buffer_count,
1795    struct tu_u_trace_submission_data **submission_data)
1796 {
1797    *submission_data = (struct tu_u_trace_submission_data *)
1798       vk_zalloc(&device->vk.alloc,
1799                 sizeof(struct tu_u_trace_submission_data), 8,
1800                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1801 
1802    if (!(*submission_data)) {
1803       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1804    }
1805 
1806    struct tu_u_trace_submission_data *data = *submission_data;
1807 
1808    data->cmd_trace_data = (struct tu_u_trace_cmd_data *) vk_zalloc(
1809       &device->vk.alloc,
1810       cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8,
1811       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1812 
1813    if (!data->cmd_trace_data) {
1814       goto fail;
1815    }
1816 
1817    data->cmd_buffer_count = cmd_buffer_count;
1818    data->last_buffer_with_tracepoints = -1;
1819 
1820    for (uint32_t i = 0; i < cmd_buffer_count; ++i) {
1821       struct tu_cmd_buffer *cmdbuf = cmd_buffers[i];
1822 
1823       if (!u_trace_has_points(&cmdbuf->trace))
1824          continue;
1825 
1826       data->last_buffer_with_tracepoints = i;
1827 
1828       if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
1829          /* A single command buffer could be submitted several times, but we
1830           * already baked timestamp iova addresses and trace points are
1831           * single-use. Therefor we have to copy trace points and create
1832           * a new timestamp buffer on every submit of reusable command buffer.
1833           */
1834          if (tu_create_copy_timestamp_cs(cmdbuf,
1835                &data->cmd_trace_data[i].timestamp_copy_cs,
1836                &data->cmd_trace_data[i].trace) != VK_SUCCESS) {
1837             goto fail;
1838          }
1839 
1840          assert(data->cmd_trace_data[i].timestamp_copy_cs->entry_count == 1);
1841       } else {
1842          data->cmd_trace_data[i].trace = &cmdbuf->trace;
1843       }
1844    }
1845 
1846    assert(data->last_buffer_with_tracepoints != -1);
1847 
1848    return VK_SUCCESS;
1849 
1850 fail:
1851    tu_u_trace_submission_data_finish(device, data);
1852    *submission_data = NULL;
1853 
1854    return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1855 }
1856 
1857 void
tu_u_trace_submission_data_finish(struct tu_device * device,struct tu_u_trace_submission_data * submission_data)1858 tu_u_trace_submission_data_finish(
1859    struct tu_device *device,
1860    struct tu_u_trace_submission_data *submission_data)
1861 {
1862    for (uint32_t i = 0; i < submission_data->cmd_buffer_count; ++i) {
1863       /* Only if we had to create a copy of trace we should free it */
1864       struct tu_u_trace_cmd_data *cmd_data = &submission_data->cmd_trace_data[i];
1865       if (cmd_data->timestamp_copy_cs) {
1866          tu_cs_finish(cmd_data->timestamp_copy_cs);
1867          vk_free(&device->vk.alloc, cmd_data->timestamp_copy_cs);
1868 
1869          u_trace_fini(cmd_data->trace);
1870          vk_free(&device->vk.alloc, cmd_data->trace);
1871       }
1872    }
1873 
1874    if (submission_data->kgsl_timestamp_bo.bo) {
1875       mtx_lock(&device->kgsl_profiling_mutex);
1876       tu_suballoc_bo_free(&device->kgsl_profiling_suballoc,
1877                         &submission_data->kgsl_timestamp_bo);
1878       mtx_unlock(&device->kgsl_profiling_mutex);
1879    }
1880 
1881    vk_free(&device->vk.alloc, submission_data->cmd_trace_data);
1882    vk_free(&device->vk.alloc, submission_data->syncobj);
1883    vk_free(&device->vk.alloc, submission_data);
1884 }
1885 
1886 enum tu_reg_stomper_flags
1887 {
1888    TU_DEBUG_REG_STOMP_INVERSE = 1 << 0,
1889    TU_DEBUG_REG_STOMP_CMDBUF = 1 << 1,
1890    TU_DEBUG_REG_STOMP_RENDERPASS = 1 << 2,
1891 };
1892 
1893 /* See freedreno.rst for usage tips */
1894 static const struct debug_named_value tu_reg_stomper_options[] = {
1895    { "inverse", TU_DEBUG_REG_STOMP_INVERSE,
1896      "By default the range specifies the regs to stomp, with 'inverse' it "
1897      "specifies the regs NOT to stomp" },
1898    { "cmdbuf", TU_DEBUG_REG_STOMP_CMDBUF,
1899      "Stomp regs at the start of a cmdbuf" },
1900    { "renderpass", TU_DEBUG_REG_STOMP_RENDERPASS,
1901      "Stomp regs before a renderpass" },
1902    { NULL, 0 }
1903 };
1904 
1905 template <chip CHIP>
1906 static inline void
tu_cs_dbg_stomp_regs(struct tu_cs * cs,bool is_rp_blit,uint32_t first_reg,uint32_t last_reg,bool inverse)1907 tu_cs_dbg_stomp_regs(struct tu_cs *cs,
1908                      bool is_rp_blit,
1909                      uint32_t first_reg,
1910                      uint32_t last_reg,
1911                      bool inverse)
1912 {
1913    const uint16_t *regs = NULL;
1914    size_t count = 0;
1915 
1916    if (is_rp_blit) {
1917       regs = &RP_BLIT_REGS<CHIP>[0];
1918       count = ARRAY_SIZE(RP_BLIT_REGS<CHIP>);
1919    } else {
1920       regs = &CMD_REGS<CHIP>[0];
1921       count = ARRAY_SIZE(CMD_REGS<CHIP>);
1922    }
1923 
1924    for (size_t i = 0; i < count; i++) {
1925       if (inverse) {
1926          if (regs[i] >= first_reg && regs[i] <= last_reg)
1927             continue;
1928       } else {
1929          if (regs[i] < first_reg || regs[i] > last_reg)
1930             continue;
1931       }
1932 
1933       if (fd_reg_stomp_allowed(CHIP, regs[i]))
1934          tu_cs_emit_write_reg(cs, regs[i], 0xffffffff);
1935    }
1936 }
1937 
1938 static void
tu_init_dbg_reg_stomper(struct tu_device * device)1939 tu_init_dbg_reg_stomper(struct tu_device *device)
1940 {
1941    const char *stale_reg_range_str =
1942       os_get_option("TU_DEBUG_STALE_REGS_RANGE");
1943    if (!stale_reg_range_str)
1944       return;
1945 
1946    uint32_t first_reg, last_reg;
1947 
1948    if (sscanf(stale_reg_range_str, "%x,%x", &first_reg, &last_reg) != 2) {
1949       mesa_loge("Incorrect TU_DEBUG_STALE_REGS_RANGE");
1950       return;
1951    }
1952 
1953    uint64_t debug_flags = debug_get_flags_option("TU_DEBUG_STALE_REGS_FLAGS",
1954                                                  tu_reg_stomper_options,
1955                                                  TU_DEBUG_REG_STOMP_CMDBUF);
1956 
1957    struct tu_cs *cmdbuf_cs = (struct tu_cs *) calloc(1, sizeof(struct tu_cs));
1958    tu_cs_init(cmdbuf_cs, device, TU_CS_MODE_GROW, 4096,
1959               "cmdbuf reg stomp cs");
1960    tu_cs_begin(cmdbuf_cs);
1961 
1962    struct tu_cs *rp_cs = (struct tu_cs *) calloc(1, sizeof(struct tu_cs));
1963    tu_cs_init(rp_cs, device, TU_CS_MODE_GROW, 4096, "rp reg stomp cs");
1964    tu_cs_begin(rp_cs);
1965 
1966    bool inverse = debug_flags & TU_DEBUG_REG_STOMP_INVERSE;
1967    TU_CALLX(device, tu_cs_dbg_stomp_regs)(cmdbuf_cs, false, first_reg, last_reg, inverse);
1968    TU_CALLX(device, tu_cs_dbg_stomp_regs)(rp_cs, true, first_reg, last_reg, inverse);
1969 
1970    tu_cs_end(cmdbuf_cs);
1971    tu_cs_end(rp_cs);
1972 
1973    device->dbg_cmdbuf_stomp_cs = cmdbuf_cs;
1974    device->dbg_renderpass_stomp_cs = rp_cs;
1975 }
1976 
1977 /* It is unknown what this workaround is for and what it fixes. */
1978 static VkResult
tu_init_cmdbuf_start_a725_quirk(struct tu_device * device)1979 tu_init_cmdbuf_start_a725_quirk(struct tu_device *device)
1980 {
1981    struct tu_cs *cs;
1982 
1983    if (!(device->cmdbuf_start_a725_quirk_cs =
1984             (struct tu_cs *) calloc(1, sizeof(struct tu_cs)))) {
1985       return vk_startup_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY,
1986                                "OOM");
1987    }
1988 
1989    if (!(device->cmdbuf_start_a725_quirk_entry =
1990             (struct tu_cs_entry *) calloc(1, sizeof(struct tu_cs_entry)))) {
1991       free(device->cmdbuf_start_a725_quirk_cs);
1992       return vk_startup_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY,
1993                                "OOM");
1994    }
1995 
1996    cs = device->cmdbuf_start_a725_quirk_cs;
1997    tu_cs_init(cs, device, TU_CS_MODE_SUB_STREAM, 57, "a725 workaround cs");
1998 
1999    struct tu_cs shader_cs;
2000    tu_cs_begin_sub_stream(cs, 10, &shader_cs);
2001 
2002    uint32_t raw_shader[] = {
2003       0x00040000, 0x40600000, // mul.f hr0.x, hr0.x, hr1.x
2004       0x00050001, 0x40600001, // mul.f hr0.y, hr0.y, hr1.y
2005       0x00060002, 0x40600002, // mul.f hr0.z, hr0.z, hr1.z
2006       0x00070003, 0x40600003, // mul.f hr0.w, hr0.w, hr1.w
2007       0x00000000, 0x03000000, // end
2008    };
2009 
2010    tu_cs_emit_array(&shader_cs, raw_shader, ARRAY_SIZE(raw_shader));
2011    struct tu_cs_entry shader_entry = tu_cs_end_sub_stream(cs, &shader_cs);
2012    uint64_t shader_iova = shader_entry.bo->iova + shader_entry.offset;
2013 
2014    struct tu_cs sub_cs;
2015    tu_cs_begin_sub_stream(cs, 47, &sub_cs);
2016 
2017    tu_cs_emit_regs(&sub_cs, HLSQ_INVALIDATE_CMD(A7XX,
2018             .vs_state = true, .hs_state = true, .ds_state = true,
2019             .gs_state = true, .fs_state = true, .gfx_ibo = true,
2020             .cs_bindless = 0xff, .gfx_bindless = 0xff));
2021    tu_cs_emit_regs(&sub_cs, HLSQ_CS_CNTL(A7XX,
2022             .constlen = 4,
2023             .enabled = true));
2024    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CONFIG(.enabled = true));
2025    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CTRL_REG0(
2026             .threadmode = MULTI,
2027             .threadsize = THREAD128,
2028             .mergedregs = true));
2029    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_UNKNOWN_A9B1(.shared_size = 1));
2030    tu_cs_emit_regs(&sub_cs, HLSQ_CS_KERNEL_GROUP_X(A7XX, 1),
2031                      HLSQ_CS_KERNEL_GROUP_Y(A7XX, 1),
2032                      HLSQ_CS_KERNEL_GROUP_Z(A7XX, 1));
2033    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_INSTRLEN(.sp_cs_instrlen = 1));
2034    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_TEX_COUNT(0));
2035    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_IBO_COUNT(0));
2036    tu_cs_emit_regs(&sub_cs, A7XX_HLSQ_CS_CNTL_1(
2037             .linearlocalidregid = regid(63, 0),
2038             .threadsize = THREAD128,
2039             .unk11 = true,
2040             .unk22 = true,
2041             .yalign = CS_YALIGN_1));
2042    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CNTL_0(
2043             .wgidconstid = regid(51, 3),
2044             .wgsizeconstid = regid(48, 0),
2045             .wgoffsetconstid = regid(63, 0),
2046             .localidregid = regid(63, 0)));
2047    tu_cs_emit_regs(&sub_cs, SP_CS_CNTL_1(A7XX,
2048             .linearlocalidregid = regid(63, 0),
2049             .threadsize = THREAD128,
2050             .unk15 = true));
2051    tu_cs_emit_regs(&sub_cs, A7XX_SP_CS_UNKNOWN_A9BE(0));
2052 
2053    tu_cs_emit_regs(&sub_cs,
2054                   HLSQ_CS_NDRANGE_0(A7XX, .kerneldim = 3,
2055                                           .localsizex = 255,
2056                                           .localsizey = 1,
2057                                           .localsizez = 1),
2058                   HLSQ_CS_NDRANGE_1(A7XX, .globalsize_x = 3072),
2059                   HLSQ_CS_NDRANGE_2(A7XX, .globaloff_x = 0),
2060                   HLSQ_CS_NDRANGE_3(A7XX, .globalsize_y = 1),
2061                   HLSQ_CS_NDRANGE_4(A7XX, .globaloff_y = 0),
2062                   HLSQ_CS_NDRANGE_5(A7XX, .globalsize_z = 1),
2063                   HLSQ_CS_NDRANGE_6(A7XX, .globaloff_z = 0));
2064    tu_cs_emit_regs(&sub_cs, A7XX_HLSQ_CS_LOCAL_SIZE(
2065             .localsizex = 255,
2066             .localsizey = 0,
2067             .localsizez = 0));
2068    tu_cs_emit_pkt4(&sub_cs, REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET, 3);
2069    tu_cs_emit(&sub_cs, 0);
2070    tu_cs_emit_qw(&sub_cs, shader_iova);
2071 
2072    tu_cs_emit_pkt7(&sub_cs, CP_EXEC_CS, 4);
2073    tu_cs_emit(&sub_cs, 0x00000000);
2074    tu_cs_emit(&sub_cs, CP_EXEC_CS_1_NGROUPS_X(12));
2075    tu_cs_emit(&sub_cs, CP_EXEC_CS_2_NGROUPS_Y(1));
2076    tu_cs_emit(&sub_cs, CP_EXEC_CS_3_NGROUPS_Z(1));
2077 
2078    *device->cmdbuf_start_a725_quirk_entry = tu_cs_end_sub_stream(cs, &sub_cs);
2079 
2080    return VK_SUCCESS;
2081 }
2082 
2083 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2084 tu_CreateDevice(VkPhysicalDevice physicalDevice,
2085                 const VkDeviceCreateInfo *pCreateInfo,
2086                 const VkAllocationCallbacks *pAllocator,
2087                 VkDevice *pDevice)
2088 {
2089    TU_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice);
2090    VkResult result;
2091    struct tu_device *device;
2092    bool custom_border_colors = false;
2093    bool perf_query_pools = false;
2094    bool robust_buffer_access2 = false;
2095    bool border_color_without_format = false;
2096    bool global_priority_query = false;
2097 
2098    vk_foreach_struct_const(ext, pCreateInfo->pNext) {
2099       switch (ext->sType) {
2100       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
2101          const VkPhysicalDeviceCustomBorderColorFeaturesEXT
2102             *border_color_features =
2103                (const VkPhysicalDeviceCustomBorderColorFeaturesEXT *) ext;
2104          custom_border_colors = border_color_features->customBorderColors;
2105          border_color_without_format =
2106             border_color_features->customBorderColorWithoutFormat;
2107          break;
2108       }
2109       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PERFORMANCE_QUERY_FEATURES_KHR: {
2110          const VkPhysicalDevicePerformanceQueryFeaturesKHR *feature =
2111             (VkPhysicalDevicePerformanceQueryFeaturesKHR *)ext;
2112          perf_query_pools = feature->performanceCounterQueryPools;
2113          break;
2114       }
2115       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
2116          VkPhysicalDeviceRobustness2FeaturesEXT *features =
2117             (VkPhysicalDeviceRobustness2FeaturesEXT *) ext;
2118          robust_buffer_access2 = features->robustBufferAccess2;
2119          break;
2120       }
2121       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_KHR: {
2122          VkPhysicalDeviceGlobalPriorityQueryFeaturesKHR *features =
2123             (VkPhysicalDeviceGlobalPriorityQueryFeaturesKHR *) ext;
2124          global_priority_query = features->globalPriorityQuery;
2125          break;
2126       }
2127       default:
2128          break;
2129       }
2130    }
2131 
2132    device = (struct tu_device *) vk_zalloc2(
2133       &physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,
2134       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2135    if (!device)
2136       return vk_startup_errorf(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2137 
2138    struct vk_device_dispatch_table dispatch_table;
2139    vk_device_dispatch_table_from_entrypoints(
2140       &dispatch_table, &tu_device_entrypoints, true);
2141 
2142    switch (fd_dev_gen(&physical_device->dev_id)) {
2143    case 6:
2144       vk_device_dispatch_table_from_entrypoints(
2145          &dispatch_table, &tu_device_entrypoints_a6xx, false);
2146       break;
2147    case 7:
2148       vk_device_dispatch_table_from_entrypoints(
2149          &dispatch_table, &tu_device_entrypoints_a7xx, false);
2150    }
2151 
2152    vk_device_dispatch_table_from_entrypoints(
2153       &dispatch_table, &wsi_device_entrypoints, false);
2154 
2155    const struct vk_device_entrypoint_table *knl_device_entrypoints =
2156          physical_device->instance->knl->device_entrypoints;
2157    if (knl_device_entrypoints) {
2158       vk_device_dispatch_table_from_entrypoints(
2159          &dispatch_table, knl_device_entrypoints, false);
2160    }
2161 
2162    result = vk_device_init(&device->vk, &physical_device->vk,
2163                            &dispatch_table, pCreateInfo, pAllocator);
2164    if (result != VK_SUCCESS) {
2165       vk_free(&device->vk.alloc, device);
2166       return vk_startup_errorf(physical_device->instance, result,
2167                                "vk_device_init failed");
2168    }
2169 
2170    device->instance = physical_device->instance;
2171    device->physical_device = physical_device;
2172    device->device_idx = device->physical_device->device_count++;
2173 
2174    result = tu_drm_device_init(device);
2175    if (result != VK_SUCCESS) {
2176       vk_free(&device->vk.alloc, device);
2177       return result;
2178    }
2179 
2180    device->vk.command_buffer_ops = &tu_cmd_buffer_ops;
2181    device->vk.check_status = tu_device_check_status;
2182 
2183    mtx_init(&device->bo_mutex, mtx_plain);
2184    mtx_init(&device->pipeline_mutex, mtx_plain);
2185    mtx_init(&device->autotune_mutex, mtx_plain);
2186    mtx_init(&device->kgsl_profiling_mutex, mtx_plain);
2187    u_rwlock_init(&device->dma_bo_lock);
2188    pthread_mutex_init(&device->submit_mutex, NULL);
2189 
2190    if (physical_device->has_set_iova) {
2191       mtx_init(&device->vma_mutex, mtx_plain);
2192       util_vma_heap_init(&device->vma, physical_device->va_start,
2193                          ROUND_DOWN_TO(physical_device->va_size, 4096));
2194    }
2195 
2196    if (TU_DEBUG(BOS))
2197       device->bo_sizes = _mesa_hash_table_create(NULL, _mesa_hash_string, _mesa_key_string_equal);
2198 
2199    /* kgsl is not a drm device: */
2200    if (!is_kgsl(physical_device->instance))
2201       vk_device_set_drm_fd(&device->vk, device->fd);
2202 
2203    struct tu6_global *global = NULL;
2204    uint32_t global_size = sizeof(struct tu6_global);
2205    struct vk_pipeline_cache_create_info pcc_info = { };
2206 
2207    for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2208       const VkDeviceQueueCreateInfo *queue_create =
2209          &pCreateInfo->pQueueCreateInfos[i];
2210       uint32_t qfi = queue_create->queueFamilyIndex;
2211       device->queues[qfi] = (struct tu_queue *) vk_alloc(
2212          &device->vk.alloc,
2213          queue_create->queueCount * sizeof(struct tu_queue), 8,
2214          VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2215       if (!device->queues[qfi]) {
2216          result = vk_startup_errorf(physical_device->instance,
2217                                     VK_ERROR_OUT_OF_HOST_MEMORY,
2218                                     "OOM");
2219          goto fail_queues;
2220       }
2221 
2222       memset(device->queues[qfi], 0,
2223              queue_create->queueCount * sizeof(struct tu_queue));
2224 
2225       device->queue_count[qfi] = queue_create->queueCount;
2226 
2227       for (unsigned q = 0; q < queue_create->queueCount; q++) {
2228          result = tu_queue_init(device, &device->queues[qfi][q], q,
2229                                 queue_create, global_priority_query);
2230          if (result != VK_SUCCESS) {
2231             device->queue_count[qfi] = q;
2232             goto fail_queues;
2233          }
2234       }
2235    }
2236 
2237    {
2238       struct ir3_compiler_options ir3_options = {
2239          .robust_buffer_access2 = robust_buffer_access2,
2240          .push_ubo_with_preamble = true,
2241          .disable_cache = true,
2242          .bindless_fb_read_descriptor = -1,
2243          .bindless_fb_read_slot = -1,
2244          .storage_16bit = physical_device->info->a6xx.storage_16bit,
2245          .shared_push_consts = !TU_DEBUG(PUSH_CONSTS_PER_STAGE),
2246       };
2247       device->compiler = ir3_compiler_create(
2248          NULL, &physical_device->dev_id, physical_device->info, &ir3_options);
2249    }
2250    if (!device->compiler) {
2251       result = vk_startup_errorf(physical_device->instance,
2252                                  VK_ERROR_INITIALIZATION_FAILED,
2253                                  "failed to initialize ir3 compiler");
2254       goto fail_queues;
2255    }
2256 
2257    /* Initialize sparse array for refcounting imported BOs */
2258    util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
2259 
2260    if (physical_device->has_set_iova) {
2261       STATIC_ASSERT(TU_MAX_QUEUE_FAMILIES == 1);
2262       if (!u_vector_init(&device->zombie_vmas, 64,
2263                          sizeof(struct tu_zombie_vma))) {
2264          result = vk_startup_errorf(physical_device->instance,
2265                                     VK_ERROR_INITIALIZATION_FAILED,
2266                                     "zombie_vmas create failed");
2267          goto fail_free_zombie_vma;
2268       }
2269    }
2270 
2271    /* initial sizes, these will increase if there is overflow */
2272    device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD;
2273    device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD;
2274 
2275    if (custom_border_colors)
2276       global_size += TU_BORDER_COLOR_COUNT * sizeof(struct bcolor_entry);
2277 
2278    tu_bo_suballocator_init(
2279       &device->pipeline_suballoc, device, 128 * 1024,
2280       (enum tu_bo_alloc_flags) (TU_BO_ALLOC_GPU_READ_ONLY | TU_BO_ALLOC_ALLOW_DUMP), "pipeline_suballoc");
2281    tu_bo_suballocator_init(&device->autotune_suballoc, device,
2282                            128 * 1024, TU_BO_ALLOC_NO_FLAGS, "autotune_suballoc");
2283    if (is_kgsl(physical_device->instance)) {
2284       tu_bo_suballocator_init(&device->kgsl_profiling_suballoc, device,
2285                               128 * 1024, TU_BO_ALLOC_NO_FLAGS, "kgsl_profiling_suballoc");
2286    }
2287 
2288    result = tu_bo_init_new(device, &device->global_bo, global_size,
2289                            TU_BO_ALLOC_ALLOW_DUMP, "global");
2290    if (result != VK_SUCCESS) {
2291       vk_startup_errorf(device->instance, result, "BO init");
2292       goto fail_global_bo;
2293    }
2294 
2295    result = tu_bo_map(device, device->global_bo);
2296    if (result != VK_SUCCESS) {
2297       vk_startup_errorf(device->instance, result, "BO map");
2298       goto fail_global_bo_map;
2299    }
2300 
2301    global = (struct tu6_global *)device->global_bo->map;
2302    device->global_bo_map = global;
2303    tu_init_clear_blit_shaders(device);
2304 
2305    result = tu_init_empty_shaders(device);
2306    if (result != VK_SUCCESS) {
2307       vk_startup_errorf(device->instance, result, "empty shaders");
2308       goto fail_empty_shaders;
2309    }
2310 
2311    global->predicate = 0;
2312    global->vtx_stats_query_not_running = 1;
2313    global->dbg_one = (uint32_t)-1;
2314    global->dbg_gmem_total_loads = 0;
2315    global->dbg_gmem_taken_loads = 0;
2316    global->dbg_gmem_total_stores = 0;
2317    global->dbg_gmem_taken_stores = 0;
2318    for (int i = 0; i < TU_BORDER_COLOR_BUILTIN; i++) {
2319       VkClearColorValue border_color = vk_border_color_value((VkBorderColor) i);
2320       tu6_pack_border_color(&global->bcolor_builtin[i], &border_color,
2321                             vk_border_color_is_int((VkBorderColor) i));
2322    }
2323 
2324    /* initialize to ones so ffs can be used to find unused slots */
2325    BITSET_ONES(device->custom_border_color);
2326 
2327    result = tu_init_dynamic_rendering(device);
2328    if (result != VK_SUCCESS) {
2329       vk_startup_errorf(device->instance, result, "dynamic rendering");
2330       goto fail_dynamic_rendering;
2331    }
2332 
2333    device->mem_cache = vk_pipeline_cache_create(&device->vk, &pcc_info,
2334                                                 NULL);
2335    if (!device->mem_cache) {
2336       result = VK_ERROR_OUT_OF_HOST_MEMORY;
2337       vk_startup_errorf(device->instance, result, "create pipeline cache failed");
2338       goto fail_pipeline_cache;
2339    }
2340 
2341    if (perf_query_pools) {
2342       /* Prepare command streams setting pass index to the PERF_CNTRS_REG
2343        * from 0 to 31. One of these will be picked up at cmd submit time
2344        * when the perf query is executed.
2345        */
2346       struct tu_cs *cs;
2347 
2348       if (!(device->perfcntrs_pass_cs =
2349                (struct tu_cs *) calloc(1, sizeof(struct tu_cs)))) {
2350          result = vk_startup_errorf(device->instance,
2351                VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2352          goto fail_perfcntrs_pass_alloc;
2353       }
2354 
2355       device->perfcntrs_pass_cs_entries =
2356          (struct tu_cs_entry *) calloc(32, sizeof(struct tu_cs_entry));
2357       if (!device->perfcntrs_pass_cs_entries) {
2358          result = vk_startup_errorf(device->instance,
2359                VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2360          goto fail_perfcntrs_pass_entries_alloc;
2361       }
2362 
2363       cs = device->perfcntrs_pass_cs;
2364       tu_cs_init(cs, device, TU_CS_MODE_SUB_STREAM, 96, "perfcntrs cs");
2365 
2366       for (unsigned i = 0; i < 32; i++) {
2367          struct tu_cs sub_cs;
2368 
2369          result = tu_cs_begin_sub_stream(cs, 3, &sub_cs);
2370          if (result != VK_SUCCESS) {
2371             vk_startup_errorf(device->instance, result,
2372                   "failed to allocate commands streams");
2373             goto fail_prepare_perfcntrs_pass_cs;
2374          }
2375 
2376          tu_cs_emit_regs(&sub_cs, A6XX_CP_SCRATCH_REG(PERF_CNTRS_REG, 1 << i));
2377          tu_cs_emit_pkt7(&sub_cs, CP_WAIT_FOR_ME, 0);
2378 
2379          device->perfcntrs_pass_cs_entries[i] = tu_cs_end_sub_stream(cs, &sub_cs);
2380       }
2381    }
2382 
2383    if (physical_device->info->a7xx.cmdbuf_start_a725_quirk) {
2384          result = tu_init_cmdbuf_start_a725_quirk(device);
2385          if (result != VK_SUCCESS)
2386             goto fail_a725_workaround;
2387    }
2388 
2389    tu_init_dbg_reg_stomper(device);
2390 
2391    /* Initialize a condition variable for timeline semaphore */
2392    pthread_condattr_t condattr;
2393    if (pthread_condattr_init(&condattr) != 0) {
2394       result = vk_startup_errorf(physical_device->instance,
2395                                  VK_ERROR_INITIALIZATION_FAILED,
2396                                  "pthread condattr init");
2397       goto fail_timeline_cond;
2398    }
2399    if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
2400       pthread_condattr_destroy(&condattr);
2401       result = vk_startup_errorf(physical_device->instance,
2402                                  VK_ERROR_INITIALIZATION_FAILED,
2403                                  "pthread condattr clock setup");
2404       goto fail_timeline_cond;
2405    }
2406    if (pthread_cond_init(&device->timeline_cond, &condattr) != 0) {
2407       pthread_condattr_destroy(&condattr);
2408       result = vk_startup_errorf(physical_device->instance,
2409                                  VK_ERROR_INITIALIZATION_FAILED,
2410                                  "pthread cond init");
2411       goto fail_timeline_cond;
2412    }
2413    pthread_condattr_destroy(&condattr);
2414 
2415    result = tu_autotune_init(&device->autotune, device);
2416    if (result != VK_SUCCESS) {
2417       goto fail_timeline_cond;
2418    }
2419 
2420    for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
2421       mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain);
2422 
2423    mtx_init(&device->fiber_pvtmem_bo.mtx, mtx_plain);
2424    mtx_init(&device->wave_pvtmem_bo.mtx, mtx_plain);
2425 
2426    mtx_init(&device->mutex, mtx_plain);
2427 
2428    device->use_z24uint_s8uint =
2429       physical_device->info->a6xx.has_z24uint_s8uint &&
2430       !border_color_without_format;
2431    device->use_lrz =
2432       !TU_DEBUG(NOLRZ) && device->physical_device->info->chip == 6;
2433 
2434    tu_gpu_tracepoint_config_variable();
2435 
2436    device->submit_count = 0;
2437    u_trace_context_init(&device->trace_context, device,
2438                      tu_trace_create_ts_buffer,
2439                      tu_trace_destroy_ts_buffer,
2440                      TU_CALLX(device, tu_trace_record_ts),
2441                      tu_trace_read_ts,
2442                      tu_trace_delete_flush_data);
2443 
2444    tu_breadcrumbs_init(device);
2445 
2446    if (FD_RD_DUMP(ENABLE)) {
2447       struct vk_app_info *app_info = &device->instance->vk.app_info;
2448       const char *app_name_str = app_info->app_name ?
2449          app_info->app_name : util_get_process_name();
2450       const char *engine_name_str = app_info->engine_name ?
2451          app_info->engine_name : "unknown-engine";
2452 
2453       char app_name[64];
2454       snprintf(app_name, sizeof(app_name), "%s", app_name_str);
2455 
2456       char engine_name[32];
2457       snprintf(engine_name, sizeof(engine_name), "%s", engine_name_str);
2458 
2459       char output_name[128];
2460       snprintf(output_name, sizeof(output_name), "tu_%s.%s_device%u",
2461                app_name, engine_name, device->device_idx);
2462 
2463       fd_rd_output_init(&device->rd_output, output_name);
2464    }
2465 
2466    *pDevice = tu_device_to_handle(device);
2467    return VK_SUCCESS;
2468 
2469 fail_timeline_cond:
2470    if (device->cmdbuf_start_a725_quirk_entry) {
2471       free(device->cmdbuf_start_a725_quirk_entry);
2472       tu_cs_finish(device->cmdbuf_start_a725_quirk_cs);
2473       free(device->cmdbuf_start_a725_quirk_cs);
2474    }
2475 fail_a725_workaround:
2476 fail_prepare_perfcntrs_pass_cs:
2477    free(device->perfcntrs_pass_cs_entries);
2478    tu_cs_finish(device->perfcntrs_pass_cs);
2479 fail_perfcntrs_pass_entries_alloc:
2480    free(device->perfcntrs_pass_cs);
2481 fail_perfcntrs_pass_alloc:
2482    vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2483 fail_pipeline_cache:
2484    tu_destroy_dynamic_rendering(device);
2485 fail_dynamic_rendering:
2486    tu_destroy_empty_shaders(device);
2487 fail_empty_shaders:
2488    tu_destroy_clear_blit_shaders(device);
2489 fail_global_bo_map:
2490    tu_bo_finish(device, device->global_bo);
2491    vk_free(&device->vk.alloc, device->bo_list);
2492 fail_global_bo:
2493    ir3_compiler_destroy(device->compiler);
2494    util_sparse_array_finish(&device->bo_map);
2495    if (physical_device->has_set_iova)
2496       util_vma_heap_finish(&device->vma);
2497 fail_free_zombie_vma:
2498    u_vector_finish(&device->zombie_vmas);
2499 fail_queues:
2500    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2501       for (unsigned q = 0; q < device->queue_count[i]; q++)
2502          tu_queue_finish(&device->queues[i][q]);
2503       if (device->queues[i])
2504          vk_free(&device->vk.alloc, device->queues[i]);
2505    }
2506 
2507    u_rwlock_destroy(&device->dma_bo_lock);
2508    vk_device_finish(&device->vk);
2509    vk_free(&device->vk.alloc, device);
2510    return result;
2511 }
2512 
2513 VKAPI_ATTR void VKAPI_CALL
tu_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2514 tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
2515 {
2516    TU_FROM_HANDLE(tu_device, device, _device);
2517 
2518    if (!device)
2519       return;
2520 
2521    if (FD_RD_DUMP(ENABLE))
2522       fd_rd_output_fini(&device->rd_output);
2523 
2524    tu_breadcrumbs_finish(device);
2525 
2526    u_trace_context_fini(&device->trace_context);
2527 
2528    for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
2529       if (device->scratch_bos[i].initialized)
2530          tu_bo_finish(device, device->scratch_bos[i].bo);
2531    }
2532 
2533    if (device->fiber_pvtmem_bo.bo)
2534       tu_bo_finish(device, device->fiber_pvtmem_bo.bo);
2535 
2536    if (device->wave_pvtmem_bo.bo)
2537       tu_bo_finish(device, device->wave_pvtmem_bo.bo);
2538 
2539    tu_destroy_clear_blit_shaders(device);
2540 
2541    tu_destroy_empty_shaders(device);
2542 
2543    tu_destroy_dynamic_rendering(device);
2544 
2545    ir3_compiler_destroy(device->compiler);
2546 
2547    vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2548 
2549    if (device->perfcntrs_pass_cs) {
2550       free(device->perfcntrs_pass_cs_entries);
2551       tu_cs_finish(device->perfcntrs_pass_cs);
2552       free(device->perfcntrs_pass_cs);
2553    }
2554 
2555    if (device->dbg_cmdbuf_stomp_cs) {
2556       tu_cs_finish(device->dbg_cmdbuf_stomp_cs);
2557       free(device->dbg_cmdbuf_stomp_cs);
2558    }
2559 
2560    if (device->dbg_renderpass_stomp_cs) {
2561       tu_cs_finish(device->dbg_renderpass_stomp_cs);
2562       free(device->dbg_renderpass_stomp_cs);
2563    }
2564 
2565    if (device->cmdbuf_start_a725_quirk_entry) {
2566       free(device->cmdbuf_start_a725_quirk_entry);
2567       tu_cs_finish(device->cmdbuf_start_a725_quirk_cs);
2568       free(device->cmdbuf_start_a725_quirk_cs);
2569    }
2570 
2571    tu_autotune_fini(&device->autotune, device);
2572 
2573    tu_bo_suballocator_finish(&device->pipeline_suballoc);
2574    tu_bo_suballocator_finish(&device->autotune_suballoc);
2575    tu_bo_suballocator_finish(&device->kgsl_profiling_suballoc);
2576 
2577    tu_bo_finish(device, device->global_bo);
2578 
2579    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2580       for (unsigned q = 0; q < device->queue_count[i]; q++)
2581          tu_queue_finish(&device->queues[i][q]);
2582       if (device->queue_count[i])
2583          vk_free(&device->vk.alloc, device->queues[i]);
2584    }
2585 
2586    tu_drm_device_finish(device);
2587 
2588    if (device->physical_device->has_set_iova)
2589       util_vma_heap_finish(&device->vma);
2590 
2591    util_sparse_array_finish(&device->bo_map);
2592    u_rwlock_destroy(&device->dma_bo_lock);
2593 
2594    u_vector_finish(&device->zombie_vmas);
2595 
2596    pthread_cond_destroy(&device->timeline_cond);
2597    _mesa_hash_table_destroy(device->bo_sizes, NULL);
2598    vk_free(&device->vk.alloc, device->bo_list);
2599    vk_device_finish(&device->vk);
2600    vk_free(&device->vk.alloc, device);
2601 }
2602 
2603 VkResult
tu_get_scratch_bo(struct tu_device * dev,uint64_t size,struct tu_bo ** bo)2604 tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
2605 {
2606    unsigned size_log2 = MAX2(util_logbase2_ceil64(size), MIN_SCRATCH_BO_SIZE_LOG2);
2607    unsigned index = size_log2 - MIN_SCRATCH_BO_SIZE_LOG2;
2608    assert(index < ARRAY_SIZE(dev->scratch_bos));
2609 
2610    for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
2611       if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
2612          /* Fast path: just return the already-allocated BO. */
2613          *bo = dev->scratch_bos[i].bo;
2614          return VK_SUCCESS;
2615       }
2616    }
2617 
2618    /* Slow path: actually allocate the BO. We take a lock because the process
2619     * of allocating it is slow, and we don't want to block the CPU while it
2620     * finishes.
2621    */
2622    mtx_lock(&dev->scratch_bos[index].construct_mtx);
2623 
2624    /* Another thread may have allocated it already while we were waiting on
2625     * the lock. We need to check this in order to avoid double-allocating.
2626     */
2627    if (dev->scratch_bos[index].initialized) {
2628       mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2629       *bo = dev->scratch_bos[index].bo;
2630       return VK_SUCCESS;
2631    }
2632 
2633    unsigned bo_size = 1ull << size_log2;
2634    VkResult result = tu_bo_init_new(dev, &dev->scratch_bos[index].bo, bo_size,
2635                                     TU_BO_ALLOC_NO_FLAGS, "scratch");
2636    if (result != VK_SUCCESS) {
2637       mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2638       return result;
2639    }
2640 
2641    p_atomic_set(&dev->scratch_bos[index].initialized, true);
2642 
2643    mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2644 
2645    *bo = dev->scratch_bos[index].bo;
2646    return VK_SUCCESS;
2647 }
2648 
2649 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2650 tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2651                                     VkLayerProperties *pProperties)
2652 {
2653    *pPropertyCount = 0;
2654    return VK_SUCCESS;
2655 }
2656 
2657 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)2658 tu_EnumerateInstanceExtensionProperties(const char *pLayerName,
2659                                         uint32_t *pPropertyCount,
2660                                         VkExtensionProperties *pProperties)
2661 {
2662    if (pLayerName)
2663       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2664 
2665    return vk_enumerate_instance_extension_properties(
2666       &tu_instance_extensions_supported, pPropertyCount, pProperties);
2667 }
2668 
2669 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
tu_GetInstanceProcAddr(VkInstance _instance,const char * pName)2670 tu_GetInstanceProcAddr(VkInstance _instance, const char *pName)
2671 {
2672    TU_FROM_HANDLE(tu_instance, instance, _instance);
2673    return vk_instance_get_proc_addr(instance != NULL ? &instance->vk : NULL,
2674                                     &tu_instance_entrypoints,
2675                                     pName);
2676 }
2677 
2678 /* The loader wants us to expose a second GetInstanceProcAddr function
2679  * to work around certain LD_PRELOAD issues seen in apps.
2680  */
2681 PUBLIC
2682 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)2683 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
2684 {
2685    return tu_GetInstanceProcAddr(instance, pName);
2686 }
2687 
2688 VKAPI_ATTR VkResult VKAPI_CALL
tu_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2689 tu_AllocateMemory(VkDevice _device,
2690                   const VkMemoryAllocateInfo *pAllocateInfo,
2691                   const VkAllocationCallbacks *pAllocator,
2692                   VkDeviceMemory *pMem)
2693 {
2694    TU_FROM_HANDLE(tu_device, device, _device);
2695    struct tu_device_memory *mem;
2696    VkResult result;
2697 
2698    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2699 
2700    if (pAllocateInfo->allocationSize == 0) {
2701       /* Apparently, this is allowed */
2702       *pMem = VK_NULL_HANDLE;
2703       return VK_SUCCESS;
2704    }
2705 
2706    struct tu_memory_heap *mem_heap = &device->physical_device->heap;
2707    uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
2708    if (mem_heap_used > mem_heap->size)
2709       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2710 
2711    mem = (struct tu_device_memory *) vk_object_alloc(
2712       &device->vk, pAllocator, sizeof(*mem), VK_OBJECT_TYPE_DEVICE_MEMORY);
2713    if (mem == NULL)
2714       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2715 
2716    const VkImportMemoryFdInfoKHR *fd_info =
2717       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
2718    if (fd_info && !fd_info->handleType)
2719       fd_info = NULL;
2720 
2721    if (fd_info) {
2722       assert(fd_info->handleType ==
2723                 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2724              fd_info->handleType ==
2725                 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2726 
2727       /*
2728        * TODO Importing the same fd twice gives us the same handle without
2729        * reference counting.  We need to maintain a per-instance handle-to-bo
2730        * table and add reference count to tu_bo.
2731        */
2732       result = tu_bo_init_dmabuf(device, &mem->bo,
2733                                  pAllocateInfo->allocationSize, fd_info->fd);
2734       if (result == VK_SUCCESS) {
2735          /* take ownership and close the fd */
2736          close(fd_info->fd);
2737       }
2738    } else {
2739       uint64_t client_address = 0;
2740       BITMASK_ENUM(tu_bo_alloc_flags) alloc_flags = TU_BO_ALLOC_NO_FLAGS;
2741 
2742       const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info =
2743          vk_find_struct_const(pAllocateInfo->pNext,
2744                               MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO);
2745       if (replay_info && replay_info->opaqueCaptureAddress) {
2746          client_address = replay_info->opaqueCaptureAddress;
2747          alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
2748       }
2749 
2750       const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const(
2751          pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO);
2752       if (flags_info &&
2753           (flags_info->flags &
2754            VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)) {
2755          alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
2756       }
2757 
2758       char name[64] = "vkAllocateMemory()";
2759       if (device->bo_sizes)
2760          snprintf(name, ARRAY_SIZE(name), "vkAllocateMemory(%ldkb)",
2761                   (long)DIV_ROUND_UP(pAllocateInfo->allocationSize, 1024));
2762       VkMemoryPropertyFlags mem_property =
2763          device->physical_device->memory.types[pAllocateInfo->memoryTypeIndex];
2764       result = tu_bo_init_new_explicit_iova(
2765          device, &mem->bo, pAllocateInfo->allocationSize, client_address,
2766          mem_property, alloc_flags, name);
2767    }
2768 
2769    if (result == VK_SUCCESS) {
2770       mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
2771       if (mem_heap_used > mem_heap->size) {
2772          p_atomic_add(&mem_heap->used, -mem->bo->size);
2773          tu_bo_finish(device, mem->bo);
2774          result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
2775                             "Out of heap memory");
2776       }
2777    }
2778 
2779    if (result != VK_SUCCESS) {
2780       vk_object_free(&device->vk, pAllocator, mem);
2781       return result;
2782    }
2783 
2784    /* Track in the device whether our BO list contains any implicit-sync BOs, so
2785     * we can suppress implicit sync on non-WSI usage.
2786     */
2787    const struct wsi_memory_allocate_info *wsi_info =
2788       vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
2789    if (wsi_info && wsi_info->implicit_sync) {
2790       mtx_lock(&device->bo_mutex);
2791       if (!mem->bo->implicit_sync) {
2792          mem->bo->implicit_sync = true;
2793          device->implicit_sync_bo_count++;
2794       }
2795       mtx_unlock(&device->bo_mutex);
2796    }
2797 
2798    const VkMemoryDedicatedAllocateInfo *dedicate_info =
2799       vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
2800    if (dedicate_info) {
2801       mem->image = tu_image_from_handle(dedicate_info->image);
2802    } else {
2803       mem->image = NULL;
2804    }
2805 
2806    *pMem = tu_device_memory_to_handle(mem);
2807 
2808    return VK_SUCCESS;
2809 }
2810 
2811 VKAPI_ATTR void VKAPI_CALL
tu_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)2812 tu_FreeMemory(VkDevice _device,
2813               VkDeviceMemory _mem,
2814               const VkAllocationCallbacks *pAllocator)
2815 {
2816    TU_FROM_HANDLE(tu_device, device, _device);
2817    TU_FROM_HANDLE(tu_device_memory, mem, _mem);
2818 
2819    if (mem == NULL)
2820       return;
2821 
2822    p_atomic_add(&device->physical_device->heap.used, -mem->bo->size);
2823    tu_bo_finish(device, mem->bo);
2824    vk_object_free(&device->vk, pAllocator, mem);
2825 }
2826 
2827 VKAPI_ATTR VkResult VKAPI_CALL
tu_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)2828 tu_MapMemory(VkDevice _device,
2829              VkDeviceMemory _memory,
2830              VkDeviceSize offset,
2831              VkDeviceSize size,
2832              VkMemoryMapFlags flags,
2833              void **ppData)
2834 {
2835    TU_FROM_HANDLE(tu_device, device, _device);
2836    TU_FROM_HANDLE(tu_device_memory, mem, _memory);
2837    VkResult result;
2838 
2839    if (mem == NULL) {
2840       *ppData = NULL;
2841       return VK_SUCCESS;
2842    }
2843 
2844    if (!mem->bo->map) {
2845       result = tu_bo_map(device, mem->bo);
2846       if (result != VK_SUCCESS)
2847          return result;
2848    }
2849 
2850    *ppData = (char *) mem->bo->map + offset;
2851    return VK_SUCCESS;
2852 }
2853 
2854 VKAPI_ATTR void VKAPI_CALL
tu_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)2855 tu_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
2856 {
2857    /* TODO: unmap here instead of waiting for FreeMemory */
2858 }
2859 
2860 static void
tu_get_buffer_memory_requirements(struct tu_device * dev,uint64_t size,VkMemoryRequirements2 * pMemoryRequirements)2861 tu_get_buffer_memory_requirements(struct tu_device *dev, uint64_t size,
2862                                   VkMemoryRequirements2 *pMemoryRequirements)
2863 {
2864    pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
2865       .size = MAX2(align64(size, 64), size),
2866       .alignment = 64,
2867       .memoryTypeBits = (1 << dev->physical_device->memory.type_count) - 1,
2868    };
2869 
2870    vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2871       switch (ext->sType) {
2872       case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
2873          VkMemoryDedicatedRequirements *req =
2874             (VkMemoryDedicatedRequirements *) ext;
2875          req->requiresDedicatedAllocation = false;
2876          req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
2877          break;
2878       }
2879       default:
2880          break;
2881       }
2882    }
2883 }
2884 
2885 VKAPI_ATTR void VKAPI_CALL
tu_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2886 tu_GetBufferMemoryRequirements2(
2887    VkDevice _device,
2888    const VkBufferMemoryRequirementsInfo2 *pInfo,
2889    VkMemoryRequirements2 *pMemoryRequirements)
2890 {
2891    TU_FROM_HANDLE(tu_device, device, _device);
2892    TU_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer);
2893 
2894    tu_get_buffer_memory_requirements(device, buffer->vk.size, pMemoryRequirements);
2895 }
2896 
2897 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceBufferMemoryRequirements(VkDevice _device,const VkDeviceBufferMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2898 tu_GetDeviceBufferMemoryRequirements(
2899    VkDevice _device,
2900    const VkDeviceBufferMemoryRequirements *pInfo,
2901    VkMemoryRequirements2 *pMemoryRequirements)
2902 {
2903    TU_FROM_HANDLE(tu_device, device, _device);
2904    tu_get_buffer_memory_requirements(device, pInfo->pCreateInfo->size, pMemoryRequirements);
2905 }
2906 
2907 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)2908 tu_GetDeviceMemoryCommitment(VkDevice device,
2909                              VkDeviceMemory memory,
2910                              VkDeviceSize *pCommittedMemoryInBytes)
2911 {
2912    *pCommittedMemoryInBytes = 0;
2913 }
2914 
2915 VKAPI_ATTR VkResult VKAPI_CALL
tu_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)2916 tu_BindBufferMemory2(VkDevice device,
2917                      uint32_t bindInfoCount,
2918                      const VkBindBufferMemoryInfo *pBindInfos)
2919 {
2920    TU_FROM_HANDLE(tu_device, dev, device);
2921 
2922    for (uint32_t i = 0; i < bindInfoCount; ++i) {
2923       TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
2924       TU_FROM_HANDLE(tu_buffer, buffer, pBindInfos[i].buffer);
2925 
2926       if (mem) {
2927          buffer->bo = mem->bo;
2928          buffer->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
2929          if (buffer->vk.usage &
2930              (VK_BUFFER_USAGE_2_SAMPLER_DESCRIPTOR_BUFFER_BIT_EXT |
2931               VK_BUFFER_USAGE_2_RESOURCE_DESCRIPTOR_BUFFER_BIT_EXT))
2932             tu_bo_allow_dump(dev, mem->bo);
2933       } else {
2934          buffer->bo = NULL;
2935       }
2936    }
2937    return VK_SUCCESS;
2938 }
2939 
2940 VKAPI_ATTR VkResult VKAPI_CALL
tu_BindImageMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos)2941 tu_BindImageMemory2(VkDevice _device,
2942                     uint32_t bindInfoCount,
2943                     const VkBindImageMemoryInfo *pBindInfos)
2944 {
2945    TU_FROM_HANDLE(tu_device, device, _device);
2946 
2947    for (uint32_t i = 0; i < bindInfoCount; ++i) {
2948       TU_FROM_HANDLE(tu_image, image, pBindInfos[i].image);
2949       TU_FROM_HANDLE(tu_device_memory, mem, pBindInfos[i].memory);
2950 
2951       if (mem) {
2952          image->bo = mem->bo;
2953          image->iova = mem->bo->iova + pBindInfos[i].memoryOffset;
2954 
2955          if (image->vk.usage & VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT) {
2956             if (!mem->bo->map) {
2957                VkResult result = tu_bo_map(device, mem->bo);
2958                if (result != VK_SUCCESS)
2959                   return result;
2960             }
2961 
2962             image->map = (char *)mem->bo->map + pBindInfos[i].memoryOffset;
2963          } else {
2964             image->map = NULL;
2965          }
2966       } else {
2967          image->bo = NULL;
2968          image->map = NULL;
2969          image->iova = 0;
2970       }
2971    }
2972 
2973    return VK_SUCCESS;
2974 }
2975 
2976 VKAPI_ATTR VkResult VKAPI_CALL
tu_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence _fence)2977 tu_QueueBindSparse(VkQueue _queue,
2978                    uint32_t bindInfoCount,
2979                    const VkBindSparseInfo *pBindInfo,
2980                    VkFence _fence)
2981 {
2982    return VK_SUCCESS;
2983 }
2984 
2985 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)2986 tu_CreateEvent(VkDevice _device,
2987                const VkEventCreateInfo *pCreateInfo,
2988                const VkAllocationCallbacks *pAllocator,
2989                VkEvent *pEvent)
2990 {
2991    TU_FROM_HANDLE(tu_device, device, _device);
2992 
2993    struct tu_event *event = (struct tu_event *)
2994          vk_object_alloc(&device->vk, pAllocator, sizeof(*event),
2995                          VK_OBJECT_TYPE_EVENT);
2996    if (!event)
2997       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2998 
2999    VkResult result = tu_bo_init_new(device, &event->bo, 0x1000,
3000                                     TU_BO_ALLOC_NO_FLAGS, "event");
3001    if (result != VK_SUCCESS)
3002       goto fail_alloc;
3003 
3004    result = tu_bo_map(device, event->bo);
3005    if (result != VK_SUCCESS)
3006       goto fail_map;
3007 
3008    *pEvent = tu_event_to_handle(event);
3009 
3010    return VK_SUCCESS;
3011 
3012 fail_map:
3013    tu_bo_finish(device, event->bo);
3014 fail_alloc:
3015    vk_object_free(&device->vk, pAllocator, event);
3016    return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3017 }
3018 
3019 VKAPI_ATTR void VKAPI_CALL
tu_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)3020 tu_DestroyEvent(VkDevice _device,
3021                 VkEvent _event,
3022                 const VkAllocationCallbacks *pAllocator)
3023 {
3024    TU_FROM_HANDLE(tu_device, device, _device);
3025    TU_FROM_HANDLE(tu_event, event, _event);
3026 
3027    if (!event)
3028       return;
3029 
3030    tu_bo_finish(device, event->bo);
3031    vk_object_free(&device->vk, pAllocator, event);
3032 }
3033 
3034 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetEventStatus(VkDevice _device,VkEvent _event)3035 tu_GetEventStatus(VkDevice _device, VkEvent _event)
3036 {
3037    TU_FROM_HANDLE(tu_device, device, _device);
3038    TU_FROM_HANDLE(tu_event, event, _event);
3039 
3040    if (vk_device_is_lost(&device->vk))
3041       return VK_ERROR_DEVICE_LOST;
3042 
3043    if (*(uint64_t*) event->bo->map == 1)
3044       return VK_EVENT_SET;
3045    return VK_EVENT_RESET;
3046 }
3047 
3048 VKAPI_ATTR VkResult VKAPI_CALL
tu_SetEvent(VkDevice _device,VkEvent _event)3049 tu_SetEvent(VkDevice _device, VkEvent _event)
3050 {
3051    TU_FROM_HANDLE(tu_event, event, _event);
3052    *(uint64_t*) event->bo->map = 1;
3053 
3054    return VK_SUCCESS;
3055 }
3056 
3057 VKAPI_ATTR VkResult VKAPI_CALL
tu_ResetEvent(VkDevice _device,VkEvent _event)3058 tu_ResetEvent(VkDevice _device, VkEvent _event)
3059 {
3060    TU_FROM_HANDLE(tu_event, event, _event);
3061    *(uint64_t*) event->bo->map = 0;
3062 
3063    return VK_SUCCESS;
3064 }
3065 
3066 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)3067 tu_CreateBuffer(VkDevice _device,
3068                 const VkBufferCreateInfo *pCreateInfo,
3069                 const VkAllocationCallbacks *pAllocator,
3070                 VkBuffer *pBuffer)
3071 {
3072    TU_FROM_HANDLE(tu_device, device, _device);
3073    struct tu_buffer *buffer;
3074 
3075    buffer = (struct tu_buffer *) vk_buffer_create(
3076       &device->vk, pCreateInfo, pAllocator, sizeof(*buffer));
3077    if (buffer == NULL)
3078       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3079 
3080    *pBuffer = tu_buffer_to_handle(buffer);
3081 
3082    return VK_SUCCESS;
3083 }
3084 
3085 VKAPI_ATTR void VKAPI_CALL
tu_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)3086 tu_DestroyBuffer(VkDevice _device,
3087                  VkBuffer _buffer,
3088                  const VkAllocationCallbacks *pAllocator)
3089 {
3090    TU_FROM_HANDLE(tu_device, device, _device);
3091    TU_FROM_HANDLE(tu_buffer, buffer, _buffer);
3092 
3093    if (!buffer)
3094       return;
3095 
3096    vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
3097 }
3098 
3099 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)3100 tu_CreateFramebuffer(VkDevice _device,
3101                      const VkFramebufferCreateInfo *pCreateInfo,
3102                      const VkAllocationCallbacks *pAllocator,
3103                      VkFramebuffer *pFramebuffer)
3104 {
3105    TU_FROM_HANDLE(tu_device, device, _device);
3106 
3107    if (TU_DEBUG(DYNAMIC))
3108       return vk_common_CreateFramebuffer(_device, pCreateInfo, pAllocator,
3109                                          pFramebuffer);
3110 
3111    TU_FROM_HANDLE(tu_render_pass, pass, pCreateInfo->renderPass);
3112    struct tu_framebuffer *framebuffer;
3113 
3114    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3115 
3116    bool imageless = pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT;
3117 
3118    size_t size = sizeof(*framebuffer);
3119    if (!imageless)
3120       size += sizeof(struct tu_attachment_info) * pCreateInfo->attachmentCount;
3121    framebuffer = (struct tu_framebuffer *) vk_object_alloc(
3122       &device->vk, pAllocator, size, VK_OBJECT_TYPE_FRAMEBUFFER);
3123    if (framebuffer == NULL)
3124       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3125 
3126    framebuffer->attachment_count = pCreateInfo->attachmentCount;
3127    framebuffer->width = pCreateInfo->width;
3128    framebuffer->height = pCreateInfo->height;
3129    framebuffer->layers = pCreateInfo->layers;
3130 
3131    if (!imageless) {
3132       for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3133          VkImageView _iview = pCreateInfo->pAttachments[i];
3134          struct tu_image_view *iview = tu_image_view_from_handle(_iview);
3135          framebuffer->attachments[i].attachment = iview;
3136       }
3137    }
3138 
3139    tu_framebuffer_tiling_config(framebuffer, device, pass);
3140 
3141    *pFramebuffer = tu_framebuffer_to_handle(framebuffer);
3142    return VK_SUCCESS;
3143 }
3144 
3145 void
tu_setup_dynamic_framebuffer(struct tu_cmd_buffer * cmd_buffer,const VkRenderingInfo * pRenderingInfo)3146 tu_setup_dynamic_framebuffer(struct tu_cmd_buffer *cmd_buffer,
3147                              const VkRenderingInfo *pRenderingInfo)
3148 {
3149    struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
3150    struct tu_framebuffer *framebuffer = &cmd_buffer->dynamic_framebuffer;
3151 
3152    framebuffer->attachment_count = pass->attachment_count;
3153    framebuffer->width = pRenderingInfo->renderArea.offset.x +
3154       pRenderingInfo->renderArea.extent.width;
3155    framebuffer->height = pRenderingInfo->renderArea.offset.y +
3156       pRenderingInfo->renderArea.extent.height;
3157    framebuffer->layers = pRenderingInfo->layerCount;
3158 
3159    tu_framebuffer_tiling_config(framebuffer, cmd_buffer->device, pass);
3160 }
3161 
3162 VKAPI_ATTR void VKAPI_CALL
tu_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)3163 tu_DestroyFramebuffer(VkDevice _device,
3164                       VkFramebuffer _fb,
3165                       const VkAllocationCallbacks *pAllocator)
3166 {
3167    TU_FROM_HANDLE(tu_device, device, _device);
3168 
3169    if (TU_DEBUG(DYNAMIC)) {
3170       vk_common_DestroyFramebuffer(_device, _fb, pAllocator);
3171       return;
3172    }
3173 
3174    TU_FROM_HANDLE(tu_framebuffer, fb, _fb);
3175 
3176    if (!fb)
3177       return;
3178 
3179    vk_object_free(&device->vk, pAllocator, fb);
3180 }
3181 
3182 static void
tu_init_sampler(struct tu_device * device,struct tu_sampler * sampler,const VkSamplerCreateInfo * pCreateInfo)3183 tu_init_sampler(struct tu_device *device,
3184                 struct tu_sampler *sampler,
3185                 const VkSamplerCreateInfo *pCreateInfo)
3186 {
3187    const struct VkSamplerReductionModeCreateInfo *reduction =
3188       vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO);
3189    const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
3190       vk_find_struct_const(pCreateInfo->pNext,  SAMPLER_YCBCR_CONVERSION_INFO);
3191    const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
3192       vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
3193    /* for non-custom border colors, the VK enum is translated directly to an offset in
3194     * the border color buffer. custom border colors are located immediately after the
3195     * builtin colors, and thus an offset of TU_BORDER_COLOR_BUILTIN is added.
3196     */
3197    uint32_t border_color = (unsigned) pCreateInfo->borderColor;
3198    if (pCreateInfo->borderColor == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT ||
3199        pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
3200       mtx_lock(&device->mutex);
3201       border_color = BITSET_FFS(device->custom_border_color) - 1;
3202       assert(border_color < TU_BORDER_COLOR_COUNT);
3203       BITSET_CLEAR(device->custom_border_color, border_color);
3204       mtx_unlock(&device->mutex);
3205 
3206       VkClearColorValue color = custom_border_color->customBorderColor;
3207       if (custom_border_color->format == VK_FORMAT_D24_UNORM_S8_UINT &&
3208           pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT &&
3209           device->use_z24uint_s8uint) {
3210          /* When sampling stencil using the special Z24UINT_S8UINT format, the
3211           * border color is in the second component. Note: if
3212           * customBorderColorWithoutFormat is enabled, we may miss doing this
3213           * here if the format isn't specified, which is why we don't use that
3214           * format.
3215           */
3216          color.uint32[1] = color.uint32[0];
3217       }
3218 
3219       tu6_pack_border_color(
3220          &device->global_bo_map->bcolor[border_color], &color,
3221          pCreateInfo->borderColor == VK_BORDER_COLOR_INT_CUSTOM_EXT);
3222       border_color += TU_BORDER_COLOR_BUILTIN;
3223    }
3224 
3225    unsigned aniso = pCreateInfo->anisotropyEnable ?
3226       util_last_bit(MIN2((uint32_t)pCreateInfo->maxAnisotropy >> 1, 8)) : 0;
3227    bool miplinear = (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR);
3228    float min_lod = CLAMP(pCreateInfo->minLod, 0.0f, 4095.0f / 256.0f);
3229    float max_lod = CLAMP(pCreateInfo->maxLod, 0.0f, 4095.0f / 256.0f);
3230 
3231    sampler->descriptor[0] =
3232       COND(miplinear, A6XX_TEX_SAMP_0_MIPFILTER_LINEAR_NEAR) |
3233       A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(pCreateInfo->magFilter, aniso)) |
3234       A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(pCreateInfo->minFilter, aniso)) |
3235       A6XX_TEX_SAMP_0_ANISO((enum a6xx_tex_aniso) aniso) |
3236       A6XX_TEX_SAMP_0_WRAP_S(tu6_tex_wrap(pCreateInfo->addressModeU)) |
3237       A6XX_TEX_SAMP_0_WRAP_T(tu6_tex_wrap(pCreateInfo->addressModeV)) |
3238       A6XX_TEX_SAMP_0_WRAP_R(tu6_tex_wrap(pCreateInfo->addressModeW)) |
3239       A6XX_TEX_SAMP_0_LOD_BIAS(pCreateInfo->mipLodBias);
3240    sampler->descriptor[1] =
3241       COND(pCreateInfo->flags & VK_SAMPLER_CREATE_NON_SEAMLESS_CUBE_MAP_BIT_EXT,
3242            A6XX_TEX_SAMP_1_CUBEMAPSEAMLESSFILTOFF) |
3243       COND(pCreateInfo->unnormalizedCoordinates, A6XX_TEX_SAMP_1_UNNORM_COORDS) |
3244       A6XX_TEX_SAMP_1_MIN_LOD(min_lod) |
3245       A6XX_TEX_SAMP_1_MAX_LOD(max_lod) |
3246       COND(pCreateInfo->compareEnable,
3247            A6XX_TEX_SAMP_1_COMPARE_FUNC(tu6_compare_func(pCreateInfo->compareOp)));
3248    sampler->descriptor[2] = A6XX_TEX_SAMP_2_BCOLOR(border_color);
3249    sampler->descriptor[3] = 0;
3250 
3251    if (reduction) {
3252       sampler->descriptor[2] |= A6XX_TEX_SAMP_2_REDUCTION_MODE(
3253          tu6_reduction_mode(reduction->reductionMode));
3254    }
3255 
3256    sampler->ycbcr_sampler = ycbcr_conversion ?
3257       tu_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion) : NULL;
3258 
3259    if (sampler->ycbcr_sampler &&
3260        sampler->ycbcr_sampler->chroma_filter == VK_FILTER_LINEAR) {
3261       sampler->descriptor[2] |= A6XX_TEX_SAMP_2_CHROMA_LINEAR;
3262    }
3263 
3264    /* TODO:
3265     * A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR disables mipmapping, but vk has no NONE mipfilter?
3266     */
3267 }
3268 
3269 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)3270 tu_CreateSampler(VkDevice _device,
3271                  const VkSamplerCreateInfo *pCreateInfo,
3272                  const VkAllocationCallbacks *pAllocator,
3273                  VkSampler *pSampler)
3274 {
3275    TU_FROM_HANDLE(tu_device, device, _device);
3276    struct tu_sampler *sampler;
3277 
3278    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
3279 
3280    sampler = (struct tu_sampler *) vk_object_alloc(
3281       &device->vk, pAllocator, sizeof(*sampler), VK_OBJECT_TYPE_SAMPLER);
3282    if (!sampler)
3283       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3284 
3285    tu_init_sampler(device, sampler, pCreateInfo);
3286    *pSampler = tu_sampler_to_handle(sampler);
3287 
3288    return VK_SUCCESS;
3289 }
3290 
3291 VKAPI_ATTR void VKAPI_CALL
tu_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)3292 tu_DestroySampler(VkDevice _device,
3293                   VkSampler _sampler,
3294                   const VkAllocationCallbacks *pAllocator)
3295 {
3296    TU_FROM_HANDLE(tu_device, device, _device);
3297    TU_FROM_HANDLE(tu_sampler, sampler, _sampler);
3298    uint32_t border_color;
3299 
3300    if (!sampler)
3301       return;
3302 
3303    border_color = (sampler->descriptor[2] & A6XX_TEX_SAMP_2_BCOLOR__MASK) >> A6XX_TEX_SAMP_2_BCOLOR__SHIFT;
3304    if (border_color >= TU_BORDER_COLOR_BUILTIN) {
3305       border_color -= TU_BORDER_COLOR_BUILTIN;
3306       /* if the sampler had a custom border color, free it. TODO: no lock */
3307       mtx_lock(&device->mutex);
3308       assert(!BITSET_TEST(device->custom_border_color, border_color));
3309       BITSET_SET(device->custom_border_color, border_color);
3310       mtx_unlock(&device->mutex);
3311    }
3312 
3313    vk_object_free(&device->vk, pAllocator, sampler);
3314 }
3315 
3316 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)3317 tu_GetMemoryFdKHR(VkDevice _device,
3318                   const VkMemoryGetFdInfoKHR *pGetFdInfo,
3319                   int *pFd)
3320 {
3321    TU_FROM_HANDLE(tu_device, device, _device);
3322    TU_FROM_HANDLE(tu_device_memory, memory, pGetFdInfo->memory);
3323 
3324    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3325 
3326    /* At the moment, we support only the below handle types. */
3327    assert(pGetFdInfo->handleType ==
3328              VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3329           pGetFdInfo->handleType ==
3330              VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3331 
3332    int prime_fd = tu_bo_export_dmabuf(device, memory->bo);
3333    if (prime_fd < 0)
3334       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3335 
3336    *pFd = prime_fd;
3337 
3338    if (memory->image) {
3339       struct fdl_layout *l = &memory->image->layout[0];
3340       uint64_t modifier;
3341       if (l->ubwc) {
3342          modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
3343       } else if (l->tile_mode == 2) {
3344          modifier = DRM_FORMAT_MOD_QCOM_TILED2;
3345       } else if (l->tile_mode == 3) {
3346          modifier = DRM_FORMAT_MOD_QCOM_TILED3;
3347       } else {
3348          assert(!l->tile_mode);
3349          modifier = DRM_FORMAT_MOD_LINEAR;
3350       }
3351       struct fdl_metadata metadata = {
3352          .modifier = modifier,
3353       };
3354       tu_bo_set_metadata(device, memory->bo, &metadata, sizeof(metadata));
3355    }
3356 
3357    return VK_SUCCESS;
3358 }
3359 
3360 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)3361 tu_GetMemoryFdPropertiesKHR(VkDevice _device,
3362                             VkExternalMemoryHandleTypeFlagBits handleType,
3363                             int fd,
3364                             VkMemoryFdPropertiesKHR *pMemoryFdProperties)
3365 {
3366    TU_FROM_HANDLE(tu_device, device, _device);
3367    assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3368    pMemoryFdProperties->memoryTypeBits =
3369       (1 << device->physical_device->memory.type_count) - 1;
3370    return VK_SUCCESS;
3371 }
3372 
3373 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)3374 tu_GetPhysicalDeviceMultisamplePropertiesEXT(
3375    VkPhysicalDevice                            physicalDevice,
3376    VkSampleCountFlagBits                       samples,
3377    VkMultisamplePropertiesEXT*                 pMultisampleProperties)
3378 {
3379    TU_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
3380 
3381    if (samples <= VK_SAMPLE_COUNT_4_BIT && pdevice->vk.supported_extensions.EXT_sample_locations)
3382       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 1, 1 };
3383    else
3384       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
3385 }
3386 
3387 VkDeviceAddress
tu_GetBufferDeviceAddress(VkDevice _device,const VkBufferDeviceAddressInfo * pInfo)3388 tu_GetBufferDeviceAddress(VkDevice _device,
3389                           const VkBufferDeviceAddressInfo* pInfo)
3390 {
3391    TU_FROM_HANDLE(tu_buffer, buffer, pInfo->buffer);
3392 
3393    return buffer->iova;
3394 }
3395 
tu_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3396 uint64_t tu_GetBufferOpaqueCaptureAddress(
3397     VkDevice                                    device,
3398     const VkBufferDeviceAddressInfo*            pInfo)
3399 {
3400    /* We care only about memory allocation opaque addresses */
3401    return 0;
3402 }
3403 
tu_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3404 uint64_t tu_GetDeviceMemoryOpaqueCaptureAddress(
3405     VkDevice                                    device,
3406     const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
3407 {
3408    TU_FROM_HANDLE(tu_device_memory, mem, pInfo->memory);
3409    return mem->bo->iova;
3410 }
3411 
3412 struct tu_debug_bos_entry {
3413    uint32_t count;
3414    uint64_t size;
3415    const char *name;
3416 };
3417 
3418 const char *
tu_debug_bos_add(struct tu_device * dev,uint64_t size,const char * name)3419 tu_debug_bos_add(struct tu_device *dev, uint64_t size, const char *name)
3420 {
3421    assert(name);
3422 
3423    if (likely(!dev->bo_sizes))
3424       return NULL;
3425 
3426    mtx_lock(&dev->bo_mutex);
3427    struct hash_entry *entry = _mesa_hash_table_search(dev->bo_sizes, name);
3428    struct tu_debug_bos_entry *debug_bos;
3429 
3430    if (!entry) {
3431       debug_bos = (struct tu_debug_bos_entry *) calloc(
3432          1, sizeof(struct tu_debug_bos_entry));
3433       debug_bos->name = strdup(name);
3434       _mesa_hash_table_insert(dev->bo_sizes, debug_bos->name, debug_bos);
3435    } else {
3436       debug_bos = (struct tu_debug_bos_entry *) entry->data;
3437    }
3438 
3439    debug_bos->count++;
3440    debug_bos->size += align(size, 4096);
3441    mtx_unlock(&dev->bo_mutex);
3442 
3443    return debug_bos->name;
3444 }
3445 
3446 void
tu_debug_bos_del(struct tu_device * dev,struct tu_bo * bo)3447 tu_debug_bos_del(struct tu_device *dev, struct tu_bo *bo)
3448 {
3449    if (likely(!dev->bo_sizes) || !bo->name)
3450       return;
3451 
3452    mtx_lock(&dev->bo_mutex);
3453    struct hash_entry *entry =
3454       _mesa_hash_table_search(dev->bo_sizes, bo->name);
3455    /* If we're finishing the BO, it should have been added already */
3456    assert(entry);
3457 
3458    struct tu_debug_bos_entry *debug_bos =
3459       (struct tu_debug_bos_entry *) entry->data;
3460    debug_bos->count--;
3461    debug_bos->size -= align(bo->size, 4096);
3462    if (!debug_bos->count) {
3463       _mesa_hash_table_remove(dev->bo_sizes, entry);
3464       free((void *) debug_bos->name);
3465       free(debug_bos);
3466    }
3467    mtx_unlock(&dev->bo_mutex);
3468 }
3469 
debug_bos_count_compare(const void * in_a,const void * in_b)3470 static int debug_bos_count_compare(const void *in_a, const void *in_b)
3471 {
3472    struct tu_debug_bos_entry *a = *(struct tu_debug_bos_entry **)in_a;
3473    struct tu_debug_bos_entry *b = *(struct tu_debug_bos_entry **)in_b;
3474    return a->count - b->count;
3475 }
3476 
3477 void
tu_debug_bos_print_stats(struct tu_device * dev)3478 tu_debug_bos_print_stats(struct tu_device *dev)
3479 {
3480    if (likely(!dev->bo_sizes))
3481       return;
3482 
3483    mtx_lock(&dev->bo_mutex);
3484 
3485    /* Put the HT's sizes data in an array so we can sort by number of allocations. */
3486    struct util_dynarray dyn;
3487    util_dynarray_init(&dyn, NULL);
3488 
3489    uint32_t size = 0;
3490    uint32_t count = 0;
3491    hash_table_foreach(dev->bo_sizes, entry)
3492    {
3493       struct tu_debug_bos_entry *debug_bos =
3494          (struct tu_debug_bos_entry *) entry->data;
3495       util_dynarray_append(&dyn, struct tu_debug_bos_entry *, debug_bos);
3496       size += debug_bos->size / 1024;
3497       count += debug_bos->count;
3498    }
3499 
3500    qsort(dyn.data,
3501          util_dynarray_num_elements(&dyn, struct tu_debug_bos_entry *),
3502          sizeof(struct tu_debug_bos_entryos_entry *), debug_bos_count_compare);
3503 
3504    util_dynarray_foreach(&dyn, struct tu_debug_bos_entry *, entryp)
3505    {
3506       struct tu_debug_bos_entry *debug_bos = *entryp;
3507       mesa_logi("%30s: %4d bos, %lld kb\n", debug_bos->name, debug_bos->count,
3508                 (long long) (debug_bos->size / 1024));
3509    }
3510 
3511    mesa_logi("submitted %d bos (%d MB)\n", count, DIV_ROUND_UP(size, 1024));
3512 
3513    util_dynarray_fini(&dyn);
3514 
3515    mtx_unlock(&dev->bo_mutex);
3516 }
3517 
3518 void
tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)3519 tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,
3520                               const VkDebugUtilsLabelEXT *pLabelInfo)
3521 {
3522    VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
3523 
3524    vk_common_CmdBeginDebugUtilsLabelEXT(_commandBuffer, pLabelInfo);
3525 
3526    /* Note that the spec says:
3527     *
3528     * "An application may open a debug label region in one command buffer and
3529     *  close it in another, or otherwise split debug label regions across
3530     *  multiple command buffers or multiple queue submissions. When viewed
3531     * from the linear series of submissions to a single queue, the calls to
3532     *  vkCmdBeginDebugUtilsLabelEXT and vkCmdEndDebugUtilsLabelEXT must be
3533     *  matched and balanced."
3534     *
3535     * But if you're beginning labeling during a renderpass and ending outside
3536     * it, or vice versa, these trace ranges in perfetto will be unbalanced.  I
3537     * expect that u_trace and perfetto will do something like take just one of
3538     * the begins/ends, or drop the event entirely, but not crash.  Similarly,
3539     * I think we'll have problems if the tracepoints are split across cmd
3540     * buffers. Still, getting the simple case of cmd buffer annotation into
3541     * perfetto should prove useful.
3542     */
3543    const char *label = pLabelInfo->pLabelName;
3544    if (cmd_buffer->state.pass) {
3545       trace_start_cmd_buffer_annotation_rp(
3546          &cmd_buffer->trace, &cmd_buffer->draw_cs, strlen(label), label);
3547    } else {
3548       trace_start_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs,
3549                                         strlen(label), label);
3550    }
3551 }
3552 
3553 void
tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)3554 tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)
3555 {
3556    VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
3557 
3558    if (cmd_buffer->vk.labels.size > 0) {
3559       if (cmd_buffer->state.pass) {
3560          trace_end_cmd_buffer_annotation_rp(&cmd_buffer->trace,
3561                                             &cmd_buffer->draw_cs);
3562       } else {
3563          trace_end_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs);
3564       }
3565    }
3566 
3567    vk_common_CmdEndDebugUtilsLabelEXT(_commandBuffer);
3568 }
3569