• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  * SPDX-License-Identifier: MIT
5  *
6  * based in part on anv driver which is:
7  * Copyright © 2015 Intel Corporation
8  */
9 
10 #include "tu_device.h"
11 
12 #include "drm-uapi/drm_fourcc.h"
13 #include "fdl/freedreno_layout.h"
14 #include <fcntl.h>
15 #include <poll.h>
16 
17 #include "git_sha1.h"
18 #include "util/u_debug.h"
19 #include "util/disk_cache.h"
20 #include "util/hex.h"
21 #include "util/driconf.h"
22 #include "util/os_misc.h"
23 #include "util/u_process.h"
24 #include "vk_android.h"
25 #include "vk_shader_module.h"
26 #include "vk_sampler.h"
27 #include "vk_util.h"
28 
29 /* for fd_get_driver/device_uuid() */
30 #include "freedreno/common/freedreno_uuid.h"
31 #include "freedreno/common/freedreno_stompable_regs.h"
32 
33 #include "tu_acceleration_structure.h"
34 #include "tu_clear_blit.h"
35 #include "tu_cmd_buffer.h"
36 #include "tu_cs.h"
37 #include "tu_descriptor_set.h"
38 #include "tu_dynamic_rendering.h"
39 #include "tu_image.h"
40 #include "tu_pass.h"
41 #include "tu_queue.h"
42 #include "tu_query_pool.h"
43 #include "tu_rmv.h"
44 #include "tu_tracepoints.h"
45 #include "tu_wsi.h"
46 
47 #if DETECT_OS_ANDROID
48 #include "util/u_gralloc/u_gralloc.h"
49 #include <vndk/hardware_buffer.h>
50 #endif
51 
52 uint64_t os_page_size = 4096;
53 
54 static int
tu_device_get_cache_uuid(struct tu_physical_device * device,void * uuid)55 tu_device_get_cache_uuid(struct tu_physical_device *device, void *uuid)
56 {
57    struct mesa_sha1 ctx;
58    unsigned char sha1[20];
59    /* Note: IR3_SHADER_DEBUG also affects compilation, but it's not
60     * initialized until after compiler creation so we have to add it to the
61     * shader hash instead, since the compiler is only created with the logical
62     * device.
63     */
64    uint64_t driver_flags = TU_DEBUG(NOMULTIPOS);
65    uint16_t family = fd_dev_gpu_id(&device->dev_id);
66 
67    memset(uuid, 0, VK_UUID_SIZE);
68    _mesa_sha1_init(&ctx);
69 
70    if (!disk_cache_get_function_identifier((void *)tu_device_get_cache_uuid, &ctx))
71       return -1;
72 
73    _mesa_sha1_update(&ctx, &family, sizeof(family));
74    _mesa_sha1_update(&ctx, &driver_flags, sizeof(driver_flags));
75    _mesa_sha1_final(&ctx, sha1);
76 
77    memcpy(uuid, sha1, VK_UUID_SIZE);
78    return 0;
79 }
80 
81 #define TU_API_VERSION VK_MAKE_VERSION(1, 4, VK_HEADER_VERSION)
82 
83 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceVersion(uint32_t * pApiVersion)84 tu_EnumerateInstanceVersion(uint32_t *pApiVersion)
85 {
86     *pApiVersion = TU_API_VERSION;
87     return VK_SUCCESS;
88 }
89 
90 static const struct vk_instance_extension_table tu_instance_extensions_supported = { .table = {
91    .KHR_device_group_creation           = true,
92 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
93    .KHR_display                         = true,
94 #endif
95    .KHR_external_fence_capabilities     = true,
96    .KHR_external_memory_capabilities    = true,
97    .KHR_external_semaphore_capabilities = true,
98 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
99    .KHR_get_display_properties2         = true,
100 #endif
101    .KHR_get_physical_device_properties2 = true,
102 #ifdef TU_USE_WSI_PLATFORM
103    .KHR_get_surface_capabilities2       = true,
104    .KHR_surface                         = true,
105    .KHR_surface_protected_capabilities  = true,
106 #endif
107 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
108    .KHR_wayland_surface                 = true,
109 #endif
110 #ifdef VK_USE_PLATFORM_XCB_KHR
111    .KHR_xcb_surface                     = true,
112 #endif
113 #ifdef VK_USE_PLATFORM_XLIB_KHR
114    .KHR_xlib_surface                    = true,
115 #endif
116 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
117    .EXT_acquire_drm_display             = true,
118 #endif
119 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
120    .EXT_acquire_xlib_display            = true,
121 #endif
122    .EXT_debug_report                    = true,
123    .EXT_debug_utils                     = true,
124 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
125    .EXT_direct_mode_display             = true,
126    .EXT_display_surface_counter         = true,
127 #endif
128 #ifndef VK_USE_PLATFORM_WIN32_KHR
129    .EXT_headless_surface                = true,
130 #endif
131 #ifdef TU_USE_WSI_PLATFORM
132    .EXT_surface_maintenance1            = true,
133    .EXT_swapchain_colorspace            = true,
134 #endif
135 } };
136 
137 static bool
is_kgsl(struct tu_instance * instance)138 is_kgsl(struct tu_instance *instance)
139 {
140    return strcmp(instance->knl->name, "kgsl") == 0;
141 }
142 
143 static void
get_device_extensions(const struct tu_physical_device * device,struct vk_device_extension_table * ext)144 get_device_extensions(const struct tu_physical_device *device,
145                       struct vk_device_extension_table *ext)
146 {
147    /* device->has_raytracing contains the value of the SW fuse. If the
148     * device doesn't have a fuse (i.e. a740), we have to ignore it because
149     * kgsl returns false. If it does have a fuse, enable raytracing if the
150     * fuse is set and we have ray_intersection.
151     */
152    bool has_raytracing =
153       device->info->a7xx.has_ray_intersection &&
154       (!device->info->a7xx.has_sw_fuse || device->has_raytracing);
155 
156    *ext = (struct vk_device_extension_table) { .table = {
157       .KHR_8bit_storage = device->info->a7xx.storage_8bit,
158       .KHR_16bit_storage = device->info->a6xx.storage_16bit,
159       .KHR_acceleration_structure = has_raytracing,
160       .KHR_bind_memory2 = true,
161       .KHR_buffer_device_address = true,
162       .KHR_calibrated_timestamps = device->info->a7xx.has_persistent_counter,
163       .KHR_compute_shader_derivatives = device->info->chip >= 7,
164       .KHR_copy_commands2 = true,
165       .KHR_create_renderpass2 = true,
166       .KHR_dedicated_allocation = true,
167       .KHR_deferred_host_operations = true,
168       .KHR_depth_stencil_resolve = true,
169       .KHR_descriptor_update_template = true,
170       .KHR_device_group = true,
171       .KHR_draw_indirect_count = true,
172       .KHR_driver_properties = true,
173       .KHR_dynamic_rendering = true,
174       .KHR_dynamic_rendering_local_read = true,
175       .KHR_external_fence = true,
176       .KHR_external_fence_fd = true,
177       .KHR_external_memory = true,
178       .KHR_external_memory_fd = true,
179       .KHR_external_semaphore = true,
180       .KHR_external_semaphore_fd = true,
181       .KHR_format_feature_flags2 = true,
182       .KHR_fragment_shading_rate = device->info->a6xx.has_attachment_shading_rate,
183       .KHR_get_memory_requirements2 = true,
184       .KHR_global_priority = true,
185       .KHR_image_format_list = true,
186       .KHR_imageless_framebuffer = true,
187 #ifdef TU_USE_WSI_PLATFORM
188       .KHR_incremental_present = true,
189 #endif
190       .KHR_index_type_uint8 = true,
191       .KHR_line_rasterization = true,
192       .KHR_load_store_op_none = true,
193       .KHR_maintenance1 = true,
194       .KHR_maintenance2 = true,
195       .KHR_maintenance3 = true,
196       .KHR_maintenance4 = true,
197       .KHR_maintenance5 = true,
198       .KHR_maintenance6 = true,
199       .KHR_map_memory2 = true,
200       .KHR_multiview = TU_DEBUG(NOCONFORM) ? true : device->info->a6xx.has_hw_multiview,
201       .KHR_performance_query = TU_DEBUG(PERFC),
202       .KHR_pipeline_executable_properties = true,
203       .KHR_pipeline_library = true,
204 #ifdef TU_USE_WSI_PLATFORM
205       /* Hide these behind dri configs for now since we cannot implement it reliably on
206        * all surfaces yet. There is no surface capability query for present wait/id,
207        * but the feature is useful enough to hide behind an opt-in mechanism for now.
208        * If the instance only enables surface extensions that unconditionally support present wait,
209        * we can also expose the extension that way. */
210       .KHR_present_id = (driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
211                          wsi_common_vk_instance_supports_present_wait(&device->instance->vk)),
212       .KHR_present_wait = (driQueryOptionb(&device->instance->dri_options, "vk_khr_present_wait") ||
213                            wsi_common_vk_instance_supports_present_wait(&device->instance->vk)),
214 #endif
215       .KHR_push_descriptor = true,
216       .KHR_ray_query = has_raytracing,
217       .KHR_ray_tracing_maintenance1 = has_raytracing,
218       .KHR_relaxed_block_layout = true,
219       .KHR_sampler_mirror_clamp_to_edge = true,
220       .KHR_sampler_ycbcr_conversion = true,
221       .KHR_separate_depth_stencil_layouts = true,
222       .KHR_shader_atomic_int64 = device->info->a7xx.has_64b_ssbo_atomics,
223       .KHR_shader_draw_parameters = true,
224       .KHR_shader_expect_assume = true,
225       .KHR_shader_float16_int8 = true,
226       .KHR_shader_float_controls = true,
227       .KHR_shader_float_controls2 = true,
228       .KHR_shader_integer_dot_product = true,
229       .KHR_shader_non_semantic_info = true,
230       .KHR_shader_relaxed_extended_instruction = true,
231       .KHR_shader_subgroup_extended_types = true,
232       .KHR_shader_subgroup_rotate = true,
233       .KHR_shader_subgroup_uniform_control_flow = true,
234       .KHR_shader_terminate_invocation = true,
235       .KHR_spirv_1_4 = true,
236       .KHR_storage_buffer_storage_class = true,
237 #ifdef TU_USE_WSI_PLATFORM
238       .KHR_swapchain = true,
239       .KHR_swapchain_mutable_format = true,
240 #endif
241       .KHR_synchronization2 = true,
242       .KHR_timeline_semaphore = true,
243       .KHR_uniform_buffer_standard_layout = true,
244       .KHR_variable_pointers = true,
245       .KHR_vertex_attribute_divisor = true,
246       .KHR_vulkan_memory_model = true,
247       .KHR_workgroup_memory_explicit_layout = true,
248       .KHR_zero_initialize_workgroup_memory = true,
249 
250       .EXT_4444_formats = true,
251       .EXT_attachment_feedback_loop_dynamic_state = true,
252       .EXT_attachment_feedback_loop_layout = true,
253       .EXT_border_color_swizzle = true,
254       .EXT_calibrated_timestamps = device->info->a7xx.has_persistent_counter,
255       .EXT_color_write_enable = true,
256       .EXT_conditional_rendering = true,
257       .EXT_conservative_rasterization = device->info->chip >= 7,
258       .EXT_custom_border_color = true,
259       .EXT_depth_clamp_zero_one = true,
260       .EXT_depth_clip_control = true,
261       .EXT_depth_clip_enable = true,
262       .EXT_descriptor_buffer = true,
263       .EXT_descriptor_indexing = true,
264       .EXT_device_address_binding_report = true,
265 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
266       .EXT_display_control = true,
267 #endif
268       .EXT_extended_dynamic_state = true,
269       .EXT_extended_dynamic_state2 = true,
270       .EXT_extended_dynamic_state3 = true,
271       .EXT_external_memory_dma_buf = true,
272       .EXT_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
273       .EXT_fragment_density_map = true,
274       .EXT_global_priority = true,
275       .EXT_global_priority_query = true,
276       .EXT_graphics_pipeline_library = true,
277       .EXT_host_image_copy = true,
278       .EXT_host_query_reset = true,
279       .EXT_image_2d_view_of_3d = true,
280       .EXT_image_drm_format_modifier = true,
281       .EXT_image_robustness = true,
282       .EXT_image_view_min_lod = true,
283       .EXT_index_type_uint8 = true,
284       .EXT_inline_uniform_block = true,
285       .EXT_legacy_dithering = true,
286       .EXT_legacy_vertex_attributes = true,
287       .EXT_line_rasterization = true,
288       .EXT_load_store_op_none = true,
289       .EXT_map_memory_placed = true,
290       .EXT_memory_budget = true,
291       .EXT_multi_draw = true,
292       .EXT_mutable_descriptor_type = true,
293       .EXT_nested_command_buffer = true,
294       .EXT_non_seamless_cube_map = true,
295       .EXT_physical_device_drm = !is_kgsl(device->instance),
296       .EXT_pipeline_creation_cache_control = true,
297       .EXT_pipeline_creation_feedback = true,
298       .EXT_post_depth_coverage = true,
299       .EXT_primitive_topology_list_restart = true,
300       .EXT_primitives_generated_query = true,
301       .EXT_private_data = true,
302       .EXT_provoking_vertex = true,
303       .EXT_queue_family_foreign = true,
304       .EXT_rasterization_order_attachment_access = true,
305       .EXT_robustness2 = true,
306       .EXT_sample_locations = device->info->a6xx.has_sample_locations,
307       .EXT_sampler_filter_minmax = device->info->a6xx.has_sampler_minmax,
308       .EXT_scalar_block_layout = true,
309       .EXT_separate_stencil_usage = true,
310       .EXT_shader_demote_to_helper_invocation = true,
311       .EXT_shader_module_identifier = true,
312       .EXT_shader_replicated_composites = true,
313       .EXT_shader_stencil_export = true,
314       .EXT_shader_viewport_index_layer = TU_DEBUG(NOCONFORM) ? true : device->info->a6xx.has_hw_multiview,
315       .EXT_subgroup_size_control = true,
316 #ifdef TU_USE_WSI_PLATFORM
317       .EXT_swapchain_maintenance1 = true,
318 #endif
319       .EXT_texel_buffer_alignment = true,
320       .EXT_tooling_info = true,
321       .EXT_transform_feedback = true,
322       .EXT_vertex_attribute_divisor = true,
323       .EXT_vertex_input_dynamic_state = true,
324 
325       /* For Graphics Flight Recorder (GFR) */
326       .AMD_buffer_marker = true,
327       .ARM_rasterization_order_attachment_access = true,
328       .GOOGLE_decorate_string = true,
329       .GOOGLE_hlsl_functionality1 = true,
330       .GOOGLE_user_type = true,
331       .IMG_filter_cubic = device->info->a6xx.has_tex_filter_cubic,
332       .NV_compute_shader_derivatives = device->info->chip >= 7,
333       .VALVE_mutable_descriptor_type = true,
334    } };
335 
336 #if DETECT_OS_ANDROID
337    if (vk_android_get_ugralloc() != NULL) {
338       ext->ANDROID_external_memory_android_hardware_buffer = true,
339       ext->ANDROID_native_buffer = true;
340    }
341 #endif
342 }
343 
344 static void
tu_get_features(struct tu_physical_device * pdevice,struct vk_features * features)345 tu_get_features(struct tu_physical_device *pdevice,
346                 struct vk_features *features)
347 {
348    *features = (struct vk_features) { false };
349 
350    /* Vulkan 1.0 */
351    features->robustBufferAccess = true;
352    features->fullDrawIndexUint32 = true;
353    features->imageCubeArray = true;
354    features->independentBlend = true;
355    features->geometryShader = true;
356    features->tessellationShader = true;
357    features->sampleRateShading = true;
358    features->dualSrcBlend = true;
359    features->logicOp = true;
360    features->multiDrawIndirect = true;
361    features->drawIndirectFirstInstance = true;
362    features->depthClamp = true;
363    features->depthBiasClamp = true;
364    features->fillModeNonSolid = true;
365    features->depthBounds = true;
366    features->wideLines = pdevice->info->a6xx.line_width_max > 1.0;
367    features->largePoints = true;
368    features->alphaToOne = true;
369    features->multiViewport = true;
370    features->samplerAnisotropy = true;
371    features->textureCompressionETC2 = true;
372    features->textureCompressionASTC_LDR = true;
373    features->textureCompressionBC = true;
374    features->occlusionQueryPrecise = true;
375    features->pipelineStatisticsQuery = true;
376    features->vertexPipelineStoresAndAtomics = true;
377    features->fragmentStoresAndAtomics = true;
378    features->shaderTessellationAndGeometryPointSize = true;
379    features->shaderImageGatherExtended = true;
380    features->shaderStorageImageExtendedFormats = true;
381    features->shaderStorageImageMultisample = false;
382    features->shaderStorageImageReadWithoutFormat = true;
383    features->shaderStorageImageWriteWithoutFormat = true;
384    features->shaderUniformBufferArrayDynamicIndexing = true;
385    features->shaderSampledImageArrayDynamicIndexing = true;
386    features->shaderStorageBufferArrayDynamicIndexing = true;
387    features->shaderStorageImageArrayDynamicIndexing = true;
388    features->shaderClipDistance = true;
389    features->shaderCullDistance = true;
390    features->shaderFloat64 = false;
391    features->shaderInt64 = true;
392    features->shaderInt16 = true;
393    features->sparseBinding = false;
394    features->variableMultisampleRate = true;
395    features->inheritedQueries = true;
396 
397    /* Vulkan 1.1 */
398    features->storageBuffer16BitAccess            = pdevice->info->a6xx.storage_16bit;
399    features->uniformAndStorageBuffer16BitAccess  = false;
400    features->storagePushConstant16               = false;
401    features->storageInputOutput16                = false;
402    features->multiview                           = true;
403    features->multiviewGeometryShader             = false;
404    features->multiviewTessellationShader         = false;
405    features->variablePointersStorageBuffer       = true;
406    features->variablePointers                    = true;
407    features->protectedMemory                     = false;
408    features->samplerYcbcrConversion              = true;
409    features->shaderDrawParameters                = true;
410 
411    /* Vulkan 1.2 */
412    features->samplerMirrorClampToEdge            = true;
413    features->drawIndirectCount                   = true;
414    features->storageBuffer8BitAccess             = pdevice->info->a7xx.storage_8bit;
415    features->uniformAndStorageBuffer8BitAccess   = false;
416    features->storagePushConstant8                = false;
417    features->shaderBufferInt64Atomics =
418       pdevice->info->a7xx.has_64b_ssbo_atomics;
419    features->shaderSharedInt64Atomics            = false;
420    features->shaderFloat16                       = true;
421    features->shaderInt8                          = true;
422 
423    features->descriptorIndexing                                 = true;
424    features->shaderInputAttachmentArrayDynamicIndexing          = false;
425    features->shaderUniformTexelBufferArrayDynamicIndexing       = true;
426    features->shaderStorageTexelBufferArrayDynamicIndexing       = true;
427    features->shaderUniformBufferArrayNonUniformIndexing         = true;
428    features->shaderSampledImageArrayNonUniformIndexing          = true;
429    features->shaderStorageBufferArrayNonUniformIndexing         = true;
430    features->shaderStorageImageArrayNonUniformIndexing          = true;
431    features->shaderInputAttachmentArrayNonUniformIndexing       = false;
432    features->shaderUniformTexelBufferArrayNonUniformIndexing    = true;
433    features->shaderStorageTexelBufferArrayNonUniformIndexing    = true;
434    features->descriptorBindingUniformBufferUpdateAfterBind      = true;
435    features->descriptorBindingSampledImageUpdateAfterBind       = true;
436    features->descriptorBindingStorageImageUpdateAfterBind       = true;
437    features->descriptorBindingStorageBufferUpdateAfterBind      = true;
438    features->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
439    features->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
440    features->descriptorBindingUpdateUnusedWhilePending          = true;
441    features->descriptorBindingPartiallyBound                    = true;
442    features->descriptorBindingVariableDescriptorCount           = true;
443    features->runtimeDescriptorArray                             = true;
444 
445    features->samplerFilterMinmax                 =
446       pdevice->info->a6xx.has_sampler_minmax;
447    features->scalarBlockLayout                   = true;
448    features->imagelessFramebuffer                = true;
449    features->uniformBufferStandardLayout         = true;
450    features->shaderSubgroupExtendedTypes         = true;
451    features->separateDepthStencilLayouts         = true;
452    features->hostQueryReset                      = true;
453    features->timelineSemaphore                   = true;
454    features->bufferDeviceAddress                 = true;
455    features->bufferDeviceAddressCaptureReplay    = pdevice->has_set_iova;
456    features->bufferDeviceAddressMultiDevice      = false;
457    features->vulkanMemoryModel                   = true;
458    features->vulkanMemoryModelDeviceScope        = true;
459    features->vulkanMemoryModelAvailabilityVisibilityChains = true;
460    features->shaderOutputViewportIndex           = true;
461    features->shaderOutputLayer                   = true;
462    features->subgroupBroadcastDynamicId          = true;
463 
464    /* Vulkan 1.3 */
465    features->robustImageAccess                   = true;
466    features->inlineUniformBlock                  = true;
467    features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
468    features->pipelineCreationCacheControl        = true;
469    features->privateData                         = true;
470    features->shaderDemoteToHelperInvocation      = true;
471    features->shaderTerminateInvocation           = true;
472    features->subgroupSizeControl                 = true;
473    features->computeFullSubgroups                = true;
474    features->synchronization2                    = true;
475    features->textureCompressionASTC_HDR          = false;
476    features->shaderZeroInitializeWorkgroupMemory = true;
477    features->dynamicRendering                    = true;
478    features->shaderIntegerDotProduct             = true;
479    features->maintenance4                        = true;
480 
481    /* Vulkan 1.4 */
482    features->pushDescriptor = true;
483 
484    /* VK_KHR_acceleration_structure */
485    features->accelerationStructure = true;
486    features->accelerationStructureCaptureReplay = pdevice->has_set_iova;
487    features->descriptorBindingAccelerationStructureUpdateAfterBind = true;
488 
489    /* VK_KHR_compute_shader_derivatives */
490    features->computeDerivativeGroupQuads = pdevice->info->chip >= 7;
491    features->computeDerivativeGroupLinear = pdevice->info->chip >= 7;
492 
493    /* VK_KHR_dynamic_rendering_local_read */
494    features->dynamicRenderingLocalRead = true;
495 
496    /* VK_KHR_fragment_shading_rate */
497    features->pipelineFragmentShadingRate = pdevice->info->a6xx.has_attachment_shading_rate;
498    features->primitiveFragmentShadingRate = pdevice->info->a7xx.has_primitive_shading_rate;
499    features->attachmentFragmentShadingRate = pdevice->info->a6xx.has_attachment_shading_rate;
500 
501    /* VK_KHR_index_type_uint8 */
502    features->indexTypeUint8 = true;
503 
504    /* VK_KHR_line_rasterization */
505    features->rectangularLines = true;
506    features->bresenhamLines = true;
507    features->smoothLines = false;
508    features->stippledRectangularLines = false;
509    features->stippledBresenhamLines = false;
510    features->stippledSmoothLines = false;
511 
512    /* VK_KHR_maintenance5 */
513    features->maintenance5 = true;
514 
515    /* VK_KHR_maintenance6 */
516    features->maintenance6 = true;
517 
518    /* VK_KHR_performance_query */
519    features->performanceCounterQueryPools = true;
520    features->performanceCounterMultipleQueryPools = false;
521 
522    /* VK_KHR_pipeline_executable_properties */
523    features->pipelineExecutableInfo = true;
524 
525    /* VK_KHR_present_id */
526    features->presentId = pdevice->vk.supported_extensions.KHR_present_id;
527 
528    /* VK_KHR_present_wait */
529    features->presentWait = pdevice->vk.supported_extensions.KHR_present_wait;
530 
531    /* VK_KHR_shader_expect_assume */
532    features->shaderExpectAssume = true;
533 
534    /* VK_KHR_shader_float_controls2 */
535    features->shaderFloatControls2 = true;
536 
537    /* VK_KHR_shader_subgroup_uniform_control_flow */
538    features->shaderSubgroupUniformControlFlow = true;
539 
540    /* VK_KHR_vertex_attribute_divisor */
541    features->vertexAttributeInstanceRateDivisor = true;
542    features->vertexAttributeInstanceRateZeroDivisor = true;
543 
544    /* VK_KHR_workgroup_memory_explicit_layout */
545    features->workgroupMemoryExplicitLayout = true;
546    features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
547    features->workgroupMemoryExplicitLayout8BitAccess = true;
548    features->workgroupMemoryExplicitLayout16BitAccess = true;
549 
550    /* VK_EXT_4444_formats */
551    features->formatA4R4G4B4 = true;
552    features->formatA4B4G4R4 = true;
553 
554    /* VK_EXT_attachment_feedback_loop_dynamic_state */
555    features->attachmentFeedbackLoopDynamicState = true;
556 
557    /* VK_EXT_attachment_feedback_loop_layout */
558    features->attachmentFeedbackLoopLayout = true;
559 
560    /* VK_EXT_border_color_swizzle */
561    features->borderColorSwizzle = true;
562    features->borderColorSwizzleFromImage = true;
563 
564    /* VK_EXT_color_write_enable */
565    features->colorWriteEnable = true;
566 
567    /* VK_EXT_conditional_rendering */
568    features->conditionalRendering = true;
569    features->inheritedConditionalRendering = true;
570 
571    /* VK_EXT_custom_border_color */
572    features->customBorderColors = true;
573    features->customBorderColorWithoutFormat = true;
574 
575    /* VK_EXT_depth_clamp_zero_one */
576    features->depthClampZeroOne = true;
577 
578    /* VK_EXT_depth_clip_control */
579    features->depthClipControl = true;
580 
581    /* VK_EXT_depth_clip_enable */
582    features->depthClipEnable = true;
583 
584    /* VK_EXT_descriptor_buffer */
585    features->descriptorBuffer = true;
586    features->descriptorBufferCaptureReplay = pdevice->has_set_iova;
587    features->descriptorBufferImageLayoutIgnored = true;
588    features->descriptorBufferPushDescriptors = true;
589 
590    /* VK_EXT_device_address_binding_report */
591    features->reportAddressBinding = true;
592 
593    /* VK_EXT_extended_dynamic_state */
594    features->extendedDynamicState = true;
595 
596    /* VK_EXT_extended_dynamic_state2 */
597    features->extendedDynamicState2 = true;
598    features->extendedDynamicState2LogicOp = true;
599    features->extendedDynamicState2PatchControlPoints = true;
600 
601    /* VK_EXT_extended_dynamic_state3 */
602    features->extendedDynamicState3PolygonMode = true;
603    features->extendedDynamicState3TessellationDomainOrigin = true;
604    features->extendedDynamicState3DepthClampEnable = true;
605    features->extendedDynamicState3DepthClipEnable = true;
606    features->extendedDynamicState3LogicOpEnable = true;
607    features->extendedDynamicState3SampleMask = true;
608    features->extendedDynamicState3RasterizationSamples = true;
609    features->extendedDynamicState3AlphaToCoverageEnable = true;
610    features->extendedDynamicState3AlphaToOneEnable = true;
611    features->extendedDynamicState3DepthClipNegativeOneToOne = true;
612    features->extendedDynamicState3RasterizationStream = true;
613    features->extendedDynamicState3ConservativeRasterizationMode =
614       pdevice->vk.supported_extensions.EXT_conservative_rasterization;
615    features->extendedDynamicState3ExtraPrimitiveOverestimationSize =
616       pdevice->vk.supported_extensions.EXT_conservative_rasterization;
617    features->extendedDynamicState3LineRasterizationMode = true;
618    features->extendedDynamicState3LineStippleEnable = false;
619    features->extendedDynamicState3ProvokingVertexMode = true;
620    features->extendedDynamicState3SampleLocationsEnable =
621       pdevice->info->a6xx.has_sample_locations;
622    features->extendedDynamicState3ColorBlendEnable = true;
623    features->extendedDynamicState3ColorBlendEquation = true;
624    features->extendedDynamicState3ColorWriteMask = true;
625    features->extendedDynamicState3ViewportWScalingEnable = false;
626    features->extendedDynamicState3ViewportSwizzle = false;
627    features->extendedDynamicState3ShadingRateImageEnable = false;
628    features->extendedDynamicState3CoverageToColorEnable = false;
629    features->extendedDynamicState3CoverageToColorLocation = false;
630    features->extendedDynamicState3CoverageModulationMode = false;
631    features->extendedDynamicState3CoverageModulationTableEnable = false;
632    features->extendedDynamicState3CoverageModulationTable = false;
633    features->extendedDynamicState3CoverageReductionMode = false;
634    features->extendedDynamicState3RepresentativeFragmentTestEnable = false;
635    features->extendedDynamicState3ColorBlendAdvanced = false;
636 
637    /* VK_EXT_fragment_density_map */
638    features->fragmentDensityMap = true;
639    features->fragmentDensityMapDynamic = false;
640    features->fragmentDensityMapNonSubsampledImages = true;
641 
642    /* VK_EXT_global_priority_query */
643    features->globalPriorityQuery = true;
644 
645    /* VK_EXT_graphics_pipeline_library */
646    features->graphicsPipelineLibrary = true;
647 
648    /* VK_EXT_host_image_copy */
649    features->hostImageCopy = true;
650 
651    /* VK_EXT_image_2d_view_of_3d  */
652    features->image2DViewOf3D = true;
653    features->sampler2DViewOf3D = true;
654 
655    /* VK_EXT_image_view_min_lod */
656    features->minLod = true;
657 
658    /* VK_EXT_legacy_vertex_attributes */
659    features->legacyVertexAttributes = true;
660 
661    /* VK_EXT_legacy_dithering */
662    features->legacyDithering = true;
663 
664    /* VK_EXT_map_memory_placed */
665    features->memoryMapPlaced = true;
666    features->memoryMapRangePlaced = false;
667    features->memoryUnmapReserve = true;
668 
669    /* VK_EXT_multi_draw */
670    features->multiDraw = true;
671 
672    /* VK_EXT_mutable_descriptor_type */
673    features->mutableDescriptorType = true;
674 
675    /* VK_EXT_nested_command_buffer */
676    features->nestedCommandBuffer = true;
677    features->nestedCommandBufferRendering = true;
678    features->nestedCommandBufferSimultaneousUse = true;
679 
680    /* VK_EXT_non_seamless_cube_map */
681    features->nonSeamlessCubeMap = true;
682 
683    /* VK_EXT_pipeline_robustness */
684    features->pipelineRobustness = true;
685 
686    /* VK_EXT_primitive_topology_list_restart */
687    features->primitiveTopologyListRestart = true;
688    features->primitiveTopologyPatchListRestart = false;
689 
690    /* VK_EXT_primitives_generated_query */
691    features->primitivesGeneratedQuery = true;
692    features->primitivesGeneratedQueryWithRasterizerDiscard = false;
693    features->primitivesGeneratedQueryWithNonZeroStreams = false;
694 
695    /* VK_EXT_provoking_vertex */
696    features->provokingVertexLast = true;
697 
698    /* VK_EXT_rasterization_order_attachment_access */
699    features->rasterizationOrderColorAttachmentAccess = true;
700    features->rasterizationOrderDepthAttachmentAccess = true;
701    features->rasterizationOrderStencilAttachmentAccess = true;
702 
703    /* VK_KHR_ray_query */
704    features->rayQuery = true;
705 
706    /* VK_KHR_ray_tracing_maintenance1 */
707    features->rayTracingMaintenance1 = true;
708 
709    /* VK_EXT_robustness2 */
710    features->robustBufferAccess2 = true;
711    features->robustImageAccess2 = true;
712    features->nullDescriptor = true;
713 
714    /* VK_EXT_shader_module_identifier */
715    features->shaderModuleIdentifier = true;
716 
717    /* VK_EXT_shader_replicated_composites */
718    features->shaderReplicatedComposites = true;
719 
720 #ifdef TU_USE_WSI_PLATFORM
721    /* VK_EXT_swapchain_maintenance1 */
722    features->swapchainMaintenance1 = true;
723 #endif
724 
725    /* VK_EXT_texel_buffer_alignment */
726    features->texelBufferAlignment = true;
727 
728    /* VK_EXT_transform_feedback */
729    features->transformFeedback = true;
730    features->geometryStreams = true;
731 
732    /* VK_EXT_vertex_input_dynamic_state */
733    features->vertexInputDynamicState = true;
734 
735    /* VK_KHR_shader_relaxed_extended_instruction */
736    features->shaderRelaxedExtendedInstruction = true;
737 
738    /* VK_KHR_subgroup_rotate */
739    features->shaderSubgroupRotate = true;
740    features->shaderSubgroupRotateClustered = true;
741 }
742 
743 static void
tu_get_physical_device_properties_1_1(struct tu_physical_device * pdevice,struct vk_properties * p)744 tu_get_physical_device_properties_1_1(struct tu_physical_device *pdevice,
745                                       struct vk_properties *p)
746 {
747    memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
748    memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
749    memset(p->deviceLUID, 0, VK_LUID_SIZE);
750    p->deviceNodeMask = 0;
751    p->deviceLUIDValid = false;
752 
753    p->subgroupSize = pdevice->info->a6xx.supports_double_threadsize ?
754       pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
755    p->subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT;
756    p->subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT |
757                                     VK_SUBGROUP_FEATURE_VOTE_BIT |
758                                     VK_SUBGROUP_FEATURE_BALLOT_BIT |
759                                     VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
760                                     VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
761                                     VK_SUBGROUP_FEATURE_ROTATE_BIT_KHR |
762                                     VK_SUBGROUP_FEATURE_ROTATE_CLUSTERED_BIT_KHR |
763                                     VK_SUBGROUP_FEATURE_CLUSTERED_BIT |
764                                     VK_SUBGROUP_FEATURE_ARITHMETIC_BIT;
765    if (pdevice->info->a6xx.has_getfiberid) {
766       p->subgroupSupportedStages |= VK_SHADER_STAGE_ALL_GRAPHICS;
767       p->subgroupSupportedOperations |= VK_SUBGROUP_FEATURE_QUAD_BIT;
768    }
769 
770    p->subgroupQuadOperationsInAllStages = false;
771 
772    p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
773    p->maxMultiviewViewCount =
774       (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ? MAX_VIEWPORTS : 1;
775    p->maxMultiviewInstanceIndex = INT_MAX;
776    p->protectedNoFault = false;
777    /* Our largest descriptors are 2 texture descriptors, or a texture and
778     * sampler descriptor.
779     */
780    p->maxPerSetDescriptors = MAX_SET_SIZE / (2 * A6XX_TEX_CONST_DWORDS * 4);
781    /* Our buffer size fields allow only this much */
782    p->maxMemoryAllocationSize = 0xFFFFFFFFull;
783 
784 }
785 
786 
787 static const size_t max_descriptor_set_size = MAX_SET_SIZE / (4 * A6XX_TEX_CONST_DWORDS);
788 static const VkSampleCountFlags sample_counts =
789    VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
790 
791 static void
tu_get_physical_device_properties_1_2(struct tu_physical_device * pdevice,struct vk_properties * p)792 tu_get_physical_device_properties_1_2(struct tu_physical_device *pdevice,
793                                       struct vk_properties *p)
794 {
795    p->driverID = VK_DRIVER_ID_MESA_TURNIP;
796    memset(p->driverName, 0, sizeof(p->driverName));
797    snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE,
798             "turnip Mesa driver");
799    memset(p->driverInfo, 0, sizeof(p->driverInfo));
800    snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
801             "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
802    if (pdevice->info->chip >= 7) {
803       p->conformanceVersion = (VkConformanceVersion) {
804          .major = 1,
805          .minor = 4,
806          .subminor = 0,
807          .patch = 0,
808       };
809    } else {
810       p->conformanceVersion = (VkConformanceVersion) {
811          .major = 1,
812          .minor = 2,
813          .subminor = 7,
814          .patch = 1,
815       };
816    }
817 
818    p->denormBehaviorIndependence =
819       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
820    p->roundingModeIndependence =
821       VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL;
822 
823    p->shaderDenormFlushToZeroFloat16         = true;
824    p->shaderDenormPreserveFloat16            = false;
825    p->shaderRoundingModeRTEFloat16           = true;
826    p->shaderRoundingModeRTZFloat16           = false;
827    p->shaderSignedZeroInfNanPreserveFloat16  = true;
828 
829    p->shaderDenormFlushToZeroFloat32         = true;
830    p->shaderDenormPreserveFloat32            = false;
831    p->shaderRoundingModeRTEFloat32           = true;
832    p->shaderRoundingModeRTZFloat32           = false;
833    p->shaderSignedZeroInfNanPreserveFloat32  = true;
834 
835    p->shaderDenormFlushToZeroFloat64         = false;
836    p->shaderDenormPreserveFloat64            = false;
837    p->shaderRoundingModeRTEFloat64           = false;
838    p->shaderRoundingModeRTZFloat64           = false;
839    p->shaderSignedZeroInfNanPreserveFloat64  = false;
840 
841    p->shaderUniformBufferArrayNonUniformIndexingNative   = true;
842    p->shaderSampledImageArrayNonUniformIndexingNative    = true;
843    p->shaderStorageBufferArrayNonUniformIndexingNative   = true;
844    p->shaderStorageImageArrayNonUniformIndexingNative    = true;
845    p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
846    p->robustBufferAccessUpdateAfterBind                  = false;
847    p->quadDivergentImplicitLod                           = false;
848 
849    p->maxUpdateAfterBindDescriptorsInAllPools            = max_descriptor_set_size;
850    p->maxPerStageDescriptorUpdateAfterBindSamplers       = max_descriptor_set_size;
851    p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
852    p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
853    p->maxPerStageDescriptorUpdateAfterBindSampledImages  = max_descriptor_set_size;
854    p->maxPerStageDescriptorUpdateAfterBindStorageImages  = max_descriptor_set_size;
855    p->maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_RTS;
856    p->maxPerStageUpdateAfterBindResources                = max_descriptor_set_size;
857    p->maxDescriptorSetUpdateAfterBindSamplers            = max_descriptor_set_size;
858    p->maxDescriptorSetUpdateAfterBindUniformBuffers      = max_descriptor_set_size;
859    p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
860    p->maxDescriptorSetUpdateAfterBindStorageBuffers      = max_descriptor_set_size;
861    p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
862    p->maxDescriptorSetUpdateAfterBindSampledImages       = max_descriptor_set_size;
863    p->maxDescriptorSetUpdateAfterBindStorageImages       = max_descriptor_set_size;
864    p->maxDescriptorSetUpdateAfterBindInputAttachments    = MAX_RTS;
865 
866    p->supportedDepthResolveModes    = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
867    p->supportedStencilResolveModes  = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT;
868    p->independentResolveNone  = false;
869    p->independentResolve      = false;
870 
871    p->filterMinmaxSingleComponentFormats  = true;
872    p->filterMinmaxImageComponentMapping   = true;
873 
874    p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
875 
876    p->framebufferIntegerColorSampleCounts = sample_counts;
877 }
878 
879 static void
tu_get_physical_device_properties_1_3(struct tu_physical_device * pdevice,struct vk_properties * p)880 tu_get_physical_device_properties_1_3(struct tu_physical_device *pdevice,
881                                       struct vk_properties *p)
882 {
883    p->minSubgroupSize = pdevice->info->threadsize_base;
884    p->maxSubgroupSize = pdevice->info->a6xx.supports_double_threadsize ?
885       pdevice->info->threadsize_base * 2 : pdevice->info->threadsize_base;
886    p->maxComputeWorkgroupSubgroups = pdevice->info->max_waves;
887    p->requiredSubgroupSizeStages = VK_SHADER_STAGE_ALL;
888 
889    p->maxInlineUniformBlockSize = MAX_INLINE_UBO_RANGE;
890    p->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UBOS;
891    p->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UBOS;
892    p->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UBOS;
893    p->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UBOS;
894    p->maxInlineUniformTotalSize = MAX_INLINE_UBOS * MAX_INLINE_UBO_RANGE;
895 
896    p->integerDotProduct8BitUnsignedAccelerated = false;
897    p->integerDotProduct8BitSignedAccelerated = false;
898    p->integerDotProduct8BitMixedSignednessAccelerated = false;
899    p->integerDotProduct4x8BitPackedUnsignedAccelerated =
900       pdevice->info->a6xx.has_dp2acc;
901    /* TODO: we should be able to emulate 4x8BitPackedSigned fast enough */
902    p->integerDotProduct4x8BitPackedSignedAccelerated = false;
903    p->integerDotProduct4x8BitPackedMixedSignednessAccelerated =
904       pdevice->info->a6xx.has_dp2acc;
905    p->integerDotProduct16BitUnsignedAccelerated = false;
906    p->integerDotProduct16BitSignedAccelerated = false;
907    p->integerDotProduct16BitMixedSignednessAccelerated = false;
908    p->integerDotProduct32BitUnsignedAccelerated = false;
909    p->integerDotProduct32BitSignedAccelerated = false;
910    p->integerDotProduct32BitMixedSignednessAccelerated = false;
911    p->integerDotProduct64BitUnsignedAccelerated = false;
912    p->integerDotProduct64BitSignedAccelerated = false;
913    p->integerDotProduct64BitMixedSignednessAccelerated = false;
914    p->integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false;
915    p->integerDotProductAccumulatingSaturating8BitSignedAccelerated = false;
916    p->integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false;
917    p->integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated =
918       pdevice->info->a6xx.has_dp2acc;
919    /* TODO: we should be able to emulate Saturating4x8BitPackedSigned fast enough */
920    p->integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false;
921    p->integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated =
922       pdevice->info->a6xx.has_dp2acc;
923    p->integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false;
924    p->integerDotProductAccumulatingSaturating16BitSignedAccelerated = false;
925    p->integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false;
926    p->integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false;
927    p->integerDotProductAccumulatingSaturating32BitSignedAccelerated = false;
928    p->integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false;
929    p->integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false;
930    p->integerDotProductAccumulatingSaturating64BitSignedAccelerated = false;
931    p->integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false;
932 
933    p->storageTexelBufferOffsetAlignmentBytes = 64;
934    p->storageTexelBufferOffsetSingleTexelAlignment = true;
935    p->uniformTexelBufferOffsetAlignmentBytes = 64;
936    p->uniformTexelBufferOffsetSingleTexelAlignment = true;
937 
938    /* The address space is 4GB for current kernels, so there's no point
939     * allowing a larger buffer. Our buffer sizes are 64-bit though, so
940     * GetBufferDeviceRequirements won't fall over if someone actually creates
941     * a 4GB buffer.
942     */
943    p->maxBufferSize = 1ull << 32;
944 }
945 
946 /* CP_ALWAYS_ON_COUNTER is fixed 19.2 MHz */
947 #define ALWAYS_ON_FREQUENCY 19200000
948 
949 static void
tu_get_properties(struct tu_physical_device * pdevice,struct vk_properties * props)950 tu_get_properties(struct tu_physical_device *pdevice,
951                   struct vk_properties *props)
952 {
953    /* Limits */
954    props->maxImageDimension1D = (1 << 14);
955    props->maxImageDimension2D = (1 << 14);
956    props->maxImageDimension3D = (1 << 11);
957    props->maxImageDimensionCube = (1 << 14);
958    props->maxImageArrayLayers = (1 << 11);
959    props->maxTexelBufferElements = MAX_TEXEL_ELEMENTS;
960    props->maxUniformBufferRange = MAX_UNIFORM_BUFFER_RANGE;
961    props->maxStorageBufferRange = MAX_STORAGE_BUFFER_RANGE;
962    props->maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE;
963    props->maxMemoryAllocationCount = UINT32_MAX;
964    props->maxSamplerAllocationCount = 64 * 1024;
965    props->bufferImageGranularity = 64;          /* A cache line */
966    props->sparseAddressSpaceSize = 0;
967    props->maxBoundDescriptorSets = pdevice->usable_sets;
968    props->maxPerStageDescriptorSamplers = max_descriptor_set_size;
969    props->maxPerStageDescriptorUniformBuffers = max_descriptor_set_size;
970    props->maxPerStageDescriptorStorageBuffers = max_descriptor_set_size;
971    props->maxPerStageDescriptorSampledImages = max_descriptor_set_size;
972    props->maxPerStageDescriptorStorageImages = max_descriptor_set_size;
973    props->maxPerStageDescriptorInputAttachments = MAX_RTS;
974    props->maxPerStageResources = max_descriptor_set_size;
975    props->maxDescriptorSetSamplers = max_descriptor_set_size;
976    props->maxDescriptorSetUniformBuffers = max_descriptor_set_size;
977    props->maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
978    props->maxDescriptorSetStorageBuffers = max_descriptor_set_size;
979    props->maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
980    props->maxDescriptorSetSampledImages = max_descriptor_set_size;
981    props->maxDescriptorSetStorageImages = max_descriptor_set_size;
982    props->maxDescriptorSetInputAttachments = MAX_RTS;
983    props->maxVertexInputAttributes = pdevice->info->a6xx.vs_max_inputs_count;
984    props->maxVertexInputBindings = pdevice->info->a6xx.vs_max_inputs_count;
985    props->maxVertexInputAttributeOffset = 4095;
986    props->maxVertexInputBindingStride = 2048;
987    props->maxVertexOutputComponents = 128;
988    props->maxTessellationGenerationLevel = 64;
989    props->maxTessellationPatchSize = 32;
990    props->maxTessellationControlPerVertexInputComponents = 128;
991    props->maxTessellationControlPerVertexOutputComponents = 128;
992    props->maxTessellationControlPerPatchOutputComponents = 120;
993    props->maxTessellationControlTotalOutputComponents = 4096;
994    props->maxTessellationEvaluationInputComponents = 128;
995    props->maxTessellationEvaluationOutputComponents = 128;
996    props->maxGeometryShaderInvocations = 32;
997    props->maxGeometryInputComponents = 64;
998    props->maxGeometryOutputComponents = 128;
999    props->maxGeometryOutputVertices = 256;
1000    props->maxGeometryTotalOutputComponents = 1024;
1001    props->maxFragmentInputComponents = 124;
1002    props->maxFragmentOutputAttachments = 8;
1003    props->maxFragmentDualSrcAttachments = 1;
1004    props->maxFragmentCombinedOutputResources = MAX_RTS + max_descriptor_set_size * 2;
1005    props->maxComputeSharedMemorySize = pdevice->info->cs_shared_mem_size;
1006    props->maxComputeWorkGroupCount[0] =
1007       props->maxComputeWorkGroupCount[1] =
1008       props->maxComputeWorkGroupCount[2] = 65535;
1009    props->maxComputeWorkGroupInvocations = pdevice->info->a6xx.supports_double_threadsize ?
1010       pdevice->info->threadsize_base * 2 * pdevice->info->max_waves :
1011       pdevice->info->threadsize_base * pdevice->info->max_waves;
1012    props->maxComputeWorkGroupSize[0] =
1013       props->maxComputeWorkGroupSize[1] =
1014       props->maxComputeWorkGroupSize[2] = 1024;
1015    props->subPixelPrecisionBits = 8;
1016    props->subTexelPrecisionBits = 8;
1017    props->mipmapPrecisionBits = 8;
1018    props->maxDrawIndexedIndexValue = UINT32_MAX;
1019    props->maxDrawIndirectCount = UINT32_MAX;
1020    props->maxSamplerLodBias = 4095.0 / 256.0; /* [-16, 15.99609375] */
1021    props->maxSamplerAnisotropy = 16;
1022    props->maxViewports =
1023          (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ? MAX_VIEWPORTS : 1;
1024    props->maxViewportDimensions[0] =
1025       props->maxViewportDimensions[1] = MAX_VIEWPORT_SIZE;
1026    props->viewportBoundsRange[0] = INT16_MIN;
1027    props->viewportBoundsRange[1] = INT16_MAX;
1028    props->viewportSubPixelBits = 8;
1029    props->minMemoryMapAlignment = 4096; /* A page */
1030    props->minTexelBufferOffsetAlignment = 64;
1031    props->minUniformBufferOffsetAlignment = 64;
1032    props->minStorageBufferOffsetAlignment = 4;
1033    props->minTexelOffset = -16;
1034    props->maxTexelOffset = 15;
1035    props->minTexelGatherOffset = -32;
1036    props->maxTexelGatherOffset = 31;
1037    props->minInterpolationOffset = -0.5;
1038    props->maxInterpolationOffset = 0.4375;
1039    props->subPixelInterpolationOffsetBits = 4;
1040    props->maxFramebufferWidth = (1 << 14);
1041    props->maxFramebufferHeight = (1 << 14);
1042    props->maxFramebufferLayers = (1 << 10);
1043    props->framebufferColorSampleCounts = sample_counts;
1044    props->framebufferDepthSampleCounts = sample_counts;
1045    props->framebufferStencilSampleCounts = sample_counts;
1046    props->framebufferNoAttachmentsSampleCounts = sample_counts;
1047    props->maxColorAttachments = MAX_RTS;
1048    props->sampledImageColorSampleCounts = sample_counts;
1049    props->sampledImageIntegerSampleCounts = sample_counts;
1050    props->sampledImageDepthSampleCounts = sample_counts;
1051    props->sampledImageStencilSampleCounts = sample_counts;
1052    props->storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT;
1053    props->maxSampleMaskWords = 1;
1054    props->timestampComputeAndGraphics = true;
1055    props->timestampPeriod = 1000000000.0 / (float) ALWAYS_ON_FREQUENCY;
1056    props->maxClipDistances = 8;
1057    props->maxCullDistances = 8;
1058    props->maxCombinedClipAndCullDistances = 8;
1059    props->discreteQueuePriorities = 2;
1060    props->pointSizeRange[0] = 1;
1061    props->pointSizeRange[1] = 4092;
1062    props->lineWidthRange[0] = pdevice->info->a6xx.line_width_min;
1063    props->lineWidthRange[1] = pdevice->info->a6xx.line_width_max;
1064    props->pointSizeGranularity = 	0.0625;
1065    props->lineWidthGranularity =
1066       pdevice->info->a6xx.line_width_max == 1.0 ? 0.0 : 0.5;
1067    props->strictLines = true;
1068    props->standardSampleLocations = true;
1069    props->optimalBufferCopyOffsetAlignment = 128;
1070    props->optimalBufferCopyRowPitchAlignment = 128;
1071    props->nonCoherentAtomSize = 64;
1072 
1073    props->apiVersion =
1074       (pdevice->info->a6xx.has_hw_multiview || TU_DEBUG(NOCONFORM)) ?
1075          ((pdevice->info->chip >= 7) ? TU_API_VERSION :
1076             VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION))
1077          : VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION);
1078    props->driverVersion = vk_get_driver_version();
1079    props->vendorID = 0x5143;
1080    props->deviceID = pdevice->dev_id.chip_id;
1081    props->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
1082 
1083    /* Vulkan 1.4 */
1084    props->dynamicRenderingLocalReadDepthStencilAttachments = true;
1085    props->dynamicRenderingLocalReadMultisampledAttachments = true;
1086 
1087    /* sparse properties */
1088    props->sparseResidencyStandard2DBlockShape = { 0 };
1089    props->sparseResidencyStandard2DMultisampleBlockShape = { 0 };
1090    props->sparseResidencyStandard3DBlockShape = { 0 };
1091    props->sparseResidencyAlignedMipSize = { 0 };
1092    props->sparseResidencyNonResidentStrict = { 0 };
1093 
1094    strcpy(props->deviceName, pdevice->name);
1095    memcpy(props->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1096 
1097    tu_get_physical_device_properties_1_1(pdevice, props);
1098    tu_get_physical_device_properties_1_2(pdevice, props);
1099    tu_get_physical_device_properties_1_3(pdevice, props);
1100 
1101    /* VK_KHR_compute_shader_derivatives */
1102    props->meshAndTaskShaderDerivatives = false;
1103 
1104    /* VK_KHR_fragment_shading_rate */
1105    if (pdevice->info->a6xx.has_attachment_shading_rate) {
1106       props->minFragmentShadingRateAttachmentTexelSize = {8, 8};
1107       props->maxFragmentShadingRateAttachmentTexelSize = {8, 8};
1108    } else {
1109       props->minFragmentShadingRateAttachmentTexelSize = {0, 0};
1110       props->maxFragmentShadingRateAttachmentTexelSize = {0, 0};
1111    }
1112    props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
1113    props->primitiveFragmentShadingRateWithMultipleViewports =
1114       pdevice->info->a7xx.has_primitive_shading_rate;
1115    /* A7XX TODO: dEQP-VK.fragment_shading_rate.*.srlayered.* are failing
1116     * for some reason.
1117     */
1118    props->layeredShadingRateAttachments = false;
1119    props->fragmentShadingRateNonTrivialCombinerOps = true;
1120    props->maxFragmentSize = {4, 4};
1121    props->maxFragmentSizeAspectRatio = 4;
1122    props->maxFragmentShadingRateCoverageSamples = 16;
1123    props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_4_BIT;
1124    props->fragmentShadingRateWithShaderDepthStencilWrites = true;
1125    props->fragmentShadingRateWithSampleMask = true;
1126    /* Has wrong gl_SampleMaskIn[0] values with VK_EXT_post_depth_coverage used. */
1127    props->fragmentShadingRateWithShaderSampleMask = false;
1128    props->fragmentShadingRateWithConservativeRasterization = true;
1129    props->fragmentShadingRateWithFragmentShaderInterlock = false;
1130    props->fragmentShadingRateWithCustomSampleLocations = true;
1131    props->fragmentShadingRateStrictMultiplyCombiner = true;
1132 
1133    /* VK_KHR_push_descriptor */
1134    props->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
1135 
1136    /* VK_EXT_transform_feedback */
1137    props->maxTransformFeedbackStreams = IR3_MAX_SO_STREAMS;
1138    props->maxTransformFeedbackBuffers = IR3_MAX_SO_BUFFERS;
1139    props->maxTransformFeedbackBufferSize = UINT32_MAX;
1140    props->maxTransformFeedbackStreamDataSize = 512;
1141    props->maxTransformFeedbackBufferDataSize = 512;
1142    props->maxTransformFeedbackBufferDataStride = 512;
1143    props->transformFeedbackQueries = true;
1144    props->transformFeedbackStreamsLinesTriangles = true;
1145    props->transformFeedbackRasterizationStreamSelect = true;
1146    props->transformFeedbackDraw = true;
1147 
1148    /* VK_EXT_sample_locations */
1149    props->sampleLocationSampleCounts =
1150       pdevice->vk.supported_extensions.EXT_sample_locations ? sample_counts : 0;
1151    props->maxSampleLocationGridSize = (VkExtent2D) { 1 , 1 };
1152    props->sampleLocationCoordinateRange[0] = SAMPLE_LOCATION_MIN;
1153    props->sampleLocationCoordinateRange[1] = SAMPLE_LOCATION_MAX;
1154    props->sampleLocationSubPixelBits = 4;
1155    props->variableSampleLocations = true;
1156 
1157    /* VK_KHR_vertex_attribute_divisor */
1158    props->maxVertexAttribDivisor = UINT32_MAX;
1159    props->supportsNonZeroFirstInstance = true;
1160 
1161    /* VK_EXT_custom_border_color */
1162    props->maxCustomBorderColorSamplers = TU_BORDER_COLOR_COUNT;
1163 
1164    /* VK_KHR_performance_query */
1165    props->allowCommandBufferQueryCopies = false;
1166 
1167    /* VK_EXT_robustness2 */
1168    /* see write_buffer_descriptor() */
1169    props->robustStorageBufferAccessSizeAlignment = 4;
1170    /* see write_ubo_descriptor() */
1171    props->robustUniformBufferAccessSizeAlignment = 16;
1172 
1173    /* VK_EXT_pipeline_robustness */
1174    props->defaultRobustnessStorageBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1175    props->defaultRobustnessUniformBuffers = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_EXT;
1176    props->defaultRobustnessVertexInputs = VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_ROBUST_BUFFER_ACCESS_2_EXT;
1177    props->defaultRobustnessImages = VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_ROBUST_IMAGE_ACCESS_2_EXT;
1178 
1179    /* VK_EXT_provoking_vertex */
1180    props->provokingVertexModePerPipeline = true;
1181    props->transformFeedbackPreservesTriangleFanProvokingVertex = false;
1182 
1183    /* VK_KHR_line_rasterization */
1184    props->lineSubPixelPrecisionBits = 8;
1185 
1186    /* VK_EXT_physical_device_drm */
1187    props->drmHasPrimary = pdevice->has_master;
1188    props->drmPrimaryMajor = pdevice->master_major;
1189    props->drmPrimaryMinor = pdevice->master_minor;
1190 
1191    props->drmHasRender = pdevice->has_local;
1192    props->drmRenderMajor = pdevice->local_major;
1193    props->drmRenderMinor = pdevice->local_minor;
1194 
1195    /* VK_EXT_shader_module_identifier */
1196    STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1197                  sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1198    memcpy(props->shaderModuleIdentifierAlgorithmUUID,
1199           vk_shaderModuleIdentifierAlgorithmUUID,
1200           sizeof(props->shaderModuleIdentifierAlgorithmUUID));
1201 
1202    /* VK_EXT_map_memory_placed */
1203    os_get_page_size(&os_page_size);
1204    props->minPlacedMemoryMapAlignment = os_page_size;
1205 
1206    /* VK_EXT_multi_draw */
1207    props->maxMultiDrawCount = 2048;
1208 
1209    /* VK_EXT_nested_command_buffer */
1210    props->maxCommandBufferNestingLevel = UINT32_MAX;
1211 
1212    /* VK_EXT_graphics_pipeline_library */
1213    props->graphicsPipelineLibraryFastLinking = true;
1214    props->graphicsPipelineLibraryIndependentInterpolationDecoration = true;
1215 
1216    /* VK_EXT_extended_dynamic_state3 */
1217    props->dynamicPrimitiveTopologyUnrestricted = true;
1218 
1219    /* VK_EXT_descriptor_buffer */
1220    props->combinedImageSamplerDescriptorSingleArray = true;
1221    props->bufferlessPushDescriptors = true;
1222    props->allowSamplerImageViewPostSubmitCreation = true;
1223    props->descriptorBufferOffsetAlignment = A6XX_TEX_CONST_DWORDS * 4;
1224    props->maxDescriptorBufferBindings = pdevice->usable_sets;
1225    props->maxResourceDescriptorBufferBindings = pdevice->usable_sets;
1226    props->maxSamplerDescriptorBufferBindings = pdevice->usable_sets;
1227    props->maxEmbeddedImmutableSamplerBindings = pdevice->usable_sets;
1228    props->maxEmbeddedImmutableSamplers = max_descriptor_set_size;
1229    props->bufferCaptureReplayDescriptorDataSize = 0;
1230    props->imageCaptureReplayDescriptorDataSize = 0;
1231    props->imageViewCaptureReplayDescriptorDataSize = 0;
1232    props->samplerCaptureReplayDescriptorDataSize = 0;
1233    props->accelerationStructureCaptureReplayDescriptorDataSize = 0;
1234    /* Note: these sizes must match descriptor_size() */
1235    props->samplerDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1236    props->combinedImageSamplerDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
1237    props->sampledImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1238    props->storageImageDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1239    props->uniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1240    props->robustUniformTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1241    props->storageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1242    props->robustStorageTexelBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1243    props->uniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1244    props->robustUniformBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1245    props->storageBufferDescriptorSize = A6XX_TEX_CONST_DWORDS * 4 * (1 +
1246       COND(pdevice->info->a6xx.storage_16bit && !pdevice->info->a6xx.has_isam_v, 1) +
1247       COND(pdevice->info->a7xx.storage_8bit, 1));
1248    props->robustStorageBufferDescriptorSize =
1249       props->storageBufferDescriptorSize;
1250    props->accelerationStructureDescriptorSize = 4 * A6XX_TEX_CONST_DWORDS;
1251    props->inputAttachmentDescriptorSize = A6XX_TEX_CONST_DWORDS * 4;
1252    props->maxSamplerDescriptorBufferRange = ~0ull;
1253    props->maxResourceDescriptorBufferRange = ~0ull;
1254    props->samplerDescriptorBufferAddressSpaceSize = ~0ull;
1255    props->resourceDescriptorBufferAddressSpaceSize = ~0ull;
1256    props->descriptorBufferAddressSpaceSize = ~0ull;
1257    props->combinedImageSamplerDensityMapDescriptorSize = 2 * A6XX_TEX_CONST_DWORDS * 4;
1258 
1259    /* VK_EXT_legacy_vertex_attributes */
1260    props->nativeUnalignedPerformance = true;
1261 
1262    /* VK_EXT_fragment_density_map*/
1263    props->minFragmentDensityTexelSize = (VkExtent2D) { MIN_FDM_TEXEL_SIZE, MIN_FDM_TEXEL_SIZE };
1264    props->maxFragmentDensityTexelSize = (VkExtent2D) { MAX_FDM_TEXEL_SIZE, MAX_FDM_TEXEL_SIZE };
1265    props->fragmentDensityInvocations = false;
1266 
1267    /* VK_KHR_maintenance5 */
1268    props->earlyFragmentMultisampleCoverageAfterSampleCounting = true;
1269    props->earlyFragmentSampleMaskTestBeforeSampleCounting = true;
1270    props->depthStencilSwizzleOneSupport = true;
1271    props->polygonModePointSize = true;
1272    props->nonStrictWideLinesUseParallelogram = false;
1273    props->nonStrictSinglePixelWideLinesUseParallelogram = false;
1274 
1275    /* VK_KHR_maintenance6 */
1276    props->blockTexelViewCompatibleMultipleLayers = true;
1277    props->maxCombinedImageSamplerDescriptorCount = 1;
1278    props->fragmentShadingRateClampCombinerInputs = true;
1279 
1280    /* VK_EXT_host_image_copy */
1281 
1282    /* We don't use the layouts ATM so just report all layouts from
1283     * extensions that we support as compatible.
1284     */
1285    static const VkImageLayout supported_layouts[] = {
1286       VK_IMAGE_LAYOUT_GENERAL, /* required by spec */
1287       VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1288       VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1289       VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL,
1290       VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
1291       VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1292       VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1293       VK_IMAGE_LAYOUT_PREINITIALIZED,
1294       VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_STENCIL_ATTACHMENT_OPTIMAL,
1295       VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_STENCIL_READ_ONLY_OPTIMAL,
1296       VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL,
1297       VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL,
1298       VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL,
1299       VK_IMAGE_LAYOUT_STENCIL_READ_ONLY_OPTIMAL,
1300       VK_IMAGE_LAYOUT_READ_ONLY_OPTIMAL,
1301       VK_IMAGE_LAYOUT_ATTACHMENT_OPTIMAL,
1302       VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT,
1303       VK_IMAGE_LAYOUT_ATTACHMENT_FEEDBACK_LOOP_OPTIMAL_EXT,
1304    };
1305 
1306    props->pCopySrcLayouts = (VkImageLayout *)supported_layouts;
1307    props->copySrcLayoutCount = ARRAY_SIZE(supported_layouts);
1308    props->pCopyDstLayouts = (VkImageLayout *)supported_layouts;
1309    props->copyDstLayoutCount = ARRAY_SIZE(supported_layouts);
1310 
1311    /* We're a UMR so we can always map every kind of memory */
1312    props->identicalMemoryTypeRequirements = true;
1313 
1314    {
1315       struct mesa_sha1 sha1_ctx;
1316       uint8_t sha1[20];
1317 
1318       _mesa_sha1_init(&sha1_ctx);
1319 
1320       /* Make sure we don't match with other vendors */
1321       const char *driver = "turnip-v1";
1322       _mesa_sha1_update(&sha1_ctx, driver, strlen(driver));
1323 
1324       /* Hash in UBWC configuration */
1325       _mesa_sha1_update(&sha1_ctx, &pdevice->ubwc_config.highest_bank_bit,
1326                         sizeof(pdevice->ubwc_config.highest_bank_bit));
1327       _mesa_sha1_update(&sha1_ctx, &pdevice->ubwc_config.bank_swizzle_levels,
1328                         sizeof(pdevice->ubwc_config.bank_swizzle_levels));
1329       _mesa_sha1_update(&sha1_ctx, &pdevice->ubwc_config.macrotile_mode,
1330                         sizeof(pdevice->ubwc_config.macrotile_mode));
1331 
1332       _mesa_sha1_final(&sha1_ctx, sha1);
1333 
1334       memcpy(props->optimalTilingLayoutUUID, sha1, VK_UUID_SIZE);
1335    }
1336 
1337    /* VK_KHR_acceleration_structure */
1338    props->maxGeometryCount = (1 << 24) - 1;
1339    props->maxInstanceCount = (1 << 24) - 1;
1340    props->maxPrimitiveCount = (1 << 29) - 1;
1341    props->maxPerStageDescriptorAccelerationStructures = max_descriptor_set_size;
1342    props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures = max_descriptor_set_size;
1343    props->maxDescriptorSetAccelerationStructures = max_descriptor_set_size;
1344    props->maxDescriptorSetUpdateAfterBindAccelerationStructures = max_descriptor_set_size;
1345    props->minAccelerationStructureScratchOffsetAlignment = 128;
1346 
1347    /* VK_EXT_conservative_rasterization */
1348    props->primitiveOverestimationSize = 0.5 + 1 / 256.;
1349    props->maxExtraPrimitiveOverestimationSize = 0.5;
1350    props->extraPrimitiveOverestimationSizeGranularity = 0.5;
1351    props->primitiveUnderestimation = false;
1352    props->conservativePointAndLineRasterization = false;
1353    props->degenerateTrianglesRasterized = true;
1354    props->degenerateLinesRasterized = false;
1355    props->fullyCoveredFragmentShaderInputVariable = false;
1356    props->conservativeRasterizationPostDepthCoverage = false;
1357 }
1358 
1359 static const struct vk_pipeline_cache_object_ops *const cache_import_ops[] = {
1360    &tu_shader_ops,
1361    &tu_nir_shaders_ops,
1362    NULL,
1363 };
1364 
1365 VkResult
tu_physical_device_init(struct tu_physical_device * device,struct tu_instance * instance)1366 tu_physical_device_init(struct tu_physical_device *device,
1367                         struct tu_instance *instance)
1368 {
1369    VkResult result = VK_SUCCESS;
1370 
1371    const char *fd_name = fd_dev_name(&device->dev_id);
1372    if (!fd_name) {
1373       return vk_startup_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER,
1374                                "device (chip_id = %" PRIX64
1375                                ", gpu_id = %u) is unsupported",
1376                                device->dev_id.chip_id, device->dev_id.gpu_id);
1377    }
1378 
1379    const struct fd_dev_info info = fd_dev_info(&device->dev_id);
1380    assert(info.chip);
1381 
1382    /* Print a suffix if raytracing is disabled by the SW fuse, in an attempt
1383     * to avoid confusion when apps don't work.
1384     */
1385    bool raytracing_disabled = info.a7xx.has_sw_fuse &&
1386       !device->has_raytracing;
1387    const char *rt_suffix = raytracing_disabled ? " (raytracing disabled)" : "";
1388 
1389    if (strncmp(fd_name, "FD", 2) == 0) {
1390       device->name = vk_asprintf(&instance->vk.alloc,
1391                                  VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE,
1392                                  "Turnip Adreno (TM) %s%s", &fd_name[2],
1393                                  rt_suffix);
1394    } else {
1395       device->name = vk_asprintf(&instance->vk.alloc,
1396                                  VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE,
1397                                  "%s%s", fd_name, rt_suffix);
1398 
1399    }
1400    if (!device->name) {
1401       return vk_startup_errorf(instance, VK_ERROR_OUT_OF_HOST_MEMORY,
1402                                "device name alloc fail");
1403    }
1404 
1405    switch (fd_dev_gen(&device->dev_id)) {
1406    case 6:
1407    case 7: {
1408       device->dev_info = info;
1409       device->info = &device->dev_info;
1410       uint32_t depth_cache_size =
1411          device->info->num_ccu * device->info->a6xx.sysmem_per_ccu_depth_cache_size;
1412       uint32_t color_cache_size =
1413          (device->info->num_ccu *
1414           device->info->a6xx.sysmem_per_ccu_color_cache_size);
1415       uint32_t color_cache_size_gmem =
1416          color_cache_size /
1417          (1 << device->info->a6xx.gmem_ccu_color_cache_fraction);
1418 
1419       device->ccu_depth_offset_bypass = 0;
1420       device->ccu_offset_bypass =
1421          device->ccu_depth_offset_bypass + depth_cache_size;
1422 
1423       if (device->info->a7xx.has_gmem_vpc_attr_buf) {
1424          device->vpc_attr_buf_size_bypass =
1425             device->info->a7xx.sysmem_vpc_attr_buf_size;
1426          device->vpc_attr_buf_offset_bypass =
1427             device->ccu_offset_bypass + color_cache_size;
1428 
1429          device->vpc_attr_buf_size_gmem =
1430             device->info->a7xx.gmem_vpc_attr_buf_size;
1431          device->vpc_attr_buf_offset_gmem =
1432             device->gmem_size -
1433             (device->vpc_attr_buf_size_gmem * device->info->num_ccu);
1434 
1435          device->ccu_offset_gmem =
1436             device->vpc_attr_buf_offset_gmem - color_cache_size_gmem;
1437 
1438          device->usable_gmem_size_gmem = device->vpc_attr_buf_offset_gmem;
1439       } else {
1440          device->ccu_offset_gmem = device->gmem_size - color_cache_size_gmem;
1441          device->usable_gmem_size_gmem = device->gmem_size;
1442       }
1443 
1444       if (instance->reserve_descriptor_set) {
1445          device->usable_sets = device->reserved_set_idx = device->info->a6xx.max_sets - 1;
1446       } else {
1447          device->usable_sets = device->info->a6xx.max_sets;
1448          device->reserved_set_idx = -1;
1449       }
1450       break;
1451    }
1452    default:
1453       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1454                                  "device %s is unsupported", device->name);
1455       goto fail_free_name;
1456    }
1457    if (tu_device_get_cache_uuid(device, device->cache_uuid)) {
1458       result = vk_startup_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1459                                  "cannot generate UUID");
1460       goto fail_free_name;
1461    }
1462 
1463    device->level1_dcache_size = tu_get_l1_dcache_size();
1464    device->has_cached_non_coherent_memory =
1465       device->level1_dcache_size > 0 && !DETECT_ARCH_ARM;
1466 
1467    device->memory.type_count = 1;
1468    device->memory.types[0] =
1469       VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1470       VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1471       VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
1472 
1473    if (device->has_cached_coherent_memory) {
1474       device->memory.types[device->memory.type_count] =
1475          VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1476          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1477          VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
1478          VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1479       device->memory.type_count++;
1480    }
1481 
1482    if (device->has_cached_non_coherent_memory) {
1483       device->memory.types[device->memory.type_count] =
1484          VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1485          VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1486          VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1487       device->memory.type_count++;
1488    }
1489 
1490    /* Provide fallback UBWC config values if the kernel doesn't support
1491     * providing them. This should match what the kernel programs.
1492     */
1493    if (!device->ubwc_config.highest_bank_bit) {
1494       device->ubwc_config.highest_bank_bit = info.highest_bank_bit;
1495    }
1496    if (device->ubwc_config.bank_swizzle_levels == ~0) {
1497       device->ubwc_config.bank_swizzle_levels = info.ubwc_swizzle;
1498    }
1499    if (device->ubwc_config.macrotile_mode == FDL_MACROTILE_INVALID) {
1500       device->ubwc_config.macrotile_mode =
1501          (enum fdl_macrotile_mode) info.macrotile_mode;
1502    }
1503 
1504    fd_get_driver_uuid(device->driver_uuid);
1505    fd_get_device_uuid(device->device_uuid, &device->dev_id);
1506 
1507    struct vk_physical_device_dispatch_table dispatch_table;
1508    vk_physical_device_dispatch_table_from_entrypoints(
1509       &dispatch_table, &tu_physical_device_entrypoints, true);
1510    vk_physical_device_dispatch_table_from_entrypoints(
1511       &dispatch_table, &wsi_physical_device_entrypoints, false);
1512 
1513    result = vk_physical_device_init(&device->vk, &instance->vk,
1514                                     NULL, NULL, NULL, /* We set up extensions later */
1515                                     &dispatch_table);
1516    if (result != VK_SUCCESS)
1517       goto fail_free_name;
1518 
1519    get_device_extensions(device, &device->vk.supported_extensions);
1520    tu_get_features(device, &device->vk.supported_features);
1521    tu_get_properties(device, &device->vk.properties);
1522 
1523    device->vk.supported_sync_types = device->sync_types;
1524 
1525 #ifdef TU_USE_WSI_PLATFORM
1526    result = tu_wsi_init(device);
1527    if (result != VK_SUCCESS) {
1528       vk_startup_errorf(instance, result, "WSI init failure");
1529       vk_physical_device_finish(&device->vk);
1530       goto fail_free_name;
1531    }
1532 #endif
1533 
1534    /* The gpu id is already embedded in the uuid so we just pass "tu"
1535     * when creating the cache.
1536     */
1537    char buf[VK_UUID_SIZE * 2 + 1];
1538    mesa_bytes_to_hex(buf, device->cache_uuid, VK_UUID_SIZE);
1539    device->vk.disk_cache = disk_cache_create(device->name, buf, 0);
1540 
1541    device->vk.pipeline_cache_import_ops = cache_import_ops;
1542 
1543    return VK_SUCCESS;
1544 
1545 fail_free_name:
1546    vk_free(&instance->vk.alloc, (void *)device->name);
1547    return result;
1548 }
1549 
1550 static void
tu_physical_device_finish(struct tu_physical_device * device)1551 tu_physical_device_finish(struct tu_physical_device *device)
1552 {
1553 #ifdef TU_USE_WSI_PLATFORM
1554    tu_wsi_finish(device);
1555 #endif
1556 
1557    close(device->local_fd);
1558    if (device->master_fd != -1)
1559       close(device->master_fd);
1560 
1561    if (device->kgsl_dma_fd != -1)
1562       close(device->kgsl_dma_fd);
1563 
1564    disk_cache_destroy(device->vk.disk_cache);
1565    vk_free(&device->instance->vk.alloc, (void *)device->name);
1566 
1567    vk_physical_device_finish(&device->vk);
1568 }
1569 
1570 static void
tu_destroy_physical_device(struct vk_physical_device * device)1571 tu_destroy_physical_device(struct vk_physical_device *device)
1572 {
1573    tu_physical_device_finish((struct tu_physical_device *) device);
1574    vk_free(&device->instance->alloc, device);
1575 }
1576 
1577 static const driOptionDescription tu_dri_options[] = {
1578    DRI_CONF_SECTION_PERFORMANCE
1579       DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
1580       DRI_CONF_VK_KHR_PRESENT_WAIT(false)
1581       DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
1582       DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
1583       DRI_CONF_VK_XWAYLAND_WAIT_READY(false)
1584    DRI_CONF_SECTION_END
1585 
1586    DRI_CONF_SECTION_DEBUG
1587       DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
1588       DRI_CONF_VK_WSI_FORCE_SWAPCHAIN_TO_CURRENT_EXTENT(false)
1589       DRI_CONF_VK_X11_IGNORE_SUBOPTIMAL(false)
1590       DRI_CONF_VK_DONT_CARE_AS_LOAD(false)
1591    DRI_CONF_SECTION_END
1592 
1593    DRI_CONF_SECTION_MISCELLANEOUS
1594       DRI_CONF_DISABLE_CONSERVATIVE_LRZ(false)
1595       DRI_CONF_TU_DONT_RESERVE_DESCRIPTOR_SET(false)
1596       DRI_CONF_TU_ALLOW_OOB_INDIRECT_UBO_LOADS(false)
1597       DRI_CONF_TU_DISABLE_D24S8_BORDER_COLOR_WORKAROUND(false)
1598    DRI_CONF_SECTION_END
1599 };
1600 
1601 static void
tu_init_dri_options(struct tu_instance * instance)1602 tu_init_dri_options(struct tu_instance *instance)
1603 {
1604    driParseOptionInfo(&instance->available_dri_options, tu_dri_options,
1605                       ARRAY_SIZE(tu_dri_options));
1606    driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "turnip", NULL, NULL,
1607                        instance->vk.app_info.app_name, instance->vk.app_info.app_version,
1608                        instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
1609 
1610    instance->dont_care_as_load =
1611          driQueryOptionb(&instance->dri_options, "vk_dont_care_as_load");
1612    instance->conservative_lrz =
1613          !driQueryOptionb(&instance->dri_options, "disable_conservative_lrz");
1614    instance->reserve_descriptor_set =
1615          !driQueryOptionb(&instance->dri_options, "tu_dont_reserve_descriptor_set");
1616    instance->allow_oob_indirect_ubo_loads =
1617          driQueryOptionb(&instance->dri_options, "tu_allow_oob_indirect_ubo_loads");
1618    instance->disable_d24s8_border_color_workaround =
1619          driQueryOptionb(&instance->dri_options, "tu_disable_d24s8_border_color_workaround");
1620 }
1621 
1622 static uint32_t instance_count = 0;
1623 
1624 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1625 tu_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
1626                   const VkAllocationCallbacks *pAllocator,
1627                   VkInstance *pInstance)
1628 {
1629    struct tu_instance *instance;
1630    VkResult result;
1631 
1632    tu_env_init();
1633 
1634    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1635 
1636    if (pAllocator == NULL)
1637       pAllocator = vk_default_allocator();
1638 
1639    instance = (struct tu_instance *) vk_zalloc(
1640       pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1641 
1642    if (!instance)
1643       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1644 
1645    struct vk_instance_dispatch_table dispatch_table;
1646    vk_instance_dispatch_table_from_entrypoints(
1647       &dispatch_table, &tu_instance_entrypoints, true);
1648    vk_instance_dispatch_table_from_entrypoints(
1649       &dispatch_table, &wsi_instance_entrypoints, false);
1650 
1651    result = vk_instance_init(&instance->vk,
1652                              &tu_instance_extensions_supported,
1653                              &dispatch_table,
1654                              pCreateInfo, pAllocator);
1655    if (result != VK_SUCCESS) {
1656       vk_free(pAllocator, instance);
1657       return vk_error(NULL, result);
1658    }
1659 
1660    instance->vk.physical_devices.try_create_for_drm =
1661       tu_physical_device_try_create;
1662    instance->vk.physical_devices.enumerate = tu_enumerate_devices;
1663    instance->vk.physical_devices.destroy = tu_destroy_physical_device;
1664 
1665    instance->instance_idx = p_atomic_fetch_add(&instance_count, 1);
1666    if (TU_DEBUG(STARTUP))
1667       mesa_logi("Created an instance");
1668 
1669    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1670 
1671    tu_init_dri_options(instance);
1672 
1673    *pInstance = tu_instance_to_handle(instance);
1674 
1675 #ifdef HAVE_PERFETTO
1676    tu_perfetto_init();
1677 #endif
1678 
1679    util_gpuvis_init();
1680 
1681    return VK_SUCCESS;
1682 }
1683 
1684 VKAPI_ATTR void VKAPI_CALL
tu_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)1685 tu_DestroyInstance(VkInstance _instance,
1686                    const VkAllocationCallbacks *pAllocator)
1687 {
1688    VK_FROM_HANDLE(tu_instance, instance, _instance);
1689 
1690    if (!instance)
1691       return;
1692 
1693    VG(VALGRIND_DESTROY_MEMPOOL(instance));
1694 
1695    driDestroyOptionCache(&instance->dri_options);
1696    driDestroyOptionInfo(&instance->available_dri_options);
1697 
1698    vk_instance_finish(&instance->vk);
1699    vk_free(&instance->vk.alloc, instance);
1700 }
1701 
1702 static const VkQueueFamilyProperties tu_queue_family_properties = {
1703    .queueFlags =
1704       VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT,
1705    .queueCount = 1,
1706    .timestampValidBits = 48,
1707    .minImageTransferGranularity = { 1, 1, 1 },
1708 };
1709 
1710 void
tu_physical_device_get_global_priority_properties(const struct tu_physical_device * pdevice,VkQueueFamilyGlobalPriorityPropertiesKHR * props)1711 tu_physical_device_get_global_priority_properties(const struct tu_physical_device *pdevice,
1712                                                   VkQueueFamilyGlobalPriorityPropertiesKHR *props)
1713 {
1714    props->priorityCount = MIN2(pdevice->submitqueue_priority_count, 3);
1715    switch (props->priorityCount) {
1716    case 1:
1717       props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1718       break;
1719    case 2:
1720       props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1721       props->priorities[1] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
1722       break;
1723    case 3:
1724       props->priorities[0] = VK_QUEUE_GLOBAL_PRIORITY_LOW_KHR;
1725       props->priorities[1] = VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_KHR;
1726       props->priorities[2] = VK_QUEUE_GLOBAL_PRIORITY_HIGH_KHR;
1727       break;
1728    default:
1729       unreachable("unexpected priority count");
1730       break;
1731    }
1732 }
1733 
1734 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1735 tu_GetPhysicalDeviceQueueFamilyProperties2(
1736    VkPhysicalDevice physicalDevice,
1737    uint32_t *pQueueFamilyPropertyCount,
1738    VkQueueFamilyProperties2 *pQueueFamilyProperties)
1739 {
1740    VK_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
1741 
1742    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
1743                           pQueueFamilyProperties, pQueueFamilyPropertyCount);
1744 
1745    vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p)
1746    {
1747       p->queueFamilyProperties = tu_queue_family_properties;
1748 
1749       vk_foreach_struct(ext, p->pNext) {
1750          switch (ext->sType) {
1751          case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_KHR: {
1752             VkQueueFamilyGlobalPriorityPropertiesKHR *props =
1753                (VkQueueFamilyGlobalPriorityPropertiesKHR *) ext;
1754             tu_physical_device_get_global_priority_properties(pdevice, props);
1755             break;
1756          }
1757          default:
1758             break;
1759          }
1760       }
1761    }
1762 }
1763 
1764 uint64_t
tu_get_system_heap_size(struct tu_physical_device * physical_device)1765 tu_get_system_heap_size(struct tu_physical_device *physical_device)
1766 {
1767    uint64_t total_ram = 0;
1768    ASSERTED bool has_physical_memory =
1769       os_get_total_physical_memory(&total_ram);
1770    assert(has_physical_memory);
1771 
1772    /* We don't want to burn too much ram with the GPU.  If the user has 4GiB
1773     * or less, we use at most half.  If they have more than 4GiB, we use 3/4.
1774     */
1775    uint64_t available_ram;
1776    if (total_ram <= 4ull * 1024ull * 1024ull * 1024ull)
1777       available_ram = total_ram / 2;
1778    else
1779       available_ram = total_ram * 3 / 4;
1780 
1781    if (physical_device->va_size)
1782       available_ram = MIN2(available_ram, physical_device->va_size);
1783 
1784    return available_ram;
1785 }
1786 
1787 static VkDeviceSize
tu_get_budget_memory(struct tu_physical_device * physical_device)1788 tu_get_budget_memory(struct tu_physical_device *physical_device)
1789 {
1790    uint64_t heap_size = physical_device->heap.size;
1791    uint64_t heap_used = physical_device->heap.used;
1792    uint64_t sys_available;
1793    ASSERTED bool has_available_memory =
1794       os_get_available_system_memory(&sys_available);
1795    assert(has_available_memory);
1796 
1797    if (physical_device->va_size)
1798       sys_available = MIN2(sys_available, physical_device->va_size);
1799 
1800    /*
1801     * Let's not incite the app to starve the system: report at most 90% of
1802     * available system memory.
1803     */
1804    uint64_t heap_available = sys_available * 9 / 10;
1805    return MIN2(heap_size, heap_used + heap_available);
1806 }
1807 
1808 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,VkPhysicalDeviceMemoryProperties2 * props2)1809 tu_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice pdev,
1810                                       VkPhysicalDeviceMemoryProperties2 *props2)
1811 {
1812    VK_FROM_HANDLE(tu_physical_device, physical_device, pdev);
1813 
1814    VkPhysicalDeviceMemoryProperties *props = &props2->memoryProperties;
1815    props->memoryHeapCount = 1;
1816    props->memoryHeaps[0].size = physical_device->heap.size;
1817    props->memoryHeaps[0].flags = physical_device->heap.flags;
1818 
1819    props->memoryTypeCount = physical_device->memory.type_count;
1820    for (uint32_t i = 0; i < physical_device->memory.type_count; i++) {
1821       props->memoryTypes[i] = (VkMemoryType) {
1822          .propertyFlags = physical_device->memory.types[i],
1823          .heapIndex     = 0,
1824       };
1825    }
1826 
1827    vk_foreach_struct(ext, props2->pNext)
1828    {
1829       switch (ext->sType) {
1830       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1831          VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget_props =
1832             (VkPhysicalDeviceMemoryBudgetPropertiesEXT *) ext;
1833          memory_budget_props->heapUsage[0] = physical_device->heap.used;
1834          memory_budget_props->heapBudget[0] = tu_get_budget_memory(physical_device);
1835 
1836          /* The heapBudget and heapUsage values must be zero for array elements
1837           * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
1838           */
1839          for (unsigned i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1840             memory_budget_props->heapBudget[i] = 0u;
1841             memory_budget_props->heapUsage[i] = 0u;
1842          }
1843          break;
1844       }
1845       default:
1846          break;
1847       }
1848    }
1849 }
1850 
1851 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetPhysicalDeviceFragmentShadingRatesKHR(VkPhysicalDevice physicalDevice,uint32_t * pFragmentShadingRateCount,VkPhysicalDeviceFragmentShadingRateKHR * pFragmentShadingRates)1852 tu_GetPhysicalDeviceFragmentShadingRatesKHR(
1853    VkPhysicalDevice physicalDevice,
1854    uint32_t *pFragmentShadingRateCount,
1855    VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
1856 {
1857    VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out,
1858                           pFragmentShadingRates, pFragmentShadingRateCount);
1859 
1860 #define append_rate(w, h, s)                                                        \
1861    {                                                                                \
1862       VkPhysicalDeviceFragmentShadingRateKHR rate = {                               \
1863          .sType =                                                                   \
1864             VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, \
1865          .sampleCounts = s,                                                         \
1866          .fragmentSize = { .width = w, .height = h },                               \
1867       };                                                                            \
1868       vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out,        \
1869                                r) *r = rate;                                        \
1870    }
1871 
1872    append_rate(4, 4, VK_SAMPLE_COUNT_1_BIT);
1873    append_rate(4, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT);
1874    append_rate(2, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
1875    append_rate(2, 1, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
1876    append_rate(1, 2, VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT);
1877    append_rate(1, 1, ~0);
1878 
1879 #undef append_rate
1880 
1881    return vk_outarray_status(&out);
1882 }
1883 
1884 uint64_t
tu_device_ticks_to_ns(struct tu_device * dev,uint64_t ts)1885 tu_device_ticks_to_ns(struct tu_device *dev, uint64_t ts)
1886 {
1887    /* This is based on the 19.2MHz always-on rbbm timer.
1888     *
1889     * TODO we should probably query this value from kernel..
1890     */
1891    return ts * (1000000000 / 19200000);
1892 }
1893 
1894 struct u_trace_context *
tu_device_get_u_trace(struct tu_device * device)1895 tu_device_get_u_trace(struct tu_device *device)
1896 {
1897    return &device->trace_context;
1898 }
1899 
1900 static void*
tu_trace_create_buffer(struct u_trace_context * utctx,uint64_t size_B)1901 tu_trace_create_buffer(struct u_trace_context *utctx, uint64_t size_B)
1902 {
1903    struct tu_device *device =
1904       container_of(utctx, struct tu_device, trace_context);
1905 
1906    struct tu_bo *bo;
1907    tu_bo_init_new(device, NULL, &bo, size_B, TU_BO_ALLOC_INTERNAL_RESOURCE, "trace");
1908    tu_bo_map(device, bo, NULL);
1909 
1910    return bo;
1911 }
1912 
1913 static void
tu_trace_destroy_buffer(struct u_trace_context * utctx,void * timestamps)1914 tu_trace_destroy_buffer(struct u_trace_context *utctx, void *timestamps)
1915 {
1916    struct tu_device *device =
1917       container_of(utctx, struct tu_device, trace_context);
1918    struct tu_bo *bo = (struct tu_bo *) timestamps;
1919 
1920    tu_bo_finish(device, bo);
1921 }
1922 
1923 template <chip CHIP>
1924 static void
tu_trace_record_ts(struct u_trace * ut,void * cs,void * timestamps,uint64_t offset_B,uint32_t)1925 tu_trace_record_ts(struct u_trace *ut, void *cs, void *timestamps,
1926                    uint64_t offset_B, uint32_t)
1927 {
1928    struct tu_bo *bo = (struct tu_bo *) timestamps;
1929    struct tu_cs *ts_cs = (struct tu_cs *) cs;
1930 
1931    if (CHIP == A6XX) {
1932       tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE, 4);
1933       tu_cs_emit(ts_cs, CP_EVENT_WRITE_0_EVENT(RB_DONE_TS) |
1934                            CP_EVENT_WRITE_0_TIMESTAMP);
1935       tu_cs_emit_qw(ts_cs, bo->iova + offset_B);
1936       tu_cs_emit(ts_cs, 0x00000000);
1937    } else {
1938       tu_cs_emit_pkt7(ts_cs, CP_EVENT_WRITE7, 3);
1939       tu_cs_emit(ts_cs, CP_EVENT_WRITE7_0(.event = RB_DONE_TS,
1940                                           .write_src = EV_WRITE_ALWAYSON,
1941                                           .write_dst = EV_DST_RAM,
1942                                           .write_enabled = true)
1943                            .value);
1944       tu_cs_emit_qw(ts_cs, bo->iova + offset_B);
1945    }
1946 }
1947 
1948 static uint64_t
tu_trace_read_ts(struct u_trace_context * utctx,void * timestamps,uint64_t offset_B,void * flush_data)1949 tu_trace_read_ts(struct u_trace_context *utctx,
1950                  void *timestamps, uint64_t offset_B, void *flush_data)
1951 {
1952    struct tu_device *device =
1953       container_of(utctx, struct tu_device, trace_context);
1954    struct tu_bo *bo = (struct tu_bo *) timestamps;
1955    struct tu_u_trace_submission_data *submission_data =
1956       (struct tu_u_trace_submission_data *) flush_data;
1957 
1958    /* Only need to stall on results for the first entry: */
1959    if (offset_B == 0) {
1960       tu_queue_wait_fence(submission_data->queue, submission_data->fence,
1961                           1000000000);
1962    }
1963 
1964    if (tu_bo_map(device, bo, NULL) != VK_SUCCESS) {
1965       return U_TRACE_NO_TIMESTAMP;
1966    }
1967 
1968    uint64_t *ts = (uint64_t *) ((char *)bo->map + offset_B);
1969 
1970    /* Don't translate the no-timestamp marker: */
1971    if (*ts == U_TRACE_NO_TIMESTAMP)
1972       return U_TRACE_NO_TIMESTAMP;
1973 
1974    return tu_device_ticks_to_ns(device, *ts);
1975 }
1976 
1977 static void
tu_trace_delete_flush_data(struct u_trace_context * utctx,void * flush_data)1978 tu_trace_delete_flush_data(struct u_trace_context *utctx, void *flush_data)
1979 {
1980    struct tu_device *device =
1981       container_of(utctx, struct tu_device, trace_context);
1982    struct tu_u_trace_submission_data *submission_data =
1983       (struct tu_u_trace_submission_data *) flush_data;
1984 
1985    tu_u_trace_submission_data_finish(device, submission_data);
1986 }
1987 
1988 void
tu_copy_buffer(struct u_trace_context * utctx,void * cmdstream,void * ts_from,uint64_t from_offset_B,void * ts_to,uint64_t to_offset_B,uint64_t size_B)1989 tu_copy_buffer(struct u_trace_context *utctx, void *cmdstream,
1990                void *ts_from, uint64_t from_offset_B,
1991                void *ts_to, uint64_t to_offset_B,
1992                uint64_t size_B)
1993 {
1994    struct tu_cs *cs = (struct tu_cs *) cmdstream;
1995    struct tu_bo *bo_from = (struct tu_bo *) ts_from;
1996    struct tu_bo *bo_to = (struct tu_bo *) ts_to;
1997 
1998    tu_cs_emit_pkt7(cs, CP_MEMCPY, 5);
1999    tu_cs_emit(cs, size_B / sizeof(uint32_t));
2000    tu_cs_emit_qw(cs, bo_from->iova + from_offset_B);
2001    tu_cs_emit_qw(cs, bo_to->iova + to_offset_B);
2002 }
2003 
2004 static void
tu_trace_capture_data(struct u_trace * ut,void * cs,void * dst_buffer,uint64_t dst_offset_B,void * src_buffer,uint64_t src_offset_B,uint32_t size_B)2005 tu_trace_capture_data(struct u_trace *ut,
2006                         void *cs,
2007                         void *dst_buffer,
2008                         uint64_t dst_offset_B,
2009                         void *src_buffer,
2010                         uint64_t src_offset_B,
2011                         uint32_t size_B)
2012 {
2013    if (src_buffer)
2014       tu_copy_buffer(ut->utctx, cs, src_buffer, src_offset_B, dst_buffer,
2015                      dst_offset_B, size_B);
2016 }
2017 
2018 static const void *
tu_trace_get_data(struct u_trace_context * utctx,void * buffer,uint64_t offset_B,uint32_t size_B)2019 tu_trace_get_data(struct u_trace_context *utctx,
2020                   void *buffer,
2021                   uint64_t offset_B,
2022                   uint32_t size_B)
2023 {
2024    struct tu_bo *bo = (struct tu_bo *) buffer;
2025    return (char *) bo->map + offset_B;
2026 }
2027 
2028 /* Special helpers instead of u_trace_begin_iterator()/u_trace_end_iterator()
2029  * that ignore tracepoints at the beginning/end that are part of a
2030  * suspend/resume chain.
2031  */
2032 static struct u_trace_iterator
tu_cmd_begin_iterator(struct tu_cmd_buffer * cmdbuf)2033 tu_cmd_begin_iterator(struct tu_cmd_buffer *cmdbuf)
2034 {
2035    switch (cmdbuf->state.suspend_resume) {
2036    case SR_IN_PRE_CHAIN:
2037       return cmdbuf->trace_renderpass_end;
2038    case SR_AFTER_PRE_CHAIN:
2039    case SR_IN_CHAIN_AFTER_PRE_CHAIN:
2040       return cmdbuf->pre_chain.trace_renderpass_end;
2041    default:
2042       return u_trace_begin_iterator(&cmdbuf->trace);
2043    }
2044 }
2045 
2046 static struct u_trace_iterator
tu_cmd_end_iterator(struct tu_cmd_buffer * cmdbuf)2047 tu_cmd_end_iterator(struct tu_cmd_buffer *cmdbuf)
2048 {
2049    switch (cmdbuf->state.suspend_resume) {
2050    case SR_IN_PRE_CHAIN:
2051       return cmdbuf->trace_renderpass_end;
2052    case SR_IN_CHAIN:
2053    case SR_IN_CHAIN_AFTER_PRE_CHAIN:
2054       return cmdbuf->trace_renderpass_start;
2055    default:
2056       return u_trace_end_iterator(&cmdbuf->trace);
2057    }
2058 }
2059 VkResult
tu_create_copy_timestamp_cs(struct tu_cmd_buffer * cmdbuf,struct tu_cs ** cs,struct u_trace ** trace_copy)2060 tu_create_copy_timestamp_cs(struct tu_cmd_buffer *cmdbuf, struct tu_cs** cs,
2061                             struct u_trace **trace_copy)
2062 {
2063    *cs = (struct tu_cs *) vk_zalloc(&cmdbuf->device->vk.alloc,
2064                                     sizeof(struct tu_cs), 8,
2065                                     VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2066 
2067    if (*cs == NULL) {
2068       return VK_ERROR_OUT_OF_HOST_MEMORY;
2069    }
2070 
2071    tu_cs_init(*cs, cmdbuf->device, TU_CS_MODE_GROW,
2072               list_length(&cmdbuf->trace.trace_chunks) * 6 * 2 + 3, "trace copy timestamp cs");
2073 
2074    tu_cs_begin(*cs);
2075 
2076    tu_cs_emit_wfi(*cs);
2077    tu_cs_emit_pkt7(*cs, CP_WAIT_FOR_ME, 0);
2078 
2079    *trace_copy = (struct u_trace *) vk_zalloc(
2080       &cmdbuf->device->vk.alloc, sizeof(struct u_trace), 8,
2081       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2082 
2083    if (*trace_copy == NULL) {
2084       return VK_ERROR_OUT_OF_HOST_MEMORY;
2085    }
2086 
2087    u_trace_init(*trace_copy, cmdbuf->trace.utctx);
2088    u_trace_clone_append(tu_cmd_begin_iterator(cmdbuf),
2089                         tu_cmd_end_iterator(cmdbuf),
2090                         *trace_copy, *cs,
2091                         tu_copy_buffer);
2092 
2093    tu_cs_emit_wfi(*cs);
2094 
2095    tu_cs_end(*cs);
2096 
2097    return VK_SUCCESS;
2098 }
2099 
2100 VkResult
tu_u_trace_submission_data_create(struct tu_device * device,struct tu_cmd_buffer ** cmd_buffers,uint32_t cmd_buffer_count,struct tu_u_trace_submission_data ** submission_data)2101 tu_u_trace_submission_data_create(
2102    struct tu_device *device,
2103    struct tu_cmd_buffer **cmd_buffers,
2104    uint32_t cmd_buffer_count,
2105    struct tu_u_trace_submission_data **submission_data)
2106 {
2107    *submission_data = (struct tu_u_trace_submission_data *)
2108       vk_zalloc(&device->vk.alloc,
2109                 sizeof(struct tu_u_trace_submission_data), 8,
2110                 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2111 
2112    if (!(*submission_data)) {
2113       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2114    }
2115 
2116    struct tu_u_trace_submission_data *data = *submission_data;
2117 
2118    data->cmd_trace_data = (struct tu_u_trace_cmd_data *) vk_zalloc(
2119       &device->vk.alloc,
2120       cmd_buffer_count * sizeof(struct tu_u_trace_cmd_data), 8,
2121       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2122 
2123    if (!data->cmd_trace_data) {
2124       goto fail;
2125    }
2126 
2127    data->cmd_buffer_count = cmd_buffer_count;
2128    data->last_buffer_with_tracepoints = -1;
2129 
2130    for (uint32_t i = 0; i < cmd_buffer_count; ++i) {
2131       struct tu_cmd_buffer *cmdbuf = cmd_buffers[i];
2132 
2133       if (!u_trace_has_points(&cmdbuf->trace))
2134          continue;
2135 
2136       data->last_buffer_with_tracepoints = i;
2137 
2138       if (!(cmdbuf->usage_flags & VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT)) {
2139          /* A single command buffer could be submitted several times, but we
2140           * already baked timestamp iova addresses and trace points are
2141           * single-use. Therefor we have to copy trace points and create
2142           * a new timestamp buffer on every submit of reusable command buffer.
2143           */
2144          if (tu_create_copy_timestamp_cs(cmdbuf,
2145                &data->cmd_trace_data[i].timestamp_copy_cs,
2146                &data->cmd_trace_data[i].trace) != VK_SUCCESS) {
2147             goto fail;
2148          }
2149 
2150          assert(data->cmd_trace_data[i].timestamp_copy_cs->entry_count == 1);
2151       } else {
2152          data->cmd_trace_data[i].trace = &cmdbuf->trace;
2153       }
2154    }
2155 
2156    assert(data->last_buffer_with_tracepoints != -1);
2157 
2158    return VK_SUCCESS;
2159 
2160 fail:
2161    tu_u_trace_submission_data_finish(device, data);
2162    *submission_data = NULL;
2163 
2164    return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2165 }
2166 
2167 void
tu_u_trace_submission_data_finish(struct tu_device * device,struct tu_u_trace_submission_data * submission_data)2168 tu_u_trace_submission_data_finish(
2169    struct tu_device *device,
2170    struct tu_u_trace_submission_data *submission_data)
2171 {
2172    for (uint32_t i = 0; i < submission_data->cmd_buffer_count; ++i) {
2173       /* Only if we had to create a copy of trace we should free it */
2174       struct tu_u_trace_cmd_data *cmd_data = &submission_data->cmd_trace_data[i];
2175       if (cmd_data->timestamp_copy_cs) {
2176          tu_cs_finish(cmd_data->timestamp_copy_cs);
2177          vk_free(&device->vk.alloc, cmd_data->timestamp_copy_cs);
2178 
2179          u_trace_fini(cmd_data->trace);
2180          vk_free(&device->vk.alloc, cmd_data->trace);
2181       }
2182    }
2183 
2184    if (submission_data->kgsl_timestamp_bo.bo) {
2185       mtx_lock(&device->kgsl_profiling_mutex);
2186       tu_suballoc_bo_free(&device->kgsl_profiling_suballoc,
2187                         &submission_data->kgsl_timestamp_bo);
2188       mtx_unlock(&device->kgsl_profiling_mutex);
2189    }
2190 
2191    vk_free(&device->vk.alloc, submission_data->cmd_trace_data);
2192    vk_free(&device->vk.alloc, submission_data);
2193 }
2194 
2195 enum tu_reg_stomper_flags
2196 {
2197    TU_DEBUG_REG_STOMP_INVERSE = 1 << 0,
2198    TU_DEBUG_REG_STOMP_CMDBUF = 1 << 1,
2199    TU_DEBUG_REG_STOMP_RENDERPASS = 1 << 2,
2200 };
2201 
2202 /* See freedreno.rst for usage tips */
2203 static const struct debug_named_value tu_reg_stomper_options[] = {
2204    { "inverse", TU_DEBUG_REG_STOMP_INVERSE,
2205      "By default the range specifies the regs to stomp, with 'inverse' it "
2206      "specifies the regs NOT to stomp" },
2207    { "cmdbuf", TU_DEBUG_REG_STOMP_CMDBUF,
2208      "Stomp regs at the start of a cmdbuf" },
2209    { "renderpass", TU_DEBUG_REG_STOMP_RENDERPASS,
2210      "Stomp regs before a renderpass" },
2211    { NULL, 0 }
2212 };
2213 
2214 template <chip CHIP>
2215 static inline void
tu_cs_dbg_stomp_regs(struct tu_cs * cs,bool is_rp_blit,uint32_t first_reg,uint32_t last_reg,bool inverse)2216 tu_cs_dbg_stomp_regs(struct tu_cs *cs,
2217                      bool is_rp_blit,
2218                      uint32_t first_reg,
2219                      uint32_t last_reg,
2220                      bool inverse)
2221 {
2222    const uint16_t *regs = NULL;
2223    size_t count = 0;
2224 
2225    if (is_rp_blit) {
2226       regs = &RP_BLIT_REGS<CHIP>[0];
2227       count = ARRAY_SIZE(RP_BLIT_REGS<CHIP>);
2228    } else {
2229       regs = &CMD_REGS<CHIP>[0];
2230       count = ARRAY_SIZE(CMD_REGS<CHIP>);
2231    }
2232 
2233    for (size_t i = 0; i < count; i++) {
2234       if (inverse) {
2235          if (regs[i] >= first_reg && regs[i] <= last_reg)
2236             continue;
2237       } else {
2238          if (regs[i] < first_reg || regs[i] > last_reg)
2239             continue;
2240       }
2241 
2242       if (fd_reg_stomp_allowed(CHIP, regs[i]))
2243          tu_cs_emit_write_reg(cs, regs[i], 0xffffffff);
2244    }
2245 }
2246 
2247 static void
tu_init_dbg_reg_stomper(struct tu_device * device)2248 tu_init_dbg_reg_stomper(struct tu_device *device)
2249 {
2250    const char *stale_reg_range_str =
2251       os_get_option("TU_DEBUG_STALE_REGS_RANGE");
2252    if (!stale_reg_range_str)
2253       return;
2254 
2255    uint32_t first_reg, last_reg;
2256 
2257    if (sscanf(stale_reg_range_str, "%x,%x", &first_reg, &last_reg) != 2) {
2258       mesa_loge("Incorrect TU_DEBUG_STALE_REGS_RANGE");
2259       return;
2260    }
2261 
2262    uint64_t debug_flags = debug_get_flags_option("TU_DEBUG_STALE_REGS_FLAGS",
2263                                                  tu_reg_stomper_options,
2264                                                  TU_DEBUG_REG_STOMP_CMDBUF);
2265 
2266    bool inverse = debug_flags & TU_DEBUG_REG_STOMP_INVERSE;
2267 
2268    if (debug_flags & TU_DEBUG_REG_STOMP_CMDBUF) {
2269       struct tu_cs *cmdbuf_cs =
2270          (struct tu_cs *) calloc(1, sizeof(struct tu_cs));
2271       tu_cs_init(cmdbuf_cs, device, TU_CS_MODE_GROW, 4096,
2272                  "cmdbuf reg stomp cs");
2273       tu_cs_begin(cmdbuf_cs);
2274 
2275       TU_CALLX(device, tu_cs_dbg_stomp_regs)(cmdbuf_cs, false, first_reg, last_reg, inverse);
2276       tu_cs_end(cmdbuf_cs);
2277       device->dbg_cmdbuf_stomp_cs = cmdbuf_cs;
2278    }
2279 
2280    if (debug_flags & TU_DEBUG_REG_STOMP_RENDERPASS) {
2281       struct tu_cs *rp_cs = (struct tu_cs *) calloc(1, sizeof(struct tu_cs));
2282       tu_cs_init(rp_cs, device, TU_CS_MODE_GROW, 4096, "rp reg stomp cs");
2283       tu_cs_begin(rp_cs);
2284 
2285       TU_CALLX(device, tu_cs_dbg_stomp_regs)(rp_cs, true, first_reg, last_reg, inverse);
2286       tu_cs_end(rp_cs);
2287 
2288       device->dbg_renderpass_stomp_cs = rp_cs;
2289    }
2290 }
2291 
2292 /* It is unknown what this workaround is for and what it fixes. */
2293 static VkResult
tu_init_cmdbuf_start_a725_quirk(struct tu_device * device)2294 tu_init_cmdbuf_start_a725_quirk(struct tu_device *device)
2295 {
2296    struct tu_cs shader_cs;
2297    tu_cs_begin_sub_stream(&device->sub_cs, 10, &shader_cs);
2298 
2299    uint32_t raw_shader[] = {
2300       0x00040000, 0x40600000, // mul.f hr0.x, hr0.x, hr1.x
2301       0x00050001, 0x40600001, // mul.f hr0.y, hr0.y, hr1.y
2302       0x00060002, 0x40600002, // mul.f hr0.z, hr0.z, hr1.z
2303       0x00070003, 0x40600003, // mul.f hr0.w, hr0.w, hr1.w
2304       0x00000000, 0x03000000, // end
2305    };
2306 
2307    tu_cs_emit_array(&shader_cs, raw_shader, ARRAY_SIZE(raw_shader));
2308    struct tu_cs_entry shader_entry = tu_cs_end_sub_stream(&device->sub_cs, &shader_cs);
2309    uint64_t shader_iova = shader_entry.bo->iova + shader_entry.offset;
2310 
2311    struct tu_cs sub_cs;
2312    tu_cs_begin_sub_stream(&device->sub_cs, 47, &sub_cs);
2313 
2314    tu_cs_emit_regs(&sub_cs, HLSQ_INVALIDATE_CMD(A7XX,
2315             .vs_state = true, .hs_state = true, .ds_state = true,
2316             .gs_state = true, .fs_state = true, .gfx_ibo = true,
2317             .cs_bindless = 0xff, .gfx_bindless = 0xff));
2318    tu_cs_emit_regs(&sub_cs, HLSQ_CS_CNTL(A7XX,
2319             .constlen = 4,
2320             .enabled = true));
2321    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CONFIG(.enabled = true));
2322    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CTRL_REG0(
2323             .threadmode = MULTI,
2324             .threadsize = THREAD128,
2325             .mergedregs = true));
2326    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_UNKNOWN_A9B1(.shared_size = 1));
2327    tu_cs_emit_regs(&sub_cs, HLSQ_CS_KERNEL_GROUP_X(A7XX, 1),
2328                      HLSQ_CS_KERNEL_GROUP_Y(A7XX, 1),
2329                      HLSQ_CS_KERNEL_GROUP_Z(A7XX, 1));
2330    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_INSTRLEN(.sp_cs_instrlen = 1));
2331    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_TEX_COUNT(0));
2332    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_IBO_COUNT(0));
2333    tu_cs_emit_regs(&sub_cs, HLSQ_CS_CNTL_1(A7XX,
2334             .linearlocalidregid = regid(63, 0),
2335             .threadsize = THREAD128,
2336             .workgrouprastorderzfirsten = true,
2337             .wgtilewidth = 4,
2338             .wgtileheight = 17));
2339    tu_cs_emit_regs(&sub_cs, A6XX_SP_CS_CNTL_0(
2340             .wgidconstid = regid(51, 3),
2341             .wgsizeconstid = regid(48, 0),
2342             .wgoffsetconstid = regid(63, 0),
2343             .localidregid = regid(63, 0)));
2344    tu_cs_emit_regs(&sub_cs, SP_CS_CNTL_1(A7XX,
2345             .linearlocalidregid = regid(63, 0),
2346             .threadsize = THREAD128,
2347             .workitemrastorder = WORKITEMRASTORDER_TILED));
2348    tu_cs_emit_regs(&sub_cs, A7XX_SP_CS_UNKNOWN_A9BE(0));
2349 
2350    tu_cs_emit_regs(&sub_cs,
2351                   HLSQ_CS_NDRANGE_0(A7XX, .kerneldim = 3,
2352                                           .localsizex = 255,
2353                                           .localsizey = 1,
2354                                           .localsizez = 1),
2355                   HLSQ_CS_NDRANGE_1(A7XX, .globalsize_x = 3072),
2356                   HLSQ_CS_NDRANGE_2(A7XX, .globaloff_x = 0),
2357                   HLSQ_CS_NDRANGE_3(A7XX, .globalsize_y = 1),
2358                   HLSQ_CS_NDRANGE_4(A7XX, .globaloff_y = 0),
2359                   HLSQ_CS_NDRANGE_5(A7XX, .globalsize_z = 1),
2360                   HLSQ_CS_NDRANGE_6(A7XX, .globaloff_z = 0));
2361    tu_cs_emit_regs(&sub_cs, A7XX_HLSQ_CS_LAST_LOCAL_SIZE(
2362             .localsizex = 255,
2363             .localsizey = 0,
2364             .localsizez = 0));
2365    tu_cs_emit_pkt4(&sub_cs, REG_A6XX_SP_CS_OBJ_FIRST_EXEC_OFFSET, 3);
2366    tu_cs_emit(&sub_cs, 0);
2367    tu_cs_emit_qw(&sub_cs, shader_iova);
2368 
2369    tu_cs_emit_pkt7(&sub_cs, CP_EXEC_CS, 4);
2370    tu_cs_emit(&sub_cs, 0x00000000);
2371    tu_cs_emit(&sub_cs, CP_EXEC_CS_1_NGROUPS_X(12));
2372    tu_cs_emit(&sub_cs, CP_EXEC_CS_2_NGROUPS_Y(1));
2373    tu_cs_emit(&sub_cs, CP_EXEC_CS_3_NGROUPS_Z(1));
2374 
2375    device->cmdbuf_start_a725_quirk_entry =
2376       tu_cs_end_sub_stream(&device->sub_cs, &sub_cs);
2377 
2378    return VK_SUCCESS;
2379 }
2380 
2381 static VkResult
tu_device_get_timestamp(struct vk_device * vk_device,uint64_t * timestamp)2382 tu_device_get_timestamp(struct vk_device *vk_device, uint64_t *timestamp)
2383 {
2384    struct tu_device *dev = container_of(vk_device, struct tu_device, vk);
2385    const int ret = tu_device_get_gpu_timestamp(dev, timestamp);
2386    return ret == 0 ? VK_SUCCESS : VK_ERROR_UNKNOWN;
2387 }
2388 
2389 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)2390 tu_CreateDevice(VkPhysicalDevice physicalDevice,
2391                 const VkDeviceCreateInfo *pCreateInfo,
2392                 const VkAllocationCallbacks *pAllocator,
2393                 VkDevice *pDevice)
2394 {
2395    VK_FROM_HANDLE(tu_physical_device, physical_device, physicalDevice);
2396    VkResult result;
2397    struct tu_device *device;
2398    bool border_color_without_format = false;
2399 
2400    vk_foreach_struct_const (ext, pCreateInfo->pNext) {
2401       switch (ext->sType) {
2402       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT:
2403          border_color_without_format =
2404             ((const VkPhysicalDeviceCustomBorderColorFeaturesEXT *) ext)
2405                ->customBorderColorWithoutFormat;
2406          break;
2407       default:
2408          break;
2409       }
2410    }
2411 
2412    device = (struct tu_device *) vk_zalloc2(
2413       &physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,
2414       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2415    if (!device)
2416       return vk_startup_errorf(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2417 
2418    struct vk_device_dispatch_table dispatch_table;
2419    bool override_initial_entrypoints = true;
2420 
2421    if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV) {
2422       vk_device_dispatch_table_from_entrypoints(
2423          &dispatch_table, &tu_rmv_device_entrypoints, true);
2424       override_initial_entrypoints = false;
2425    }
2426 
2427    vk_device_dispatch_table_from_entrypoints(
2428       &dispatch_table, &tu_device_entrypoints, override_initial_entrypoints);
2429 
2430    switch (fd_dev_gen(&physical_device->dev_id)) {
2431    case 6:
2432       vk_device_dispatch_table_from_entrypoints(
2433          &dispatch_table, &tu_device_entrypoints_a6xx, false);
2434       break;
2435    case 7:
2436       vk_device_dispatch_table_from_entrypoints(
2437          &dispatch_table, &tu_device_entrypoints_a7xx, false);
2438    }
2439 
2440    vk_device_dispatch_table_from_entrypoints(
2441       &dispatch_table, &wsi_device_entrypoints, false);
2442 
2443    const struct vk_device_entrypoint_table *knl_device_entrypoints =
2444          physical_device->instance->knl->device_entrypoints;
2445    if (knl_device_entrypoints) {
2446       vk_device_dispatch_table_from_entrypoints(
2447          &dispatch_table, knl_device_entrypoints, false);
2448    }
2449 
2450    result = vk_device_init(&device->vk, &physical_device->vk,
2451                            &dispatch_table, pCreateInfo, pAllocator);
2452    if (result != VK_SUCCESS) {
2453       vk_free(&device->vk.alloc, device);
2454       return vk_startup_errorf(physical_device->instance, result,
2455                                "vk_device_init failed");
2456    }
2457 
2458    device->instance = physical_device->instance;
2459    device->physical_device = physical_device;
2460    device->device_idx = device->physical_device->device_count++;
2461 
2462    result = tu_drm_device_init(device);
2463    if (result != VK_SUCCESS) {
2464       vk_free(&device->vk.alloc, device);
2465       return result;
2466    }
2467 
2468    device->vk.command_buffer_ops = &tu_cmd_buffer_ops;
2469    device->vk.as_build_ops = &tu_as_build_ops;
2470    device->vk.check_status = tu_device_check_status;
2471    device->vk.get_timestamp = tu_device_get_timestamp;
2472 
2473    mtx_init(&device->bo_mutex, mtx_plain);
2474    mtx_init(&device->pipeline_mutex, mtx_plain);
2475    mtx_init(&device->autotune_mutex, mtx_plain);
2476    mtx_init(&device->kgsl_profiling_mutex, mtx_plain);
2477    u_rwlock_init(&device->dma_bo_lock);
2478    pthread_mutex_init(&device->submit_mutex, NULL);
2479 
2480    if (physical_device->has_set_iova) {
2481       mtx_init(&device->vma_mutex, mtx_plain);
2482       util_vma_heap_init(&device->vma, physical_device->va_start,
2483                          ROUND_DOWN_TO(physical_device->va_size, os_page_size));
2484    }
2485 
2486    if (TU_DEBUG(BOS))
2487       device->bo_sizes = _mesa_hash_table_create(NULL, _mesa_hash_string, _mesa_key_string_equal);
2488 
2489    if (physical_device->instance->vk.trace_mode & VK_TRACE_MODE_RMV)
2490       tu_memory_trace_init(device);
2491 
2492    /* kgsl is not a drm device: */
2493    if (!is_kgsl(physical_device->instance))
2494       vk_device_set_drm_fd(&device->vk, device->fd);
2495 
2496    struct tu6_global *global = NULL;
2497    uint32_t global_size = sizeof(struct tu6_global);
2498    struct vk_pipeline_cache_create_info pcc_info = { };
2499 
2500    for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
2501       const VkDeviceQueueCreateInfo *queue_create =
2502          &pCreateInfo->pQueueCreateInfos[i];
2503       uint32_t qfi = queue_create->queueFamilyIndex;
2504       device->queues[qfi] = (struct tu_queue *) vk_alloc(
2505          &device->vk.alloc,
2506          queue_create->queueCount * sizeof(struct tu_queue), 8,
2507          VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2508       if (!device->queues[qfi]) {
2509          result = vk_startup_errorf(physical_device->instance,
2510                                     VK_ERROR_OUT_OF_HOST_MEMORY,
2511                                     "OOM");
2512          goto fail_queues;
2513       }
2514 
2515       memset(device->queues[qfi], 0,
2516              queue_create->queueCount * sizeof(struct tu_queue));
2517 
2518       device->queue_count[qfi] = queue_create->queueCount;
2519 
2520       for (unsigned q = 0; q < queue_create->queueCount; q++) {
2521          result = tu_queue_init(device, &device->queues[qfi][q], q, queue_create);
2522          if (result != VK_SUCCESS) {
2523             device->queue_count[qfi] = q;
2524             goto fail_queues;
2525          }
2526       }
2527    }
2528 
2529    result = vk_meta_device_init(&device->vk, &device->meta);
2530    if (result != VK_SUCCESS)
2531       goto fail_queues;
2532 
2533    util_sparse_array_init(&device->accel_struct_ranges, sizeof(VkDeviceSize), 256);
2534 
2535    mtx_init(&device->radix_sort_mutex, mtx_plain);
2536 
2537    {
2538       struct ir3_compiler_options ir3_options = {
2539          .push_ubo_with_preamble = true,
2540          .disable_cache = true,
2541          .bindless_fb_read_descriptor = -1,
2542          .bindless_fb_read_slot = -1,
2543          .storage_16bit = physical_device->info->a6xx.storage_16bit,
2544          .storage_8bit = physical_device->info->a7xx.storage_8bit,
2545          .shared_push_consts = !TU_DEBUG(PUSH_CONSTS_PER_STAGE),
2546       };
2547       device->compiler = ir3_compiler_create(
2548          NULL, &physical_device->dev_id, physical_device->info, &ir3_options);
2549    }
2550    if (!device->compiler) {
2551       result = vk_startup_errorf(physical_device->instance,
2552                                  VK_ERROR_INITIALIZATION_FAILED,
2553                                  "failed to initialize ir3 compiler");
2554       goto fail_compiler;
2555    }
2556 
2557    /* Initialize sparse array for refcounting imported BOs */
2558    util_sparse_array_init(&device->bo_map, sizeof(struct tu_bo), 512);
2559 
2560    if (physical_device->has_set_iova) {
2561       STATIC_ASSERT(TU_MAX_QUEUE_FAMILIES == 1);
2562       if (!u_vector_init(&device->zombie_vmas, 64,
2563                          sizeof(struct tu_zombie_vma))) {
2564          result = vk_startup_errorf(physical_device->instance,
2565                                     VK_ERROR_INITIALIZATION_FAILED,
2566                                     "zombie_vmas create failed");
2567          goto fail_free_zombie_vma;
2568       }
2569    }
2570 
2571    /* initial sizes, these will increase if there is overflow */
2572    device->vsc_draw_strm_pitch = 0x1000 + VSC_PAD;
2573    device->vsc_prim_strm_pitch = 0x4000 + VSC_PAD;
2574 
2575    if (device->vk.enabled_features.customBorderColors)
2576       global_size += TU_BORDER_COLOR_COUNT * sizeof(struct bcolor_entry);
2577 
2578    tu_bo_suballocator_init(
2579       &device->pipeline_suballoc, device, 128 * 1024,
2580       (enum tu_bo_alloc_flags) (TU_BO_ALLOC_GPU_READ_ONLY |
2581                                 TU_BO_ALLOC_ALLOW_DUMP |
2582                                 TU_BO_ALLOC_INTERNAL_RESOURCE),
2583       "pipeline_suballoc");
2584    tu_bo_suballocator_init(&device->autotune_suballoc, device,
2585                            128 * 1024, TU_BO_ALLOC_INTERNAL_RESOURCE,
2586                            "autotune_suballoc");
2587    if (is_kgsl(physical_device->instance)) {
2588       tu_bo_suballocator_init(&device->kgsl_profiling_suballoc, device,
2589                               128 * 1024, TU_BO_ALLOC_INTERNAL_RESOURCE,
2590                               "kgsl_profiling_suballoc");
2591    }
2592 
2593    result = tu_bo_init_new(
2594       device, NULL, &device->global_bo, global_size,
2595       (enum tu_bo_alloc_flags) (TU_BO_ALLOC_ALLOW_DUMP |
2596                                 TU_BO_ALLOC_INTERNAL_RESOURCE),
2597       "global");
2598    if (result != VK_SUCCESS) {
2599       vk_startup_errorf(device->instance, result, "BO init");
2600       goto fail_global_bo;
2601    }
2602 
2603    result = tu_bo_map(device, device->global_bo, NULL);
2604    if (result != VK_SUCCESS) {
2605       vk_startup_errorf(device->instance, result, "BO map");
2606       goto fail_global_bo_map;
2607    }
2608 
2609    global = (struct tu6_global *)device->global_bo->map;
2610    device->global_bo_map = global;
2611    tu_init_clear_blit_shaders(device);
2612 
2613    if (device->vk.enabled_features.accelerationStructure &&
2614        device->vk.enabled_features.nullDescriptor) {
2615       result = tu_init_null_accel_struct(device);
2616       if (result != VK_SUCCESS) {
2617          vk_startup_errorf(device->instance, result, "null acceleration struct");
2618          goto fail_null_accel_struct;
2619       }
2620    }
2621 
2622    result = tu_init_empty_shaders(device);
2623    if (result != VK_SUCCESS) {
2624       vk_startup_errorf(device->instance, result, "empty shaders");
2625       goto fail_empty_shaders;
2626    }
2627 
2628    global->predicate = 0;
2629    global->vtx_stats_query_not_running = 1;
2630    global->dbg_one = (uint32_t)-1;
2631    global->dbg_gmem_total_loads = 0;
2632    global->dbg_gmem_taken_loads = 0;
2633    global->dbg_gmem_total_stores = 0;
2634    global->dbg_gmem_taken_stores = 0;
2635    for (int i = 0; i < TU_BORDER_COLOR_BUILTIN; i++) {
2636       VkClearColorValue border_color = vk_border_color_value((VkBorderColor) i);
2637       tu6_pack_border_color(&global->bcolor_builtin[i], &border_color,
2638                             vk_border_color_is_int((VkBorderColor) i));
2639    }
2640 
2641    /* initialize to ones so ffs can be used to find unused slots */
2642    BITSET_ONES(device->custom_border_color);
2643 
2644    result = tu_init_dynamic_rendering(device);
2645    if (result != VK_SUCCESS) {
2646       vk_startup_errorf(device->instance, result, "dynamic rendering");
2647       goto fail_dynamic_rendering;
2648    }
2649 
2650    device->mem_cache = vk_pipeline_cache_create(&device->vk, &pcc_info,
2651                                                 NULL);
2652    if (!device->mem_cache) {
2653       result = VK_ERROR_OUT_OF_HOST_MEMORY;
2654       vk_startup_errorf(device->instance, result, "create pipeline cache failed");
2655       goto fail_pipeline_cache;
2656    }
2657 
2658    tu_cs_init(&device->sub_cs, device, TU_CS_MODE_SUB_STREAM, 1024, "device sub cs");
2659 
2660    if (device->vk.enabled_features.performanceCounterQueryPools) {
2661       /* Prepare command streams setting pass index to the PERF_CNTRS_REG
2662        * from 0 to 31. One of these will be picked up at cmd submit time
2663        * when the perf query is executed.
2664        */
2665 
2666       device->perfcntrs_pass_cs_entries =
2667          (struct tu_cs_entry *) calloc(32, sizeof(struct tu_cs_entry));
2668       if (!device->perfcntrs_pass_cs_entries) {
2669          result = vk_startup_errorf(device->instance,
2670                VK_ERROR_OUT_OF_HOST_MEMORY, "OOM");
2671          goto fail_perfcntrs_pass_entries_alloc;
2672       }
2673 
2674       for (unsigned i = 0; i < 32; i++) {
2675          struct tu_cs sub_cs;
2676 
2677          result = tu_cs_begin_sub_stream(&device->sub_cs, 3, &sub_cs);
2678          if (result != VK_SUCCESS) {
2679             vk_startup_errorf(device->instance, result,
2680                   "failed to allocate commands streams");
2681             goto fail_prepare_perfcntrs_pass_cs;
2682          }
2683 
2684          tu_cs_emit_regs(&sub_cs, A6XX_CP_SCRATCH_REG(PERF_CNTRS_REG, 1 << i));
2685          tu_cs_emit_pkt7(&sub_cs, CP_WAIT_FOR_ME, 0);
2686 
2687          device->perfcntrs_pass_cs_entries[i] =
2688             tu_cs_end_sub_stream(&device->sub_cs, &sub_cs);
2689       }
2690    }
2691 
2692    result = tu_init_bin_preamble(device);
2693    if (result != VK_SUCCESS)
2694       goto fail_bin_preamble;
2695 
2696    if (physical_device->info->a7xx.cmdbuf_start_a725_quirk) {
2697          result = tu_init_cmdbuf_start_a725_quirk(device);
2698          if (result != VK_SUCCESS)
2699             goto fail_a725_workaround;
2700    }
2701 
2702    tu_init_dbg_reg_stomper(device);
2703 
2704    /* Initialize a condition variable for timeline semaphore */
2705    pthread_condattr_t condattr;
2706    if (pthread_condattr_init(&condattr) != 0) {
2707       result = vk_startup_errorf(physical_device->instance,
2708                                  VK_ERROR_INITIALIZATION_FAILED,
2709                                  "pthread condattr init");
2710       goto fail_timeline_cond;
2711    }
2712    if (pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC) != 0) {
2713       pthread_condattr_destroy(&condattr);
2714       result = vk_startup_errorf(physical_device->instance,
2715                                  VK_ERROR_INITIALIZATION_FAILED,
2716                                  "pthread condattr clock setup");
2717       goto fail_timeline_cond;
2718    }
2719    if (pthread_cond_init(&device->timeline_cond, &condattr) != 0) {
2720       pthread_condattr_destroy(&condattr);
2721       result = vk_startup_errorf(physical_device->instance,
2722                                  VK_ERROR_INITIALIZATION_FAILED,
2723                                  "pthread cond init");
2724       goto fail_timeline_cond;
2725    }
2726    pthread_condattr_destroy(&condattr);
2727 
2728    result = tu_autotune_init(&device->autotune, device);
2729    if (result != VK_SUCCESS) {
2730       goto fail_timeline_cond;
2731    }
2732 
2733    for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++)
2734       mtx_init(&device->scratch_bos[i].construct_mtx, mtx_plain);
2735 
2736    mtx_init(&device->fiber_pvtmem_bo.mtx, mtx_plain);
2737    mtx_init(&device->wave_pvtmem_bo.mtx, mtx_plain);
2738 
2739    mtx_init(&device->mutex, mtx_plain);
2740 
2741    device->use_z24uint_s8uint =
2742       physical_device->info->a6xx.has_z24uint_s8uint &&
2743       (!border_color_without_format ||
2744        physical_device->instance->disable_d24s8_border_color_workaround);
2745    device->use_lrz = !TU_DEBUG_ENV(NOLRZ);
2746 
2747    tu_gpu_tracepoint_config_variable();
2748 
2749    device->submit_count = 0;
2750    u_trace_context_init(&device->trace_context, device,
2751                      sizeof(uint64_t),
2752                      12,
2753                      tu_trace_create_buffer,
2754                      tu_trace_destroy_buffer,
2755                      TU_CALLX(device, tu_trace_record_ts),
2756                      tu_trace_read_ts,
2757                      tu_trace_capture_data,
2758                      tu_trace_get_data,
2759                      tu_trace_delete_flush_data);
2760 
2761    tu_breadcrumbs_init(device);
2762 
2763    if (FD_RD_DUMP(ENABLE)) {
2764       struct vk_app_info *app_info = &device->instance->vk.app_info;
2765       const char *app_name_str = app_info->app_name ?
2766          app_info->app_name : util_get_process_name();
2767       const char *engine_name_str = app_info->engine_name ?
2768          app_info->engine_name : "unknown-engine";
2769 
2770       char app_name[64];
2771       snprintf(app_name, sizeof(app_name), "%s", app_name_str);
2772 
2773       char engine_name[32];
2774       snprintf(engine_name, sizeof(engine_name), "%s", engine_name_str);
2775 
2776       char output_name[128];
2777       snprintf(output_name, sizeof(output_name), "tu_%s.%s_instance%u_device%u",
2778                app_name, engine_name, device->instance->instance_idx,
2779                device->device_idx);
2780 
2781       fd_rd_output_init(&device->rd_output, output_name);
2782    }
2783 
2784    device->vk.cmd_dispatch_unaligned = tu_dispatch_unaligned;
2785    device->vk.write_buffer_cp = tu_write_buffer_cp;
2786    device->vk.flush_buffer_write_cp = tu_flush_buffer_write_cp;
2787    device->vk.cmd_fill_buffer_addr = tu_cmd_fill_buffer_addr;
2788 
2789    *pDevice = tu_device_to_handle(device);
2790    return VK_SUCCESS;
2791 
2792 fail_timeline_cond:
2793 fail_a725_workaround:
2794 fail_bin_preamble:
2795 fail_prepare_perfcntrs_pass_cs:
2796    free(device->perfcntrs_pass_cs_entries);
2797 fail_perfcntrs_pass_entries_alloc:
2798    tu_cs_finish(&device->sub_cs);
2799    vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2800 fail_pipeline_cache:
2801    tu_destroy_dynamic_rendering(device);
2802 fail_dynamic_rendering:
2803    tu_destroy_empty_shaders(device);
2804 fail_empty_shaders:
2805    if (device->null_accel_struct_bo)
2806       tu_bo_finish(device, device->null_accel_struct_bo);
2807 fail_null_accel_struct:
2808    tu_destroy_clear_blit_shaders(device);
2809 fail_global_bo_map:
2810    TU_RMV(resource_destroy, device, device->global_bo);
2811    tu_bo_finish(device, device->global_bo);
2812    vk_free(&device->vk.alloc, device->submit_bo_list);
2813    util_dynarray_fini(&device->dump_bo_list);
2814 fail_global_bo:
2815    if (physical_device->has_set_iova)
2816       util_vma_heap_finish(&device->vma);
2817 fail_free_zombie_vma:
2818    util_sparse_array_finish(&device->bo_map);
2819    u_vector_finish(&device->zombie_vmas);
2820    ir3_compiler_destroy(device->compiler);
2821 fail_compiler:
2822    util_sparse_array_finish(&device->accel_struct_ranges);
2823    vk_meta_device_finish(&device->vk, &device->meta);
2824 fail_queues:
2825    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2826       for (unsigned q = 0; q < device->queue_count[i]; q++)
2827          tu_queue_finish(&device->queues[i][q]);
2828       if (device->queues[i])
2829          vk_free(&device->vk.alloc, device->queues[i]);
2830    }
2831 
2832    u_rwlock_destroy(&device->dma_bo_lock);
2833    tu_drm_device_finish(device);
2834    vk_device_finish(&device->vk);
2835    vk_free(&device->vk.alloc, device);
2836    return result;
2837 }
2838 
2839 VKAPI_ATTR void VKAPI_CALL
tu_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2840 tu_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
2841 {
2842    VK_FROM_HANDLE(tu_device, device, _device);
2843 
2844    if (!device)
2845       return;
2846 
2847    tu_memory_trace_finish(device);
2848 
2849    if (FD_RD_DUMP(ENABLE))
2850       fd_rd_output_fini(&device->rd_output);
2851 
2852    tu_breadcrumbs_finish(device);
2853 
2854    u_trace_context_fini(&device->trace_context);
2855 
2856    for (unsigned i = 0; i < ARRAY_SIZE(device->scratch_bos); i++) {
2857       if (device->scratch_bos[i].initialized)
2858          tu_bo_finish(device, device->scratch_bos[i].bo);
2859    }
2860 
2861    if (device->fiber_pvtmem_bo.bo)
2862       tu_bo_finish(device, device->fiber_pvtmem_bo.bo);
2863 
2864    if (device->wave_pvtmem_bo.bo)
2865       tu_bo_finish(device, device->wave_pvtmem_bo.bo);
2866 
2867    tu_destroy_clear_blit_shaders(device);
2868 
2869    tu_destroy_empty_shaders(device);
2870 
2871    tu_destroy_dynamic_rendering(device);
2872 
2873    vk_meta_device_finish(&device->vk, &device->meta);
2874 
2875    util_sparse_array_finish(&device->accel_struct_ranges);
2876 
2877    ir3_compiler_destroy(device->compiler);
2878 
2879    vk_pipeline_cache_destroy(device->mem_cache, &device->vk.alloc);
2880 
2881    tu_cs_finish(&device->sub_cs);
2882 
2883    if (device->perfcntrs_pass_cs_entries) {
2884       free(device->perfcntrs_pass_cs_entries);
2885    }
2886 
2887    if (device->dbg_cmdbuf_stomp_cs) {
2888       tu_cs_finish(device->dbg_cmdbuf_stomp_cs);
2889       free(device->dbg_cmdbuf_stomp_cs);
2890    }
2891 
2892    if (device->dbg_renderpass_stomp_cs) {
2893       tu_cs_finish(device->dbg_renderpass_stomp_cs);
2894       free(device->dbg_renderpass_stomp_cs);
2895    }
2896 
2897    tu_autotune_fini(&device->autotune, device);
2898 
2899    tu_bo_suballocator_finish(&device->pipeline_suballoc);
2900    tu_bo_suballocator_finish(&device->autotune_suballoc);
2901    tu_bo_suballocator_finish(&device->kgsl_profiling_suballoc);
2902 
2903    tu_bo_finish(device, device->global_bo);
2904 
2905    if (device->null_accel_struct_bo)
2906       tu_bo_finish(device, device->null_accel_struct_bo);
2907 
2908    for (unsigned i = 0; i < TU_MAX_QUEUE_FAMILIES; i++) {
2909       for (unsigned q = 0; q < device->queue_count[i]; q++)
2910          tu_queue_finish(&device->queues[i][q]);
2911       if (device->queue_count[i])
2912          vk_free(&device->vk.alloc, device->queues[i]);
2913    }
2914 
2915    tu_drm_device_finish(device);
2916 
2917    if (device->physical_device->has_set_iova)
2918       util_vma_heap_finish(&device->vma);
2919 
2920    util_sparse_array_finish(&device->bo_map);
2921    u_rwlock_destroy(&device->dma_bo_lock);
2922 
2923    u_vector_finish(&device->zombie_vmas);
2924 
2925    pthread_cond_destroy(&device->timeline_cond);
2926    _mesa_hash_table_destroy(device->bo_sizes, NULL);
2927    vk_free(&device->vk.alloc, device->submit_bo_list);
2928    util_dynarray_fini(&device->dump_bo_list);
2929    vk_device_finish(&device->vk);
2930    vk_free(&device->vk.alloc, device);
2931 }
2932 
2933 VkResult
tu_get_scratch_bo(struct tu_device * dev,uint64_t size,struct tu_bo ** bo)2934 tu_get_scratch_bo(struct tu_device *dev, uint64_t size, struct tu_bo **bo)
2935 {
2936    unsigned size_log2 = MAX2(util_logbase2_ceil64(size), MIN_SCRATCH_BO_SIZE_LOG2);
2937    unsigned index = size_log2 - MIN_SCRATCH_BO_SIZE_LOG2;
2938    assert(index < ARRAY_SIZE(dev->scratch_bos));
2939 
2940    for (unsigned i = index; i < ARRAY_SIZE(dev->scratch_bos); i++) {
2941       if (p_atomic_read(&dev->scratch_bos[i].initialized)) {
2942          /* Fast path: just return the already-allocated BO. */
2943          *bo = dev->scratch_bos[i].bo;
2944          return VK_SUCCESS;
2945       }
2946    }
2947 
2948    /* Slow path: actually allocate the BO. We take a lock because the process
2949     * of allocating it is slow, and we don't want to block the CPU while it
2950     * finishes.
2951    */
2952    mtx_lock(&dev->scratch_bos[index].construct_mtx);
2953 
2954    /* Another thread may have allocated it already while we were waiting on
2955     * the lock. We need to check this in order to avoid double-allocating.
2956     */
2957    if (dev->scratch_bos[index].initialized) {
2958       mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2959       *bo = dev->scratch_bos[index].bo;
2960       return VK_SUCCESS;
2961    }
2962 
2963    unsigned bo_size = 1ull << size_log2;
2964    VkResult result = tu_bo_init_new(dev, NULL, &dev->scratch_bos[index].bo, bo_size,
2965                                     TU_BO_ALLOC_INTERNAL_RESOURCE, "scratch");
2966    if (result != VK_SUCCESS) {
2967       mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2968       return result;
2969    }
2970 
2971    p_atomic_set(&dev->scratch_bos[index].initialized, true);
2972 
2973    mtx_unlock(&dev->scratch_bos[index].construct_mtx);
2974 
2975    *bo = dev->scratch_bos[index].bo;
2976    return VK_SUCCESS;
2977 }
2978 
2979 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2980 tu_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2981                                     VkLayerProperties *pProperties)
2982 {
2983    *pPropertyCount = 0;
2984    return VK_SUCCESS;
2985 }
2986 
2987 VKAPI_ATTR VkResult VKAPI_CALL
tu_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)2988 tu_EnumerateInstanceExtensionProperties(const char *pLayerName,
2989                                         uint32_t *pPropertyCount,
2990                                         VkExtensionProperties *pProperties)
2991 {
2992    if (pLayerName)
2993       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2994 
2995    return vk_enumerate_instance_extension_properties(
2996       &tu_instance_extensions_supported, pPropertyCount, pProperties);
2997 }
2998 
2999 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
tu_GetInstanceProcAddr(VkInstance _instance,const char * pName)3000 tu_GetInstanceProcAddr(VkInstance _instance, const char *pName)
3001 {
3002    VK_FROM_HANDLE(tu_instance, instance, _instance);
3003    return vk_instance_get_proc_addr(instance != NULL ? &instance->vk : NULL,
3004                                     &tu_instance_entrypoints,
3005                                     pName);
3006 }
3007 
3008 /* The loader wants us to expose a second GetInstanceProcAddr function
3009  * to work around certain LD_PRELOAD issues seen in apps.
3010  */
3011 PUBLIC
3012 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)3013 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
3014 {
3015    return tu_GetInstanceProcAddr(instance, pName);
3016 }
3017 
3018 VKAPI_ATTR VkResult VKAPI_CALL
tu_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)3019 tu_AllocateMemory(VkDevice _device,
3020                   const VkMemoryAllocateInfo *pAllocateInfo,
3021                   const VkAllocationCallbacks *pAllocator,
3022                   VkDeviceMemory *pMem)
3023 {
3024    VK_FROM_HANDLE(tu_device, device, _device);
3025    struct tu_device_memory *mem;
3026    VkResult result;
3027 
3028    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
3029 
3030    struct tu_memory_heap *mem_heap = &device->physical_device->heap;
3031    uint64_t mem_heap_used = p_atomic_read(&mem_heap->used);
3032    if (mem_heap_used > mem_heap->size)
3033       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3034 
3035    mem = (struct tu_device_memory *) vk_device_memory_create(
3036       &device->vk, pAllocateInfo, pAllocator, sizeof(*mem));
3037    if (mem == NULL)
3038       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3039 
3040    if (pAllocateInfo->allocationSize == 0 && !mem->vk.ahardware_buffer) {
3041       vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
3042       /* Apparently, this is allowed */
3043       *pMem = VK_NULL_HANDLE;
3044       return VK_SUCCESS;
3045    }
3046 
3047    const VkImportMemoryFdInfoKHR *fd_info =
3048       vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
3049 
3050    if (fd_info && fd_info->handleType) {
3051       assert(fd_info->handleType ==
3052                 VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3053              fd_info->handleType ==
3054                 VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3055 
3056       /*
3057        * TODO Importing the same fd twice gives us the same handle without
3058        * reference counting.  We need to maintain a per-instance handle-to-bo
3059        * table and add reference count to tu_bo.
3060        */
3061       result = tu_bo_init_dmabuf(device, &mem->bo,
3062                                  pAllocateInfo->allocationSize, fd_info->fd);
3063       if (result == VK_SUCCESS) {
3064          /* take ownership and close the fd */
3065          close(fd_info->fd);
3066       }
3067    } else if (mem->vk.ahardware_buffer) {
3068 #if DETECT_OS_ANDROID
3069       const native_handle_t *handle = AHardwareBuffer_getNativeHandle(mem->vk.ahardware_buffer);
3070       assert(handle->numFds > 0);
3071       size_t size = lseek(handle->data[0], 0, SEEK_END);
3072       result = tu_bo_init_dmabuf(device, &mem->bo, size, handle->data[0]);
3073 #else
3074       result = VK_ERROR_FEATURE_NOT_PRESENT;
3075 #endif
3076    } else {
3077       uint64_t client_address = 0;
3078       BITMASK_ENUM(tu_bo_alloc_flags) alloc_flags = TU_BO_ALLOC_NO_FLAGS;
3079 
3080       const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info =
3081          vk_find_struct_const(pAllocateInfo->pNext,
3082                               MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO);
3083       if (replay_info && replay_info->opaqueCaptureAddress) {
3084          client_address = replay_info->opaqueCaptureAddress;
3085          alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
3086       }
3087 
3088       const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const(
3089          pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO);
3090       if (flags_info &&
3091           (flags_info->flags &
3092            VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)) {
3093          alloc_flags |= TU_BO_ALLOC_REPLAYABLE;
3094       }
3095 
3096       const VkExportMemoryAllocateInfo *export_info =
3097          vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
3098       if (export_info && (export_info->handleTypes &
3099                           (VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT |
3100                            VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT)))
3101          alloc_flags |= TU_BO_ALLOC_SHAREABLE;
3102 
3103 
3104       char name[64] = "vkAllocateMemory()";
3105       if (device->bo_sizes)
3106          snprintf(name, ARRAY_SIZE(name), "vkAllocateMemory(%ldkb)",
3107                   (long)DIV_ROUND_UP(pAllocateInfo->allocationSize, 1024));
3108       VkMemoryPropertyFlags mem_property =
3109          device->physical_device->memory.types[pAllocateInfo->memoryTypeIndex];
3110       result = tu_bo_init_new_explicit_iova(
3111          device, &mem->vk.base, &mem->bo, pAllocateInfo->allocationSize,
3112          client_address, mem_property, alloc_flags, name);
3113    }
3114 
3115    if (result == VK_SUCCESS) {
3116       mem_heap_used = p_atomic_add_return(&mem_heap->used, mem->bo->size);
3117       if (mem_heap_used > mem_heap->size) {
3118          p_atomic_add(&mem_heap->used, -mem->bo->size);
3119          tu_bo_finish(device, mem->bo);
3120          result = vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY,
3121                             "Out of heap memory");
3122       }
3123    }
3124 
3125    if (result != VK_SUCCESS) {
3126       vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
3127       return result;
3128    }
3129 
3130    /* Track in the device whether our BO list contains any implicit-sync BOs, so
3131     * we can suppress implicit sync on non-WSI usage.
3132     */
3133    const struct wsi_memory_allocate_info *wsi_info =
3134       vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
3135    if (wsi_info && wsi_info->implicit_sync) {
3136       mtx_lock(&device->bo_mutex);
3137       if (!mem->bo->implicit_sync) {
3138          mem->bo->implicit_sync = true;
3139          device->implicit_sync_bo_count++;
3140       }
3141       mtx_unlock(&device->bo_mutex);
3142    }
3143 
3144    const VkMemoryDedicatedAllocateInfo *dedicate_info =
3145       vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
3146    if (dedicate_info) {
3147       mem->image = tu_image_from_handle(dedicate_info->image);
3148    } else {
3149       mem->image = NULL;
3150    }
3151 
3152    TU_RMV(heap_create, device, pAllocateInfo, mem);
3153 
3154    *pMem = tu_device_memory_to_handle(mem);
3155 
3156    return VK_SUCCESS;
3157 }
3158 
3159 VKAPI_ATTR void VKAPI_CALL
tu_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)3160 tu_FreeMemory(VkDevice _device,
3161               VkDeviceMemory _mem,
3162               const VkAllocationCallbacks *pAllocator)
3163 {
3164    VK_FROM_HANDLE(tu_device, device, _device);
3165    VK_FROM_HANDLE(tu_device_memory, mem, _mem);
3166 
3167    if (mem == NULL)
3168       return;
3169 
3170    TU_RMV(resource_destroy, device, mem);
3171 
3172    p_atomic_add(&device->physical_device->heap.used, -mem->bo->size);
3173    tu_bo_finish(device, mem->bo);
3174    vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
3175 }
3176 
3177 VKAPI_ATTR VkResult VKAPI_CALL
tu_MapMemory2KHR(VkDevice _device,const VkMemoryMapInfoKHR * pMemoryMapInfo,void ** ppData)3178 tu_MapMemory2KHR(VkDevice _device, const VkMemoryMapInfoKHR *pMemoryMapInfo, void **ppData)
3179 {
3180    VK_FROM_HANDLE(tu_device, device, _device);
3181    VK_FROM_HANDLE(tu_device_memory, mem, pMemoryMapInfo->memory);
3182    VkResult result;
3183 
3184    if (mem == NULL) {
3185       *ppData = NULL;
3186       return VK_SUCCESS;
3187    }
3188 
3189    void *placed_addr = NULL;
3190    if (pMemoryMapInfo->flags & VK_MEMORY_MAP_PLACED_BIT_EXT) {
3191       const VkMemoryMapPlacedInfoEXT *placed_info =
3192          vk_find_struct_const(pMemoryMapInfo->pNext, MEMORY_MAP_PLACED_INFO_EXT);
3193       assert(placed_info != NULL);
3194       placed_addr = placed_info->pPlacedAddress;
3195    }
3196 
3197    result = tu_bo_map(device, mem->bo, placed_addr);
3198    if (result != VK_SUCCESS)
3199       return result;
3200 
3201    *ppData = (char *) mem->bo->map + pMemoryMapInfo->offset;
3202    return VK_SUCCESS;
3203 }
3204 
3205 VKAPI_ATTR VkResult VKAPI_CALL
tu_UnmapMemory2KHR(VkDevice _device,const VkMemoryUnmapInfoKHR * pMemoryUnmapInfo)3206 tu_UnmapMemory2KHR(VkDevice _device, const VkMemoryUnmapInfoKHR *pMemoryUnmapInfo)
3207 {
3208    VK_FROM_HANDLE(tu_device, device, _device);
3209    VK_FROM_HANDLE(tu_device_memory, mem, pMemoryUnmapInfo->memory);
3210 
3211    if (mem == NULL)
3212       return VK_SUCCESS;
3213 
3214    return tu_bo_unmap(device, mem->bo, pMemoryUnmapInfo->flags & VK_MEMORY_UNMAP_RESERVE_BIT_EXT);
3215 }
3216 static VkResult
sync_cache(VkDevice _device,enum tu_mem_sync_op op,uint32_t count,const VkMappedMemoryRange * ranges)3217 sync_cache(VkDevice _device,
3218            enum tu_mem_sync_op op,
3219            uint32_t count,
3220            const VkMappedMemoryRange *ranges)
3221 {
3222    VK_FROM_HANDLE(tu_device, device, _device);
3223 
3224    if (!device->physical_device->has_cached_non_coherent_memory) {
3225       tu_finishme(
3226          "data cache clean and invalidation are unsupported on this arch!");
3227       return VK_SUCCESS;
3228    }
3229 
3230    for (uint32_t i = 0; i < count; i++) {
3231       VK_FROM_HANDLE(tu_device_memory, mem, ranges[i].memory);
3232       tu_bo_sync_cache(device, mem->bo, ranges[i].offset, ranges[i].size, op);
3233    }
3234 
3235    return VK_SUCCESS;
3236 }
3237 
3238 VkResult
tu_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3239 tu_FlushMappedMemoryRanges(VkDevice _device,
3240                            uint32_t memoryRangeCount,
3241                            const VkMappedMemoryRange *pMemoryRanges)
3242 {
3243    return sync_cache(_device, TU_MEM_SYNC_CACHE_TO_GPU, memoryRangeCount,
3244                      pMemoryRanges);
3245 }
3246 
3247 VkResult
tu_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)3248 tu_InvalidateMappedMemoryRanges(VkDevice _device,
3249                                 uint32_t memoryRangeCount,
3250                                 const VkMappedMemoryRange *pMemoryRanges)
3251 {
3252    return sync_cache(_device, TU_MEM_SYNC_CACHE_FROM_GPU, memoryRangeCount,
3253                      pMemoryRanges);
3254 }
3255 
3256 VKAPI_ATTR void VKAPI_CALL
tu_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)3257 tu_GetDeviceMemoryCommitment(VkDevice device,
3258                              VkDeviceMemory memory,
3259                              VkDeviceSize *pCommittedMemoryInBytes)
3260 {
3261    *pCommittedMemoryInBytes = 0;
3262 }
3263 
3264 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)3265 tu_CreateFramebuffer(VkDevice _device,
3266                      const VkFramebufferCreateInfo *pCreateInfo,
3267                      const VkAllocationCallbacks *pAllocator,
3268                      VkFramebuffer *pFramebuffer)
3269 {
3270    VK_FROM_HANDLE(tu_device, device, _device);
3271 
3272    if (TU_DEBUG(DYNAMIC))
3273       return vk_common_CreateFramebuffer(_device, pCreateInfo, pAllocator,
3274                                          pFramebuffer);
3275 
3276    VK_FROM_HANDLE(tu_render_pass, pass, pCreateInfo->renderPass);
3277    struct tu_framebuffer *framebuffer;
3278 
3279    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3280 
3281    bool imageless = pCreateInfo->flags & VK_FRAMEBUFFER_CREATE_IMAGELESS_BIT;
3282 
3283    size_t size = sizeof(*framebuffer);
3284    if (!imageless)
3285       size += sizeof(struct tu_attachment_info) * pCreateInfo->attachmentCount;
3286    framebuffer = (struct tu_framebuffer *) vk_object_alloc(
3287       &device->vk, pAllocator, size, VK_OBJECT_TYPE_FRAMEBUFFER);
3288    if (framebuffer == NULL)
3289       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3290 
3291    framebuffer->attachment_count = pCreateInfo->attachmentCount;
3292    framebuffer->width = pCreateInfo->width;
3293    framebuffer->height = pCreateInfo->height;
3294    framebuffer->layers = pCreateInfo->layers;
3295 
3296    if (!imageless) {
3297       for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
3298          VkImageView _iview = pCreateInfo->pAttachments[i];
3299          struct tu_image_view *iview = tu_image_view_from_handle(_iview);
3300          framebuffer->attachments[i].attachment = iview;
3301       }
3302    }
3303 
3304    tu_framebuffer_tiling_config(framebuffer, device, pass);
3305 
3306    *pFramebuffer = tu_framebuffer_to_handle(framebuffer);
3307    return VK_SUCCESS;
3308 }
3309 
3310 void
tu_setup_dynamic_framebuffer(struct tu_cmd_buffer * cmd_buffer,const VkRenderingInfo * pRenderingInfo)3311 tu_setup_dynamic_framebuffer(struct tu_cmd_buffer *cmd_buffer,
3312                              const VkRenderingInfo *pRenderingInfo)
3313 {
3314    struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
3315    struct tu_framebuffer *framebuffer = &cmd_buffer->dynamic_framebuffer;
3316 
3317    framebuffer->attachment_count = pass->attachment_count;
3318    framebuffer->width = pRenderingInfo->renderArea.offset.x +
3319       pRenderingInfo->renderArea.extent.width;
3320    framebuffer->height = pRenderingInfo->renderArea.offset.y +
3321       pRenderingInfo->renderArea.extent.height;
3322    framebuffer->layers = pRenderingInfo->layerCount;
3323 
3324    tu_framebuffer_tiling_config(framebuffer, cmd_buffer->device, pass);
3325 }
3326 
3327 VKAPI_ATTR void VKAPI_CALL
tu_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)3328 tu_DestroyFramebuffer(VkDevice _device,
3329                       VkFramebuffer _fb,
3330                       const VkAllocationCallbacks *pAllocator)
3331 {
3332    VK_FROM_HANDLE(tu_device, device, _device);
3333 
3334    if (TU_DEBUG(DYNAMIC)) {
3335       vk_common_DestroyFramebuffer(_device, _fb, pAllocator);
3336       return;
3337    }
3338 
3339    VK_FROM_HANDLE(tu_framebuffer, fb, _fb);
3340 
3341    if (!fb)
3342       return;
3343 
3344    vk_object_free(&device->vk, pAllocator, fb);
3345 }
3346 
3347 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)3348 tu_GetMemoryFdKHR(VkDevice _device,
3349                   const VkMemoryGetFdInfoKHR *pGetFdInfo,
3350                   int *pFd)
3351 {
3352    VK_FROM_HANDLE(tu_device, device, _device);
3353    VK_FROM_HANDLE(tu_device_memory, memory, pGetFdInfo->memory);
3354 
3355    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
3356 
3357    /* At the moment, we support only the below handle types. */
3358    assert(pGetFdInfo->handleType ==
3359              VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
3360           pGetFdInfo->handleType ==
3361              VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3362 
3363    int prime_fd = tu_bo_export_dmabuf(device, memory->bo);
3364    if (prime_fd < 0)
3365       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
3366 
3367    *pFd = prime_fd;
3368 
3369    if (memory->image) {
3370       struct fdl_layout *l = &memory->image->layout[0];
3371       uint64_t modifier;
3372       if (l->ubwc) {
3373          modifier = DRM_FORMAT_MOD_QCOM_COMPRESSED;
3374       } else if (l->tile_mode == 2) {
3375          modifier = DRM_FORMAT_MOD_QCOM_TILED2;
3376       } else if (l->tile_mode == 3) {
3377          modifier = DRM_FORMAT_MOD_QCOM_TILED3;
3378       } else {
3379          assert(!l->tile_mode);
3380          modifier = DRM_FORMAT_MOD_LINEAR;
3381       }
3382       struct fdl_metadata metadata = {
3383          .modifier = modifier,
3384       };
3385       tu_bo_set_metadata(device, memory->bo, &metadata, sizeof(metadata));
3386    }
3387 
3388    return VK_SUCCESS;
3389 }
3390 
3391 VKAPI_ATTR VkResult VKAPI_CALL
tu_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)3392 tu_GetMemoryFdPropertiesKHR(VkDevice _device,
3393                             VkExternalMemoryHandleTypeFlagBits handleType,
3394                             int fd,
3395                             VkMemoryFdPropertiesKHR *pMemoryFdProperties)
3396 {
3397    VK_FROM_HANDLE(tu_device, device, _device);
3398    assert(handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
3399    pMemoryFdProperties->memoryTypeBits =
3400       (1 << device->physical_device->memory.type_count) - 1;
3401    return VK_SUCCESS;
3402 }
3403 
3404 VKAPI_ATTR void VKAPI_CALL
tu_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,VkSampleCountFlagBits samples,VkMultisamplePropertiesEXT * pMultisampleProperties)3405 tu_GetPhysicalDeviceMultisamplePropertiesEXT(
3406    VkPhysicalDevice                            physicalDevice,
3407    VkSampleCountFlagBits                       samples,
3408    VkMultisamplePropertiesEXT*                 pMultisampleProperties)
3409 {
3410    VK_FROM_HANDLE(tu_physical_device, pdevice, physicalDevice);
3411 
3412    if (samples <= VK_SAMPLE_COUNT_4_BIT && pdevice->vk.supported_extensions.EXT_sample_locations)
3413       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 1, 1 };
3414    else
3415       pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){ 0, 0 };
3416 }
3417 
tu_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3418 uint64_t tu_GetDeviceMemoryOpaqueCaptureAddress(
3419     VkDevice                                    device,
3420     const VkDeviceMemoryOpaqueCaptureAddressInfo* pInfo)
3421 {
3422    VK_FROM_HANDLE(tu_device_memory, mem, pInfo->memory);
3423    return mem->bo->iova;
3424 }
3425 
3426 struct tu_debug_bos_entry {
3427    uint32_t count;
3428    uint64_t size;
3429    const char *name;
3430 };
3431 
3432 const char *
tu_debug_bos_add(struct tu_device * dev,uint64_t size,const char * name)3433 tu_debug_bos_add(struct tu_device *dev, uint64_t size, const char *name)
3434 {
3435    assert(name);
3436 
3437    if (likely(!dev->bo_sizes))
3438       return NULL;
3439 
3440    mtx_lock(&dev->bo_mutex);
3441    struct hash_entry *entry = _mesa_hash_table_search(dev->bo_sizes, name);
3442    struct tu_debug_bos_entry *debug_bos;
3443 
3444    if (!entry) {
3445       debug_bos = (struct tu_debug_bos_entry *) calloc(
3446          1, sizeof(struct tu_debug_bos_entry));
3447       debug_bos->name = strdup(name);
3448       _mesa_hash_table_insert(dev->bo_sizes, debug_bos->name, debug_bos);
3449    } else {
3450       debug_bos = (struct tu_debug_bos_entry *) entry->data;
3451    }
3452 
3453    debug_bos->count++;
3454    debug_bos->size += align(size, 4096);
3455    mtx_unlock(&dev->bo_mutex);
3456 
3457    return debug_bos->name;
3458 }
3459 
3460 void
tu_debug_bos_del(struct tu_device * dev,struct tu_bo * bo)3461 tu_debug_bos_del(struct tu_device *dev, struct tu_bo *bo)
3462 {
3463    if (likely(!dev->bo_sizes) || !bo->name)
3464       return;
3465 
3466    mtx_lock(&dev->bo_mutex);
3467    struct hash_entry *entry =
3468       _mesa_hash_table_search(dev->bo_sizes, bo->name);
3469    /* If we're finishing the BO, it should have been added already */
3470    assert(entry);
3471 
3472    struct tu_debug_bos_entry *debug_bos =
3473       (struct tu_debug_bos_entry *) entry->data;
3474    debug_bos->count--;
3475    debug_bos->size -= align(bo->size, 4096);
3476    if (!debug_bos->count) {
3477       _mesa_hash_table_remove(dev->bo_sizes, entry);
3478       free((void *) debug_bos->name);
3479       free(debug_bos);
3480    }
3481    mtx_unlock(&dev->bo_mutex);
3482 }
3483 
debug_bos_count_compare(const void * in_a,const void * in_b)3484 static int debug_bos_count_compare(const void *in_a, const void *in_b)
3485 {
3486    struct tu_debug_bos_entry *a = *(struct tu_debug_bos_entry **)in_a;
3487    struct tu_debug_bos_entry *b = *(struct tu_debug_bos_entry **)in_b;
3488    return a->count - b->count;
3489 }
3490 
3491 void
tu_debug_bos_print_stats(struct tu_device * dev)3492 tu_debug_bos_print_stats(struct tu_device *dev)
3493 {
3494    if (likely(!dev->bo_sizes))
3495       return;
3496 
3497    mtx_lock(&dev->bo_mutex);
3498 
3499    /* Put the HT's sizes data in an array so we can sort by number of allocations. */
3500    struct util_dynarray dyn;
3501    util_dynarray_init(&dyn, NULL);
3502 
3503    uint32_t size = 0;
3504    uint32_t count = 0;
3505    hash_table_foreach(dev->bo_sizes, entry)
3506    {
3507       struct tu_debug_bos_entry *debug_bos =
3508          (struct tu_debug_bos_entry *) entry->data;
3509       util_dynarray_append(&dyn, struct tu_debug_bos_entry *, debug_bos);
3510       size += debug_bos->size / 1024;
3511       count += debug_bos->count;
3512    }
3513 
3514    qsort(dyn.data,
3515          util_dynarray_num_elements(&dyn, struct tu_debug_bos_entry *),
3516          sizeof(struct tu_debug_bos_entryos_entry *), debug_bos_count_compare);
3517 
3518    util_dynarray_foreach(&dyn, struct tu_debug_bos_entry *, entryp)
3519    {
3520       struct tu_debug_bos_entry *debug_bos = *entryp;
3521       mesa_logi("%30s: %4d bos, %lld kb\n", debug_bos->name, debug_bos->count,
3522                 (long long) (debug_bos->size / 1024));
3523    }
3524 
3525    mesa_logi("submitted %d bos (%d MB)\n", count, DIV_ROUND_UP(size, 1024));
3526 
3527    util_dynarray_fini(&dyn);
3528 
3529    mtx_unlock(&dev->bo_mutex);
3530 }
3531 
3532 void
tu_dump_bo_init(struct tu_device * dev,struct tu_bo * bo)3533 tu_dump_bo_init(struct tu_device *dev, struct tu_bo *bo)
3534 {
3535    bo->dump_bo_list_idx = ~0;
3536 
3537    if (!FD_RD_DUMP(ENABLE))
3538       return;
3539 
3540    mtx_lock(&dev->bo_mutex);
3541    uint32_t idx =
3542       util_dynarray_num_elements(&dev->dump_bo_list, struct tu_bo *);
3543    bo->dump_bo_list_idx = idx;
3544    util_dynarray_append(&dev->dump_bo_list, struct tu_bo *, bo);
3545    mtx_unlock(&dev->bo_mutex);
3546 }
3547 
3548 void
tu_dump_bo_del(struct tu_device * dev,struct tu_bo * bo)3549 tu_dump_bo_del(struct tu_device *dev, struct tu_bo *bo)
3550 {
3551    if (bo->dump_bo_list_idx != ~0) {
3552       mtx_lock(&dev->bo_mutex);
3553       struct tu_bo *exchanging_bo =
3554          util_dynarray_pop(&dev->dump_bo_list, struct tu_bo *);
3555       *util_dynarray_element(&dev->dump_bo_list, struct tu_bo *,
3556                              bo->dump_bo_list_idx) = exchanging_bo;
3557       exchanging_bo->dump_bo_list_idx = bo->dump_bo_list_idx;
3558       mtx_unlock(&dev->bo_mutex);
3559    }
3560 }
3561 
3562 void
tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,const VkDebugUtilsLabelEXT * pLabelInfo)3563 tu_CmdBeginDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer,
3564                               const VkDebugUtilsLabelEXT *pLabelInfo)
3565 {
3566    VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
3567 
3568    vk_common_CmdBeginDebugUtilsLabelEXT(_commandBuffer, pLabelInfo);
3569 
3570    /* Note that the spec says:
3571     *
3572     * "An application may open a debug label region in one command buffer and
3573     *  close it in another, or otherwise split debug label regions across
3574     *  multiple command buffers or multiple queue submissions. When viewed
3575     * from the linear series of submissions to a single queue, the calls to
3576     *  vkCmdBeginDebugUtilsLabelEXT and vkCmdEndDebugUtilsLabelEXT must be
3577     *  matched and balanced."
3578     *
3579     * But if you're beginning labeling during a renderpass and ending outside
3580     * it, or vice versa, these trace ranges in perfetto will be unbalanced.  I
3581     * expect that u_trace and perfetto will do something like take just one of
3582     * the begins/ends, or drop the event entirely, but not crash.  Similarly,
3583     * I think we'll have problems if the tracepoints are split across cmd
3584     * buffers. Still, getting the simple case of cmd buffer annotation into
3585     * perfetto should prove useful.
3586     */
3587    const char *label = pLabelInfo->pLabelName;
3588    if (cmd_buffer->state.pass) {
3589       trace_start_cmd_buffer_annotation_rp(
3590          &cmd_buffer->trace, &cmd_buffer->draw_cs, strlen(label), label);
3591    } else {
3592       trace_start_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs,
3593                                         strlen(label), label);
3594    }
3595 }
3596 
3597 void
tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)3598 tu_CmdEndDebugUtilsLabelEXT(VkCommandBuffer _commandBuffer)
3599 {
3600    VK_FROM_HANDLE(tu_cmd_buffer, cmd_buffer, _commandBuffer);
3601 
3602    if (cmd_buffer->vk.labels.size > 0) {
3603       if (cmd_buffer->state.pass) {
3604          trace_end_cmd_buffer_annotation_rp(&cmd_buffer->trace,
3605                                             &cmd_buffer->draw_cs);
3606       } else {
3607          trace_end_cmd_buffer_annotation(&cmd_buffer->trace, &cmd_buffer->cs);
3608       }
3609    }
3610 
3611    vk_common_CmdEndDebugUtilsLabelEXT(_commandBuffer);
3612 }
3613