1 /*
2 * Copyright © 2019 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <fcntl.h>
26 #include <stdbool.h>
27 #include <string.h>
28 #include <sys/mman.h>
29 #include <sys/sysinfo.h>
30 #include <unistd.h>
31 #include <xf86drm.h>
32 #include <xf86drmMode.h>
33
34 #ifdef MAJOR_IN_MKDEV
35 #include <sys/mkdev.h>
36 #endif
37 #ifdef MAJOR_IN_SYSMACROS
38 #include <sys/sysmacros.h>
39 #endif
40
41 #include "v3dv_private.h"
42
43 #include "common/v3d_debug.h"
44
45 #include "compiler/v3d_compiler.h"
46
47 #include "drm-uapi/v3d_drm.h"
48 #include "vk_android.h"
49 #include "vk_drm_syncobj.h"
50 #include "vk_util.h"
51 #include "git_sha1.h"
52
53 #include "util/build_id.h"
54 #include "util/os_file.h"
55 #include "util/u_debug.h"
56 #include "util/format/u_format.h"
57
58 #if DETECT_OS_ANDROID
59 #include "vk_android.h"
60 #include <vndk/hardware_buffer.h>
61 #include "util/u_gralloc/u_gralloc.h"
62 #endif
63
64 #ifdef VK_USE_PLATFORM_XCB_KHR
65 #include <xcb/xcb.h>
66 #include <xcb/dri3.h>
67 #include <X11/Xlib-xcb.h>
68 #endif
69
70 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
71 #include <wayland-client.h>
72 #include "wayland-drm-client-protocol.h"
73 #endif
74
75 #define V3DV_API_VERSION VK_MAKE_VERSION(1, 3, VK_HEADER_VERSION)
76
77 #ifdef ANDROID_STRICT
78 #if ANDROID_API_LEVEL <= 32
79 /* Android 12.1 and lower support only Vulkan API v1.1 */
80 #undef V3DV_API_VERSION
81 #define V3DV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
82 #endif
83 #endif
84
85 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EnumerateInstanceVersion(uint32_t * pApiVersion)86 v3dv_EnumerateInstanceVersion(uint32_t *pApiVersion)
87 {
88 *pApiVersion = V3DV_API_VERSION;
89 return VK_SUCCESS;
90 }
91
92 #if defined(VK_USE_PLATFORM_WIN32_KHR) || \
93 defined(VK_USE_PLATFORM_WAYLAND_KHR) || \
94 defined(VK_USE_PLATFORM_XCB_KHR) || \
95 defined(VK_USE_PLATFORM_XLIB_KHR) || \
96 defined(VK_USE_PLATFORM_DISPLAY_KHR)
97 #define V3DV_USE_WSI_PLATFORM
98 #endif
99
100 static const struct vk_instance_extension_table instance_extensions = {
101 .KHR_device_group_creation = true,
102 #ifdef VK_USE_PLATFORM_DISPLAY_KHR
103 .KHR_display = true,
104 .KHR_get_display_properties2 = true,
105 .EXT_direct_mode_display = true,
106 .EXT_acquire_drm_display = true,
107 #endif
108 .KHR_external_fence_capabilities = true,
109 .KHR_external_memory_capabilities = true,
110 .KHR_external_semaphore_capabilities = true,
111 .KHR_get_physical_device_properties2 = true,
112 #ifdef V3DV_USE_WSI_PLATFORM
113 .KHR_get_surface_capabilities2 = true,
114 .KHR_surface = true,
115 .KHR_surface_protected_capabilities = true,
116 .EXT_surface_maintenance1 = true,
117 .EXT_swapchain_colorspace = true,
118 #endif
119 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
120 .KHR_wayland_surface = true,
121 #endif
122 #ifdef VK_USE_PLATFORM_XCB_KHR
123 .KHR_xcb_surface = true,
124 #endif
125 #ifdef VK_USE_PLATFORM_XLIB_KHR
126 .KHR_xlib_surface = true,
127 #endif
128 #ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
129 .EXT_acquire_xlib_display = true,
130 #endif
131 #ifndef VK_USE_PLATFORM_WIN32_KHR
132 .EXT_headless_surface = true,
133 #endif
134 .EXT_debug_report = true,
135 .EXT_debug_utils = true,
136 };
137
138 static void
get_device_extensions(const struct v3dv_physical_device * device,struct vk_device_extension_table * ext)139 get_device_extensions(const struct v3dv_physical_device *device,
140 struct vk_device_extension_table *ext)
141 {
142 *ext = (struct vk_device_extension_table) {
143 .KHR_8bit_storage = true,
144 .KHR_16bit_storage = true,
145 .KHR_bind_memory2 = true,
146 .KHR_buffer_device_address = true,
147 .KHR_copy_commands2 = true,
148 .KHR_create_renderpass2 = true,
149 .KHR_dedicated_allocation = true,
150 .KHR_device_group = true,
151 .KHR_driver_properties = true,
152 .KHR_descriptor_update_template = true,
153 .KHR_depth_stencil_resolve = true,
154 .KHR_dynamic_rendering = true,
155 .KHR_external_fence = true,
156 .KHR_external_fence_fd = true,
157 .KHR_external_memory = true,
158 .KHR_external_memory_fd = true,
159 .KHR_external_semaphore = true,
160 .KHR_external_semaphore_fd = true,
161 .KHR_format_feature_flags2 = true,
162 .KHR_get_memory_requirements2 = true,
163 .KHR_image_format_list = true,
164 .KHR_imageless_framebuffer = true,
165 .KHR_index_type_uint8 = true,
166 .KHR_line_rasterization = true,
167 .KHR_load_store_op_none = true,
168 .KHR_performance_query = device->caps.perfmon,
169 .KHR_relaxed_block_layout = true,
170 .KHR_maintenance1 = true,
171 .KHR_maintenance2 = true,
172 .KHR_maintenance3 = true,
173 .KHR_maintenance4 = true,
174 .KHR_maintenance5 = true,
175 .KHR_multiview = true,
176 .KHR_pipeline_executable_properties = true,
177 .KHR_separate_depth_stencil_layouts = true,
178 .KHR_shader_expect_assume = true,
179 .KHR_shader_float_controls = true,
180 .KHR_shader_non_semantic_info = true,
181 .KHR_shader_relaxed_extended_instruction = true,
182 .KHR_sampler_mirror_clamp_to_edge = true,
183 .KHR_sampler_ycbcr_conversion = true,
184 .KHR_spirv_1_4 = true,
185 .KHR_storage_buffer_storage_class = true,
186 .KHR_timeline_semaphore = true,
187 .KHR_uniform_buffer_standard_layout = true,
188 .KHR_shader_integer_dot_product = true,
189 .KHR_shader_terminate_invocation = true,
190 .KHR_synchronization2 = true,
191 .KHR_workgroup_memory_explicit_layout = true,
192 #ifdef V3DV_USE_WSI_PLATFORM
193 .KHR_swapchain = true,
194 .KHR_swapchain_mutable_format = true,
195 .KHR_incremental_present = true,
196 #endif
197 .KHR_variable_pointers = true,
198 .KHR_vertex_attribute_divisor = true,
199 .KHR_vulkan_memory_model = true,
200 .KHR_zero_initialize_workgroup_memory = true,
201 .EXT_4444_formats = true,
202 .EXT_attachment_feedback_loop_layout = true,
203 .EXT_border_color_swizzle = true,
204 .EXT_color_write_enable = true,
205 .EXT_custom_border_color = true,
206 .EXT_depth_clamp_zero_one = device->devinfo.ver >= 71,
207 .EXT_depth_clip_control = true,
208 .EXT_depth_clip_enable = device->devinfo.ver >= 71,
209 .EXT_load_store_op_none = true,
210 .EXT_inline_uniform_block = true,
211 .EXT_extended_dynamic_state = true,
212 .EXT_extended_dynamic_state2 = true,
213 .EXT_external_memory_dma_buf = true,
214 .EXT_host_query_reset = true,
215 .EXT_image_drm_format_modifier = true,
216 .EXT_image_robustness = true,
217 .EXT_index_type_uint8 = true,
218 .EXT_line_rasterization = true,
219 .EXT_memory_budget = true,
220 .EXT_multi_draw = true,
221 .EXT_physical_device_drm = true,
222 .EXT_pipeline_creation_cache_control = true,
223 .EXT_pipeline_creation_feedback = true,
224 .EXT_pipeline_robustness = true,
225 .EXT_primitive_topology_list_restart = true,
226 .EXT_private_data = true,
227 .EXT_provoking_vertex = true,
228 .EXT_queue_family_foreign = true,
229 .EXT_separate_stencil_usage = true,
230 .EXT_shader_demote_to_helper_invocation = true,
231 .EXT_shader_module_identifier = true,
232 .EXT_subgroup_size_control = true,
233 #ifdef V3DV_USE_WSI_PLATFORM
234 .EXT_swapchain_maintenance1 = true,
235 #endif
236 .EXT_texel_buffer_alignment = true,
237 .EXT_tooling_info = true,
238 .EXT_vertex_attribute_divisor = true,
239 };
240 #if DETECT_OS_ANDROID
241 if (vk_android_get_ugralloc() != NULL) {
242 ext->ANDROID_external_memory_android_hardware_buffer = true;
243 ext->ANDROID_native_buffer = true;
244 }
245 #endif
246 }
247
248 static void
get_features(const struct v3dv_physical_device * physical_device,struct vk_features * features)249 get_features(const struct v3dv_physical_device *physical_device,
250 struct vk_features *features)
251 {
252 *features = (struct vk_features) {
253 /* Vulkan 1.0 */
254 .robustBufferAccess = true, /* This feature is mandatory */
255 .fullDrawIndexUint32 = physical_device->devinfo.ver >= 71,
256 .imageCubeArray = true,
257 .independentBlend = true,
258 .geometryShader = true,
259 .tessellationShader = false,
260 .sampleRateShading = true,
261 .dualSrcBlend = false,
262 .logicOp = true,
263 .multiDrawIndirect = false,
264 .drawIndirectFirstInstance = true,
265 .depthClamp = physical_device->devinfo.ver >= 71,
266 .depthClampZeroOne = physical_device->devinfo.ver >= 71,
267 .depthBiasClamp = true,
268 .fillModeNonSolid = true,
269 .depthBounds = physical_device->devinfo.ver >= 71,
270 .wideLines = true,
271 .largePoints = true,
272 .alphaToOne = true,
273 .multiViewport = false,
274 .samplerAnisotropy = true,
275 .textureCompressionETC2 = true,
276 .textureCompressionASTC_LDR = true,
277 /* Note that textureCompressionBC requires that the driver support all
278 * the BC formats. V3D 4.2 only support the BC1-3, so we can't claim
279 * that we support it.
280 */
281 .textureCompressionBC = false,
282 .occlusionQueryPrecise = true,
283 .pipelineStatisticsQuery = false,
284 .vertexPipelineStoresAndAtomics = true,
285 .fragmentStoresAndAtomics = true,
286 .shaderTessellationAndGeometryPointSize = true,
287 .shaderImageGatherExtended = true,
288 .shaderStorageImageExtendedFormats = true,
289 .shaderStorageImageMultisample = false,
290 .shaderStorageImageReadWithoutFormat = true,
291 .shaderStorageImageWriteWithoutFormat = false,
292 .shaderUniformBufferArrayDynamicIndexing = false,
293 .shaderSampledImageArrayDynamicIndexing = false,
294 .shaderStorageBufferArrayDynamicIndexing = false,
295 .shaderStorageImageArrayDynamicIndexing = false,
296 .shaderClipDistance = true,
297 .shaderCullDistance = false,
298 .shaderFloat64 = false,
299 .shaderInt64 = false,
300 .shaderInt16 = false,
301 .shaderResourceResidency = false,
302 .shaderResourceMinLod = false,
303 .sparseBinding = false,
304 .sparseResidencyBuffer = false,
305 .sparseResidencyImage2D = false,
306 .sparseResidencyImage3D = false,
307 .sparseResidency2Samples = false,
308 .sparseResidency4Samples = false,
309 .sparseResidency8Samples = false,
310 .sparseResidency16Samples = false,
311 .sparseResidencyAliased = false,
312 .variableMultisampleRate = false,
313 .inheritedQueries = true,
314
315 /* Vulkan 1.1 */
316 .storageBuffer16BitAccess = true,
317 .uniformAndStorageBuffer16BitAccess = true,
318 .storagePushConstant16 = true,
319 .storageInputOutput16 = false,
320 .multiview = true,
321 .multiviewGeometryShader = false,
322 .multiviewTessellationShader = false,
323 .variablePointersStorageBuffer = true,
324 /* FIXME: this needs support for non-constant index on UBO/SSBO */
325 .variablePointers = false,
326 .protectedMemory = false,
327 .samplerYcbcrConversion = true,
328 .shaderDrawParameters = false,
329
330 /* Vulkan 1.2 */
331 .hostQueryReset = true,
332 .uniformAndStorageBuffer8BitAccess = true,
333 .uniformBufferStandardLayout = true,
334 /* V3D 4.2 wraps TMU vector accesses to 16-byte boundaries, so loads and
335 * stores of vectors that cross these boundaries would not work correctly
336 * with scalarBlockLayout and would need to be split into smaller vectors
337 * (and/or scalars) that don't cross these boundaries. For load/stores
338 * with dynamic offsets where we can't identify if the offset is
339 * problematic, we would always have to scalarize. Overall, this would
340 * not lead to best performance so let's just not support it.
341 */
342 .scalarBlockLayout = physical_device->devinfo.ver >= 71,
343 /* This tells applications 2 things:
344 *
345 * 1. If they can select just one aspect for barriers. For us barriers
346 * decide if we need to split a job and we don't care if it is only
347 * for one of the aspects of the image or both, so we don't really
348 * benefit from seeing barriers that select just one aspect.
349 *
350 * 2. If they can program different layouts for each aspect. We
351 * generally don't care about layouts, so again, we don't get any
352 * benefits from this to limit the scope of image layout transitions.
353 *
354 * Still, Vulkan 1.2 requires this feature to be supported so we
355 * advertise it even though we don't really take advantage of it.
356 */
357 .separateDepthStencilLayouts = true,
358 .storageBuffer8BitAccess = true,
359 .storagePushConstant8 = true,
360 .imagelessFramebuffer = true,
361 .timelineSemaphore = true,
362
363 .samplerMirrorClampToEdge = true,
364
365 /* Extended subgroup types is mandatory by Vulkan 1.2, however, it is
366 * only in effect if the implementation supports non 32-bit types, which
367 * we don't, so in practice setting it to true doesn't have any
368 * implications for us.
369 */
370 .shaderSubgroupExtendedTypes = true,
371 .subgroupBroadcastDynamicId = true,
372
373 .vulkanMemoryModel = true,
374 .vulkanMemoryModelDeviceScope = true,
375 .vulkanMemoryModelAvailabilityVisibilityChains = true,
376
377 .bufferDeviceAddress = true,
378 .bufferDeviceAddressCaptureReplay = false,
379 .bufferDeviceAddressMultiDevice = false,
380
381 /* Vulkan 1.3 */
382 .inlineUniformBlock = true,
383 /* Inline buffers work like push constants, so after their are bound
384 * some of their contents may be copied into the uniform stream as soon
385 * as the next draw/dispatch is recorded in the command buffer. This means
386 * that if the client updates the buffer contents after binding it to
387 * a command buffer, the next queue submit of that command buffer may
388 * not use the latest update to the buffer contents, but the data that
389 * was present in the buffer at the time it was bound to the command
390 * buffer.
391 */
392 .descriptorBindingInlineUniformBlockUpdateAfterBind = false,
393 .pipelineCreationCacheControl = true,
394 .privateData = true,
395 .maintenance4 = true,
396 .shaderZeroInitializeWorkgroupMemory = true,
397 .synchronization2 = true,
398 .robustImageAccess = true,
399 .shaderIntegerDotProduct = true,
400
401 /* VK_EXT_4444_formats */
402 .formatA4R4G4B4 = true,
403 .formatA4B4G4R4 = true,
404
405 /* VK_EXT_custom_border_color */
406 .customBorderColors = true,
407 .customBorderColorWithoutFormat = false,
408
409 /* VK_EXT_index_type_uint8 */
410 .indexTypeUint8 = true,
411
412 /* VK_EXT_line_rasterization */
413 .rectangularLines = true,
414 .bresenhamLines = true,
415 .smoothLines = true,
416 .stippledRectangularLines = false,
417 .stippledBresenhamLines = false,
418 .stippledSmoothLines = false,
419
420 /* VK_EXT_color_write_enable */
421 .colorWriteEnable = true,
422
423 /* VK_EXT_extended_dynamic_state */
424 .extendedDynamicState = true,
425
426 /* VK_EXT_extended_dynamic_state2 */
427 .extendedDynamicState2 = true,
428 /* We don't support extendedDynamicState2LogicOp as that would require
429 * compile shader variants after the pipeline creation.
430 */
431 .extendedDynamicState2LogicOp = false,
432 /* We don't support extendedDynamicState2PatchControlPoints as we don't
433 * support Tessellation Shaders
434 */
435 .extendedDynamicState2PatchControlPoints = false,
436
437 /* VK_KHR_pipeline_executable_properties */
438 .pipelineExecutableInfo = true,
439
440 /* VK_EXT_provoking_vertex */
441 .provokingVertexLast = true,
442 /* FIXME: update when supporting EXT_transform_feedback */
443 .transformFeedbackPreservesProvokingVertex = false,
444
445 /* VK_EXT_vertex_attribute_divisor */
446 .vertexAttributeInstanceRateDivisor = true,
447 .vertexAttributeInstanceRateZeroDivisor = false,
448
449 /* VK_KHR_performance_query */
450 .performanceCounterQueryPools = physical_device->caps.perfmon,
451 .performanceCounterMultipleQueryPools = false,
452
453 /* VK_EXT_texel_buffer_alignment */
454 .texelBufferAlignment = true,
455
456 /* VK_KHR_workgroup_memory_explicit_layout */
457 .workgroupMemoryExplicitLayout = true,
458 .workgroupMemoryExplicitLayoutScalarBlockLayout = false,
459 .workgroupMemoryExplicitLayout8BitAccess = true,
460 .workgroupMemoryExplicitLayout16BitAccess = true,
461
462 /* VK_EXT_border_color_swizzle */
463 .borderColorSwizzle = true,
464 .borderColorSwizzleFromImage = true,
465
466 /* VK_EXT_shader_module_identifier */
467 .shaderModuleIdentifier = true,
468
469 /* VK_EXT_depth_clip_control */
470 .depthClipControl = true,
471
472 /* VK_EXT_depth_clip_enable */
473 .depthClipEnable = physical_device->devinfo.ver >= 71,
474
475 /* VK_EXT_attachment_feedback_loop_layout */
476 .attachmentFeedbackLoopLayout = true,
477
478 /* VK_EXT_primitive_topology_list_restart */
479 .primitiveTopologyListRestart = true,
480 /* FIXME: we don't support tessellation shaders yet */
481 .primitiveTopologyPatchListRestart = false,
482
483 /* VK_EXT_pipeline_robustness */
484 .pipelineRobustness = true,
485
486 /* VK_EXT_multi_draw */
487 .multiDraw = true,
488
489 /* VK_KHR_shader_terminate_invocation */
490 .shaderTerminateInvocation = true,
491
492 /* VK_EXT_shader_demote_to_helper_invocation */
493 .shaderDemoteToHelperInvocation = true,
494
495 /* VK_EXT_subgroup_size_control */
496 .subgroupSizeControl = true,
497 .computeFullSubgroups = true,
498
499 /* VK_KHR_shader_expect_assume */
500 .shaderExpectAssume = true,
501
502 /* VK_KHR_dynamic_rendering */
503 .dynamicRendering = true,
504
505 /* VK_KHR_maintenance5 */
506 .maintenance5 = true,
507
508 #ifdef V3DV_USE_WSI_PLATFORM
509 /* VK_EXT_swapchain_maintenance1 */
510 .swapchainMaintenance1 = true,
511 #endif
512
513 /* VK_KHR_shader_relaxed_extended_instruction */
514 .shaderRelaxedExtendedInstruction = true,
515 };
516 }
517
518 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)519 v3dv_EnumerateInstanceExtensionProperties(const char *pLayerName,
520 uint32_t *pPropertyCount,
521 VkExtensionProperties *pProperties)
522 {
523 /* We don't support any layers */
524 if (pLayerName)
525 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
526
527 return vk_enumerate_instance_extension_properties(
528 &instance_extensions, pPropertyCount, pProperties);
529 }
530
531 static VkResult enumerate_devices(struct vk_instance *vk_instance);
532
533 static void destroy_physical_device(struct vk_physical_device *device);
534
535 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)536 v3dv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
537 const VkAllocationCallbacks *pAllocator,
538 VkInstance *pInstance)
539 {
540 struct v3dv_instance *instance;
541 VkResult result;
542
543 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
544
545 if (pAllocator == NULL)
546 pAllocator = vk_default_allocator();
547
548 instance = vk_alloc(pAllocator, sizeof(*instance), 8,
549 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
550 if (!instance)
551 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
552
553 struct vk_instance_dispatch_table dispatch_table;
554 vk_instance_dispatch_table_from_entrypoints(
555 &dispatch_table, &v3dv_instance_entrypoints, true);
556 vk_instance_dispatch_table_from_entrypoints(
557 &dispatch_table, &wsi_instance_entrypoints, false);
558
559 result = vk_instance_init(&instance->vk,
560 &instance_extensions,
561 &dispatch_table,
562 pCreateInfo, pAllocator);
563
564 if (result != VK_SUCCESS) {
565 vk_free(pAllocator, instance);
566 return vk_error(NULL, result);
567 }
568
569 v3d_process_debug_variable();
570
571 instance->vk.physical_devices.enumerate = enumerate_devices;
572 instance->vk.physical_devices.destroy = destroy_physical_device;
573
574 /* We start with the default values for the pipeline_cache envvars.
575 *
576 * FIXME: with so many options now, perhaps we could use parse_debug_string
577 */
578 instance->pipeline_cache_enabled = true;
579 instance->default_pipeline_cache_enabled = true;
580 instance->meta_cache_enabled = true;
581 const char *pipeline_cache_str = getenv("V3DV_ENABLE_PIPELINE_CACHE");
582 if (pipeline_cache_str != NULL) {
583 if (strncmp(pipeline_cache_str, "full", 4) == 0) {
584 /* nothing to do, just to filter correct values */
585 } else if (strncmp(pipeline_cache_str, "no-default-cache", 16) == 0) {
586 instance->default_pipeline_cache_enabled = false;
587 } else if (strncmp(pipeline_cache_str, "no-meta-cache", 13) == 0) {
588 instance->meta_cache_enabled = false;
589 } else if (strncmp(pipeline_cache_str, "off", 3) == 0) {
590 instance->pipeline_cache_enabled = false;
591 instance->default_pipeline_cache_enabled = false;
592 instance->meta_cache_enabled = false;
593 } else {
594 mesa_loge("Wrong value for envvar V3DV_ENABLE_PIPELINE_CACHE. "
595 "Allowed values are: full, no-default-cache, no-meta-cache, off\n");
596 }
597 }
598
599 if (instance->pipeline_cache_enabled == false) {
600 mesa_logw("v3dv pipeline cache is disabled. Performance "
601 "can be affected negatively\n");
602 }
603 if (instance->default_pipeline_cache_enabled == false) {
604 mesa_logw("default v3dv pipeline cache is disabled. "
605 "Performance can be affected negatively\n");
606 }
607 if (instance->meta_cache_enabled == false) {
608 mesa_logw("custom pipeline cache for meta operations are disabled. "
609 "Performance can be affected negatively\n");
610 }
611
612
613 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
614
615 #if DETECT_OS_ANDROID
616 struct u_gralloc *u_gralloc = vk_android_init_ugralloc();
617
618 if (u_gralloc && u_gralloc_get_type(u_gralloc) == U_GRALLOC_TYPE_FALLBACK) {
619 mesa_logw(
620 "v3dv: Gralloc is not supported. Android extensions are disabled.");
621 vk_android_destroy_ugralloc();
622 }
623 #endif
624
625 *pInstance = v3dv_instance_to_handle(instance);
626
627 return VK_SUCCESS;
628 }
629
630 static void
v3dv_physical_device_free_disk_cache(struct v3dv_physical_device * device)631 v3dv_physical_device_free_disk_cache(struct v3dv_physical_device *device)
632 {
633 #ifdef ENABLE_SHADER_CACHE
634 if (device->disk_cache)
635 disk_cache_destroy(device->disk_cache);
636 #else
637 assert(device->disk_cache == NULL);
638 #endif
639 }
640
641 static void
physical_device_finish(struct v3dv_physical_device * device)642 physical_device_finish(struct v3dv_physical_device *device)
643 {
644 v3dv_wsi_finish(device);
645 v3dv_physical_device_free_disk_cache(device);
646 v3d_compiler_free(device->compiler);
647
648 util_sparse_array_finish(&device->bo_map);
649
650 if (device->perfcntr)
651 v3d_perfcntrs_fini(device->perfcntr);
652
653 close(device->render_fd);
654 if (device->display_fd >= 0)
655 close(device->display_fd);
656
657 free(device->name);
658
659 #if USE_V3D_SIMULATOR
660 v3d_simulator_destroy(device->sim_file);
661 #endif
662
663 vk_physical_device_finish(&device->vk);
664 mtx_destroy(&device->mutex);
665 }
666
667 static void
destroy_physical_device(struct vk_physical_device * device)668 destroy_physical_device(struct vk_physical_device *device)
669 {
670 physical_device_finish((struct v3dv_physical_device *)device);
671 vk_free(&device->instance->alloc, device);
672 }
673
674 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)675 v3dv_DestroyInstance(VkInstance _instance,
676 const VkAllocationCallbacks *pAllocator)
677 {
678 V3DV_FROM_HANDLE(v3dv_instance, instance, _instance);
679
680 if (!instance)
681 return;
682
683 #if DETECT_OS_ANDROID
684 vk_android_destroy_ugralloc();
685 #endif
686
687 VG(VALGRIND_DESTROY_MEMPOOL(instance));
688
689 vk_instance_finish(&instance->vk);
690 vk_free(&instance->vk.alloc, instance);
691 }
692
693 static uint64_t
compute_heap_size()694 compute_heap_size()
695 {
696 #if !USE_V3D_SIMULATOR
697 /* Query the total ram from the system */
698 struct sysinfo info;
699 sysinfo(&info);
700
701 uint64_t total_ram = (uint64_t)info.totalram * (uint64_t)info.mem_unit;
702 #else
703 uint64_t total_ram = (uint64_t) v3d_simulator_get_mem_size();
704 #endif
705
706 /* We don't want to burn too much ram with the GPU. If the user has 4GB
707 * or less, we use at most half. If they have more than 4GB we limit it
708 * to 3/4 with a max. of 4GB since the GPU cannot address more than that.
709 */
710 const uint64_t MAX_HEAP_SIZE = 4ull * 1024ull * 1024ull * 1024ull;
711 uint64_t available;
712 if (total_ram <= MAX_HEAP_SIZE)
713 available = total_ram / 2;
714 else
715 available = MIN2(MAX_HEAP_SIZE, total_ram * 3 / 4);
716
717 return available;
718 }
719
720 static uint64_t
compute_memory_budget(struct v3dv_physical_device * device)721 compute_memory_budget(struct v3dv_physical_device *device)
722 {
723 uint64_t heap_size = device->memory.memoryHeaps[0].size;
724 uint64_t heap_used = device->heap_used;
725 uint64_t sys_available;
726 #if !USE_V3D_SIMULATOR
727 ASSERTED bool has_available_memory =
728 os_get_available_system_memory(&sys_available);
729 assert(has_available_memory);
730 #else
731 sys_available = (uint64_t) v3d_simulator_get_mem_free();
732 #endif
733
734 /* Let's not incite the app to starve the system: report at most 90% of
735 * available system memory.
736 */
737 uint64_t heap_available = sys_available * 9 / 10;
738 return MIN2(heap_size, heap_used + heap_available);
739 }
740
741 static bool
v3d_has_feature(struct v3dv_physical_device * device,enum drm_v3d_param feature)742 v3d_has_feature(struct v3dv_physical_device *device, enum drm_v3d_param feature)
743 {
744 struct drm_v3d_get_param p = {
745 .param = feature,
746 };
747 if (v3d_ioctl(device->render_fd, DRM_IOCTL_V3D_GET_PARAM, &p) != 0)
748 return false;
749 return p.value;
750 }
751
752 static bool
device_has_expected_features(struct v3dv_physical_device * device)753 device_has_expected_features(struct v3dv_physical_device *device)
754 {
755 return v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_TFU) &&
756 v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_CSD) &&
757 v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_CACHE_FLUSH) &&
758 device->caps.multisync;
759 }
760
761
762 static VkResult
init_uuids(struct v3dv_physical_device * device)763 init_uuids(struct v3dv_physical_device *device)
764 {
765 const struct build_id_note *note =
766 build_id_find_nhdr_for_addr(init_uuids);
767 if (!note) {
768 return vk_errorf(device->vk.instance,
769 VK_ERROR_INITIALIZATION_FAILED,
770 "Failed to find build-id");
771 }
772
773 unsigned build_id_len = build_id_length(note);
774 if (build_id_len < 20) {
775 return vk_errorf(device->vk.instance,
776 VK_ERROR_INITIALIZATION_FAILED,
777 "build-id too short. It needs to be a SHA");
778 }
779
780 memcpy(device->driver_build_sha1, build_id_data(note), 20);
781
782 uint32_t vendor_id = v3dv_physical_device_vendor_id(device);
783 uint32_t device_id = v3dv_physical_device_device_id(device);
784
785 struct mesa_sha1 sha1_ctx;
786 uint8_t sha1[20];
787 STATIC_ASSERT(VK_UUID_SIZE <= sizeof(sha1));
788
789 /* The pipeline cache UUID is used for determining when a pipeline cache is
790 * invalid. It needs both a driver build and the PCI ID of the device.
791 */
792 _mesa_sha1_init(&sha1_ctx);
793 _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
794 _mesa_sha1_update(&sha1_ctx, &device_id, sizeof(device_id));
795 _mesa_sha1_final(&sha1_ctx, sha1);
796 memcpy(device->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
797
798 /* The driver UUID is used for determining sharability of images and memory
799 * between two Vulkan instances in separate processes. People who want to
800 * share memory need to also check the device UUID (below) so all this
801 * needs to be is the build-id.
802 */
803 memcpy(device->driver_uuid, build_id_data(note), VK_UUID_SIZE);
804
805 /* The device UUID uniquely identifies the given device within the machine.
806 * Since we never have more than one device, this doesn't need to be a real
807 * UUID.
808 */
809 _mesa_sha1_init(&sha1_ctx);
810 _mesa_sha1_update(&sha1_ctx, &vendor_id, sizeof(vendor_id));
811 _mesa_sha1_update(&sha1_ctx, &device_id, sizeof(device_id));
812 _mesa_sha1_final(&sha1_ctx, sha1);
813 memcpy(device->device_uuid, sha1, VK_UUID_SIZE);
814
815 return VK_SUCCESS;
816 }
817
818 static void
v3dv_physical_device_init_disk_cache(struct v3dv_physical_device * device)819 v3dv_physical_device_init_disk_cache(struct v3dv_physical_device *device)
820 {
821 #ifdef ENABLE_SHADER_CACHE
822 char timestamp[41];
823 _mesa_sha1_format(timestamp, device->driver_build_sha1);
824
825 assert(device->name);
826 device->disk_cache = disk_cache_create(device->name, timestamp, v3d_mesa_debug);
827 #else
828 device->disk_cache = NULL;
829 #endif
830 }
831
832 static void
get_device_properties(const struct v3dv_physical_device * device,struct vk_properties * properties)833 get_device_properties(const struct v3dv_physical_device *device,
834 struct vk_properties *properties)
835 {
836 STATIC_ASSERT(MAX_SAMPLED_IMAGES + MAX_STORAGE_IMAGES + MAX_INPUT_ATTACHMENTS
837 <= V3D_MAX_TEXTURE_SAMPLERS);
838 STATIC_ASSERT(MAX_UNIFORM_BUFFERS >= MAX_DYNAMIC_UNIFORM_BUFFERS);
839 STATIC_ASSERT(MAX_STORAGE_BUFFERS >= MAX_DYNAMIC_STORAGE_BUFFERS);
840
841 const uint32_t page_size = 4096;
842 const uint64_t mem_size = compute_heap_size();
843
844 const uint32_t max_varying_components = 16 * 4;
845
846 const uint32_t max_per_stage_resources = 128;
847
848 const float v3d_point_line_granularity = 2.0f / (1 << V3D_COORD_SHIFT);
849 const uint32_t max_fb_size = V3D_MAX_IMAGE_DIMENSION;
850
851 const VkSampleCountFlags supported_sample_counts =
852 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT;
853
854 const uint8_t max_rts = V3D_MAX_RENDER_TARGETS(device->devinfo.ver);
855
856 struct timespec clock_res;
857 clock_getres(CLOCK_MONOTONIC, &clock_res);
858 const float timestamp_period =
859 clock_res.tv_sec * 1000000000.0f + clock_res.tv_nsec;
860
861 /* We don't really have special restrictions for the maximum
862 * descriptors per set, other than maybe not exceeding the limits
863 * of addressable memory in a single allocation on either the host
864 * or the GPU. This will be a much larger limit than any of the
865 * per-stage limits already available in Vulkan though, so in practice,
866 * it is not expected to limit anything beyond what is already
867 * constrained through per-stage limits.
868 */
869 const uint32_t max_host_descriptors =
870 (UINT32_MAX - sizeof(struct v3dv_descriptor_set)) /
871 sizeof(struct v3dv_descriptor);
872 const uint32_t max_gpu_descriptors =
873 (UINT32_MAX / v3d_X((&device->devinfo), max_descriptor_bo_size)());
874
875 VkSubgroupFeatureFlags subgroup_ops = VK_SUBGROUP_FEATURE_BASIC_BIT;
876 if (device->devinfo.ver >= 71) {
877 subgroup_ops |= VK_SUBGROUP_FEATURE_BALLOT_BIT |
878 VK_SUBGROUP_FEATURE_SHUFFLE_BIT |
879 VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT |
880 VK_SUBGROUP_FEATURE_VOTE_BIT |
881 VK_SUBGROUP_FEATURE_QUAD_BIT;
882 }
883
884 #if DETECT_OS_ANDROID
885 /* Used to determine the sharedImage prop in
886 * VkPhysicalDevicePresentationPropertiesANDROID
887 */
888 uint64_t front_rendering_usage = 0;
889 struct u_gralloc *gralloc = u_gralloc_create(U_GRALLOC_TYPE_AUTO);
890 if (gralloc != NULL) {
891 u_gralloc_get_front_rendering_usage(gralloc, &front_rendering_usage);
892 u_gralloc_destroy(&gralloc);
893 }
894 VkBool32 shared_image = front_rendering_usage ? VK_TRUE : VK_FALSE;
895 #endif
896
897 /* FIXME: this will probably require an in-depth review */
898 *properties = (struct vk_properties) {
899 /* VkPhysicalDeviceProperties, limits and sparse props below */
900 .apiVersion = V3DV_API_VERSION,
901 .driverVersion = vk_get_driver_version(),
902 .vendorID = v3dv_physical_device_vendor_id(device),
903 .deviceID = v3dv_physical_device_device_id(device),
904 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
905
906 /* Vulkan 1.0 limits */
907 .maxImageDimension1D = V3D_MAX_IMAGE_DIMENSION,
908 .maxImageDimension2D = V3D_MAX_IMAGE_DIMENSION,
909 .maxImageDimension3D = V3D_MAX_IMAGE_DIMENSION,
910 .maxImageDimensionCube = V3D_MAX_IMAGE_DIMENSION,
911 .maxImageArrayLayers = V3D_MAX_ARRAY_LAYERS,
912 .maxTexelBufferElements = (1ul << 28),
913 .maxUniformBufferRange = V3D_MAX_BUFFER_RANGE,
914 .maxStorageBufferRange = V3D_MAX_BUFFER_RANGE,
915 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
916 .maxMemoryAllocationCount = mem_size / page_size,
917 .maxSamplerAllocationCount = 64 * 1024,
918 .bufferImageGranularity = V3D_NON_COHERENT_ATOM_SIZE,
919 .sparseAddressSpaceSize = 0,
920 .maxBoundDescriptorSets = MAX_SETS,
921 .maxPerStageDescriptorSamplers = V3D_MAX_TEXTURE_SAMPLERS,
922 .maxPerStageDescriptorUniformBuffers = MAX_UNIFORM_BUFFERS,
923 .maxPerStageDescriptorStorageBuffers = MAX_STORAGE_BUFFERS,
924 .maxPerStageDescriptorSampledImages = MAX_SAMPLED_IMAGES,
925 .maxPerStageDescriptorStorageImages = MAX_STORAGE_IMAGES,
926 .maxPerStageDescriptorInputAttachments = MAX_INPUT_ATTACHMENTS,
927 .maxPerStageResources = max_per_stage_resources,
928
929 .maxDescriptorSetSamplers =
930 V3DV_SUPPORTED_SHADER_STAGES * V3D_MAX_TEXTURE_SAMPLERS,
931 .maxDescriptorSetUniformBuffers =
932 V3DV_SUPPORTED_SHADER_STAGES * MAX_UNIFORM_BUFFERS,
933 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
934 .maxDescriptorSetStorageBuffers =
935 V3DV_SUPPORTED_SHADER_STAGES * MAX_STORAGE_BUFFERS,
936 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
937 .maxDescriptorSetSampledImages =
938 V3DV_SUPPORTED_SHADER_STAGES * MAX_SAMPLED_IMAGES,
939 .maxDescriptorSetStorageImages =
940 V3DV_SUPPORTED_SHADER_STAGES * MAX_STORAGE_IMAGES,
941 .maxDescriptorSetInputAttachments = MAX_INPUT_ATTACHMENTS,
942
943 /* Vertex limits */
944 .maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
945 .maxVertexInputBindings = MAX_VBS,
946 .maxVertexInputAttributeOffset = 0xffffffff,
947 .maxVertexInputBindingStride = MESA_VK_MAX_VERTEX_BINDING_STRIDE,
948 .maxVertexOutputComponents = max_varying_components,
949
950 /* Tessellation limits */
951 .maxTessellationGenerationLevel = 0,
952 .maxTessellationPatchSize = 0,
953 .maxTessellationControlPerVertexInputComponents = 0,
954 .maxTessellationControlPerVertexOutputComponents = 0,
955 .maxTessellationControlPerPatchOutputComponents = 0,
956 .maxTessellationControlTotalOutputComponents = 0,
957 .maxTessellationEvaluationInputComponents = 0,
958 .maxTessellationEvaluationOutputComponents = 0,
959
960 /* Geometry limits */
961 .maxGeometryShaderInvocations = 32,
962 .maxGeometryInputComponents = 64,
963 .maxGeometryOutputComponents = 64,
964 .maxGeometryOutputVertices = 256,
965 .maxGeometryTotalOutputComponents = 1024,
966
967 /* Fragment limits */
968 .maxFragmentInputComponents = max_varying_components,
969 .maxFragmentOutputAttachments = 4,
970 .maxFragmentDualSrcAttachments = 0,
971 .maxFragmentCombinedOutputResources = max_rts +
972 MAX_STORAGE_BUFFERS +
973 MAX_STORAGE_IMAGES,
974
975 /* Compute limits */
976 .maxComputeSharedMemorySize = 16384,
977 .maxComputeWorkGroupCount = { 65535, 65535, 65535 },
978 .maxComputeWorkGroupInvocations = 256,
979 .maxComputeWorkGroupSize = { 256, 256, 256 },
980
981 .subPixelPrecisionBits = V3D_COORD_SHIFT,
982 .subTexelPrecisionBits = 8,
983 .mipmapPrecisionBits = 8,
984 .maxDrawIndexedIndexValue = device->devinfo.ver >= 71 ?
985 0xffffffff : 0x00ffffff,
986 .maxDrawIndirectCount = 0x7fffffff,
987 .maxSamplerLodBias = 14.0f,
988 .maxSamplerAnisotropy = 16.0f,
989 .maxViewports = MAX_VIEWPORTS,
990 .maxViewportDimensions = { max_fb_size, max_fb_size },
991 .viewportBoundsRange = { -2.0 * max_fb_size,
992 2.0 * max_fb_size - 1 },
993 .viewportSubPixelBits = 0,
994 .minMemoryMapAlignment = page_size,
995 .minTexelBufferOffsetAlignment = V3D_TMU_TEXEL_ALIGN,
996 .minUniformBufferOffsetAlignment = 32,
997 .minStorageBufferOffsetAlignment = 32,
998 .minTexelOffset = -8,
999 .maxTexelOffset = 7,
1000 .minTexelGatherOffset = -8,
1001 .maxTexelGatherOffset = 7,
1002 .minInterpolationOffset = -0.5,
1003 .maxInterpolationOffset = 0.5,
1004 .subPixelInterpolationOffsetBits = V3D_COORD_SHIFT,
1005 .maxFramebufferWidth = max_fb_size,
1006 .maxFramebufferHeight = max_fb_size,
1007 .maxFramebufferLayers = 256,
1008 .framebufferColorSampleCounts = supported_sample_counts,
1009 .framebufferDepthSampleCounts = supported_sample_counts,
1010 .framebufferStencilSampleCounts = supported_sample_counts,
1011 .framebufferNoAttachmentsSampleCounts = supported_sample_counts,
1012 .maxColorAttachments = max_rts,
1013 .sampledImageColorSampleCounts = supported_sample_counts,
1014 .sampledImageIntegerSampleCounts = supported_sample_counts,
1015 .sampledImageDepthSampleCounts = supported_sample_counts,
1016 .sampledImageStencilSampleCounts = supported_sample_counts,
1017 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT,
1018 .maxSampleMaskWords = 1,
1019 .timestampComputeAndGraphics = true,
1020 .timestampPeriod = timestamp_period,
1021 .maxClipDistances = 8,
1022 .maxCullDistances = 0,
1023 .maxCombinedClipAndCullDistances = 8,
1024 .discreteQueuePriorities = 2,
1025 .pointSizeRange = { v3d_point_line_granularity,
1026 V3D_MAX_POINT_SIZE },
1027 .lineWidthRange = { 1.0f, V3D_MAX_LINE_WIDTH },
1028 .pointSizeGranularity = v3d_point_line_granularity,
1029 .lineWidthGranularity = v3d_point_line_granularity,
1030 .strictLines = true,
1031 .standardSampleLocations = false,
1032 .optimalBufferCopyOffsetAlignment = 32,
1033 .optimalBufferCopyRowPitchAlignment = 32,
1034 .nonCoherentAtomSize = V3D_NON_COHERENT_ATOM_SIZE,
1035
1036 /* Vulkan 1.0 sparse properties */
1037 .sparseResidencyStandard2DBlockShape = false,
1038 .sparseResidencyStandard2DMultisampleBlockShape = false,
1039 .sparseResidencyStandard3DBlockShape = false,
1040 .sparseResidencyAlignedMipSize = false,
1041 .sparseResidencyNonResidentStrict = false,
1042
1043 /* Vulkan 1.1 properties*/
1044 .deviceLUIDValid = false,
1045 .subgroupSize = V3D_CHANNELS,
1046 .subgroupSupportedStages = VK_SHADER_STAGE_COMPUTE_BIT |
1047 VK_SHADER_STAGE_FRAGMENT_BIT,
1048 .subgroupSupportedOperations = VK_SUBGROUP_FEATURE_BASIC_BIT,
1049 .subgroupQuadOperationsInAllStages = false,
1050 .pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES,
1051 .maxMultiviewViewCount = MAX_MULTIVIEW_VIEW_COUNT,
1052 .maxMultiviewInstanceIndex = UINT32_MAX - 1,
1053 .protectedNoFault = false,
1054 .maxPerSetDescriptors = MIN2(max_host_descriptors, max_gpu_descriptors),
1055 /* Minimum required by the spec */
1056 .maxMemoryAllocationSize = MAX_MEMORY_ALLOCATION_SIZE,
1057
1058 /* Vulkan 1.2 properties */
1059 .driverID = VK_DRIVER_ID_MESA_V3DV,
1060 .conformanceVersion = {
1061 .major = 1,
1062 .minor = 3,
1063 .subminor = 8,
1064 .patch = 3,
1065 },
1066 .supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
1067 .supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
1068 /* FIXME: if we want to support independentResolveNone then we would
1069 * need to honor attachment load operations on resolve attachments,
1070 * which we currently ignore because the resolve makes them irrelevant,
1071 * as it unconditionally writes all pixels in the render area. However,
1072 * with independentResolveNone, it is possible to have one aspect of a
1073 * D/S resolve attachment stay unresolved, in which case the attachment
1074 * load operation is relevant.
1075 *
1076 * NOTE: implementing attachment load for resolve attachments isn't
1077 * immediately trivial because these attachments are not part of the
1078 * framebuffer and therefore we can't use the same mechanism we use
1079 * for framebuffer attachments. Instead, we should probably have to
1080 * emit a meta operation for that right at the start of the render
1081 * pass (or subpass).
1082 */
1083 .independentResolveNone = false,
1084 .independentResolve = false,
1085 .maxTimelineSemaphoreValueDifference = UINT64_MAX,
1086
1087 .denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
1088 .roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL,
1089 .shaderSignedZeroInfNanPreserveFloat16 = true,
1090 .shaderSignedZeroInfNanPreserveFloat32 = true,
1091 .shaderSignedZeroInfNanPreserveFloat64 = false,
1092 .shaderDenormPreserveFloat16 = true,
1093 .shaderDenormPreserveFloat32 = true,
1094 .shaderDenormPreserveFloat64 = false,
1095 .shaderDenormFlushToZeroFloat16 = false,
1096 .shaderDenormFlushToZeroFloat32 = false,
1097 .shaderDenormFlushToZeroFloat64 = false,
1098 .shaderRoundingModeRTEFloat16 = true,
1099 .shaderRoundingModeRTEFloat32 = true,
1100 .shaderRoundingModeRTEFloat64 = false,
1101 .shaderRoundingModeRTZFloat16 = false,
1102 .shaderRoundingModeRTZFloat32 = false,
1103 .shaderRoundingModeRTZFloat64 = false,
1104
1105 .maxPerStageDescriptorUpdateAfterBindSamplers = V3D_MAX_TEXTURE_SAMPLERS,
1106 .maxPerStageDescriptorUpdateAfterBindUniformBuffers = MAX_UNIFORM_BUFFERS,
1107 .maxPerStageDescriptorUpdateAfterBindStorageBuffers = MAX_STORAGE_BUFFERS,
1108 .maxPerStageDescriptorUpdateAfterBindSampledImages = MAX_SAMPLED_IMAGES,
1109 .maxPerStageDescriptorUpdateAfterBindStorageImages = MAX_STORAGE_IMAGES,
1110 .maxPerStageDescriptorUpdateAfterBindInputAttachments = MAX_INPUT_ATTACHMENTS,
1111 .maxPerStageUpdateAfterBindResources = max_per_stage_resources,
1112 .maxDescriptorSetUpdateAfterBindSamplers =
1113 V3DV_SUPPORTED_SHADER_STAGES * V3D_MAX_TEXTURE_SAMPLERS,
1114 .maxDescriptorSetUpdateAfterBindUniformBuffers =
1115 V3DV_SUPPORTED_SHADER_STAGES * MAX_UNIFORM_BUFFERS,
1116 .maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1117 .maxDescriptorSetUpdateAfterBindStorageBuffers =
1118 V3DV_SUPPORTED_SHADER_STAGES * MAX_STORAGE_BUFFERS,
1119 .maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1120 .maxDescriptorSetUpdateAfterBindSampledImages =
1121 V3DV_SUPPORTED_SHADER_STAGES * MAX_SAMPLED_IMAGES,
1122 .maxDescriptorSetUpdateAfterBindStorageImages =
1123 V3DV_SUPPORTED_SHADER_STAGES * MAX_STORAGE_IMAGES,
1124 .maxDescriptorSetUpdateAfterBindInputAttachments = MAX_INPUT_ATTACHMENTS,
1125
1126 /* V3D doesn't support min/max filtering */
1127 .filterMinmaxSingleComponentFormats = false,
1128 .filterMinmaxImageComponentMapping = false,
1129
1130 .framebufferIntegerColorSampleCounts =
1131 VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_4_BIT,
1132
1133 /* Vulkan 1.3 properties */
1134 .maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE,
1135 .maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BUFFERS,
1136 .maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BUFFERS,
1137 .maxInlineUniformTotalSize =
1138 MAX_INLINE_UNIFORM_BUFFERS * MAX_INLINE_UNIFORM_BLOCK_SIZE,
1139 .maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
1140 MAX_INLINE_UNIFORM_BUFFERS,
1141 .maxDescriptorSetUpdateAfterBindInlineUniformBlocks =
1142 MAX_INLINE_UNIFORM_BUFFERS,
1143 .maxBufferSize = V3D_MAX_BUFFER_RANGE,
1144 .storageTexelBufferOffsetAlignmentBytes = V3D_TMU_TEXEL_ALIGN,
1145 .storageTexelBufferOffsetSingleTexelAlignment = false,
1146 .uniformTexelBufferOffsetAlignmentBytes = V3D_TMU_TEXEL_ALIGN,
1147 .uniformTexelBufferOffsetSingleTexelAlignment = false,
1148 /* No native acceleration for integer dot product. We use NIR lowering. */
1149 .integerDotProduct8BitUnsignedAccelerated = false,
1150 .integerDotProduct8BitMixedSignednessAccelerated = false,
1151 .integerDotProduct4x8BitPackedUnsignedAccelerated = false,
1152 .integerDotProduct4x8BitPackedSignedAccelerated = false,
1153 .integerDotProduct4x8BitPackedMixedSignednessAccelerated = false,
1154 .integerDotProduct16BitUnsignedAccelerated = false,
1155 .integerDotProduct16BitSignedAccelerated = false,
1156 .integerDotProduct16BitMixedSignednessAccelerated = false,
1157 .integerDotProduct32BitUnsignedAccelerated = false,
1158 .integerDotProduct32BitSignedAccelerated = false,
1159 .integerDotProduct32BitMixedSignednessAccelerated = false,
1160 .integerDotProduct64BitUnsignedAccelerated = false,
1161 .integerDotProduct64BitSignedAccelerated = false,
1162 .integerDotProduct64BitMixedSignednessAccelerated = false,
1163 .integerDotProductAccumulatingSaturating8BitUnsignedAccelerated = false,
1164 .integerDotProductAccumulatingSaturating8BitSignedAccelerated = false,
1165 .integerDotProductAccumulatingSaturating8BitMixedSignednessAccelerated = false,
1166 .integerDotProductAccumulatingSaturating4x8BitPackedUnsignedAccelerated = false,
1167 .integerDotProductAccumulatingSaturating4x8BitPackedSignedAccelerated = false,
1168 .integerDotProductAccumulatingSaturating4x8BitPackedMixedSignednessAccelerated = false,
1169 .integerDotProductAccumulatingSaturating16BitUnsignedAccelerated = false,
1170 .integerDotProductAccumulatingSaturating16BitSignedAccelerated = false,
1171 .integerDotProductAccumulatingSaturating16BitMixedSignednessAccelerated = false,
1172 .integerDotProductAccumulatingSaturating32BitUnsignedAccelerated = false,
1173 .integerDotProductAccumulatingSaturating32BitSignedAccelerated = false,
1174 .integerDotProductAccumulatingSaturating32BitMixedSignednessAccelerated = false,
1175 .integerDotProductAccumulatingSaturating64BitUnsignedAccelerated = false,
1176 .integerDotProductAccumulatingSaturating64BitSignedAccelerated = false,
1177 .integerDotProductAccumulatingSaturating64BitMixedSignednessAccelerated = false,
1178
1179 /* VkPhysicalDeviceCustomBorderColorPropertiesEXT */
1180 .maxCustomBorderColorSamplers = V3D_MAX_TEXTURE_SAMPLERS,
1181
1182 /* VkPhysicalDeviceProvokingVertexPropertiesEXT */
1183 .provokingVertexModePerPipeline = true,
1184 /* FIXME: update when supporting EXT_transform_feedback */
1185 .transformFeedbackPreservesTriangleFanProvokingVertex = false,
1186
1187 /* VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT */
1188 .maxVertexAttribDivisor = V3D_MAX_VERTEX_ATTRIB_DIVISOR,
1189 .supportsNonZeroFirstInstance = true,
1190
1191 /* VkPhysicalDevicePerformanceQueryPropertiesKHR */
1192 .allowCommandBufferQueryCopies = true,
1193
1194 #if DETECT_OS_ANDROID
1195 /* VkPhysicalDevicePresentationPropertiesANDROID */
1196 .sharedImage = shared_image,
1197 #endif
1198
1199 /* VkPhysicalDeviceDrmPropertiesEXT */
1200 .drmHasPrimary = device->has_primary,
1201 .drmPrimaryMajor = (int64_t) major(device->primary_devid),
1202 .drmPrimaryMinor = (int64_t) minor(device->primary_devid),
1203 .drmHasRender = device->has_render,
1204 .drmRenderMajor = (int64_t) major(device->render_devid),
1205 .drmRenderMinor = (int64_t) minor(device->render_devid),
1206
1207 /* VkPhysicalDeviceLineRasterizationPropertiesEXT */
1208 .lineSubPixelPrecisionBits = V3D_COORD_SHIFT,
1209
1210 /* VkPhysicalDevicePipelineRobustnessPropertiesEXT */
1211 .defaultRobustnessStorageBuffers =
1212 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT,
1213 .defaultRobustnessUniformBuffers =
1214 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT,
1215 .defaultRobustnessVertexInputs =
1216 VK_PIPELINE_ROBUSTNESS_BUFFER_BEHAVIOR_DEVICE_DEFAULT_EXT,
1217 .defaultRobustnessImages =
1218 VK_PIPELINE_ROBUSTNESS_IMAGE_BEHAVIOR_DEVICE_DEFAULT_EXT,
1219
1220 /* VkPhysicalDeviceMultiDrawPropertiesEXT */
1221 .maxMultiDrawCount = 2048,
1222
1223 /* VkPhysicalDevicePCIBusInfoPropertiesEXT is not supported
1224 * and is left unfilled
1225 */
1226
1227 /* VK_EXT_subgroup_size_control */
1228 .minSubgroupSize = V3D_CHANNELS,
1229 .maxSubgroupSize = V3D_CHANNELS,
1230 .maxComputeWorkgroupSubgroups = 16, /* 256 / 16 */
1231 .requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT,
1232
1233 .subgroupSupportedOperations = subgroup_ops,
1234
1235 /* VK_KHR_maintenance5 */
1236 .earlyFragmentMultisampleCoverageAfterSampleCounting = true,
1237 .earlyFragmentSampleMaskTestBeforeSampleCounting = true,
1238 .depthStencilSwizzleOneSupport = true,
1239 .polygonModePointSize = true,
1240 .nonStrictSinglePixelWideLinesUseParallelogram = true,
1241 .nonStrictWideLinesUseParallelogram = true,
1242 };
1243
1244 /* VkPhysicalDeviceShaderModuleIdentifierPropertiesEXT */
1245 STATIC_ASSERT(sizeof(vk_shaderModuleIdentifierAlgorithmUUID) ==
1246 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
1247 memcpy(properties->shaderModuleIdentifierAlgorithmUUID,
1248 vk_shaderModuleIdentifierAlgorithmUUID,
1249 sizeof(properties->shaderModuleIdentifierAlgorithmUUID));
1250
1251 /* VkPhysicalDeviceProperties */
1252 snprintf(properties->deviceName, sizeof(properties->deviceName),
1253 "%s", device->name);
1254 memcpy(properties->pipelineCacheUUID,
1255 device->pipeline_cache_uuid, VK_UUID_SIZE);
1256
1257 /* Vulkan 1.1 properties */
1258 memcpy(properties->deviceUUID, device->device_uuid, VK_UUID_SIZE);
1259 memcpy(properties->driverUUID, device->driver_uuid, VK_UUID_SIZE);
1260
1261 /* Vulkan 1.2 properties */
1262 memset(properties->driverName, 0, VK_MAX_DRIVER_NAME_SIZE);
1263 snprintf(properties->driverName, VK_MAX_DRIVER_NAME_SIZE, "V3DV Mesa");
1264 memset(properties->driverInfo, 0, VK_MAX_DRIVER_INFO_SIZE);
1265 snprintf(properties->driverInfo, VK_MAX_DRIVER_INFO_SIZE,
1266 "Mesa " PACKAGE_VERSION MESA_GIT_SHA1);
1267
1268 }
1269
1270 static VkResult
create_physical_device(struct v3dv_instance * instance,int32_t render_fd,int32_t primary_fd)1271 create_physical_device(struct v3dv_instance *instance,
1272 int32_t render_fd, int32_t primary_fd)
1273 {
1274 VkResult result = VK_SUCCESS;
1275
1276 struct v3dv_physical_device *device =
1277 vk_zalloc(&instance->vk.alloc, sizeof(*device), 8,
1278 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1279
1280 if (!device)
1281 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1282
1283 struct vk_physical_device_dispatch_table dispatch_table;
1284 vk_physical_device_dispatch_table_from_entrypoints
1285 (&dispatch_table, &v3dv_physical_device_entrypoints, true);
1286 vk_physical_device_dispatch_table_from_entrypoints(
1287 &dispatch_table, &wsi_physical_device_entrypoints, false);
1288
1289 result = vk_physical_device_init(&device->vk, &instance->vk, NULL, NULL,
1290 NULL, &dispatch_table);
1291
1292 if (result != VK_SUCCESS)
1293 goto fail;
1294
1295 struct stat primary_stat = {0}, render_stat = {0};
1296
1297 device->has_primary = primary_fd >= 0;
1298 if (device->has_primary) {
1299 if (fstat(primary_fd, &primary_stat) != 0) {
1300 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1301 "failed to stat DRM primary node");
1302 goto fail;
1303 }
1304
1305 device->primary_devid = primary_stat.st_rdev;
1306 }
1307
1308 if (fstat(render_fd, &render_stat) != 0) {
1309 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1310 "failed to stat DRM render node");
1311 goto fail;
1312 }
1313
1314 device->has_render = true;
1315 device->render_devid = render_stat.st_rdev;
1316
1317 #if USE_V3D_SIMULATOR
1318 device->sim_file = v3d_simulator_init(render_fd);
1319 #endif
1320
1321 device->render_fd = render_fd;
1322 device->display_fd = primary_fd;
1323
1324 if (!v3d_get_device_info(device->render_fd, &device->devinfo, &v3d_ioctl)) {
1325 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1326 "Failed to get info from device.");
1327 goto fail;
1328 }
1329
1330 if (device->devinfo.ver < 42) {
1331 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1332 "Device version < 42.");
1333 goto fail;
1334 }
1335
1336 device->caps.cpu_queue =
1337 v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_CPU_QUEUE);
1338
1339 device->caps.multisync =
1340 v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_MULTISYNC_EXT);
1341
1342 device->caps.perfmon =
1343 v3d_has_feature(device, DRM_V3D_PARAM_SUPPORTS_PERFMON);
1344
1345 if (!device_has_expected_features(device)) {
1346 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1347 "Kernel driver doesn't have required features.");
1348 goto fail;
1349 }
1350
1351 if (device->caps.perfmon) {
1352 device->perfcntr = v3d_perfcntrs_init(&device->devinfo, device->render_fd);
1353
1354 if (!device->perfcntr) {
1355 result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
1356 "Failed to get init perfmon.");
1357 goto fail;
1358 }
1359 }
1360
1361 result = init_uuids(device);
1362 if (result != VK_SUCCESS)
1363 goto fail;
1364
1365 device->compiler = v3d_compiler_init(&device->devinfo,
1366 MAX_INLINE_UNIFORM_BUFFERS);
1367 device->next_program_id = 0;
1368
1369 ASSERTED int len =
1370 asprintf(&device->name, "V3D %d.%d.%d.%d",
1371 device->devinfo.ver / 10,
1372 device->devinfo.ver % 10,
1373 device->devinfo.rev,
1374 device->devinfo.compat_rev);
1375 assert(len != -1);
1376
1377 v3dv_physical_device_init_disk_cache(device);
1378
1379 /* Setup available memory heaps and types */
1380 VkPhysicalDeviceMemoryProperties *mem = &device->memory;
1381 mem->memoryHeapCount = 1;
1382 mem->memoryHeaps[0].size = compute_heap_size();
1383 mem->memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
1384
1385 /* This is the only combination required by the spec */
1386 mem->memoryTypeCount = 1;
1387 mem->memoryTypes[0].propertyFlags =
1388 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
1389 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1390 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
1391 mem->memoryTypes[0].heapIndex = 0;
1392
1393 /* Initialize sparse array for refcounting imported BOs */
1394 util_sparse_array_init(&device->bo_map, sizeof(struct v3dv_bo), 512);
1395
1396 device->options.merge_jobs = !V3D_DBG(NO_MERGE_JOBS);
1397
1398 device->drm_syncobj_type = vk_drm_syncobj_get_type(device->render_fd);
1399
1400 /* We don't support timelines in the uAPI yet and we don't want it getting
1401 * suddenly turned on by vk_drm_syncobj_get_type() without us adding v3dv
1402 * code for it first.
1403 */
1404 device->drm_syncobj_type.features &= ~VK_SYNC_FEATURE_TIMELINE;
1405
1406 /* Multiwait is required for emulated timeline semaphores and is supported
1407 * by the v3d kernel interface.
1408 */
1409 device->drm_syncobj_type.features |= VK_SYNC_FEATURE_GPU_MULTI_WAIT;
1410
1411 device->sync_timeline_type =
1412 vk_sync_timeline_get_type(&device->drm_syncobj_type);
1413
1414 device->sync_types[0] = &device->drm_syncobj_type;
1415 device->sync_types[1] = &device->sync_timeline_type.sync;
1416 device->sync_types[2] = NULL;
1417 device->vk.supported_sync_types = device->sync_types;
1418
1419 get_device_extensions(device, &device->vk.supported_extensions);
1420 get_features(device, &device->vk.supported_features);
1421 get_device_properties(device, &device->vk.properties);
1422
1423 result = v3dv_wsi_init(device);
1424 if (result != VK_SUCCESS) {
1425 vk_error(instance, result);
1426 goto fail;
1427 }
1428
1429 mtx_init(&device->mutex, mtx_plain);
1430
1431 list_addtail(&device->vk.link, &instance->vk.physical_devices.list);
1432
1433 return VK_SUCCESS;
1434
1435 fail:
1436 vk_physical_device_finish(&device->vk);
1437 vk_free(&instance->vk.alloc, device);
1438
1439 if (render_fd >= 0)
1440 close(render_fd);
1441 if (primary_fd >= 0)
1442 close(primary_fd);
1443
1444 return result;
1445 }
1446
1447 static bool
try_device(const char * path,int * fd,const char * target)1448 try_device(const char *path, int *fd, const char *target)
1449 {
1450 drmVersionPtr version = NULL;
1451
1452 *fd = open(path, O_RDWR | O_CLOEXEC);
1453 if (*fd < 0) {
1454 mesa_loge("Opening %s failed: %s\n", path, strerror(errno));
1455 return false;
1456 }
1457
1458 if (!target)
1459 return true;
1460
1461 version = drmGetVersion(*fd);
1462 if (!version) {
1463 mesa_loge("Retrieving device version failed: %s\n", strerror(errno));
1464 goto fail;
1465 }
1466
1467 if (strcmp(version->name, target) != 0)
1468 goto fail;
1469
1470 drmFreeVersion(version);
1471 return true;
1472
1473 fail:
1474 drmFreeVersion(version);
1475 close(*fd);
1476 *fd = -1;
1477 return false;
1478 }
1479
1480 static void
try_display_device(struct v3dv_instance * instance,const char * path,int32_t * fd)1481 try_display_device(struct v3dv_instance *instance, const char *path,
1482 int32_t *fd)
1483 {
1484 bool khr_display = instance->vk.enabled_extensions.KHR_display ||
1485 instance->vk.enabled_extensions.EXT_acquire_drm_display;
1486 *fd = open(path, O_RDWR | O_CLOEXEC);
1487 if (*fd < 0) {
1488 mesa_loge("Opening %s failed: %s\n", path, strerror(errno));
1489 return;
1490 }
1491
1492 /* The display driver must have KMS capabilities */
1493 if (!drmIsKMS(*fd))
1494 goto fail;
1495
1496 /* If using VK_KHR_display, we require the fd to have a connected output.
1497 * We need to use this strategy because Raspberry Pi 5 can load different
1498 * drivers for different types of connectors and the one with a connected
1499 * output may not be vc4, which unlike Raspberry Pi 4, doesn't drive the
1500 * DSI output for example.
1501 */
1502 if (!khr_display) {
1503 if (instance->vk.enabled_extensions.KHR_xcb_surface ||
1504 instance->vk.enabled_extensions.KHR_xlib_surface ||
1505 instance->vk.enabled_extensions.KHR_wayland_surface)
1506 return;
1507 else
1508 goto fail;
1509 }
1510
1511 /* If the display device isn't the DRM master, we can't get its resources */
1512 if (!drmIsMaster(*fd))
1513 goto fail;
1514
1515 drmModeResPtr mode_res = drmModeGetResources(*fd);
1516 if (!mode_res) {
1517 mesa_loge("Failed to get DRM mode resources: %s\n", strerror(errno));
1518 goto fail;
1519 }
1520
1521 drmModeConnection connection = DRM_MODE_DISCONNECTED;
1522
1523 /* Only use a display device if there is at least one connected connector */
1524 for (int c = 0; c < mode_res->count_connectors && connection == DRM_MODE_DISCONNECTED; c++) {
1525 drmModeConnectorPtr connector = drmModeGetConnector(*fd, mode_res->connectors[c]);
1526
1527 if (!connector)
1528 continue;
1529
1530 connection = connector->connection;
1531 drmModeFreeConnector(connector);
1532 }
1533
1534 drmModeFreeResources(mode_res);
1535
1536 if (connection == DRM_MODE_DISCONNECTED)
1537 goto fail;
1538
1539 return;
1540
1541 fail:
1542 close(*fd);
1543 *fd = -1;
1544 }
1545
1546 /* This driver hook is expected to return VK_SUCCESS (unless a memory
1547 * allocation error happened) if no compatible device is found. If a
1548 * compatible device is found, it may return an error code if device
1549 * initialization failed.
1550 */
1551 static VkResult
enumerate_devices(struct vk_instance * vk_instance)1552 enumerate_devices(struct vk_instance *vk_instance)
1553 {
1554 struct v3dv_instance *instance =
1555 container_of(vk_instance, struct v3dv_instance, vk);
1556
1557 /* FIXME: Check for more devices? */
1558 drmDevicePtr devices[8];
1559 int max_devices;
1560
1561 max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
1562 if (max_devices < 1)
1563 return VK_SUCCESS;
1564
1565 VkResult result = VK_SUCCESS;
1566
1567 int32_t render_fd = -1;
1568 int32_t primary_fd = -1;
1569 for (unsigned i = 0; i < (unsigned)max_devices; i++) {
1570 #if USE_V3D_SIMULATOR
1571 /* In the simulator, we look for an Intel/AMD render node */
1572 const int required_nodes = (1 << DRM_NODE_RENDER) | (1 << DRM_NODE_PRIMARY);
1573 if ((devices[i]->available_nodes & required_nodes) == required_nodes &&
1574 devices[i]->bustype == DRM_BUS_PCI &&
1575 (devices[i]->deviceinfo.pci->vendor_id == 0x8086 ||
1576 devices[i]->deviceinfo.pci->vendor_id == 0x1002)) {
1577 if (try_device(devices[i]->nodes[DRM_NODE_RENDER], &render_fd, NULL))
1578 try_device(devices[i]->nodes[DRM_NODE_PRIMARY], &primary_fd, NULL);
1579 }
1580 #else
1581 /* On actual hardware, we should have a gpu device (v3d) and a display
1582 * device. We will need to use the display device to allocate WSI
1583 * buffers and share them with the render node via prime. We want to
1584 * allocate the display buffer on the WSI device as the display device
1585 * may not have a MMU (this is true at least on Raspberry Pi 4).
1586 */
1587 if (devices[i]->bustype != DRM_BUS_PLATFORM)
1588 continue;
1589
1590 if ((devices[i]->available_nodes & 1 << DRM_NODE_RENDER))
1591 try_device(devices[i]->nodes[DRM_NODE_RENDER], &render_fd, "v3d");
1592 if ((devices[i]->available_nodes & 1 << DRM_NODE_PRIMARY))
1593 try_display_device(instance, devices[i]->nodes[DRM_NODE_PRIMARY], &primary_fd);
1594 #endif
1595
1596 if (render_fd >= 0 && primary_fd >= 0)
1597 break;
1598 }
1599
1600 if (render_fd < 0)
1601 result = VK_ERROR_INCOMPATIBLE_DRIVER;
1602 else
1603 result = create_physical_device(instance, render_fd, primary_fd);
1604
1605 drmFreeDevices(devices, max_devices);
1606
1607 return result;
1608 }
1609
1610 uint32_t
v3dv_physical_device_vendor_id(const struct v3dv_physical_device * dev)1611 v3dv_physical_device_vendor_id(const struct v3dv_physical_device *dev)
1612 {
1613 return 0x14E4; /* Broadcom */
1614 }
1615
1616 uint32_t
v3dv_physical_device_device_id(const struct v3dv_physical_device * dev)1617 v3dv_physical_device_device_id(const struct v3dv_physical_device *dev)
1618 {
1619 switch (dev->devinfo.ver) {
1620 case 42:
1621 return 0xBE485FD3; /* Broadcom deviceID for 2711 */
1622 case 71:
1623 return 0x55701C33; /* Broadcom deviceID for 2712 */
1624 default:
1625 unreachable("Unsupported V3D version");
1626 }
1627 }
1628
1629 /* We support exactly one queue family. */
1630 static const VkQueueFamilyProperties
1631 v3dv_queue_family_properties = {
1632 .queueFlags = VK_QUEUE_GRAPHICS_BIT |
1633 VK_QUEUE_COMPUTE_BIT |
1634 VK_QUEUE_TRANSFER_BIT,
1635 .queueCount = 1,
1636 .timestampValidBits = 64,
1637 .minImageTransferGranularity = { 1, 1, 1 },
1638 };
1639
1640 VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1641 v3dv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,
1642 uint32_t *pQueueFamilyPropertyCount,
1643 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1644 {
1645 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2, out,
1646 pQueueFamilyProperties, pQueueFamilyPropertyCount);
1647
1648 vk_outarray_append_typed(VkQueueFamilyProperties2, &out, p) {
1649 p->queueFamilyProperties = v3dv_queue_family_properties;
1650
1651 vk_foreach_struct(s, p->pNext) {
1652 vk_debug_ignored_stype(s->sType);
1653 }
1654 }
1655 }
1656
1657 VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties * pMemoryProperties)1658 v3dv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,
1659 VkPhysicalDeviceMemoryProperties *pMemoryProperties)
1660 {
1661 V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice);
1662 *pMemoryProperties = device->memory;
1663 }
1664
1665 VKAPI_ATTR void VKAPI_CALL
v3dv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1666 v3dv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
1667 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1668 {
1669 V3DV_FROM_HANDLE(v3dv_physical_device, device, physicalDevice);
1670
1671 v3dv_GetPhysicalDeviceMemoryProperties(physicalDevice,
1672 &pMemoryProperties->memoryProperties);
1673
1674 vk_foreach_struct(ext, pMemoryProperties->pNext) {
1675 switch (ext->sType) {
1676 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1677 VkPhysicalDeviceMemoryBudgetPropertiesEXT *p =
1678 (VkPhysicalDeviceMemoryBudgetPropertiesEXT *) ext;
1679 p->heapUsage[0] = device->heap_used;
1680 p->heapBudget[0] = compute_memory_budget(device);
1681
1682 /* The heapBudget and heapUsage values must be zero for array elements
1683 * greater than or equal to VkPhysicalDeviceMemoryProperties::memoryHeapCount
1684 */
1685 for (unsigned i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1686 p->heapBudget[i] = 0u;
1687 p->heapUsage[i] = 0u;
1688 }
1689 break;
1690 }
1691 default:
1692 vk_debug_ignored_stype(ext->sType);
1693 break;
1694 }
1695 }
1696 }
1697
1698 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
v3dv_GetInstanceProcAddr(VkInstance _instance,const char * pName)1699 v3dv_GetInstanceProcAddr(VkInstance _instance,
1700 const char *pName)
1701 {
1702 V3DV_FROM_HANDLE(v3dv_instance, instance, _instance);
1703 return vk_instance_get_proc_addr(instance ? &instance->vk : NULL,
1704 &v3dv_instance_entrypoints,
1705 pName);
1706 }
1707
1708 /* With version 1+ of the loader interface the ICD should expose
1709 * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
1710 */
1711 PUBLIC
1712 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)1713 vk_icdGetInstanceProcAddr(VkInstance instance,
1714 const char* pName)
1715 {
1716 return v3dv_GetInstanceProcAddr(instance, pName);
1717 }
1718
1719 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)1720 v3dv_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
1721 VkLayerProperties *pProperties)
1722 {
1723 if (pProperties == NULL) {
1724 *pPropertyCount = 0;
1725 return VK_SUCCESS;
1726 }
1727
1728 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1729 }
1730
1731 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice,uint32_t * pPropertyCount,VkLayerProperties * pProperties)1732 v3dv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice,
1733 uint32_t *pPropertyCount,
1734 VkLayerProperties *pProperties)
1735 {
1736 V3DV_FROM_HANDLE(v3dv_physical_device, physical_device, physicalDevice);
1737
1738 if (pProperties == NULL) {
1739 *pPropertyCount = 0;
1740 return VK_SUCCESS;
1741 }
1742
1743 return vk_error(physical_device, VK_ERROR_LAYER_NOT_PRESENT);
1744 }
1745
1746 static void
destroy_queue_syncs(struct v3dv_queue * queue)1747 destroy_queue_syncs(struct v3dv_queue *queue)
1748 {
1749 for (int i = 0; i < V3DV_QUEUE_COUNT; i++) {
1750 if (queue->last_job_syncs.syncs[i]) {
1751 drmSyncobjDestroy(queue->device->pdevice->render_fd,
1752 queue->last_job_syncs.syncs[i]);
1753 }
1754 }
1755 }
1756
1757 static VkResult
queue_init(struct v3dv_device * device,struct v3dv_queue * queue,const VkDeviceQueueCreateInfo * create_info,uint32_t index_in_family)1758 queue_init(struct v3dv_device *device, struct v3dv_queue *queue,
1759 const VkDeviceQueueCreateInfo *create_info,
1760 uint32_t index_in_family)
1761 {
1762 VkResult result = vk_queue_init(&queue->vk, &device->vk, create_info,
1763 index_in_family);
1764 if (result != VK_SUCCESS)
1765 return result;
1766
1767 result = vk_queue_enable_submit_thread(&queue->vk);
1768 if (result != VK_SUCCESS)
1769 goto fail_submit_thread;
1770
1771 queue->device = device;
1772 queue->vk.driver_submit = v3dv_queue_driver_submit;
1773
1774 for (int i = 0; i < V3DV_QUEUE_COUNT; i++) {
1775 queue->last_job_syncs.first[i] = true;
1776 int ret = drmSyncobjCreate(device->pdevice->render_fd,
1777 DRM_SYNCOBJ_CREATE_SIGNALED,
1778 &queue->last_job_syncs.syncs[i]);
1779 if (ret) {
1780 result = vk_errorf(device, VK_ERROR_INITIALIZATION_FAILED,
1781 "syncobj create failed: %m");
1782 goto fail_last_job_syncs;
1783 }
1784 }
1785
1786 queue->noop_job = NULL;
1787 return VK_SUCCESS;
1788
1789 fail_last_job_syncs:
1790 destroy_queue_syncs(queue);
1791 fail_submit_thread:
1792 vk_queue_finish(&queue->vk);
1793 return result;
1794 }
1795
1796 static void
queue_finish(struct v3dv_queue * queue)1797 queue_finish(struct v3dv_queue *queue)
1798 {
1799 if (queue->noop_job)
1800 v3dv_job_destroy(queue->noop_job);
1801 destroy_queue_syncs(queue);
1802 vk_queue_finish(&queue->vk);
1803 }
1804
1805 static void
init_device_meta(struct v3dv_device * device)1806 init_device_meta(struct v3dv_device *device)
1807 {
1808 mtx_init(&device->meta.mtx, mtx_plain);
1809 v3dv_meta_clear_init(device);
1810 v3dv_meta_blit_init(device);
1811 v3dv_meta_texel_buffer_copy_init(device);
1812 }
1813
1814 static void
destroy_device_meta(struct v3dv_device * device)1815 destroy_device_meta(struct v3dv_device *device)
1816 {
1817 mtx_destroy(&device->meta.mtx);
1818 v3dv_meta_clear_finish(device);
1819 v3dv_meta_blit_finish(device);
1820 v3dv_meta_texel_buffer_copy_finish(device);
1821 }
1822
1823 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)1824 v3dv_CreateDevice(VkPhysicalDevice physicalDevice,
1825 const VkDeviceCreateInfo *pCreateInfo,
1826 const VkAllocationCallbacks *pAllocator,
1827 VkDevice *pDevice)
1828 {
1829 V3DV_FROM_HANDLE(v3dv_physical_device, physical_device, physicalDevice);
1830 struct v3dv_instance *instance = (struct v3dv_instance*) physical_device->vk.instance;
1831 VkResult result;
1832 struct v3dv_device *device;
1833
1834 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
1835
1836 /* Check requested queues (we only expose one queue ) */
1837 assert(pCreateInfo->queueCreateInfoCount == 1);
1838 for (uint32_t i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
1839 assert(pCreateInfo->pQueueCreateInfos[i].queueFamilyIndex == 0);
1840 assert(pCreateInfo->pQueueCreateInfos[i].queueCount == 1);
1841 if (pCreateInfo->pQueueCreateInfos[i].flags != 0)
1842 return vk_error(instance, VK_ERROR_INITIALIZATION_FAILED);
1843 }
1844
1845 device = vk_zalloc2(&physical_device->vk.instance->alloc, pAllocator,
1846 sizeof(*device), 8,
1847 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1848 if (!device)
1849 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1850
1851 struct vk_device_dispatch_table dispatch_table;
1852 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1853 &v3dv_device_entrypoints, true);
1854 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1855 &wsi_device_entrypoints, false);
1856 result = vk_device_init(&device->vk, &physical_device->vk,
1857 &dispatch_table, pCreateInfo, pAllocator);
1858 if (result != VK_SUCCESS) {
1859 vk_free(&device->vk.alloc, device);
1860 return vk_error(NULL, result);
1861 }
1862
1863 device->instance = instance;
1864 device->pdevice = physical_device;
1865
1866 mtx_init(&device->query_mutex, mtx_plain);
1867 cnd_init(&device->query_ended);
1868
1869 device->vk.command_buffer_ops = &v3dv_cmd_buffer_ops;
1870
1871 vk_device_set_drm_fd(&device->vk, physical_device->render_fd);
1872 vk_device_enable_threaded_submit(&device->vk);
1873
1874 result = queue_init(device, &device->queue,
1875 pCreateInfo->pQueueCreateInfos, 0);
1876 if (result != VK_SUCCESS)
1877 goto fail;
1878
1879 device->devinfo = physical_device->devinfo;
1880
1881 if (device->vk.enabled_features.robustBufferAccess)
1882 perf_debug("Device created with Robust Buffer Access enabled.\n");
1883
1884 if (device->vk.enabled_features.robustImageAccess)
1885 perf_debug("Device created with Robust Image Access enabled.\n");
1886
1887
1888 #if MESA_DEBUG
1889 v3d_X((&device->devinfo), device_check_prepacked_sizes)();
1890 #endif
1891 init_device_meta(device);
1892 v3dv_bo_cache_init(device);
1893 v3dv_pipeline_cache_init(&device->default_pipeline_cache, device, 0,
1894 device->instance->default_pipeline_cache_enabled);
1895 device->default_attribute_float =
1896 v3d_X((&device->devinfo), create_default_attribute_values)(device, NULL);
1897
1898 device->device_address_mem_ctx = ralloc_context(NULL);
1899 util_dynarray_init(&device->device_address_bo_list,
1900 device->device_address_mem_ctx);
1901
1902 mtx_init(&device->events.lock, mtx_plain);
1903 result = v3dv_event_allocate_resources(device);
1904 if (result != VK_SUCCESS)
1905 goto fail;
1906
1907 if (list_is_empty(&device->events.free_list)) {
1908 result = vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
1909 goto fail;
1910 }
1911
1912 result = v3dv_query_allocate_resources(device);
1913 if (result != VK_SUCCESS)
1914 goto fail;
1915
1916 *pDevice = v3dv_device_to_handle(device);
1917
1918 return VK_SUCCESS;
1919
1920 fail:
1921 cnd_destroy(&device->query_ended);
1922 mtx_destroy(&device->query_mutex);
1923 queue_finish(&device->queue);
1924 destroy_device_meta(device);
1925 v3dv_pipeline_cache_finish(&device->default_pipeline_cache);
1926 v3dv_event_free_resources(device);
1927 v3dv_query_free_resources(device);
1928 vk_device_finish(&device->vk);
1929 vk_free(&device->vk.alloc, device);
1930
1931 return result;
1932 }
1933
1934 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)1935 v3dv_DestroyDevice(VkDevice _device,
1936 const VkAllocationCallbacks *pAllocator)
1937 {
1938 V3DV_FROM_HANDLE(v3dv_device, device, _device);
1939
1940 device->vk.dispatch_table.DeviceWaitIdle(_device);
1941 queue_finish(&device->queue);
1942
1943 v3dv_event_free_resources(device);
1944 mtx_destroy(&device->events.lock);
1945
1946 v3dv_query_free_resources(device);
1947
1948 destroy_device_meta(device);
1949 v3dv_pipeline_cache_finish(&device->default_pipeline_cache);
1950
1951 if (device->default_attribute_float) {
1952 v3dv_bo_free(device, device->default_attribute_float);
1953 device->default_attribute_float = NULL;
1954 }
1955
1956 ralloc_free(device->device_address_mem_ctx);
1957
1958 /* Bo cache should be removed the last, as any other object could be
1959 * freeing their private bos
1960 */
1961 v3dv_bo_cache_destroy(device);
1962
1963 cnd_destroy(&device->query_ended);
1964 mtx_destroy(&device->query_mutex);
1965
1966 vk_device_finish(&device->vk);
1967 vk_free2(&device->vk.alloc, pAllocator, device);
1968 }
1969
1970 static VkResult
device_alloc(struct v3dv_device * device,struct v3dv_device_memory * mem,VkDeviceSize size)1971 device_alloc(struct v3dv_device *device,
1972 struct v3dv_device_memory *mem,
1973 VkDeviceSize size)
1974 {
1975 /* Our kernel interface is 32-bit */
1976 assert(size <= UINT32_MAX);
1977
1978 mem->bo = v3dv_bo_alloc(device, size, "device_alloc", false);
1979 if (!mem->bo)
1980 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
1981
1982 return VK_SUCCESS;
1983 }
1984
1985 static void
device_free_wsi_dumb(int32_t display_fd,int32_t dumb_handle)1986 device_free_wsi_dumb(int32_t display_fd, int32_t dumb_handle)
1987 {
1988 assert(display_fd != -1);
1989 if (dumb_handle < 0)
1990 return;
1991
1992 struct drm_mode_destroy_dumb destroy_dumb = {
1993 .handle = dumb_handle,
1994 };
1995 if (v3d_ioctl(display_fd, DRM_IOCTL_MODE_DESTROY_DUMB, &destroy_dumb)) {
1996 mesa_loge("destroy dumb object %d: %s\n", dumb_handle, strerror(errno));
1997 }
1998 }
1999
2000 static void
device_free(struct v3dv_device * device,struct v3dv_device_memory * mem)2001 device_free(struct v3dv_device *device, struct v3dv_device_memory *mem)
2002 {
2003 /* If this memory allocation was for WSI, then we need to use the
2004 * display device to free the allocated dumb BO.
2005 */
2006 if (mem->is_for_wsi) {
2007 device_free_wsi_dumb(device->pdevice->display_fd, mem->bo->dumb_handle);
2008 }
2009
2010 p_atomic_add(&device->pdevice->heap_used, -((int64_t)mem->bo->size));
2011
2012 v3dv_bo_free(device, mem->bo);
2013 }
2014
2015 static void
device_unmap(struct v3dv_device * device,struct v3dv_device_memory * mem)2016 device_unmap(struct v3dv_device *device, struct v3dv_device_memory *mem)
2017 {
2018 assert(mem && mem->bo->map && mem->bo->map_size > 0);
2019 v3dv_bo_unmap(device, mem->bo);
2020 }
2021
2022 static VkResult
device_map(struct v3dv_device * device,struct v3dv_device_memory * mem)2023 device_map(struct v3dv_device *device, struct v3dv_device_memory *mem)
2024 {
2025 assert(mem && mem->bo);
2026
2027 /* From the spec:
2028 *
2029 * "After a successful call to vkMapMemory the memory object memory is
2030 * considered to be currently host mapped. It is an application error to
2031 * call vkMapMemory on a memory object that is already host mapped."
2032 *
2033 * We are not concerned with this ourselves (validation layers should
2034 * catch these errors and warn users), however, the driver may internally
2035 * map things (for example for debug CLIF dumps or some CPU-side operations)
2036 * so by the time the user calls here the buffer might already been mapped
2037 * internally by the driver.
2038 */
2039 if (mem->bo->map) {
2040 assert(mem->bo->map_size == mem->bo->size);
2041 return VK_SUCCESS;
2042 }
2043
2044 bool ok = v3dv_bo_map(device, mem->bo, mem->bo->size);
2045 if (!ok)
2046 return VK_ERROR_MEMORY_MAP_FAILED;
2047
2048 return VK_SUCCESS;
2049 }
2050
2051 static VkResult
device_import_bo(struct v3dv_device * device,const VkAllocationCallbacks * pAllocator,int fd,uint64_t size,struct v3dv_bo ** bo)2052 device_import_bo(struct v3dv_device *device,
2053 const VkAllocationCallbacks *pAllocator,
2054 int fd, uint64_t size,
2055 struct v3dv_bo **bo)
2056 {
2057 *bo = NULL;
2058
2059 off_t real_size = lseek(fd, 0, SEEK_END);
2060 lseek(fd, 0, SEEK_SET);
2061 if (real_size < 0 || (uint64_t) real_size < size)
2062 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
2063
2064 int render_fd = device->pdevice->render_fd;
2065 assert(render_fd >= 0);
2066
2067 int ret;
2068 uint32_t handle;
2069 ret = drmPrimeFDToHandle(render_fd, fd, &handle);
2070 if (ret)
2071 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
2072
2073 struct drm_v3d_get_bo_offset get_offset = {
2074 .handle = handle,
2075 };
2076 ret = v3d_ioctl(render_fd, DRM_IOCTL_V3D_GET_BO_OFFSET, &get_offset);
2077 if (ret)
2078 return VK_ERROR_INVALID_EXTERNAL_HANDLE;
2079 assert(get_offset.offset != 0);
2080
2081 *bo = v3dv_device_lookup_bo(device->pdevice, handle);
2082 assert(*bo);
2083
2084 if ((*bo)->refcnt == 0)
2085 v3dv_bo_init_import(*bo, handle, size, get_offset.offset, false);
2086 else
2087 p_atomic_inc(&(*bo)->refcnt);
2088
2089 return VK_SUCCESS;
2090 }
2091
2092 static VkResult
device_alloc_for_wsi(struct v3dv_device * device,const VkAllocationCallbacks * pAllocator,struct v3dv_device_memory * mem,VkDeviceSize size)2093 device_alloc_for_wsi(struct v3dv_device *device,
2094 const VkAllocationCallbacks *pAllocator,
2095 struct v3dv_device_memory *mem,
2096 VkDeviceSize size)
2097 {
2098 /* In the simulator we can get away with a regular allocation since both
2099 * allocation and rendering happen in the same DRM render node. On actual
2100 * hardware we need to allocate our winsys BOs on the vc4 display device
2101 * and import them into v3d.
2102 */
2103 #if USE_V3D_SIMULATOR
2104 return device_alloc(device, mem, size);
2105 #else
2106 VkResult result;
2107 struct v3dv_physical_device *pdevice = device->pdevice;
2108 assert(pdevice->display_fd != -1);
2109
2110 mem->is_for_wsi = true;
2111
2112 int display_fd = pdevice->display_fd;
2113 struct drm_mode_create_dumb create_dumb = {
2114 .width = 1024, /* one page */
2115 .height = align(size, 4096) / 4096,
2116 .bpp = util_format_get_blocksizebits(PIPE_FORMAT_RGBA8888_UNORM),
2117 };
2118
2119 int err;
2120 err = v3d_ioctl(display_fd, DRM_IOCTL_MODE_CREATE_DUMB, &create_dumb);
2121 if (err < 0)
2122 goto fail_create;
2123
2124 int fd;
2125 err =
2126 drmPrimeHandleToFD(display_fd, create_dumb.handle, O_CLOEXEC, &fd);
2127 if (err < 0)
2128 goto fail_export;
2129
2130 result = device_import_bo(device, pAllocator, fd, size, &mem->bo);
2131 close(fd);
2132 if (result != VK_SUCCESS)
2133 goto fail_import;
2134
2135 mem->bo->dumb_handle = create_dumb.handle;
2136 return VK_SUCCESS;
2137
2138 fail_import:
2139 fail_export:
2140 device_free_wsi_dumb(display_fd, create_dumb.handle);
2141
2142 fail_create:
2143 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2144 #endif
2145 }
2146
2147 static void
device_add_device_address_bo(struct v3dv_device * device,struct v3dv_bo * bo)2148 device_add_device_address_bo(struct v3dv_device *device,
2149 struct v3dv_bo *bo)
2150 {
2151 util_dynarray_append(&device->device_address_bo_list,
2152 struct v3dv_bo *,
2153 bo);
2154 }
2155
2156 static void
device_remove_device_address_bo(struct v3dv_device * device,struct v3dv_bo * bo)2157 device_remove_device_address_bo(struct v3dv_device *device,
2158 struct v3dv_bo *bo)
2159 {
2160 util_dynarray_delete_unordered(&device->device_address_bo_list,
2161 struct v3dv_bo *,
2162 bo);
2163 }
2164
2165 static void
free_memory(struct v3dv_device * device,struct v3dv_device_memory * mem,const VkAllocationCallbacks * pAllocator)2166 free_memory(struct v3dv_device *device,
2167 struct v3dv_device_memory *mem,
2168 const VkAllocationCallbacks *pAllocator)
2169 {
2170 if (mem == NULL)
2171 return;
2172
2173 if (mem->bo->map)
2174 device_unmap(device, mem);
2175
2176 if (mem->is_for_device_address)
2177 device_remove_device_address_bo(device, mem->bo);
2178
2179 device_free(device, mem);
2180
2181 vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
2182 }
2183
2184 VKAPI_ATTR void VKAPI_CALL
v3dv_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)2185 v3dv_FreeMemory(VkDevice _device,
2186 VkDeviceMemory _mem,
2187 const VkAllocationCallbacks *pAllocator)
2188 {
2189 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2190 V3DV_FROM_HANDLE(v3dv_device_memory, mem, _mem);
2191 free_memory(device, mem, pAllocator);
2192 }
2193
2194 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2195 v3dv_AllocateMemory(VkDevice _device,
2196 const VkMemoryAllocateInfo *pAllocateInfo,
2197 const VkAllocationCallbacks *pAllocator,
2198 VkDeviceMemory *pMem)
2199 {
2200 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2201 struct v3dv_device_memory *mem;
2202 struct v3dv_physical_device *pdevice = device->pdevice;
2203
2204 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2205
2206 /* We always allocate device memory in multiples of a page, so round up
2207 * requested size to that.
2208 */
2209 const VkDeviceSize alloc_size = align64(pAllocateInfo->allocationSize, 4096);
2210
2211 if (unlikely(alloc_size > MAX_MEMORY_ALLOCATION_SIZE))
2212 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2213
2214 uint64_t heap_used = p_atomic_read(&pdevice->heap_used);
2215 if (unlikely(heap_used + alloc_size > pdevice->memory.memoryHeaps[0].size))
2216 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2217
2218 mem = vk_device_memory_create(&device->vk, pAllocateInfo,
2219 pAllocator, sizeof(*mem));
2220 if (mem == NULL)
2221 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2222
2223 assert(pAllocateInfo->memoryTypeIndex < pdevice->memory.memoryTypeCount);
2224 mem->type = &pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex];
2225 mem->is_for_wsi = false;
2226
2227 const struct wsi_memory_allocate_info *wsi_info = NULL;
2228 const VkImportMemoryFdInfoKHR *fd_info = NULL;
2229 const VkMemoryAllocateFlagsInfo *flags_info = NULL;
2230 vk_foreach_struct_const(ext, pAllocateInfo->pNext) {
2231 switch ((unsigned)ext->sType) {
2232 case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
2233 wsi_info = (void *)ext;
2234 break;
2235 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
2236 fd_info = (void *)ext;
2237 break;
2238 case VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_FLAGS_INFO:
2239 flags_info = (void *)ext;
2240 break;
2241 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_ALLOCATE_INFO:
2242 /* We don't have particular optimizations associated with memory
2243 * allocations that won't be suballocated to multiple resources.
2244 */
2245 break;
2246 case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
2247 /* The mask of handle types specified here must be supported
2248 * according to VkExternalImageFormatProperties, so it must be
2249 * fd or dmabuf, which don't have special requirements for us.
2250 */
2251 break;
2252 case VK_STRUCTURE_TYPE_IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID:
2253 /* This case is handled in the common code */
2254 break;
2255 default:
2256 vk_debug_ignored_stype(ext->sType);
2257 break;
2258 }
2259 }
2260
2261 VkResult result;
2262
2263 if (wsi_info) {
2264 result = device_alloc_for_wsi(device, pAllocator, mem, alloc_size);
2265 } else if (fd_info && fd_info->handleType) {
2266 assert(fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2267 fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2268 result = device_import_bo(device, pAllocator,
2269 fd_info->fd, alloc_size, &mem->bo);
2270 if (result == VK_SUCCESS)
2271 close(fd_info->fd);
2272 } else if (mem->vk.ahardware_buffer) {
2273 #if DETECT_OS_ANDROID
2274 const native_handle_t *handle = AHardwareBuffer_getNativeHandle(mem->vk.ahardware_buffer);
2275 assert(handle->numFds > 0);
2276 size_t size = lseek(handle->data[0], 0, SEEK_END);
2277 result = device_import_bo(device, pAllocator,
2278 handle->data[0], size, &mem->bo);
2279 #else
2280 result = VK_ERROR_FEATURE_NOT_PRESENT;
2281 #endif
2282 } else {
2283 result = device_alloc(device, mem, alloc_size);
2284 }
2285
2286 if (result != VK_SUCCESS) {
2287 vk_device_memory_destroy(&device->vk, pAllocator, &mem->vk);
2288 return vk_error(device, result);
2289 }
2290
2291 heap_used = p_atomic_add_return(&pdevice->heap_used, mem->bo->size);
2292 if (heap_used > pdevice->memory.memoryHeaps[0].size) {
2293 free_memory(device, mem, pAllocator);
2294 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2295 }
2296
2297 /* If this memory can be used via VK_KHR_buffer_device_address then we
2298 * will need to manually add the BO to any job submit that makes use of
2299 * VK_KHR_buffer_device_address, since such jobs may produce buffer
2300 * load/store operations that may access any buffer memory allocated with
2301 * this flag and we don't have any means to tell which buffers will be
2302 * accessed through this mechanism since they don't even have to be bound
2303 * through descriptor state.
2304 */
2305 if (flags_info &&
2306 (flags_info->flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_BIT)) {
2307 mem->is_for_device_address = true;
2308 device_add_device_address_bo(device, mem->bo);
2309 }
2310
2311 *pMem = v3dv_device_memory_to_handle(mem);
2312 return result;
2313 }
2314
2315 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)2316 v3dv_MapMemory(VkDevice _device,
2317 VkDeviceMemory _memory,
2318 VkDeviceSize offset,
2319 VkDeviceSize size,
2320 VkMemoryMapFlags flags,
2321 void **ppData)
2322 {
2323 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2324 V3DV_FROM_HANDLE(v3dv_device_memory, mem, _memory);
2325
2326 if (mem == NULL) {
2327 *ppData = NULL;
2328 return VK_SUCCESS;
2329 }
2330
2331 assert(offset < mem->bo->size);
2332
2333 /* Since the driver can map BOs internally as well and the mapped range
2334 * required by the user or the driver might not be the same, we always map
2335 * the entire BO and then add the requested offset to the start address
2336 * of the mapped region.
2337 */
2338 VkResult result = device_map(device, mem);
2339 if (result != VK_SUCCESS)
2340 return vk_error(device, result);
2341
2342 *ppData = ((uint8_t *) mem->bo->map) + offset;
2343 return VK_SUCCESS;
2344 }
2345
2346 VKAPI_ATTR void VKAPI_CALL
v3dv_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)2347 v3dv_UnmapMemory(VkDevice _device,
2348 VkDeviceMemory _memory)
2349 {
2350 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2351 V3DV_FROM_HANDLE(v3dv_device_memory, mem, _memory);
2352
2353 if (mem == NULL)
2354 return;
2355
2356 device_unmap(device, mem);
2357 }
2358
2359 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2360 v3dv_FlushMappedMemoryRanges(VkDevice _device,
2361 uint32_t memoryRangeCount,
2362 const VkMappedMemoryRange *pMemoryRanges)
2363 {
2364 return VK_SUCCESS;
2365 }
2366
2367 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2368 v3dv_InvalidateMappedMemoryRanges(VkDevice _device,
2369 uint32_t memoryRangeCount,
2370 const VkMappedMemoryRange *pMemoryRanges)
2371 {
2372 return VK_SUCCESS;
2373 }
2374
2375 static void
get_image_memory_requirements(struct v3dv_image * image,VkImageAspectFlagBits planeAspect,VkMemoryRequirements2 * pMemoryRequirements)2376 get_image_memory_requirements(struct v3dv_image *image,
2377 VkImageAspectFlagBits planeAspect,
2378 VkMemoryRequirements2 *pMemoryRequirements)
2379 {
2380 pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
2381 .memoryTypeBits = 0x1,
2382 .alignment = image->planes[0].alignment,
2383 .size = image->non_disjoint_size
2384 };
2385
2386 if (planeAspect != VK_IMAGE_ASPECT_NONE) {
2387 assert(image->format->plane_count > 1);
2388 /* Disjoint images should have a 0 non_disjoint_size */
2389 assert(!pMemoryRequirements->memoryRequirements.size);
2390
2391 uint8_t plane = v3dv_image_aspect_to_plane(image, planeAspect);
2392
2393 VkMemoryRequirements *mem_reqs =
2394 &pMemoryRequirements->memoryRequirements;
2395 mem_reqs->alignment = image->planes[plane].alignment;
2396 mem_reqs->size = image->planes[plane].size;
2397 }
2398
2399 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2400 switch (ext->sType) {
2401 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
2402 VkMemoryDedicatedRequirements *req =
2403 (VkMemoryDedicatedRequirements *) ext;
2404 req->requiresDedicatedAllocation = image->vk.external_handle_types != 0;
2405 req->prefersDedicatedAllocation = image->vk.external_handle_types != 0;
2406 break;
2407 }
2408 default:
2409 vk_debug_ignored_stype(ext->sType);
2410 break;
2411 }
2412 }
2413 }
2414
2415 VKAPI_ATTR void VKAPI_CALL
v3dv_GetImageMemoryRequirements2(VkDevice device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2416 v3dv_GetImageMemoryRequirements2(VkDevice device,
2417 const VkImageMemoryRequirementsInfo2 *pInfo,
2418 VkMemoryRequirements2 *pMemoryRequirements)
2419 {
2420 V3DV_FROM_HANDLE(v3dv_image, image, pInfo->image);
2421
2422 VkImageAspectFlagBits planeAspect = VK_IMAGE_ASPECT_NONE;
2423 vk_foreach_struct_const(ext, pInfo->pNext) {
2424 switch (ext->sType) {
2425 case VK_STRUCTURE_TYPE_IMAGE_PLANE_MEMORY_REQUIREMENTS_INFO: {
2426 VkImagePlaneMemoryRequirementsInfo *req =
2427 (VkImagePlaneMemoryRequirementsInfo *) ext;
2428 planeAspect = req->planeAspect;
2429 break;
2430 }
2431 default:
2432 vk_debug_ignored_stype(ext->sType);
2433 break;
2434 }
2435 }
2436
2437 get_image_memory_requirements(image, planeAspect, pMemoryRequirements);
2438 }
2439
2440 VKAPI_ATTR void VKAPI_CALL
v3dv_GetDeviceImageMemoryRequirements(VkDevice _device,const VkDeviceImageMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2441 v3dv_GetDeviceImageMemoryRequirements(
2442 VkDevice _device,
2443 const VkDeviceImageMemoryRequirements *pInfo,
2444 VkMemoryRequirements2 *pMemoryRequirements)
2445 {
2446 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2447
2448 struct v3dv_image image = { 0 };
2449 vk_image_init(&device->vk, &image.vk, pInfo->pCreateInfo);
2450
2451 ASSERTED VkResult result =
2452 v3dv_image_init(device, pInfo->pCreateInfo, NULL, &image);
2453 assert(result == VK_SUCCESS);
2454
2455 /* From VkDeviceImageMemoryRequirements spec:
2456 *
2457 * " planeAspect is a VkImageAspectFlagBits value specifying the aspect
2458 * corresponding to the image plane to query. This parameter is ignored
2459 * unless pCreateInfo::tiling is
2460 * VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT, or pCreateInfo::flags has
2461 * VK_IMAGE_CREATE_DISJOINT_BIT set"
2462 *
2463 * We need to explicitly ignore that flag, or following asserts could be
2464 * triggered.
2465 */
2466 VkImageAspectFlagBits planeAspect =
2467 pInfo->pCreateInfo->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT ||
2468 pInfo->pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT ?
2469 pInfo->planeAspect : 0;
2470
2471 get_image_memory_requirements(&image, planeAspect, pMemoryRequirements);
2472 }
2473
2474 static void
bind_image_memory(const VkBindImageMemoryInfo * info)2475 bind_image_memory(const VkBindImageMemoryInfo *info)
2476 {
2477 V3DV_FROM_HANDLE(v3dv_image, image, info->image);
2478 V3DV_FROM_HANDLE(v3dv_device_memory, mem, info->memory);
2479
2480 /* Valid usage:
2481 *
2482 * "memoryOffset must be an integer multiple of the alignment member of
2483 * the VkMemoryRequirements structure returned from a call to
2484 * vkGetImageMemoryRequirements with image"
2485 */
2486 assert(info->memoryOffset < mem->bo->size);
2487
2488 uint64_t offset = info->memoryOffset;
2489 if (image->non_disjoint_size) {
2490 /* We only check for plane 0 as it is the only one that actually starts
2491 * at that offset
2492 */
2493 assert(offset % image->planes[0].alignment == 0);
2494 for (uint8_t plane = 0; plane < image->plane_count; plane++) {
2495 image->planes[plane].mem = mem;
2496 image->planes[plane].mem_offset = offset;
2497 }
2498 } else {
2499 const VkBindImagePlaneMemoryInfo *plane_mem_info =
2500 vk_find_struct_const(info->pNext, BIND_IMAGE_PLANE_MEMORY_INFO);
2501 assert(plane_mem_info);
2502
2503 /*
2504 * From VkBindImagePlaneMemoryInfo spec:
2505 *
2506 * "If the image’s tiling is VK_IMAGE_TILING_LINEAR or
2507 * VK_IMAGE_TILING_OPTIMAL, then planeAspect must be a single valid
2508 * format plane for the image"
2509 *
2510 * <skip>
2511 *
2512 * "If the image’s tiling is VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT,
2513 * then planeAspect must be a single valid memory plane for the
2514 * image"
2515 *
2516 * So planeAspect should only refer to one plane.
2517 */
2518 uint8_t plane = v3dv_plane_from_aspect(plane_mem_info->planeAspect);
2519 assert(offset % image->planes[plane].alignment == 0);
2520 image->planes[plane].mem = mem;
2521 image->planes[plane].mem_offset = offset;
2522 }
2523 }
2524
2525 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_BindImageMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindImageMemoryInfo * pBindInfos)2526 v3dv_BindImageMemory2(VkDevice _device,
2527 uint32_t bindInfoCount,
2528 const VkBindImageMemoryInfo *pBindInfos)
2529 {
2530 for (uint32_t i = 0; i < bindInfoCount; i++) {
2531 /* This section is removed by the optimizer for non-ANDROID builds */
2532 V3DV_FROM_HANDLE(v3dv_image, image, pBindInfos[i].image);
2533 if (vk_image_is_android_hardware_buffer(&image->vk)) {
2534 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2535 V3DV_FROM_HANDLE(v3dv_device_memory, mem, pBindInfos[i].memory);
2536
2537 VkImageDrmFormatModifierExplicitCreateInfoEXT eci;
2538 VkSubresourceLayout a_plane_layouts[V3DV_MAX_PLANE_COUNT];
2539 VkResult result = vk_android_get_ahb_layout(mem->vk.ahardware_buffer,
2540 &eci, a_plane_layouts,
2541 V3DV_MAX_PLANE_COUNT);
2542 if (result != VK_SUCCESS)
2543 return result;
2544
2545 result = v3dv_update_image_layout(device, image,
2546 eci.drmFormatModifier,
2547 /* disjoint = */ false, &eci);
2548 if (result != VK_SUCCESS)
2549 return result;
2550 }
2551
2552 const VkBindImageMemorySwapchainInfoKHR *swapchain_info =
2553 vk_find_struct_const(pBindInfos->pNext,
2554 BIND_IMAGE_MEMORY_SWAPCHAIN_INFO_KHR);
2555 if (swapchain_info && swapchain_info->swapchain) {
2556 #if !DETECT_OS_ANDROID
2557 struct v3dv_image *swapchain_image =
2558 v3dv_wsi_get_image_from_swapchain(swapchain_info->swapchain,
2559 swapchain_info->imageIndex);
2560 /* Making the assumption that swapchain images are a single plane */
2561 assert(swapchain_image->plane_count == 1);
2562 VkBindImageMemoryInfo swapchain_bind = {
2563 .sType = VK_STRUCTURE_TYPE_BIND_IMAGE_MEMORY_INFO,
2564 .image = pBindInfos[i].image,
2565 .memory = v3dv_device_memory_to_handle(swapchain_image->planes[0].mem),
2566 .memoryOffset = swapchain_image->planes[0].mem_offset,
2567 };
2568 bind_image_memory(&swapchain_bind);
2569 #endif
2570 } else
2571 {
2572 bind_image_memory(&pBindInfos[i]);
2573 }
2574 }
2575
2576 return VK_SUCCESS;
2577 }
2578
2579 void
v3dv_buffer_init(struct v3dv_device * device,const VkBufferCreateInfo * pCreateInfo,struct v3dv_buffer * buffer,uint32_t alignment)2580 v3dv_buffer_init(struct v3dv_device *device,
2581 const VkBufferCreateInfo *pCreateInfo,
2582 struct v3dv_buffer *buffer,
2583 uint32_t alignment)
2584 {
2585 const VkBufferUsageFlags2CreateInfoKHR *flags2 =
2586 vk_find_struct_const(pCreateInfo->pNext,
2587 BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR);
2588 VkBufferUsageFlags2KHR usage;
2589 if (flags2)
2590 usage = flags2->usage;
2591 else
2592 usage = pCreateInfo->usage;
2593
2594 buffer->size = pCreateInfo->size;
2595 buffer->usage = usage;
2596 buffer->alignment = alignment;
2597 }
2598
2599 static void
get_buffer_memory_requirements(struct v3dv_buffer * buffer,VkMemoryRequirements2 * pMemoryRequirements)2600 get_buffer_memory_requirements(struct v3dv_buffer *buffer,
2601 VkMemoryRequirements2 *pMemoryRequirements)
2602 {
2603 pMemoryRequirements->memoryRequirements = (VkMemoryRequirements) {
2604 .memoryTypeBits = 0x1,
2605 .alignment = buffer->alignment,
2606 .size = align64(buffer->size, buffer->alignment),
2607 };
2608
2609 /* UBO and SSBO may be read using ldunifa, which prefetches the next
2610 * 4 bytes after a read. If the buffer's size is exactly a multiple
2611 * of a page size and the shader reads the last 4 bytes with ldunifa
2612 * the prefetching would read out of bounds and cause an MMU error,
2613 * so we allocate extra space to avoid kernel error spamming.
2614 */
2615 bool can_ldunifa = buffer->usage &
2616 (VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
2617 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT);
2618 if (can_ldunifa && (buffer->size % 4096 == 0))
2619 pMemoryRequirements->memoryRequirements.size += buffer->alignment;
2620
2621 vk_foreach_struct(ext, pMemoryRequirements->pNext) {
2622 switch (ext->sType) {
2623 case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
2624 VkMemoryDedicatedRequirements *req =
2625 (VkMemoryDedicatedRequirements *) ext;
2626 req->requiresDedicatedAllocation = false;
2627 req->prefersDedicatedAllocation = false;
2628 break;
2629 }
2630 default:
2631 vk_debug_ignored_stype(ext->sType);
2632 break;
2633 }
2634 }
2635 }
2636
2637 VKAPI_ATTR void VKAPI_CALL
v3dv_GetBufferMemoryRequirements2(VkDevice device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2638 v3dv_GetBufferMemoryRequirements2(VkDevice device,
2639 const VkBufferMemoryRequirementsInfo2 *pInfo,
2640 VkMemoryRequirements2 *pMemoryRequirements)
2641 {
2642 V3DV_FROM_HANDLE(v3dv_buffer, buffer, pInfo->buffer);
2643 get_buffer_memory_requirements(buffer, pMemoryRequirements);
2644 }
2645
2646 VKAPI_ATTR void VKAPI_CALL
v3dv_GetDeviceBufferMemoryRequirements(VkDevice _device,const VkDeviceBufferMemoryRequirements * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2647 v3dv_GetDeviceBufferMemoryRequirements(
2648 VkDevice _device,
2649 const VkDeviceBufferMemoryRequirements *pInfo,
2650 VkMemoryRequirements2 *pMemoryRequirements)
2651 {
2652 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2653
2654 struct v3dv_buffer buffer = { 0 };
2655 v3dv_buffer_init(device, pInfo->pCreateInfo, &buffer, V3D_NON_COHERENT_ATOM_SIZE);
2656 get_buffer_memory_requirements(&buffer, pMemoryRequirements);
2657 }
2658
2659 void
v3dv_buffer_bind_memory(const VkBindBufferMemoryInfo * info)2660 v3dv_buffer_bind_memory(const VkBindBufferMemoryInfo *info)
2661 {
2662 V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->buffer);
2663 V3DV_FROM_HANDLE(v3dv_device_memory, mem, info->memory);
2664
2665 /* Valid usage:
2666 *
2667 * "memoryOffset must be an integer multiple of the alignment member of
2668 * the VkMemoryRequirements structure returned from a call to
2669 * vkGetBufferMemoryRequirements with buffer"
2670 */
2671 assert(info->memoryOffset % buffer->alignment == 0);
2672 assert(info->memoryOffset < mem->bo->size);
2673
2674 buffer->mem = mem;
2675 buffer->mem_offset = info->memoryOffset;
2676 }
2677
2678
2679 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_BindBufferMemory2(VkDevice device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)2680 v3dv_BindBufferMemory2(VkDevice device,
2681 uint32_t bindInfoCount,
2682 const VkBindBufferMemoryInfo *pBindInfos)
2683 {
2684 for (uint32_t i = 0; i < bindInfoCount; i++)
2685 v3dv_buffer_bind_memory(&pBindInfos[i]);
2686
2687 return VK_SUCCESS;
2688 }
2689
2690 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)2691 v3dv_CreateBuffer(VkDevice _device,
2692 const VkBufferCreateInfo *pCreateInfo,
2693 const VkAllocationCallbacks *pAllocator,
2694 VkBuffer *pBuffer)
2695 {
2696 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2697 struct v3dv_buffer *buffer;
2698
2699 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2700 assert(pCreateInfo->usage != 0);
2701
2702 /* We don't support any flags for now */
2703 assert(pCreateInfo->flags == 0);
2704
2705 buffer = vk_object_zalloc(&device->vk, pAllocator, sizeof(*buffer),
2706 VK_OBJECT_TYPE_BUFFER);
2707 if (buffer == NULL)
2708 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2709
2710 v3dv_buffer_init(device, pCreateInfo, buffer, V3D_NON_COHERENT_ATOM_SIZE);
2711
2712 /* Limit allocations to 32-bit */
2713 const VkDeviceSize aligned_size = align64(buffer->size, buffer->alignment);
2714 if (aligned_size > UINT32_MAX || aligned_size < buffer->size) {
2715 vk_free(&device->vk.alloc, buffer);
2716 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2717 }
2718
2719 *pBuffer = v3dv_buffer_to_handle(buffer);
2720
2721 return VK_SUCCESS;
2722 }
2723
2724 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)2725 v3dv_DestroyBuffer(VkDevice _device,
2726 VkBuffer _buffer,
2727 const VkAllocationCallbacks *pAllocator)
2728 {
2729 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2730 V3DV_FROM_HANDLE(v3dv_buffer, buffer, _buffer);
2731
2732 if (!buffer)
2733 return;
2734
2735 vk_object_free(&device->vk, pAllocator, buffer);
2736 }
2737
2738 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)2739 v3dv_CreateFramebuffer(VkDevice _device,
2740 const VkFramebufferCreateInfo *pCreateInfo,
2741 const VkAllocationCallbacks *pAllocator,
2742 VkFramebuffer *pFramebuffer)
2743 {
2744 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2745 struct v3dv_framebuffer *framebuffer;
2746
2747 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2748
2749 size_t size = sizeof(*framebuffer) +
2750 sizeof(struct v3dv_image_view *) * pCreateInfo->attachmentCount;
2751 framebuffer = vk_object_zalloc(&device->vk, pAllocator, size,
2752 VK_OBJECT_TYPE_FRAMEBUFFER);
2753 if (framebuffer == NULL)
2754 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2755
2756 framebuffer->width = pCreateInfo->width;
2757 framebuffer->height = pCreateInfo->height;
2758 framebuffer->layers = pCreateInfo->layers;
2759 framebuffer->has_edge_padding = true;
2760
2761 const VkFramebufferAttachmentsCreateInfo *imageless =
2762 vk_find_struct_const(pCreateInfo->pNext,
2763 FRAMEBUFFER_ATTACHMENTS_CREATE_INFO);
2764
2765 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2766 framebuffer->color_attachment_count = 0;
2767 for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
2768 if (!imageless) {
2769 framebuffer->attachments[i] =
2770 v3dv_image_view_from_handle(pCreateInfo->pAttachments[i]);
2771 if (framebuffer->attachments[i]->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT)
2772 framebuffer->color_attachment_count++;
2773 } else {
2774 assert(i < imageless->attachmentImageInfoCount);
2775 if (imageless->pAttachmentImageInfos[i].usage &
2776 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) {
2777 framebuffer->color_attachment_count++;
2778 }
2779 }
2780 }
2781
2782 *pFramebuffer = v3dv_framebuffer_to_handle(framebuffer);
2783
2784 return VK_SUCCESS;
2785 }
2786
2787 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)2788 v3dv_DestroyFramebuffer(VkDevice _device,
2789 VkFramebuffer _fb,
2790 const VkAllocationCallbacks *pAllocator)
2791 {
2792 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2793 V3DV_FROM_HANDLE(v3dv_framebuffer, fb, _fb);
2794
2795 if (!fb)
2796 return;
2797
2798 vk_object_free(&device->vk, pAllocator, fb);
2799 }
2800
2801 void
v3dv_setup_dynamic_framebuffer(struct v3dv_cmd_buffer * cmd_buffer,const VkRenderingInfoKHR * info)2802 v3dv_setup_dynamic_framebuffer(struct v3dv_cmd_buffer *cmd_buffer,
2803 const VkRenderingInfoKHR *info)
2804 {
2805 struct v3dv_device *device = cmd_buffer->device;
2806
2807 /* Max framebuffer attachments is max_color_RTs + D/S multiplied by two for
2808 * MSAA resolves.
2809 */
2810 const uint32_t max_attachments =
2811 2 * (V3D_MAX_RENDER_TARGETS(device->devinfo.ver) + 1);
2812 const uint32_t attachments_alloc_size =
2813 sizeof(struct v3dv_image_view *) * max_attachments;
2814
2815 /* Only allocate the dynamic framebuffer once and will stay valid
2816 * for the duration of the command buffer.
2817 */
2818 struct v3dv_framebuffer *fb = cmd_buffer->state.dynamic_framebuffer;
2819 if (!fb) {
2820 uint32_t alloc_size = sizeof(struct v3dv_framebuffer) +
2821 attachments_alloc_size;
2822 fb = vk_object_zalloc(&cmd_buffer->device->vk, NULL, alloc_size,
2823 VK_OBJECT_TYPE_FRAMEBUFFER);
2824 if (fb == NULL) {
2825 v3dv_flag_oom(cmd_buffer, NULL);
2826 return;
2827 }
2828 cmd_buffer->state.dynamic_framebuffer = fb;
2829 } else {
2830 memset(fb->attachments, 0, attachments_alloc_size);
2831 }
2832
2833 fb->width = info->renderArea.offset.x + info->renderArea.extent.width;
2834 fb->height = info->renderArea.offset.y + info->renderArea.extent.height;
2835
2836 /* From the Vulkan spec for VkFramebufferCreateInfo:
2837 *
2838 * "If the render pass uses multiview, then layers must be one (...)"
2839 */
2840 fb->layers = info->viewMask == 0 ? info->layerCount : 1;
2841
2842 struct v3dv_render_pass *pass = &cmd_buffer->state.dynamic_pass;
2843 assert(pass->subpass_count == 1 && pass->subpasses);
2844 assert(pass->subpasses[0].color_count == info->colorAttachmentCount);
2845 fb->color_attachment_count = info->colorAttachmentCount;
2846
2847 uint32_t a = 0;
2848 for (int i = 0; i < info->colorAttachmentCount; i++) {
2849 if (info->pColorAttachments[i].imageView == VK_NULL_HANDLE)
2850 continue;
2851 fb->attachments[a++] =
2852 v3dv_image_view_from_handle(info->pColorAttachments[i].imageView);
2853 if (info->pColorAttachments[i].resolveMode != VK_RESOLVE_MODE_NONE) {
2854 fb->attachments[a++] =
2855 v3dv_image_view_from_handle(info->pColorAttachments[i].resolveImageView);
2856 }
2857 }
2858
2859 if ((info->pDepthAttachment && info->pDepthAttachment->imageView) ||
2860 (info->pStencilAttachment && info->pStencilAttachment->imageView)) {
2861 const struct VkRenderingAttachmentInfo *common_ds_info =
2862 (info->pDepthAttachment &&
2863 info->pDepthAttachment->imageView != VK_NULL_HANDLE) ?
2864 info->pDepthAttachment :
2865 info->pStencilAttachment;
2866
2867 fb->attachments[a++] =
2868 v3dv_image_view_from_handle(common_ds_info->imageView);
2869
2870 if (common_ds_info->resolveMode != VK_RESOLVE_MODE_NONE) {
2871 fb->attachments[a++] =
2872 v3dv_image_view_from_handle(common_ds_info->resolveImageView);
2873 }
2874 }
2875
2876 assert(a == pass->attachment_count);
2877 fb->attachment_count = a;
2878
2879 /* Dynamic rendering doesn't provide the size of the underlying framebuffer
2880 * so we estimate its size from the render area. This means it is possible
2881 * the underlying attachments are larger and thus we cannot assume we have
2882 * edge padding.
2883 */
2884 fb->has_edge_padding = false;
2885 }
2886
2887 void
v3dv_destroy_dynamic_framebuffer(struct v3dv_cmd_buffer * cmd_buffer)2888 v3dv_destroy_dynamic_framebuffer(struct v3dv_cmd_buffer *cmd_buffer)
2889 {
2890 if (!cmd_buffer->state.dynamic_framebuffer)
2891 return;
2892
2893 VkDevice vk_device = v3dv_device_to_handle(cmd_buffer->device);
2894 VkFramebuffer vk_dynamic_fb =
2895 v3dv_framebuffer_to_handle(cmd_buffer->state.dynamic_framebuffer);
2896 v3dv_DestroyFramebuffer(vk_device, vk_dynamic_fb, NULL);
2897 cmd_buffer->state.dynamic_framebuffer = NULL;
2898 }
2899
2900 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)2901 v3dv_GetMemoryFdPropertiesKHR(VkDevice _device,
2902 VkExternalMemoryHandleTypeFlagBits handleType,
2903 int fd,
2904 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
2905 {
2906 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2907 struct v3dv_physical_device *pdevice = device->pdevice;
2908
2909 switch (handleType) {
2910 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
2911 pMemoryFdProperties->memoryTypeBits =
2912 (1 << pdevice->memory.memoryTypeCount) - 1;
2913 return VK_SUCCESS;
2914 default:
2915 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2916 }
2917 }
2918
2919 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)2920 v3dv_GetMemoryFdKHR(VkDevice _device,
2921 const VkMemoryGetFdInfoKHR *pGetFdInfo,
2922 int *pFd)
2923 {
2924 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2925 V3DV_FROM_HANDLE(v3dv_device_memory, mem, pGetFdInfo->memory);
2926
2927 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
2928 assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2929 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2930
2931 int fd, ret;
2932 ret = drmPrimeHandleToFD(device->pdevice->render_fd,
2933 mem->bo->handle,
2934 DRM_CLOEXEC, &fd);
2935 if (ret)
2936 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2937
2938 *pFd = fd;
2939
2940 return VK_SUCCESS;
2941 }
2942
2943 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)2944 v3dv_CreateSampler(VkDevice _device,
2945 const VkSamplerCreateInfo *pCreateInfo,
2946 const VkAllocationCallbacks *pAllocator,
2947 VkSampler *pSampler)
2948 {
2949 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2950 struct v3dv_sampler *sampler;
2951
2952 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
2953
2954 sampler = vk_object_zalloc(&device->vk, pAllocator, sizeof(*sampler),
2955 VK_OBJECT_TYPE_SAMPLER);
2956 if (!sampler)
2957 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2958
2959 sampler->plane_count = 1;
2960
2961 sampler->compare_enable = pCreateInfo->compareEnable;
2962 sampler->unnormalized_coordinates = pCreateInfo->unnormalizedCoordinates;
2963
2964 const VkSamplerCustomBorderColorCreateInfoEXT *bc_info =
2965 vk_find_struct_const(pCreateInfo->pNext,
2966 SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
2967
2968 const VkSamplerYcbcrConversionInfo *ycbcr_conv_info =
2969 vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
2970
2971 const struct vk_format_ycbcr_info *ycbcr_info = NULL;
2972
2973 if (ycbcr_conv_info) {
2974 VK_FROM_HANDLE(vk_ycbcr_conversion, conversion, ycbcr_conv_info->conversion);
2975 ycbcr_info = vk_format_get_ycbcr_info(conversion->state.format);
2976 if (ycbcr_info) {
2977 sampler->plane_count = ycbcr_info->n_planes;
2978 sampler->conversion = conversion;
2979 }
2980 }
2981
2982 v3d_X((&device->devinfo), pack_sampler_state)(device, sampler, pCreateInfo, bc_info);
2983
2984 *pSampler = v3dv_sampler_to_handle(sampler);
2985
2986 return VK_SUCCESS;
2987 }
2988
2989 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)2990 v3dv_DestroySampler(VkDevice _device,
2991 VkSampler _sampler,
2992 const VkAllocationCallbacks *pAllocator)
2993 {
2994 V3DV_FROM_HANDLE(v3dv_device, device, _device);
2995 V3DV_FROM_HANDLE(v3dv_sampler, sampler, _sampler);
2996
2997 if (!sampler)
2998 return;
2999
3000 vk_object_free(&device->vk, pAllocator, sampler);
3001 }
3002
3003 VKAPI_ATTR void VKAPI_CALL
v3dv_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)3004 v3dv_GetDeviceMemoryCommitment(VkDevice device,
3005 VkDeviceMemory memory,
3006 VkDeviceSize *pCommittedMemoryInBytes)
3007 {
3008 *pCommittedMemoryInBytes = 0;
3009 }
3010
3011 VKAPI_ATTR void VKAPI_CALL
v3dv_GetImageSparseMemoryRequirements(VkDevice device,VkImage image,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements * pSparseMemoryRequirements)3012 v3dv_GetImageSparseMemoryRequirements(
3013 VkDevice device,
3014 VkImage image,
3015 uint32_t *pSparseMemoryRequirementCount,
3016 VkSparseImageMemoryRequirements *pSparseMemoryRequirements)
3017 {
3018 *pSparseMemoryRequirementCount = 0;
3019 }
3020
3021 VKAPI_ATTR void VKAPI_CALL
v3dv_GetImageSparseMemoryRequirements2(VkDevice device,const VkImageSparseMemoryRequirementsInfo2 * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)3022 v3dv_GetImageSparseMemoryRequirements2(
3023 VkDevice device,
3024 const VkImageSparseMemoryRequirementsInfo2 *pInfo,
3025 uint32_t *pSparseMemoryRequirementCount,
3026 VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
3027 {
3028 *pSparseMemoryRequirementCount = 0;
3029 }
3030
3031 VKAPI_ATTR void VKAPI_CALL
v3dv_GetDeviceImageSparseMemoryRequirements(VkDevice device,const VkDeviceImageMemoryRequirements * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)3032 v3dv_GetDeviceImageSparseMemoryRequirements(
3033 VkDevice device,
3034 const VkDeviceImageMemoryRequirements *pInfo,
3035 uint32_t *pSparseMemoryRequirementCount,
3036 VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
3037 {
3038 *pSparseMemoryRequirementCount = 0;
3039 }
3040
3041 VkDeviceAddress
v3dv_GetBufferDeviceAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3042 v3dv_GetBufferDeviceAddress(VkDevice device,
3043 const VkBufferDeviceAddressInfo *pInfo)
3044 {
3045 V3DV_FROM_HANDLE(v3dv_buffer, buffer, pInfo->buffer);
3046 return buffer->mem_offset + buffer->mem->bo->offset;
3047 }
3048
3049 uint64_t
v3dv_GetBufferOpaqueCaptureAddress(VkDevice device,const VkBufferDeviceAddressInfo * pInfo)3050 v3dv_GetBufferOpaqueCaptureAddress(VkDevice device,
3051 const VkBufferDeviceAddressInfo *pInfo)
3052 {
3053 /* Not implemented */
3054 return 0;
3055 }
3056
3057 uint64_t
v3dv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,const VkDeviceMemoryOpaqueCaptureAddressInfo * pInfo)3058 v3dv_GetDeviceMemoryOpaqueCaptureAddress(
3059 VkDevice device,
3060 const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo)
3061 {
3062 /* Not implemented */
3063 return 0;
3064 }
3065
3066 VkResult
v3dv_create_compute_pipeline_from_nir(struct v3dv_device * device,nir_shader * nir,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)3067 v3dv_create_compute_pipeline_from_nir(struct v3dv_device *device,
3068 nir_shader *nir,
3069 VkPipelineLayout pipeline_layout,
3070 VkPipeline *pipeline)
3071 {
3072 struct vk_shader_module cs_m = vk_shader_module_from_nir(nir);
3073
3074 VkPipelineShaderStageCreateInfo set_event_cs_stage = {
3075 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3076 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
3077 .module = vk_shader_module_to_handle(&cs_m),
3078 .pName = "main",
3079 };
3080
3081 VkComputePipelineCreateInfo info = {
3082 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
3083 .stage = set_event_cs_stage,
3084 .layout = pipeline_layout,
3085 };
3086
3087 VkResult result =
3088 v3dv_CreateComputePipelines(v3dv_device_to_handle(device), VK_NULL_HANDLE,
3089 1, &info, &device->vk.alloc, pipeline);
3090
3091 return result;
3092 }
3093