1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * based in part on anv driver which is:
5 * Copyright © 2015 Intel Corporation
6 *
7 * based in part on v3dv driver which is:
8 * Copyright © 2019 Raspberry Pi
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the next
18 * paragraph) shall be included in all copies or substantial portions of the
19 * Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 * SOFTWARE.
28 */
29
30 #include <assert.h>
31 #include <fcntl.h>
32 #include <inttypes.h>
33 #include <stdbool.h>
34 #include <stddef.h>
35 #include <stdint.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <vulkan/vulkan.h>
40 #include <xf86drm.h>
41
42 #include "git_sha1.h"
43 #include "hwdef/rogue_hw_utils.h"
44 #include "pco/pco.h"
45 #include "pvr_bo.h"
46 #include "pvr_border.h"
47 #include "pvr_clear.h"
48 #include "pvr_csb.h"
49 #include "pvr_csb_enum_helpers.h"
50 #include "pvr_debug.h"
51 #include "pvr_device_info.h"
52 #include "pvr_dump_info.h"
53 #include "pvr_hardcode.h"
54 #include "pvr_job_render.h"
55 #include "pvr_limits.h"
56 #include "pvr_pds.h"
57 #include "pvr_private.h"
58 #include "pvr_robustness.h"
59 #include "pvr_tex_state.h"
60 #include "pvr_types.h"
61 #include "usc/pvr_uscgen.h"
62 #include "pvr_util.h"
63 #include "pvr_winsys.h"
64 #include "rogue/rogue.h"
65 #include "util/build_id.h"
66 #include "util/log.h"
67 #include "util/macros.h"
68 #include "util/mesa-sha1.h"
69 #include "util/os_misc.h"
70 #include "util/u_dynarray.h"
71 #include "util/u_math.h"
72 #include "vk_alloc.h"
73 #include "vk_extensions.h"
74 #include "vk_log.h"
75 #include "vk_object.h"
76 #include "vk_physical_device_features.h"
77 #include "vk_physical_device_properties.h"
78 #include "vk_sampler.h"
79 #include "vk_util.h"
80
81 #define PVR_GLOBAL_FREE_LIST_INITIAL_SIZE (2U * 1024U * 1024U)
82 #define PVR_GLOBAL_FREE_LIST_MAX_SIZE (256U * 1024U * 1024U)
83 #define PVR_GLOBAL_FREE_LIST_GROW_SIZE (1U * 1024U * 1024U)
84
85 /* After PVR_SECONDARY_DEVICE_THRESHOLD devices per instance are created,
86 * devices will have a smaller global free list size, as usually this use-case
87 * implies smaller amounts of work spread out. The free list can still grow as
88 * required.
89 */
90 #define PVR_SECONDARY_DEVICE_THRESHOLD (4U)
91 #define PVR_SECONDARY_DEVICE_FREE_LIST_INITAL_SIZE (512U * 1024U)
92
93 /* The grow threshold is a percentage. This is intended to be 12.5%, but has
94 * been rounded up since the percentage is treated as an integer.
95 */
96 #define PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD 13U
97
98 #if defined(VK_USE_PLATFORM_DISPLAY_KHR)
99 # define PVR_USE_WSI_PLATFORM_DISPLAY true
100 #else
101 # define PVR_USE_WSI_PLATFORM_DISPLAY false
102 #endif
103
104 #if PVR_USE_WSI_PLATFORM_DISPLAY
105 # define PVR_USE_WSI_PLATFORM true
106 #else
107 # define PVR_USE_WSI_PLATFORM false
108 #endif
109
110 #define PVR_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION)
111
112 /* Amount of padding required for VkBuffers to ensure we don't read beyond
113 * a page boundary.
114 */
115 #define PVR_BUFFER_MEMORY_PADDING_SIZE 4
116
117 /* Default size in bytes used by pvr_CreateDevice() for setting up the
118 * suballoc_general, suballoc_pds and suballoc_usc suballocators.
119 *
120 * TODO: Investigate if a different default size can improve the overall
121 * performance of internal driver allocations.
122 */
123 #define PVR_SUBALLOCATOR_GENERAL_SIZE (128 * 1024)
124 #define PVR_SUBALLOCATOR_PDS_SIZE (128 * 1024)
125 #define PVR_SUBALLOCATOR_TRANSFER_SIZE (128 * 1024)
126 #define PVR_SUBALLOCATOR_USC_SIZE (128 * 1024)
127 #define PVR_SUBALLOCATOR_VIS_TEST_SIZE (128 * 1024)
128
129 struct pvr_drm_device_config {
130 struct pvr_drm_device_info {
131 const char *name;
132 size_t len;
133 } render, display;
134 };
135
136 #define DEF_CONFIG(render_, display_) \
137 { \
138 .render = { .name = render_, .len = sizeof(render_) - 1 }, \
139 .display = { .name = display_, .len = sizeof(display_) - 1 }, \
140 }
141
142 /* This is the list of supported DRM render/display driver configs. */
143 static const struct pvr_drm_device_config pvr_drm_configs[] = {
144 DEF_CONFIG("mediatek,mt8173-gpu", "mediatek-drm"),
145 DEF_CONFIG("ti,am62-gpu", "ti,am625-dss"),
146 DEF_CONFIG("ti,j721s2-gpu", "ti,j721e-dss"),
147 };
148
149 #undef DEF_CONFIG
150
151 static const struct vk_instance_extension_table pvr_instance_extensions = {
152 .KHR_display = PVR_USE_WSI_PLATFORM_DISPLAY,
153 .KHR_external_fence_capabilities = true,
154 .KHR_external_memory_capabilities = true,
155 .KHR_external_semaphore_capabilities = true,
156 .KHR_get_display_properties2 = PVR_USE_WSI_PLATFORM_DISPLAY,
157 .KHR_get_physical_device_properties2 = true,
158 .KHR_get_surface_capabilities2 = PVR_USE_WSI_PLATFORM,
159 .KHR_surface = PVR_USE_WSI_PLATFORM,
160 #ifndef VK_USE_PLATFORM_WIN32_KHR
161 .EXT_headless_surface = PVR_USE_WSI_PLATFORM,
162 #endif
163 .EXT_debug_report = true,
164 .EXT_debug_utils = true,
165 };
166
pvr_physical_device_get_supported_extensions(struct vk_device_extension_table * extensions)167 static void pvr_physical_device_get_supported_extensions(
168 struct vk_device_extension_table *extensions)
169 {
170 *extensions = (struct vk_device_extension_table){
171 .KHR_bind_memory2 = true,
172 .KHR_copy_commands2 = true,
173 /* TODO: enable this extension when the conformance tests get
174 * updated to version 1.3.6.0, the current version does not
175 * include the imagination driver ID, which will make a dEQP
176 * test fail
177 */
178 .KHR_driver_properties = false,
179 .KHR_external_fence = true,
180 .KHR_external_fence_fd = true,
181 .KHR_external_memory = true,
182 .KHR_external_memory_fd = true,
183 .KHR_format_feature_flags2 = true,
184 .KHR_external_semaphore = PVR_USE_WSI_PLATFORM,
185 .KHR_external_semaphore_fd = PVR_USE_WSI_PLATFORM,
186 .KHR_get_memory_requirements2 = true,
187 .KHR_image_format_list = true,
188 .KHR_index_type_uint8 = true,
189 .KHR_shader_expect_assume = true,
190 .KHR_swapchain = PVR_USE_WSI_PLATFORM,
191 .KHR_timeline_semaphore = true,
192 .KHR_uniform_buffer_standard_layout = true,
193 .EXT_external_memory_dma_buf = true,
194 .EXT_host_query_reset = true,
195 .EXT_index_type_uint8 = true,
196 .EXT_memory_budget = true,
197 .EXT_private_data = true,
198 .EXT_scalar_block_layout = true,
199 .EXT_texel_buffer_alignment = true,
200 .EXT_tooling_info = true,
201 };
202 }
203
pvr_physical_device_get_supported_features(const struct pvr_device_info * const dev_info,struct vk_features * const features)204 static void pvr_physical_device_get_supported_features(
205 const struct pvr_device_info *const dev_info,
206 struct vk_features *const features)
207 {
208 *features = (struct vk_features){
209 /* Vulkan 1.0 */
210 .robustBufferAccess = true,
211 .fullDrawIndexUint32 = true,
212 .imageCubeArray = true,
213 .independentBlend = false,
214 .geometryShader = false,
215 .tessellationShader = false,
216 .sampleRateShading = true,
217 .dualSrcBlend = false,
218 .logicOp = false,
219 .multiDrawIndirect = true,
220 .drawIndirectFirstInstance = true,
221 .depthClamp = true,
222 .depthBiasClamp = true,
223 .fillModeNonSolid = false,
224 .depthBounds = false,
225 .wideLines = true,
226 .largePoints = true,
227 .alphaToOne = false,
228 .multiViewport = false,
229 .samplerAnisotropy = false,
230 .textureCompressionETC2 = true,
231 .textureCompressionASTC_LDR = false,
232 .textureCompressionBC = false,
233 .occlusionQueryPrecise = false,
234 .pipelineStatisticsQuery = false,
235 .vertexPipelineStoresAndAtomics = true,
236 .fragmentStoresAndAtomics = true,
237 .shaderTessellationAndGeometryPointSize = false,
238 .shaderImageGatherExtended = false,
239 .shaderStorageImageExtendedFormats = true,
240 .shaderStorageImageMultisample = false,
241 .shaderStorageImageReadWithoutFormat = true,
242 .shaderStorageImageWriteWithoutFormat = false,
243 .shaderUniformBufferArrayDynamicIndexing = true,
244 .shaderSampledImageArrayDynamicIndexing = true,
245 .shaderStorageBufferArrayDynamicIndexing = true,
246 .shaderStorageImageArrayDynamicIndexing = true,
247 .shaderClipDistance = false,
248 .shaderCullDistance = false,
249 .shaderFloat64 = false,
250 .shaderInt64 = true,
251 .shaderInt16 = true,
252 .shaderResourceResidency = false,
253 .shaderResourceMinLod = false,
254 .sparseBinding = false,
255 .sparseResidencyBuffer = false,
256 .sparseResidencyImage2D = false,
257 .sparseResidencyImage3D = false,
258 .sparseResidency2Samples = false,
259 .sparseResidency4Samples = false,
260 .sparseResidency8Samples = false,
261 .sparseResidency16Samples = false,
262 .sparseResidencyAliased = false,
263 .variableMultisampleRate = false,
264 .inheritedQueries = false,
265
266 /* VK_KHR_index_type_uint8 */
267 .indexTypeUint8 = true,
268
269 /* Vulkan 1.2 / VK_KHR_timeline_semaphore */
270 .timelineSemaphore = true,
271
272 /* Vulkan 1.2 / VK_KHR_uniform_buffer_standard_layout */
273 .uniformBufferStandardLayout = true,
274
275 /* Vulkan 1.2 / VK_EXT_host_query_reset */
276 .hostQueryReset = true,
277
278 /* Vulkan 1.3 / VK_EXT_private_data */
279 .privateData = true,
280
281 /* Vulkan 1.2 / VK_EXT_scalar_block_layout */
282 .scalarBlockLayout = true,
283
284 /* Vulkan 1.3 / VK_EXT_texel_buffer_alignment */
285 .texelBufferAlignment = true,
286
287 /* VK_KHR_shader_expect_assume */
288 .shaderExpectAssume = true,
289 };
290 }
291
pvr_physical_device_init_pipeline_cache_uuid(const struct pvr_device_info * const dev_info,uint8_t pipeline_cache_uuid_out[const static VK_UUID_SIZE])292 static bool pvr_physical_device_init_pipeline_cache_uuid(
293 const struct pvr_device_info *const dev_info,
294 uint8_t pipeline_cache_uuid_out[const static VK_UUID_SIZE])
295 {
296 struct mesa_sha1 sha1_ctx;
297 unsigned build_id_len;
298 uint8_t sha1[20];
299 uint64_t bvnc;
300
301 const struct build_id_note *note =
302 build_id_find_nhdr_for_addr(pvr_physical_device_init_pipeline_cache_uuid);
303 if (!note) {
304 mesa_loge("Failed to find build-id");
305 return false;
306 }
307
308 build_id_len = build_id_length(note);
309 if (build_id_len < 20) {
310 mesa_loge("Build-id too short. It needs to be a SHA");
311 return false;
312 }
313
314 bvnc = pvr_get_packed_bvnc(dev_info);
315
316 _mesa_sha1_init(&sha1_ctx);
317 _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
318 _mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc));
319 _mesa_sha1_final(&sha1_ctx, sha1);
320 memcpy(pipeline_cache_uuid_out, sha1, VK_UUID_SIZE);
321
322 return true;
323 }
324
325 struct pvr_descriptor_limits {
326 uint32_t max_per_stage_resources;
327 uint32_t max_per_stage_samplers;
328 uint32_t max_per_stage_uniform_buffers;
329 uint32_t max_per_stage_storage_buffers;
330 uint32_t max_per_stage_sampled_images;
331 uint32_t max_per_stage_storage_images;
332 uint32_t max_per_stage_input_attachments;
333 };
334
335 static const struct pvr_descriptor_limits *
pvr_get_physical_device_descriptor_limits(const struct pvr_device_info * dev_info,const struct pvr_device_runtime_info * dev_runtime_info)336 pvr_get_physical_device_descriptor_limits(
337 const struct pvr_device_info *dev_info,
338 const struct pvr_device_runtime_info *dev_runtime_info)
339 {
340 enum pvr_descriptor_cs_level {
341 /* clang-format off */
342 CS4096, /* 6XT and some XE cores with large CS. */
343 CS2560, /* Mid range Rogue XE cores. */
344 CS2048, /* Low end Rogue XE cores. */
345 CS1536, /* Ultra-low-end 9XEP. */
346 CS680, /* lower limits for older devices. */
347 CS408, /* 7XE. */
348 /* clang-format on */
349 };
350
351 static const struct pvr_descriptor_limits descriptor_limits[] = {
352 [CS4096] = { 1160U, 256U, 192U, 144U, 256U, 256U, 8U, },
353 [CS2560] = { 648U, 128U, 128U, 128U, 128U, 128U, 8U, },
354 [CS2048] = { 584U, 128U, 96U, 64U, 128U, 128U, 8U, },
355 [CS1536] = { 456U, 64U, 96U, 64U, 128U, 64U, 8U, },
356 [CS680] = { 224U, 32U, 64U, 36U, 48U, 8U, 8U, },
357 [CS408] = { 128U, 16U, 40U, 28U, 16U, 8U, 8U, },
358 };
359
360 const uint32_t common_size =
361 pvr_calc_fscommon_size_and_tiles_in_flight(dev_info,
362 dev_runtime_info,
363 UINT32_MAX,
364 1);
365 enum pvr_descriptor_cs_level cs_level;
366
367 if (common_size >= 2048) {
368 cs_level = CS2048;
369 } else if (common_size >= 1526) {
370 cs_level = CS1536;
371 } else if (common_size >= 680) {
372 cs_level = CS680;
373 } else if (common_size >= 408) {
374 cs_level = CS408;
375 } else {
376 mesa_loge("This core appears to have a very limited amount of shared "
377 "register space and may not meet the Vulkan spec limits.");
378 abort();
379 }
380
381 return &descriptor_limits[cs_level];
382 }
383
pvr_physical_device_get_properties(const struct pvr_physical_device * const pdevice,struct vk_properties * const properties)384 static bool pvr_physical_device_get_properties(
385 const struct pvr_physical_device *const pdevice,
386 struct vk_properties *const properties)
387 {
388 const struct pvr_device_info *const dev_info = &pdevice->dev_info;
389 const struct pvr_device_runtime_info *const dev_runtime_info =
390 &pdevice->dev_runtime_info;
391 const struct pvr_descriptor_limits *descriptor_limits =
392 pvr_get_physical_device_descriptor_limits(dev_info, dev_runtime_info);
393
394 /* Default value based on the minimum value found in all existing cores. */
395 const uint32_t max_multisample =
396 PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 4);
397
398 /* Default value based on the minimum value found in all existing cores. */
399 const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 2);
400
401 /* Default value based on the minimum value found in all existing cores. */
402 const uint32_t uvs_pba_entries =
403 PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 160);
404
405 /* Default value based on the minimum value found in all existing cores. */
406 const uint32_t num_user_clip_planes =
407 PVR_GET_FEATURE_VALUE(dev_info, num_user_clip_planes, 8);
408
409 const uint32_t sub_pixel_precision =
410 PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 4U : 8U;
411
412 const uint32_t max_render_size = rogue_get_render_size_max(dev_info);
413
414 const uint32_t max_sample_bits = ((max_multisample << 1) - 1);
415
416 const uint32_t max_user_vertex_components =
417 ((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;
418
419 /* The workgroup invocations are limited by the case where we have a compute
420 * barrier - each slot has a fixed number of invocations, the whole workgroup
421 * may need to span multiple slots. As each slot will WAIT at the barrier
422 * until the last invocation completes, all have to be schedulable at the
423 * same time.
424 *
425 * Typically all Rogue cores have 16 slots. Some of the smallest cores are
426 * reduced to 14.
427 *
428 * The compute barrier slot exhaustion scenario can be tested with:
429 * dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence
430 * .atomicwrite*guard*comp
431 */
432
433 /* Default value based on the minimum value found in all existing cores. */
434 const uint32_t usc_slots = PVR_GET_FEATURE_VALUE(dev_info, usc_slots, 14);
435
436 /* Default value based on the minimum value found in all existing cores. */
437 const uint32_t max_instances_per_pds_task =
438 PVR_GET_FEATURE_VALUE(dev_info, max_instances_per_pds_task, 32U);
439
440 const uint32_t max_compute_work_group_invocations =
441 (usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U;
442
443 bool ret;
444
445 *properties = (struct vk_properties){
446 /* Vulkan 1.0 */
447 .apiVersion = PVR_API_VERSION,
448 .driverVersion = vk_get_driver_version(),
449 .vendorID = VK_VENDOR_ID_IMAGINATION,
450 .deviceID = dev_info->ident.device_id,
451 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
452 /* deviceName and pipelineCacheUUID are filled below .*/
453
454 .maxImageDimension1D = max_render_size,
455 .maxImageDimension2D = max_render_size,
456 .maxImageDimension3D = PVR_MAX_TEXTURE_EXTENT_Z,
457 .maxImageDimensionCube = max_render_size,
458 .maxImageArrayLayers = PVR_MAX_ARRAY_LAYERS,
459 .maxTexelBufferElements = 64U * 1024U,
460 .maxUniformBufferRange = 128U * 1024U * 1024U,
461 .maxStorageBufferRange = 128U * 1024U * 1024U,
462 .maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE,
463 .maxMemoryAllocationCount = UINT32_MAX,
464 .maxSamplerAllocationCount = UINT32_MAX,
465 .bufferImageGranularity = 1U,
466 .sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL,
467 /* Maximum number of descriptor sets that can be bound simultaneously. */
468 .maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS,
469 .maxPerStageResources = descriptor_limits->max_per_stage_resources,
470 .maxPerStageDescriptorSamplers =
471 descriptor_limits->max_per_stage_samplers,
472 .maxPerStageDescriptorUniformBuffers =
473 descriptor_limits->max_per_stage_uniform_buffers,
474 .maxPerStageDescriptorStorageBuffers =
475 descriptor_limits->max_per_stage_storage_buffers,
476 .maxPerStageDescriptorSampledImages =
477 descriptor_limits->max_per_stage_sampled_images,
478 .maxPerStageDescriptorStorageImages =
479 descriptor_limits->max_per_stage_storage_images,
480 .maxPerStageDescriptorInputAttachments =
481 descriptor_limits->max_per_stage_input_attachments,
482 .maxDescriptorSetSamplers = 256U,
483 .maxDescriptorSetUniformBuffers = 256U,
484 .maxDescriptorSetUniformBuffersDynamic =
485 PVR_MAX_DESCRIPTOR_SET_UNIFORM_DYNAMIC_BUFFERS,
486 .maxDescriptorSetStorageBuffers = 256U,
487 .maxDescriptorSetStorageBuffersDynamic =
488 PVR_MAX_DESCRIPTOR_SET_STORAGE_DYNAMIC_BUFFERS,
489 .maxDescriptorSetSampledImages = 256U,
490 .maxDescriptorSetStorageImages = 256U,
491 .maxDescriptorSetInputAttachments = 256U,
492
493 /* Vertex Shader Limits */
494 .maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS,
495 .maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS,
496 .maxVertexInputAttributeOffset = 0xFFFF,
497 .maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U,
498 .maxVertexOutputComponents = max_user_vertex_components,
499
500 /* Tessellation Limits */
501 .maxTessellationGenerationLevel = 0,
502 .maxTessellationPatchSize = 0,
503 .maxTessellationControlPerVertexInputComponents = 0,
504 .maxTessellationControlPerVertexOutputComponents = 0,
505 .maxTessellationControlPerPatchOutputComponents = 0,
506 .maxTessellationControlTotalOutputComponents = 0,
507 .maxTessellationEvaluationInputComponents = 0,
508 .maxTessellationEvaluationOutputComponents = 0,
509
510 /* Geometry Shader Limits */
511 .maxGeometryShaderInvocations = 0,
512 .maxGeometryInputComponents = 0,
513 .maxGeometryOutputComponents = 0,
514 .maxGeometryOutputVertices = 0,
515 .maxGeometryTotalOutputComponents = 0,
516
517 /* Fragment Shader Limits */
518 .maxFragmentInputComponents = max_user_vertex_components,
519 .maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS,
520 .maxFragmentDualSrcAttachments = 0,
521 .maxFragmentCombinedOutputResources =
522 descriptor_limits->max_per_stage_storage_buffers +
523 descriptor_limits->max_per_stage_storage_images +
524 PVR_MAX_COLOR_ATTACHMENTS,
525
526 /* Compute Shader Limits */
527 .maxComputeSharedMemorySize = 16U * 1024U,
528 .maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U },
529 .maxComputeWorkGroupInvocations = max_compute_work_group_invocations,
530 .maxComputeWorkGroupSize = { max_compute_work_group_invocations,
531 max_compute_work_group_invocations,
532 64U },
533
534 /* Rasterization Limits */
535 .subPixelPrecisionBits = sub_pixel_precision,
536 .subTexelPrecisionBits = 8U,
537 .mipmapPrecisionBits = 8U,
538
539 .maxDrawIndexedIndexValue = UINT32_MAX,
540 .maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U,
541 .maxSamplerLodBias = 16.0f,
542 .maxSamplerAnisotropy = 1.0f,
543 .maxViewports = PVR_MAX_VIEWPORTS,
544
545 .maxViewportDimensions[0] = max_render_size,
546 .maxViewportDimensions[1] = max_render_size,
547 .viewportBoundsRange[0] = -(int32_t)(2U * max_render_size),
548 .viewportBoundsRange[1] = 2U * max_render_size,
549
550 .viewportSubPixelBits = 0,
551 .minMemoryMapAlignment = pdevice->ws->page_size,
552 .minTexelBufferOffsetAlignment = 16U,
553 .minUniformBufferOffsetAlignment = 4U,
554 .minStorageBufferOffsetAlignment = 4U,
555
556 .minTexelOffset = -8,
557 .maxTexelOffset = 7U,
558 .minTexelGatherOffset = -8,
559 .maxTexelGatherOffset = 7,
560 .minInterpolationOffset = -0.5,
561 .maxInterpolationOffset = 0.5,
562 .subPixelInterpolationOffsetBits = 4U,
563
564 .maxFramebufferWidth = max_render_size,
565 .maxFramebufferHeight = max_render_size,
566 .maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS,
567
568 .framebufferColorSampleCounts = max_sample_bits,
569 .framebufferDepthSampleCounts = max_sample_bits,
570 .framebufferStencilSampleCounts = max_sample_bits,
571 .framebufferNoAttachmentsSampleCounts = max_sample_bits,
572 .maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS,
573 .sampledImageColorSampleCounts = max_sample_bits,
574 .sampledImageIntegerSampleCounts = max_sample_bits,
575 .sampledImageDepthSampleCounts = max_sample_bits,
576 .sampledImageStencilSampleCounts = max_sample_bits,
577 .storageImageSampleCounts = max_sample_bits,
578 .maxSampleMaskWords = 1U,
579 .timestampComputeAndGraphics = false,
580 .timestampPeriod = 0.0f,
581 .maxClipDistances = num_user_clip_planes,
582 .maxCullDistances = num_user_clip_planes,
583 .maxCombinedClipAndCullDistances = num_user_clip_planes,
584 .discreteQueuePriorities = 2U,
585 .pointSizeRange[0] = 1.0f,
586 .pointSizeRange[1] = 511.0f,
587 .pointSizeGranularity = 0.0625f,
588 .lineWidthRange[0] = 1.0f / 16.0f,
589 .lineWidthRange[1] = 16.0f,
590 .lineWidthGranularity = 1.0f / 16.0f,
591 .strictLines = false,
592 .standardSampleLocations = true,
593 .optimalBufferCopyOffsetAlignment = 4U,
594 .optimalBufferCopyRowPitchAlignment = 4U,
595 .nonCoherentAtomSize = 1U,
596
597 /* Vulkan 1.2 / VK_KHR_driver_properties */
598 .driverID = VK_DRIVER_ID_IMAGINATION_OPEN_SOURCE_MESA,
599 .driverName = "Imagination open-source Mesa driver",
600 .driverInfo = "Mesa " PACKAGE_VERSION MESA_GIT_SHA1,
601 .conformanceVersion = {
602 .major = 1,
603 .minor = 3,
604 .subminor = 4,
605 .patch = 1,
606 },
607
608 /* Vulkan 1.2 / VK_KHR_timeline_semaphore */
609 .maxTimelineSemaphoreValueDifference = UINT64_MAX,
610
611 /* Vulkan 1.3 / VK_EXT_texel_buffer_alignment */
612 .storageTexelBufferOffsetAlignmentBytes = 16,
613 .storageTexelBufferOffsetSingleTexelAlignment = true,
614 .uniformTexelBufferOffsetAlignmentBytes = 16,
615 .uniformTexelBufferOffsetSingleTexelAlignment = false,
616 };
617
618 snprintf(properties->deviceName,
619 sizeof(properties->deviceName),
620 "PowerVR %s %s",
621 dev_info->ident.series_name,
622 dev_info->ident.public_name);
623
624 ret = pvr_physical_device_init_pipeline_cache_uuid(
625 dev_info,
626 properties->pipelineCacheUUID);
627 if (!ret)
628 return false;
629
630 return true;
631 }
632
pvr_EnumerateInstanceVersion(uint32_t * pApiVersion)633 VkResult pvr_EnumerateInstanceVersion(uint32_t *pApiVersion)
634 {
635 *pApiVersion = PVR_API_VERSION;
636 return VK_SUCCESS;
637 }
638
639 VkResult
pvr_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)640 pvr_EnumerateInstanceExtensionProperties(const char *pLayerName,
641 uint32_t *pPropertyCount,
642 VkExtensionProperties *pProperties)
643 {
644 if (pLayerName)
645 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
646
647 return vk_enumerate_instance_extension_properties(&pvr_instance_extensions,
648 pPropertyCount,
649 pProperties);
650 }
651
pvr_physical_device_destroy(struct vk_physical_device * vk_pdevice)652 static void pvr_physical_device_destroy(struct vk_physical_device *vk_pdevice)
653 {
654 struct pvr_physical_device *pdevice =
655 container_of(vk_pdevice, struct pvr_physical_device, vk);
656
657 /* Be careful here. The device might not have been initialized. This can
658 * happen since initialization is done in vkEnumeratePhysicalDevices() but
659 * finish is done in vkDestroyInstance(). Make sure that you check for NULL
660 * before freeing or that the freeing functions accept NULL pointers.
661 */
662
663 if (pdevice->pco_ctx)
664 ralloc_free(pdevice->pco_ctx);
665
666 if (pdevice->compiler)
667 ralloc_free(pdevice->compiler);
668
669 pvr_wsi_finish(pdevice);
670
671 if (pdevice->ws)
672 pvr_winsys_destroy(pdevice->ws);
673
674 vk_free(&pdevice->vk.instance->alloc, pdevice->render_path);
675 vk_free(&pdevice->vk.instance->alloc, pdevice->display_path);
676
677 vk_physical_device_finish(&pdevice->vk);
678
679 vk_free(&pdevice->vk.instance->alloc, pdevice);
680 }
681
pvr_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)682 void pvr_DestroyInstance(VkInstance _instance,
683 const VkAllocationCallbacks *pAllocator)
684 {
685 PVR_FROM_HANDLE(pvr_instance, instance, _instance);
686
687 if (!instance)
688 return;
689
690 VG(VALGRIND_DESTROY_MEMPOOL(instance));
691
692 vk_instance_finish(&instance->vk);
693 vk_free(&instance->vk.alloc, instance);
694 }
695
pvr_compute_heap_size(void)696 static uint64_t pvr_compute_heap_size(void)
697 {
698 /* Query the total ram from the system */
699 uint64_t total_ram;
700 if (!os_get_total_physical_memory(&total_ram))
701 return 0;
702
703 /* We don't want to burn too much ram with the GPU. If the user has 4GiB
704 * or less, we use at most half. If they have more than 4GiB, we use 3/4.
705 */
706 uint64_t available_ram;
707 if (total_ram <= 4ULL * 1024ULL * 1024ULL * 1024ULL)
708 available_ram = total_ram / 2U;
709 else
710 available_ram = total_ram * 3U / 4U;
711
712 return available_ram;
713 }
714
pvr_physical_device_init(struct pvr_physical_device * pdevice,struct pvr_instance * instance,drmDevicePtr drm_render_device,drmDevicePtr drm_display_device)715 static VkResult pvr_physical_device_init(struct pvr_physical_device *pdevice,
716 struct pvr_instance *instance,
717 drmDevicePtr drm_render_device,
718 drmDevicePtr drm_display_device)
719 {
720 struct vk_physical_device_dispatch_table dispatch_table;
721 struct vk_device_extension_table supported_extensions;
722 struct vk_properties supported_properties;
723 struct vk_features supported_features;
724 struct pvr_winsys *ws;
725 char *display_path;
726 char *render_path;
727 VkResult result;
728
729 render_path = vk_strdup(&instance->vk.alloc,
730 drm_render_device->nodes[DRM_NODE_RENDER],
731 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
732 if (!render_path) {
733 result = VK_ERROR_OUT_OF_HOST_MEMORY;
734 goto err_out;
735 }
736
737 if (instance->vk.enabled_extensions.KHR_display) {
738 display_path = vk_strdup(&instance->vk.alloc,
739 drm_display_device->nodes[DRM_NODE_PRIMARY],
740 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
741 if (!display_path) {
742 result = VK_ERROR_OUT_OF_HOST_MEMORY;
743 goto err_vk_free_render_path;
744 }
745 } else {
746 display_path = NULL;
747 }
748
749 result =
750 pvr_winsys_create(render_path, display_path, &instance->vk.alloc, &ws);
751 if (result != VK_SUCCESS)
752 goto err_vk_free_display_path;
753
754 if (!getenv("PVR_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
755 result = vk_errorf(instance,
756 VK_ERROR_INCOMPATIBLE_DRIVER,
757 "WARNING: powervr is not a conformant Vulkan "
758 "implementation. Pass "
759 "PVR_I_WANT_A_BROKEN_VULKAN_DRIVER=1 if you know "
760 "what you're doing.");
761 goto err_pvr_winsys_destroy;
762 }
763
764 pdevice->instance = instance;
765 pdevice->render_path = render_path;
766 pdevice->display_path = display_path;
767 pdevice->ws = ws;
768
769 result = ws->ops->device_info_init(ws,
770 &pdevice->dev_info,
771 &pdevice->dev_runtime_info);
772 if (result != VK_SUCCESS)
773 goto err_pvr_winsys_destroy;
774
775 pvr_physical_device_get_supported_extensions(&supported_extensions);
776 pvr_physical_device_get_supported_features(&pdevice->dev_info,
777 &supported_features);
778 if (!pvr_physical_device_get_properties(pdevice, &supported_properties)) {
779 result = vk_errorf(instance,
780 VK_ERROR_INITIALIZATION_FAILED,
781 "Failed to collect physical device properties");
782 goto err_pvr_winsys_destroy;
783 }
784
785 vk_physical_device_dispatch_table_from_entrypoints(
786 &dispatch_table,
787 &pvr_physical_device_entrypoints,
788 true);
789
790 vk_physical_device_dispatch_table_from_entrypoints(
791 &dispatch_table,
792 &wsi_physical_device_entrypoints,
793 false);
794
795 result = vk_physical_device_init(&pdevice->vk,
796 &instance->vk,
797 &supported_extensions,
798 &supported_features,
799 &supported_properties,
800 &dispatch_table);
801 if (result != VK_SUCCESS)
802 goto err_pvr_winsys_destroy;
803
804 pdevice->vk.supported_sync_types = ws->sync_types;
805
806 /* Setup available memory heaps and types */
807 pdevice->memory.memoryHeapCount = 1;
808 pdevice->memory.memoryHeaps[0].size = pvr_compute_heap_size();
809 pdevice->memory.memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
810
811 pdevice->memory.memoryTypeCount = 1;
812 pdevice->memory.memoryTypes[0].propertyFlags =
813 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
814 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
815 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
816 pdevice->memory.memoryTypes[0].heapIndex = 0;
817
818 result = pvr_wsi_init(pdevice);
819 if (result != VK_SUCCESS) {
820 vk_error(instance, result);
821 goto err_vk_physical_device_finish;
822 }
823
824 pdevice->compiler = rogue_compiler_create(&pdevice->dev_info);
825 if (!pdevice->compiler) {
826 result = vk_errorf(instance,
827 VK_ERROR_INITIALIZATION_FAILED,
828 "Failed to initialize Rogue compiler");
829 goto err_wsi_finish;
830 }
831
832 pdevice->pco_ctx = pco_ctx_create(&pdevice->dev_info, NULL);
833 if (!pdevice->pco_ctx) {
834 ralloc_free(pdevice->compiler);
835 result = vk_errorf(instance,
836 VK_ERROR_INITIALIZATION_FAILED,
837 "Failed to initialize PCO compiler context");
838 goto err_wsi_finish;
839 }
840
841 return VK_SUCCESS;
842
843 err_wsi_finish:
844 pvr_wsi_finish(pdevice);
845
846 err_vk_physical_device_finish:
847 vk_physical_device_finish(&pdevice->vk);
848
849 err_pvr_winsys_destroy:
850 pvr_winsys_destroy(ws);
851
852 err_vk_free_display_path:
853 vk_free(&instance->vk.alloc, display_path);
854
855 err_vk_free_render_path:
856 vk_free(&instance->vk.alloc, render_path);
857
858 err_out:
859 return result;
860 }
861
pvr_get_drm_devices(void * const obj,drmDevicePtr * const devices,const int max_devices,int * const num_devices_out)862 static VkResult pvr_get_drm_devices(void *const obj,
863 drmDevicePtr *const devices,
864 const int max_devices,
865 int *const num_devices_out)
866 {
867 int ret = drmGetDevices2(0, devices, max_devices);
868 if (ret < 0) {
869 return vk_errorf(obj,
870 VK_ERROR_INITIALIZATION_FAILED,
871 "Failed to enumerate drm devices (errno %d: %s)",
872 -ret,
873 strerror(-ret));
874 }
875
876 if (num_devices_out)
877 *num_devices_out = ret;
878
879 return VK_SUCCESS;
880 }
881
882 static bool
pvr_drm_device_compatible(const struct pvr_drm_device_info * const info,drmDevice * const drm_dev)883 pvr_drm_device_compatible(const struct pvr_drm_device_info *const info,
884 drmDevice *const drm_dev)
885 {
886 char **const compatible = drm_dev->deviceinfo.platform->compatible;
887
888 for (char **compat = compatible; *compat; compat++) {
889 if (strncmp(*compat, info->name, info->len) == 0)
890 return true;
891 }
892
893 return false;
894 }
895
896 static const struct pvr_drm_device_config *
pvr_drm_device_get_config(drmDevice * const drm_dev)897 pvr_drm_device_get_config(drmDevice *const drm_dev)
898 {
899 for (size_t i = 0U; i < ARRAY_SIZE(pvr_drm_configs); i++) {
900 if (pvr_drm_device_compatible(&pvr_drm_configs[i].render, drm_dev))
901 return &pvr_drm_configs[i];
902 }
903
904 return NULL;
905 }
906
907 static void
pvr_physical_device_dump_info(const struct pvr_physical_device * pdevice,char * const * comp_display,char * const * comp_render)908 pvr_physical_device_dump_info(const struct pvr_physical_device *pdevice,
909 char *const *comp_display,
910 char *const *comp_render)
911 {
912 drmVersionPtr version_display, version_render;
913 struct pvr_device_dump_info info;
914
915 version_display = drmGetVersion(pdevice->ws->display_fd);
916 if (!version_display)
917 return;
918
919 version_render = drmGetVersion(pdevice->ws->render_fd);
920 if (!version_render) {
921 drmFreeVersion(version_display);
922 return;
923 }
924
925 info.device_info = &pdevice->dev_info;
926 info.device_runtime_info = &pdevice->dev_runtime_info;
927 info.drm_display.patchlevel = version_display->version_patchlevel;
928 info.drm_display.major = version_display->version_major;
929 info.drm_display.minor = version_display->version_minor;
930 info.drm_display.name = version_display->name;
931 info.drm_display.date = version_display->date;
932 info.drm_display.comp = comp_display;
933 info.drm_render.patchlevel = version_render->version_patchlevel;
934 info.drm_render.major = version_render->version_major;
935 info.drm_render.minor = version_render->version_minor;
936 info.drm_render.name = version_render->name;
937 info.drm_render.date = version_render->date;
938 info.drm_render.comp = comp_render;
939
940 pvr_dump_physical_device_info(&info);
941
942 drmFreeVersion(version_display);
943 drmFreeVersion(version_render);
944 }
945
946 static VkResult
pvr_physical_device_enumerate(struct vk_instance * const vk_instance)947 pvr_physical_device_enumerate(struct vk_instance *const vk_instance)
948 {
949 struct pvr_instance *const instance =
950 container_of(vk_instance, struct pvr_instance, vk);
951
952 const struct pvr_drm_device_config *config = NULL;
953
954 drmDevicePtr drm_display_device = NULL;
955 drmDevicePtr drm_render_device = NULL;
956 struct pvr_physical_device *pdevice;
957 drmDevicePtr *drm_devices;
958 int num_drm_devices = 0;
959 VkResult result;
960
961 result = pvr_get_drm_devices(instance, NULL, 0, &num_drm_devices);
962 if (result != VK_SUCCESS)
963 goto out;
964
965 if (num_drm_devices == 0) {
966 result = VK_SUCCESS;
967 goto out;
968 }
969
970 drm_devices = vk_alloc(&vk_instance->alloc,
971 sizeof(*drm_devices) * num_drm_devices,
972 8,
973 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
974 if (!drm_devices) {
975 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
976 goto out;
977 }
978
979 result = pvr_get_drm_devices(instance, drm_devices, num_drm_devices, NULL);
980 if (result != VK_SUCCESS)
981 goto out_free_drm_device_ptrs;
982
983 /* First search for our render node... */
984 for (int i = 0; i < num_drm_devices; i++) {
985 drmDevice *const drm_dev = drm_devices[i];
986
987 if (drm_dev->bustype != DRM_BUS_PLATFORM)
988 continue;
989
990 if (!(drm_dev->available_nodes & BITFIELD_BIT(DRM_NODE_RENDER)))
991 continue;
992
993 config = pvr_drm_device_get_config(drm_dev);
994 if (config) {
995 drm_render_device = drm_dev;
996 break;
997 }
998 }
999
1000 if (!config) {
1001 result = VK_SUCCESS;
1002 goto out_free_drm_devices;
1003 }
1004
1005 mesa_logd("Found compatible render device '%s'.",
1006 drm_render_device->nodes[DRM_NODE_RENDER]);
1007
1008 /* ...then find the compatible display node. */
1009 for (int i = 0; i < num_drm_devices; i++) {
1010 drmDevice *const drm_dev = drm_devices[i];
1011
1012 if (!(drm_dev->available_nodes & BITFIELD_BIT(DRM_NODE_PRIMARY)))
1013 continue;
1014
1015 if (pvr_drm_device_compatible(&config->display, drm_dev)) {
1016 drm_display_device = drm_dev;
1017 break;
1018 }
1019 }
1020
1021 if (!drm_display_device) {
1022 mesa_loge("Render device '%s' has no compatible display device.",
1023 drm_render_device->nodes[DRM_NODE_RENDER]);
1024 result = VK_SUCCESS;
1025 goto out_free_drm_devices;
1026 }
1027
1028 mesa_logd("Found compatible display device '%s'.",
1029 drm_display_device->nodes[DRM_NODE_PRIMARY]);
1030
1031 pdevice = vk_zalloc(&vk_instance->alloc,
1032 sizeof(*pdevice),
1033 8,
1034 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1035 if (!pdevice) {
1036 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1037 goto out_free_drm_devices;
1038 }
1039
1040 result = pvr_physical_device_init(pdevice,
1041 instance,
1042 drm_render_device,
1043 drm_display_device);
1044 if (result != VK_SUCCESS) {
1045 if (result == VK_ERROR_INCOMPATIBLE_DRIVER)
1046 result = VK_SUCCESS;
1047
1048 goto err_free_pdevice;
1049 }
1050
1051 if (PVR_IS_DEBUG_SET(INFO)) {
1052 pvr_physical_device_dump_info(
1053 pdevice,
1054 drm_display_device->deviceinfo.platform->compatible,
1055 drm_render_device->deviceinfo.platform->compatible);
1056 }
1057
1058 list_add(&pdevice->vk.link, &vk_instance->physical_devices.list);
1059
1060 result = VK_SUCCESS;
1061 goto out_free_drm_devices;
1062
1063 err_free_pdevice:
1064 vk_free(&vk_instance->alloc, pdevice);
1065
1066 out_free_drm_devices:
1067 drmFreeDevices(drm_devices, num_drm_devices);
1068
1069 out_free_drm_device_ptrs:
1070 vk_free(&vk_instance->alloc, drm_devices);
1071
1072 out:
1073 return result;
1074 }
1075
pvr_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1076 VkResult pvr_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
1077 const VkAllocationCallbacks *pAllocator,
1078 VkInstance *pInstance)
1079 {
1080 struct vk_instance_dispatch_table dispatch_table;
1081 struct pvr_instance *instance;
1082 VkResult result;
1083
1084 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1085
1086 if (!pAllocator)
1087 pAllocator = vk_default_allocator();
1088
1089 instance = vk_alloc(pAllocator,
1090 sizeof(*instance),
1091 8,
1092 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1093 if (!instance)
1094 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1095
1096 vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
1097 &pvr_instance_entrypoints,
1098 true);
1099
1100 vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
1101 &wsi_instance_entrypoints,
1102 false);
1103
1104 result = vk_instance_init(&instance->vk,
1105 &pvr_instance_extensions,
1106 &dispatch_table,
1107 pCreateInfo,
1108 pAllocator);
1109 if (result != VK_SUCCESS) {
1110 vk_free(pAllocator, instance);
1111 return result;
1112 }
1113
1114 pvr_process_debug_variable();
1115
1116 instance->active_device_count = 0;
1117
1118 instance->vk.physical_devices.enumerate = pvr_physical_device_enumerate;
1119 instance->vk.physical_devices.destroy = pvr_physical_device_destroy;
1120
1121 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1122
1123 *pInstance = pvr_instance_to_handle(instance);
1124
1125 return VK_SUCCESS;
1126 }
1127
pvr_get_simultaneous_num_allocs(const struct pvr_device_info * dev_info,ASSERTED const struct pvr_device_runtime_info * dev_runtime_info)1128 static uint32_t pvr_get_simultaneous_num_allocs(
1129 const struct pvr_device_info *dev_info,
1130 ASSERTED const struct pvr_device_runtime_info *dev_runtime_info)
1131 {
1132 uint32_t min_cluster_per_phantom;
1133
1134 if (PVR_HAS_FEATURE(dev_info, s8xe))
1135 return PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U);
1136
1137 assert(dev_runtime_info->num_phantoms == 1);
1138 min_cluster_per_phantom = PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U);
1139
1140 if (min_cluster_per_phantom >= 4)
1141 return 1;
1142 else if (min_cluster_per_phantom == 2)
1143 return 2;
1144 else
1145 return 4;
1146 }
1147
pvr_calc_fscommon_size_and_tiles_in_flight(const struct pvr_device_info * dev_info,const struct pvr_device_runtime_info * dev_runtime_info,uint32_t fs_common_size,uint32_t min_tiles_in_flight)1148 uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
1149 const struct pvr_device_info *dev_info,
1150 const struct pvr_device_runtime_info *dev_runtime_info,
1151 uint32_t fs_common_size,
1152 uint32_t min_tiles_in_flight)
1153 {
1154 const uint32_t available_shareds =
1155 dev_runtime_info->reserved_shared_size - dev_runtime_info->max_coeffs;
1156 const uint32_t max_tiles_in_flight =
1157 PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U);
1158 uint32_t num_tile_in_flight;
1159 uint32_t num_allocs;
1160
1161 if (fs_common_size == 0)
1162 return max_tiles_in_flight;
1163
1164 num_allocs = pvr_get_simultaneous_num_allocs(dev_info, dev_runtime_info);
1165
1166 if (fs_common_size == UINT32_MAX) {
1167 uint32_t max_common_size = available_shareds;
1168
1169 num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight);
1170
1171 if (!PVR_HAS_ERN(dev_info, 38748)) {
1172 /* Hardware needs space for one extra shared allocation. */
1173 num_allocs += 1;
1174 }
1175
1176 /* Double resource requirements to deal with fragmentation. */
1177 max_common_size /= num_allocs * 2;
1178 max_common_size = MIN2(max_common_size, ROGUE_MAX_PIXEL_SHARED_REGISTERS);
1179 max_common_size =
1180 ROUND_DOWN_TO(max_common_size,
1181 ROGUE_TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE);
1182
1183 return max_common_size;
1184 }
1185
1186 num_tile_in_flight = available_shareds / (fs_common_size * 2);
1187
1188 if (!PVR_HAS_ERN(dev_info, 38748))
1189 num_tile_in_flight -= 1;
1190
1191 num_tile_in_flight /= num_allocs;
1192
1193 #if MESA_DEBUG
1194 /* Validate the above result. */
1195
1196 assert(num_tile_in_flight >= MIN2(num_tile_in_flight, max_tiles_in_flight));
1197 num_allocs *= num_tile_in_flight;
1198
1199 if (!PVR_HAS_ERN(dev_info, 38748)) {
1200 /* Hardware needs space for one extra shared allocation. */
1201 num_allocs += 1;
1202 }
1203
1204 assert(fs_common_size <= available_shareds / (num_allocs * 2));
1205 #endif
1206
1207 return MIN2(num_tile_in_flight, max_tiles_in_flight);
1208 }
1209
1210 const static VkQueueFamilyProperties pvr_queue_family_properties = {
1211 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_GRAPHICS_BIT |
1212 VK_QUEUE_TRANSFER_BIT,
1213 .queueCount = PVR_MAX_QUEUES,
1214 .timestampValidBits = 0,
1215 .minImageTransferGranularity = { 1, 1, 1 },
1216 };
1217
pvr_compute_heap_budget(struct pvr_physical_device * pdevice)1218 static uint64_t pvr_compute_heap_budget(struct pvr_physical_device *pdevice)
1219 {
1220 const uint64_t heap_size = pdevice->memory.memoryHeaps[0].size;
1221 const uint64_t heap_used = pdevice->heap_used;
1222 uint64_t sys_available = 0, heap_available;
1223 ASSERTED bool has_available_memory =
1224 os_get_available_system_memory(&sys_available);
1225 assert(has_available_memory);
1226
1227 /* Let's not incite the app to starve the system: report at most 90% of
1228 * available system memory.
1229 */
1230 heap_available = sys_available * 9 / 10;
1231 return MIN2(heap_size, heap_used + heap_available);
1232 }
1233
pvr_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1234 void pvr_GetPhysicalDeviceQueueFamilyProperties2(
1235 VkPhysicalDevice physicalDevice,
1236 uint32_t *pQueueFamilyPropertyCount,
1237 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1238 {
1239 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2,
1240 out,
1241 pQueueFamilyProperties,
1242 pQueueFamilyPropertyCount);
1243
1244 vk_outarray_append_typed (VkQueueFamilyProperties2, &out, p) {
1245 p->queueFamilyProperties = pvr_queue_family_properties;
1246
1247 vk_foreach_struct (ext, p->pNext) {
1248 vk_debug_ignored_stype(ext->sType);
1249 }
1250 }
1251 }
1252
pvr_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1253 void pvr_GetPhysicalDeviceMemoryProperties2(
1254 VkPhysicalDevice physicalDevice,
1255 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1256 {
1257 PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1258
1259 pMemoryProperties->memoryProperties = pdevice->memory;
1260
1261 vk_foreach_struct (ext, pMemoryProperties->pNext) {
1262 switch (ext->sType) {
1263 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1264 VkPhysicalDeviceMemoryBudgetPropertiesEXT *pMemoryBudget =
1265 (VkPhysicalDeviceMemoryBudgetPropertiesEXT *)ext;
1266
1267 pMemoryBudget->heapBudget[0] = pvr_compute_heap_budget(pdevice);
1268 pMemoryBudget->heapUsage[0] = pdevice->heap_used;
1269
1270 for (uint32_t i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1271 pMemoryBudget->heapBudget[i] = 0u;
1272 pMemoryBudget->heapUsage[i] = 0u;
1273 }
1274 break;
1275 }
1276 default:
1277 vk_debug_ignored_stype(ext->sType);
1278 break;
1279 }
1280 }
1281 }
1282
pvr_GetInstanceProcAddr(VkInstance _instance,const char * pName)1283 PFN_vkVoidFunction pvr_GetInstanceProcAddr(VkInstance _instance,
1284 const char *pName)
1285 {
1286 const struct vk_instance *vk_instance = NULL;
1287
1288 if (_instance != NULL) {
1289 PVR_FROM_HANDLE(pvr_instance, instance, _instance);
1290 vk_instance = &instance->vk;
1291 }
1292
1293 return vk_instance_get_proc_addr(vk_instance,
1294 &pvr_instance_entrypoints,
1295 pName);
1296 }
1297
1298 /* With version 1+ of the loader interface the ICD should expose
1299 * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in
1300 * apps.
1301 */
1302 PUBLIC
1303 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)1304 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
1305 {
1306 return pvr_GetInstanceProcAddr(instance, pName);
1307 }
1308
pvr_pds_compute_shader_create_and_upload(struct pvr_device * device,struct pvr_pds_compute_shader_program * program,struct pvr_pds_upload * const pds_upload_out)1309 VkResult pvr_pds_compute_shader_create_and_upload(
1310 struct pvr_device *device,
1311 struct pvr_pds_compute_shader_program *program,
1312 struct pvr_pds_upload *const pds_upload_out)
1313 {
1314 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1315 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
1316 size_t staging_buffer_size;
1317 uint32_t *staging_buffer;
1318 uint32_t *data_buffer;
1319 uint32_t *code_buffer;
1320 VkResult result;
1321
1322 /* Calculate how much space we'll need for the compute shader PDS program.
1323 */
1324 pvr_pds_compute_shader(program, NULL, PDS_GENERATE_SIZES, dev_info);
1325
1326 /* FIXME: Fix the below inconsistency of code size being in bytes whereas
1327 * data size being in dwords.
1328 */
1329 /* Code size is in bytes, data size in dwords. */
1330 staging_buffer_size =
1331 PVR_DW_TO_BYTES(program->data_size) + program->code_size;
1332
1333 staging_buffer = vk_alloc(&device->vk.alloc,
1334 staging_buffer_size,
1335 8U,
1336 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1337 if (!staging_buffer)
1338 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1339
1340 data_buffer = staging_buffer;
1341 code_buffer = pvr_pds_compute_shader(program,
1342 data_buffer,
1343 PDS_GENERATE_DATA_SEGMENT,
1344 dev_info);
1345
1346 pvr_pds_compute_shader(program,
1347 code_buffer,
1348 PDS_GENERATE_CODE_SEGMENT,
1349 dev_info);
1350
1351 result = pvr_gpu_upload_pds(device,
1352 data_buffer,
1353 program->data_size,
1354 ROGUE_CDMCTRL_KERNEL1_DATA_ADDR_ALIGNMENT,
1355 code_buffer,
1356 program->code_size / sizeof(uint32_t),
1357 ROGUE_CDMCTRL_KERNEL2_CODE_ADDR_ALIGNMENT,
1358 cache_line_size,
1359 pds_upload_out);
1360
1361 vk_free(&device->vk.alloc, staging_buffer);
1362
1363 return result;
1364 }
1365
pvr_device_init_compute_fence_program(struct pvr_device * device)1366 static VkResult pvr_device_init_compute_fence_program(struct pvr_device *device)
1367 {
1368 struct pvr_pds_compute_shader_program program;
1369
1370 pvr_pds_compute_shader_program_init(&program);
1371 /* Fence kernel. */
1372 program.fence = true;
1373 program.clear_pds_barrier = true;
1374
1375 return pvr_pds_compute_shader_create_and_upload(
1376 device,
1377 &program,
1378 &device->pds_compute_fence_program);
1379 }
1380
pvr_device_init_compute_empty_program(struct pvr_device * device)1381 static VkResult pvr_device_init_compute_empty_program(struct pvr_device *device)
1382 {
1383 struct pvr_pds_compute_shader_program program;
1384
1385 pvr_pds_compute_shader_program_init(&program);
1386 program.clear_pds_barrier = true;
1387
1388 return pvr_pds_compute_shader_create_and_upload(
1389 device,
1390 &program,
1391 &device->pds_compute_empty_program);
1392 }
1393
pvr_pds_idfwdf_programs_create_and_upload(struct pvr_device * device,pvr_dev_addr_t usc_addr,uint32_t shareds,uint32_t temps,pvr_dev_addr_t shareds_buffer_addr,struct pvr_pds_upload * const upload_out,struct pvr_pds_upload * const sw_compute_barrier_upload_out)1394 static VkResult pvr_pds_idfwdf_programs_create_and_upload(
1395 struct pvr_device *device,
1396 pvr_dev_addr_t usc_addr,
1397 uint32_t shareds,
1398 uint32_t temps,
1399 pvr_dev_addr_t shareds_buffer_addr,
1400 struct pvr_pds_upload *const upload_out,
1401 struct pvr_pds_upload *const sw_compute_barrier_upload_out)
1402 {
1403 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1404 struct pvr_pds_vertex_shader_sa_program program = {
1405 .kick_usc = true,
1406 .clear_pds_barrier = PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info),
1407 };
1408 size_t staging_buffer_size;
1409 uint32_t *staging_buffer;
1410 VkResult result;
1411
1412 /* We'll need to DMA the shareds into the USC's Common Store. */
1413 program.num_dma_kicks = pvr_pds_encode_dma_burst(program.dma_control,
1414 program.dma_address,
1415 0,
1416 shareds,
1417 shareds_buffer_addr.addr,
1418 false,
1419 dev_info);
1420
1421 /* DMA temp regs. */
1422 pvr_pds_setup_doutu(&program.usc_task_control,
1423 usc_addr.addr,
1424 temps,
1425 ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
1426 false);
1427
1428 pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1429
1430 staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
1431
1432 staging_buffer = vk_alloc(&device->vk.alloc,
1433 staging_buffer_size,
1434 8,
1435 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1436 if (!staging_buffer)
1437 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1438
1439 /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1440 pvr_pds_vertex_shader_sa(&program,
1441 staging_buffer,
1442 PDS_GENERATE_DATA_SEGMENT,
1443 dev_info);
1444 pvr_pds_vertex_shader_sa(&program,
1445 &staging_buffer[program.data_size],
1446 PDS_GENERATE_CODE_SEGMENT,
1447 dev_info);
1448
1449 /* At the time of writing, the SW_COMPUTE_PDS_BARRIER variant of the program
1450 * is bigger so we handle it first (if needed) and realloc() for a smaller
1451 * size.
1452 */
1453 if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
1454 /* FIXME: Figure out the define for alignment of 16. */
1455 result = pvr_gpu_upload_pds(device,
1456 &staging_buffer[0],
1457 program.data_size,
1458 16,
1459 &staging_buffer[program.data_size],
1460 program.code_size,
1461 16,
1462 16,
1463 sw_compute_barrier_upload_out);
1464 if (result != VK_SUCCESS) {
1465 vk_free(&device->vk.alloc, staging_buffer);
1466 return result;
1467 }
1468
1469 program.clear_pds_barrier = false;
1470
1471 pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1472
1473 staging_buffer_size =
1474 PVR_DW_TO_BYTES(program.code_size + program.data_size);
1475
1476 staging_buffer = vk_realloc(&device->vk.alloc,
1477 staging_buffer,
1478 staging_buffer_size,
1479 8,
1480 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1481 if (!staging_buffer) {
1482 pvr_bo_suballoc_free(sw_compute_barrier_upload_out->pvr_bo);
1483
1484 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1485 }
1486
1487 /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1488 pvr_pds_vertex_shader_sa(&program,
1489 staging_buffer,
1490 PDS_GENERATE_DATA_SEGMENT,
1491 dev_info);
1492 pvr_pds_vertex_shader_sa(&program,
1493 &staging_buffer[program.data_size],
1494 PDS_GENERATE_CODE_SEGMENT,
1495 dev_info);
1496 } else {
1497 *sw_compute_barrier_upload_out = (struct pvr_pds_upload){
1498 .pvr_bo = NULL,
1499 };
1500 }
1501
1502 /* FIXME: Figure out the define for alignment of 16. */
1503 result = pvr_gpu_upload_pds(device,
1504 &staging_buffer[0],
1505 program.data_size,
1506 16,
1507 &staging_buffer[program.data_size],
1508 program.code_size,
1509 16,
1510 16,
1511 upload_out);
1512 if (result != VK_SUCCESS) {
1513 vk_free(&device->vk.alloc, staging_buffer);
1514 pvr_bo_suballoc_free(sw_compute_barrier_upload_out->pvr_bo);
1515
1516 return result;
1517 }
1518
1519 vk_free(&device->vk.alloc, staging_buffer);
1520
1521 return VK_SUCCESS;
1522 }
1523
pvr_device_init_compute_idfwdf_state(struct pvr_device * device)1524 static VkResult pvr_device_init_compute_idfwdf_state(struct pvr_device *device)
1525 {
1526 uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
1527 uint64_t image_state[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
1528 struct util_dynarray usc_program;
1529 struct pvr_texture_state_info tex_info;
1530 uint32_t *dword_ptr;
1531 uint32_t usc_shareds;
1532 uint32_t usc_temps;
1533 VkResult result;
1534
1535 util_dynarray_init(&usc_program, NULL);
1536 pvr_hard_code_get_idfwdf_program(&device->pdevice->dev_info,
1537 &usc_program,
1538 &usc_shareds,
1539 &usc_temps);
1540
1541 device->idfwdf_state.usc_shareds = usc_shareds;
1542
1543 /* FIXME: Figure out the define for alignment of 16. */
1544 result = pvr_gpu_upload_usc(device,
1545 usc_program.data,
1546 usc_program.size,
1547 16,
1548 &device->idfwdf_state.usc);
1549 util_dynarray_fini(&usc_program);
1550
1551 if (result != VK_SUCCESS)
1552 return result;
1553
1554 /* TODO: Get the store buffer size from the compiler? */
1555 /* TODO: How was the size derived here? */
1556 result = pvr_bo_alloc(device,
1557 device->heaps.general_heap,
1558 4 * sizeof(float) * 4 * 2,
1559 4,
1560 0,
1561 &device->idfwdf_state.store_bo);
1562 if (result != VK_SUCCESS)
1563 goto err_free_usc_program;
1564
1565 result = pvr_bo_alloc(device,
1566 device->heaps.general_heap,
1567 usc_shareds * ROGUE_REG_SIZE_BYTES,
1568 ROGUE_REG_SIZE_BYTES,
1569 PVR_BO_ALLOC_FLAG_CPU_MAPPED,
1570 &device->idfwdf_state.shareds_bo);
1571 if (result != VK_SUCCESS)
1572 goto err_free_store_buffer;
1573
1574 /* Pack state words. */
1575
1576 pvr_csb_pack (&sampler_state[0], TEXSTATE_SAMPLER, sampler) {
1577 sampler.dadjust = ROGUE_TEXSTATE_DADJUST_ZERO_UINT;
1578 sampler.magfilter = ROGUE_TEXSTATE_FILTER_POINT;
1579 sampler.addrmode_u = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
1580 sampler.addrmode_v = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
1581 }
1582
1583 /* clang-format off */
1584 pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {}
1585 /* clang-format on */
1586
1587 STATIC_ASSERT(1 + 1 == ROGUE_NUM_TEXSTATE_SAMPLER_WORDS);
1588
1589 tex_info = (struct pvr_texture_state_info){
1590 .format = VK_FORMAT_R32G32B32A32_SFLOAT,
1591 .mem_layout = PVR_MEMLAYOUT_LINEAR,
1592 .flags = PVR_TEXFLAGS_INDEX_LOOKUP,
1593 .type = VK_IMAGE_VIEW_TYPE_2D,
1594 .extent = { .width = 4, .height = 2, .depth = 0 },
1595 .mip_levels = 1,
1596 .sample_count = 1,
1597 .stride = 4,
1598 .swizzle = { PIPE_SWIZZLE_X,
1599 PIPE_SWIZZLE_Y,
1600 PIPE_SWIZZLE_Z,
1601 PIPE_SWIZZLE_W },
1602 .addr = device->idfwdf_state.store_bo->vma->dev_addr,
1603 };
1604
1605 result = pvr_pack_tex_state(device, &tex_info, image_state);
1606 if (result != VK_SUCCESS)
1607 goto err_free_shareds_buffer;
1608
1609 /* Fill the shareds buffer. */
1610
1611 dword_ptr = (uint32_t *)device->idfwdf_state.shareds_bo->bo->map;
1612
1613 #define HIGH_32(val) ((uint32_t)((val) >> 32U))
1614 #define LOW_32(val) ((uint32_t)(val))
1615
1616 /* TODO: Should we use compiler info to setup the shareds data instead of
1617 * assuming there's always 12 and this is how they should be setup?
1618 */
1619
1620 dword_ptr[0] = HIGH_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1621 dword_ptr[1] = LOW_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1622
1623 /* Pad the shareds as the texture/sample state words are 128 bit aligned. */
1624 dword_ptr[2] = 0U;
1625 dword_ptr[3] = 0U;
1626
1627 dword_ptr[4] = LOW_32(image_state[0]);
1628 dword_ptr[5] = HIGH_32(image_state[0]);
1629 dword_ptr[6] = LOW_32(image_state[1]);
1630 dword_ptr[7] = HIGH_32(image_state[1]);
1631
1632 dword_ptr[8] = LOW_32(sampler_state[0]);
1633 dword_ptr[9] = HIGH_32(sampler_state[0]);
1634 dword_ptr[10] = LOW_32(sampler_state[1]);
1635 dword_ptr[11] = HIGH_32(sampler_state[1]);
1636 assert(11 + 1 == usc_shareds);
1637
1638 #undef HIGH_32
1639 #undef LOW_32
1640
1641 pvr_bo_cpu_unmap(device, device->idfwdf_state.shareds_bo);
1642 dword_ptr = NULL;
1643
1644 /* Generate and upload PDS programs. */
1645 result = pvr_pds_idfwdf_programs_create_and_upload(
1646 device,
1647 device->idfwdf_state.usc->dev_addr,
1648 usc_shareds,
1649 usc_temps,
1650 device->idfwdf_state.shareds_bo->vma->dev_addr,
1651 &device->idfwdf_state.pds,
1652 &device->idfwdf_state.sw_compute_barrier_pds);
1653 if (result != VK_SUCCESS)
1654 goto err_free_shareds_buffer;
1655
1656 return VK_SUCCESS;
1657
1658 err_free_shareds_buffer:
1659 pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1660
1661 err_free_store_buffer:
1662 pvr_bo_free(device, device->idfwdf_state.store_bo);
1663
1664 err_free_usc_program:
1665 pvr_bo_suballoc_free(device->idfwdf_state.usc);
1666
1667 return result;
1668 }
1669
pvr_device_finish_compute_idfwdf_state(struct pvr_device * device)1670 static void pvr_device_finish_compute_idfwdf_state(struct pvr_device *device)
1671 {
1672 pvr_bo_suballoc_free(device->idfwdf_state.pds.pvr_bo);
1673 pvr_bo_suballoc_free(device->idfwdf_state.sw_compute_barrier_pds.pvr_bo);
1674 pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1675 pvr_bo_free(device, device->idfwdf_state.store_bo);
1676 pvr_bo_suballoc_free(device->idfwdf_state.usc);
1677 }
1678
1679 /* FIXME: We should be calculating the size when we upload the code in
1680 * pvr_srv_setup_static_pixel_event_program().
1681 */
pvr_device_get_pixel_event_pds_program_data_size(const struct pvr_device_info * dev_info,uint32_t * const data_size_in_dwords_out)1682 static void pvr_device_get_pixel_event_pds_program_data_size(
1683 const struct pvr_device_info *dev_info,
1684 uint32_t *const data_size_in_dwords_out)
1685 {
1686 struct pvr_pds_event_program program = {
1687 /* No data to DMA, just a DOUTU needed. */
1688 .num_emit_word_pairs = 0,
1689 };
1690
1691 pvr_pds_set_sizes_pixel_event(&program, dev_info);
1692
1693 *data_size_in_dwords_out = program.data_size;
1694 }
1695
pvr_device_init_nop_program(struct pvr_device * device)1696 static VkResult pvr_device_init_nop_program(struct pvr_device *device)
1697 {
1698 const uint32_t cache_line_size =
1699 rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1700 struct pvr_pds_kickusc_program program = { 0 };
1701 struct util_dynarray nop_usc_bin;
1702 uint32_t staging_buffer_size;
1703 uint32_t *staging_buffer;
1704 VkResult result;
1705
1706 pvr_uscgen_nop(&nop_usc_bin);
1707
1708 result = pvr_gpu_upload_usc(device,
1709 util_dynarray_begin(&nop_usc_bin),
1710 nop_usc_bin.size,
1711 cache_line_size,
1712 &device->nop_program.usc);
1713 util_dynarray_fini(&nop_usc_bin);
1714 if (result != VK_SUCCESS)
1715 return result;
1716
1717 /* Setup a PDS program that kicks the static USC program. */
1718 pvr_pds_setup_doutu(&program.usc_task_control,
1719 device->nop_program.usc->dev_addr.addr,
1720 0U,
1721 ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
1722 false);
1723
1724 pvr_pds_set_sizes_pixel_shader(&program);
1725
1726 staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
1727
1728 staging_buffer = vk_alloc(&device->vk.alloc,
1729 staging_buffer_size,
1730 8U,
1731 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1732 if (!staging_buffer) {
1733 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1734 goto err_free_nop_usc_bo;
1735 }
1736
1737 pvr_pds_generate_pixel_shader_program(&program, staging_buffer);
1738
1739 /* FIXME: Figure out the define for alignment of 16. */
1740 result = pvr_gpu_upload_pds(device,
1741 staging_buffer,
1742 program.data_size,
1743 16U,
1744 &staging_buffer[program.data_size],
1745 program.code_size,
1746 16U,
1747 16U,
1748 &device->nop_program.pds);
1749 if (result != VK_SUCCESS)
1750 goto err_free_staging_buffer;
1751
1752 vk_free(&device->vk.alloc, staging_buffer);
1753
1754 return VK_SUCCESS;
1755
1756 err_free_staging_buffer:
1757 vk_free(&device->vk.alloc, staging_buffer);
1758
1759 err_free_nop_usc_bo:
1760 pvr_bo_suballoc_free(device->nop_program.usc);
1761
1762 return result;
1763 }
1764
pvr_device_init_tile_buffer_state(struct pvr_device * device)1765 static void pvr_device_init_tile_buffer_state(struct pvr_device *device)
1766 {
1767 simple_mtx_init(&device->tile_buffer_state.mtx, mtx_plain);
1768
1769 for (uint32_t i = 0; i < ARRAY_SIZE(device->tile_buffer_state.buffers); i++)
1770 device->tile_buffer_state.buffers[i] = NULL;
1771
1772 device->tile_buffer_state.buffer_count = 0;
1773 }
1774
pvr_device_finish_tile_buffer_state(struct pvr_device * device)1775 static void pvr_device_finish_tile_buffer_state(struct pvr_device *device)
1776 {
1777 /* Destroy the mutex first to trigger asserts in case it's still locked so
1778 * that we don't put things in an inconsistent state by freeing buffers that
1779 * might be in use or attempt to free buffers while new buffers are being
1780 * allocated.
1781 */
1782 simple_mtx_destroy(&device->tile_buffer_state.mtx);
1783
1784 for (uint32_t i = 0; i < device->tile_buffer_state.buffer_count; i++)
1785 pvr_bo_free(device, device->tile_buffer_state.buffers[i]);
1786 }
1787
1788 /**
1789 * \brief Ensures that a certain amount of tile buffers are allocated.
1790 *
1791 * Make sure that \p capacity amount of tile buffers are allocated. If less were
1792 * present, append new tile buffers of \p size_in_bytes each to reach the quota.
1793 */
pvr_device_tile_buffer_ensure_cap(struct pvr_device * device,uint32_t capacity,uint32_t size_in_bytes)1794 VkResult pvr_device_tile_buffer_ensure_cap(struct pvr_device *device,
1795 uint32_t capacity,
1796 uint32_t size_in_bytes)
1797 {
1798 struct pvr_device_tile_buffer_state *tile_buffer_state =
1799 &device->tile_buffer_state;
1800 const uint32_t cache_line_size =
1801 rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1802 VkResult result;
1803
1804 simple_mtx_lock(&tile_buffer_state->mtx);
1805
1806 /* Clamping in release and asserting in debug. */
1807 assert(capacity <= ARRAY_SIZE(tile_buffer_state->buffers));
1808 capacity = CLAMP(capacity,
1809 tile_buffer_state->buffer_count,
1810 ARRAY_SIZE(tile_buffer_state->buffers));
1811
1812 /* TODO: Implement bo multialloc? To reduce the amount of syscalls and
1813 * allocations.
1814 */
1815 for (uint32_t i = tile_buffer_state->buffer_count; i < capacity; i++) {
1816 result = pvr_bo_alloc(device,
1817 device->heaps.general_heap,
1818 size_in_bytes,
1819 cache_line_size,
1820 0,
1821 &tile_buffer_state->buffers[i]);
1822 if (result != VK_SUCCESS) {
1823 for (uint32_t j = tile_buffer_state->buffer_count; j < i; j++)
1824 pvr_bo_free(device, tile_buffer_state->buffers[j]);
1825
1826 goto err_release_lock;
1827 }
1828 }
1829
1830 tile_buffer_state->buffer_count = capacity;
1831
1832 simple_mtx_unlock(&tile_buffer_state->mtx);
1833
1834 return VK_SUCCESS;
1835
1836 err_release_lock:
1837 simple_mtx_unlock(&tile_buffer_state->mtx);
1838
1839 return result;
1840 }
1841
pvr_device_init_default_sampler_state(struct pvr_device * device)1842 static void pvr_device_init_default_sampler_state(struct pvr_device *device)
1843 {
1844 pvr_csb_pack (&device->input_attachment_sampler, TEXSTATE_SAMPLER, sampler) {
1845 sampler.addrmode_u = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
1846 sampler.addrmode_v = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
1847 sampler.addrmode_w = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
1848 sampler.dadjust = ROGUE_TEXSTATE_DADJUST_ZERO_UINT;
1849 sampler.magfilter = ROGUE_TEXSTATE_FILTER_POINT;
1850 sampler.minfilter = ROGUE_TEXSTATE_FILTER_POINT;
1851 sampler.anisoctl = ROGUE_TEXSTATE_ANISOCTL_DISABLED;
1852 sampler.non_normalized_coords = true;
1853 }
1854 }
1855
pvr_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)1856 VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice,
1857 const VkDeviceCreateInfo *pCreateInfo,
1858 const VkAllocationCallbacks *pAllocator,
1859 VkDevice *pDevice)
1860 {
1861 PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1862 uint32_t initial_free_list_size = PVR_GLOBAL_FREE_LIST_INITIAL_SIZE;
1863 struct pvr_instance *instance = pdevice->instance;
1864 struct vk_device_dispatch_table dispatch_table;
1865 struct pvr_device *device;
1866 struct pvr_winsys *ws;
1867 VkResult result;
1868
1869 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
1870
1871 result = pvr_winsys_create(pdevice->render_path,
1872 pdevice->display_path,
1873 pAllocator ? pAllocator : &instance->vk.alloc,
1874 &ws);
1875 if (result != VK_SUCCESS)
1876 goto err_out;
1877
1878 device = vk_alloc2(&instance->vk.alloc,
1879 pAllocator,
1880 sizeof(*device),
1881 8,
1882 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1883 if (!device) {
1884 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1885 goto err_pvr_winsys_destroy;
1886 }
1887
1888 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1889 &pvr_device_entrypoints,
1890 true);
1891
1892 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1893 &wsi_device_entrypoints,
1894 false);
1895
1896 result = vk_device_init(&device->vk,
1897 &pdevice->vk,
1898 &dispatch_table,
1899 pCreateInfo,
1900 pAllocator);
1901 if (result != VK_SUCCESS)
1902 goto err_free_device;
1903
1904 device->instance = instance;
1905 device->pdevice = pdevice;
1906 device->ws = ws;
1907
1908 vk_device_set_drm_fd(&device->vk, ws->render_fd);
1909
1910 if (ws->features.supports_threaded_submit) {
1911 /* Queue submission can be blocked if the kernel CCBs become full,
1912 * so enable threaded submit to not block the submitter.
1913 */
1914 vk_device_enable_threaded_submit(&device->vk);
1915 }
1916
1917 ws->ops->get_heaps_info(ws, &device->heaps);
1918
1919 result = pvr_bo_store_create(device);
1920 if (result != VK_SUCCESS)
1921 goto err_vk_device_finish;
1922
1923 pvr_bo_suballocator_init(&device->suballoc_general,
1924 device->heaps.general_heap,
1925 device,
1926 PVR_SUBALLOCATOR_GENERAL_SIZE);
1927 pvr_bo_suballocator_init(&device->suballoc_pds,
1928 device->heaps.pds_heap,
1929 device,
1930 PVR_SUBALLOCATOR_PDS_SIZE);
1931 pvr_bo_suballocator_init(&device->suballoc_transfer,
1932 device->heaps.transfer_frag_heap,
1933 device,
1934 PVR_SUBALLOCATOR_TRANSFER_SIZE);
1935 pvr_bo_suballocator_init(&device->suballoc_usc,
1936 device->heaps.usc_heap,
1937 device,
1938 PVR_SUBALLOCATOR_USC_SIZE);
1939 pvr_bo_suballocator_init(&device->suballoc_vis_test,
1940 device->heaps.vis_test_heap,
1941 device,
1942 PVR_SUBALLOCATOR_VIS_TEST_SIZE);
1943
1944 if (p_atomic_inc_return(&instance->active_device_count) >
1945 PVR_SECONDARY_DEVICE_THRESHOLD) {
1946 initial_free_list_size = PVR_SECONDARY_DEVICE_FREE_LIST_INITAL_SIZE;
1947 }
1948
1949 result = pvr_free_list_create(device,
1950 initial_free_list_size,
1951 PVR_GLOBAL_FREE_LIST_MAX_SIZE,
1952 PVR_GLOBAL_FREE_LIST_GROW_SIZE,
1953 PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD,
1954 NULL /* parent_free_list */,
1955 &device->global_free_list);
1956 if (result != VK_SUCCESS)
1957 goto err_dec_device_count;
1958
1959 result = pvr_device_init_nop_program(device);
1960 if (result != VK_SUCCESS)
1961 goto err_pvr_free_list_destroy;
1962
1963 result = pvr_device_init_compute_fence_program(device);
1964 if (result != VK_SUCCESS)
1965 goto err_pvr_free_nop_program;
1966
1967 result = pvr_device_init_compute_empty_program(device);
1968 if (result != VK_SUCCESS)
1969 goto err_pvr_free_compute_fence;
1970
1971 result = pvr_device_create_compute_query_programs(device);
1972 if (result != VK_SUCCESS)
1973 goto err_pvr_free_compute_empty;
1974
1975 result = pvr_device_init_compute_idfwdf_state(device);
1976 if (result != VK_SUCCESS)
1977 goto err_pvr_destroy_compute_query_programs;
1978
1979 result = pvr_device_init_graphics_static_clear_state(device);
1980 if (result != VK_SUCCESS)
1981 goto err_pvr_finish_compute_idfwdf;
1982
1983 result = pvr_device_init_spm_load_state(device);
1984 if (result != VK_SUCCESS)
1985 goto err_pvr_finish_graphics_static_clear_state;
1986
1987 pvr_device_init_tile_buffer_state(device);
1988
1989 result = pvr_queues_create(device, pCreateInfo);
1990 if (result != VK_SUCCESS)
1991 goto err_pvr_finish_tile_buffer_state;
1992
1993 pvr_device_init_default_sampler_state(device);
1994
1995 pvr_spm_init_scratch_buffer_store(device);
1996
1997 result = pvr_init_robustness_buffer(device);
1998 if (result != VK_SUCCESS)
1999 goto err_pvr_spm_finish_scratch_buffer_store;
2000
2001 result = pvr_border_color_table_init(&device->border_color_table, device);
2002 if (result != VK_SUCCESS)
2003 goto err_pvr_robustness_buffer_finish;
2004
2005 /* FIXME: Move this to a later stage and possibly somewhere other than
2006 * pvr_device. The purpose of this is so that we don't have to get the size
2007 * on each kick.
2008 */
2009 pvr_device_get_pixel_event_pds_program_data_size(
2010 &pdevice->dev_info,
2011 &device->pixel_event_data_size_in_dwords);
2012
2013 device->global_cmd_buffer_submit_count = 0;
2014 device->global_queue_present_count = 0;
2015
2016 *pDevice = pvr_device_to_handle(device);
2017
2018 return VK_SUCCESS;
2019
2020 err_pvr_robustness_buffer_finish:
2021 pvr_robustness_buffer_finish(device);
2022
2023 err_pvr_spm_finish_scratch_buffer_store:
2024 pvr_spm_finish_scratch_buffer_store(device);
2025
2026 pvr_queues_destroy(device);
2027
2028 err_pvr_finish_tile_buffer_state:
2029 pvr_device_finish_tile_buffer_state(device);
2030 pvr_device_finish_spm_load_state(device);
2031
2032 err_pvr_finish_graphics_static_clear_state:
2033 pvr_device_finish_graphics_static_clear_state(device);
2034
2035 err_pvr_finish_compute_idfwdf:
2036 pvr_device_finish_compute_idfwdf_state(device);
2037
2038 err_pvr_destroy_compute_query_programs:
2039 pvr_device_destroy_compute_query_programs(device);
2040
2041 err_pvr_free_compute_empty:
2042 pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo);
2043
2044 err_pvr_free_compute_fence:
2045 pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo);
2046
2047 err_pvr_free_nop_program:
2048 pvr_bo_suballoc_free(device->nop_program.pds.pvr_bo);
2049 pvr_bo_suballoc_free(device->nop_program.usc);
2050
2051 err_pvr_free_list_destroy:
2052 pvr_free_list_destroy(device->global_free_list);
2053
2054 err_dec_device_count:
2055 p_atomic_dec(&device->instance->active_device_count);
2056
2057 pvr_bo_suballocator_fini(&device->suballoc_vis_test);
2058 pvr_bo_suballocator_fini(&device->suballoc_usc);
2059 pvr_bo_suballocator_fini(&device->suballoc_transfer);
2060 pvr_bo_suballocator_fini(&device->suballoc_pds);
2061 pvr_bo_suballocator_fini(&device->suballoc_general);
2062
2063 pvr_bo_store_destroy(device);
2064
2065 err_vk_device_finish:
2066 vk_device_finish(&device->vk);
2067
2068 err_free_device:
2069 vk_free(&device->vk.alloc, device);
2070
2071 err_pvr_winsys_destroy:
2072 pvr_winsys_destroy(ws);
2073
2074 err_out:
2075 return result;
2076 }
2077
pvr_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2078 void pvr_DestroyDevice(VkDevice _device,
2079 const VkAllocationCallbacks *pAllocator)
2080 {
2081 PVR_FROM_HANDLE(pvr_device, device, _device);
2082
2083 if (!device)
2084 return;
2085
2086 pvr_border_color_table_finish(&device->border_color_table, device);
2087 pvr_robustness_buffer_finish(device);
2088 pvr_spm_finish_scratch_buffer_store(device);
2089 pvr_queues_destroy(device);
2090 pvr_device_finish_tile_buffer_state(device);
2091 pvr_device_finish_spm_load_state(device);
2092 pvr_device_finish_graphics_static_clear_state(device);
2093 pvr_device_finish_compute_idfwdf_state(device);
2094 pvr_device_destroy_compute_query_programs(device);
2095 pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo);
2096 pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo);
2097 pvr_bo_suballoc_free(device->nop_program.pds.pvr_bo);
2098 pvr_bo_suballoc_free(device->nop_program.usc);
2099 pvr_free_list_destroy(device->global_free_list);
2100 pvr_bo_suballocator_fini(&device->suballoc_vis_test);
2101 pvr_bo_suballocator_fini(&device->suballoc_usc);
2102 pvr_bo_suballocator_fini(&device->suballoc_transfer);
2103 pvr_bo_suballocator_fini(&device->suballoc_pds);
2104 pvr_bo_suballocator_fini(&device->suballoc_general);
2105 pvr_bo_store_destroy(device);
2106 pvr_winsys_destroy(device->ws);
2107 p_atomic_dec(&device->instance->active_device_count);
2108 vk_device_finish(&device->vk);
2109 vk_free(&device->vk.alloc, device);
2110 }
2111
pvr_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2112 VkResult pvr_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2113 VkLayerProperties *pProperties)
2114 {
2115 if (!pProperties) {
2116 *pPropertyCount = 0;
2117 return VK_SUCCESS;
2118 }
2119
2120 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2121 }
2122
free_memory(struct pvr_device * device,struct pvr_device_memory * mem,const VkAllocationCallbacks * pAllocator)2123 static void free_memory(struct pvr_device *device,
2124 struct pvr_device_memory *mem,
2125 const VkAllocationCallbacks *pAllocator)
2126 {
2127 if (!mem)
2128 return;
2129
2130 /* From the Vulkan spec (§11.2.13. Freeing Device Memory):
2131 * If a memory object is mapped at the time it is freed, it is implicitly
2132 * unmapped.
2133 */
2134 if (mem->bo->map)
2135 device->ws->ops->buffer_unmap(mem->bo);
2136
2137 p_atomic_add(&device->pdevice->heap_used, -mem->bo->size);
2138
2139 device->ws->ops->buffer_destroy(mem->bo);
2140
2141 vk_object_free(&device->vk, pAllocator, mem);
2142 }
2143
pvr_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2144 VkResult pvr_AllocateMemory(VkDevice _device,
2145 const VkMemoryAllocateInfo *pAllocateInfo,
2146 const VkAllocationCallbacks *pAllocator,
2147 VkDeviceMemory *pMem)
2148 {
2149 const VkImportMemoryFdInfoKHR *fd_info = NULL;
2150 PVR_FROM_HANDLE(pvr_device, device, _device);
2151 enum pvr_winsys_bo_type type = PVR_WINSYS_BO_TYPE_GPU;
2152 struct pvr_device_memory *mem;
2153 uint64_t heap_used;
2154 VkResult result;
2155
2156 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2157 assert(pAllocateInfo->allocationSize > 0);
2158
2159 mem = vk_object_alloc(&device->vk,
2160 pAllocator,
2161 sizeof(*mem),
2162 VK_OBJECT_TYPE_DEVICE_MEMORY);
2163 if (!mem)
2164 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2165
2166 vk_foreach_struct_const (ext, pAllocateInfo->pNext) {
2167 switch ((unsigned)ext->sType) {
2168 case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
2169 if (device->ws->display_fd >= 0)
2170 type = PVR_WINSYS_BO_TYPE_DISPLAY;
2171 break;
2172 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
2173 fd_info = (void *)ext;
2174 break;
2175 case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
2176 break;
2177 default:
2178 vk_debug_ignored_stype(ext->sType);
2179 break;
2180 }
2181 }
2182
2183 if (fd_info && fd_info->handleType) {
2184 VkDeviceSize aligned_alloc_size =
2185 ALIGN_POT(pAllocateInfo->allocationSize, device->ws->page_size);
2186
2187 assert(
2188 fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2189 fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2190
2191 result = device->ws->ops->buffer_create_from_fd(device->ws,
2192 fd_info->fd,
2193 &mem->bo);
2194 if (result != VK_SUCCESS)
2195 goto err_vk_object_free_mem;
2196
2197 /* For security purposes, we reject importing the bo if it's smaller
2198 * than the requested allocation size. This prevents a malicious client
2199 * from passing a buffer to a trusted client, lying about the size, and
2200 * telling the trusted client to try and texture from an image that goes
2201 * out-of-bounds. This sort of thing could lead to GPU hangs or worse
2202 * in the trusted client. The trusted client can protect itself against
2203 * this sort of attack but only if it can trust the buffer size.
2204 */
2205 if (aligned_alloc_size > mem->bo->size) {
2206 result = vk_errorf(device,
2207 VK_ERROR_INVALID_EXTERNAL_HANDLE,
2208 "Aligned requested size too large for the given fd "
2209 "%" PRIu64 "B > %" PRIu64 "B",
2210 pAllocateInfo->allocationSize,
2211 mem->bo->size);
2212 device->ws->ops->buffer_destroy(mem->bo);
2213 goto err_vk_object_free_mem;
2214 }
2215
2216 /* From the Vulkan spec:
2217 *
2218 * "Importing memory from a file descriptor transfers ownership of
2219 * the file descriptor from the application to the Vulkan
2220 * implementation. The application must not perform any operations on
2221 * the file descriptor after a successful import."
2222 *
2223 * If the import fails, we leave the file descriptor open.
2224 */
2225 close(fd_info->fd);
2226 } else {
2227 /* Align physical allocations to the page size of the heap that will be
2228 * used when binding device memory (see pvr_bind_memory()) to ensure the
2229 * entire allocation can be mapped.
2230 */
2231 const uint64_t alignment = device->heaps.general_heap->page_size;
2232
2233 /* FIXME: Need to determine the flags based on
2234 * device->pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags.
2235 *
2236 * The alternative would be to store the flags alongside the memory
2237 * types as an array that's indexed by pAllocateInfo->memoryTypeIndex so
2238 * that they can be looked up.
2239 */
2240 result = device->ws->ops->buffer_create(device->ws,
2241 pAllocateInfo->allocationSize,
2242 alignment,
2243 type,
2244 PVR_WINSYS_BO_FLAG_CPU_ACCESS,
2245 &mem->bo);
2246 if (result != VK_SUCCESS)
2247 goto err_vk_object_free_mem;
2248 }
2249
2250 heap_used = p_atomic_add_return(&device->pdevice->heap_used, mem->bo->size);
2251 if (heap_used > device->pdevice->memory.memoryHeaps[0].size) {
2252 free_memory(device, mem, pAllocator);
2253 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2254 }
2255
2256 *pMem = pvr_device_memory_to_handle(mem);
2257
2258 return VK_SUCCESS;
2259
2260 err_vk_object_free_mem:
2261 vk_object_free(&device->vk, pAllocator, mem);
2262
2263 return result;
2264 }
2265
pvr_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)2266 VkResult pvr_GetMemoryFdKHR(VkDevice _device,
2267 const VkMemoryGetFdInfoKHR *pGetFdInfo,
2268 int *pFd)
2269 {
2270 PVR_FROM_HANDLE(pvr_device, device, _device);
2271 PVR_FROM_HANDLE(pvr_device_memory, mem, pGetFdInfo->memory);
2272
2273 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
2274
2275 assert(
2276 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2277 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2278
2279 return device->ws->ops->buffer_get_fd(mem->bo, pFd);
2280 }
2281
2282 VkResult
pvr_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)2283 pvr_GetMemoryFdPropertiesKHR(VkDevice _device,
2284 VkExternalMemoryHandleTypeFlagBits handleType,
2285 int fd,
2286 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
2287 {
2288 PVR_FROM_HANDLE(pvr_device, device, _device);
2289
2290 switch (handleType) {
2291 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
2292 /* FIXME: This should only allow memory types having
2293 * VK_MEMORY_PROPERTY_HOST_CACHED_BIT flag set, as
2294 * dma-buf should be imported using cacheable memory types,
2295 * given exporter's mmap will always map it as cacheable.
2296 * Ref:
2297 * https://www.kernel.org/doc/html/latest/driver-api/dma-buf.html#c.dma_buf_ops
2298 */
2299 pMemoryFdProperties->memoryTypeBits =
2300 (1 << device->pdevice->memory.memoryTypeCount) - 1;
2301 return VK_SUCCESS;
2302 default:
2303 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2304 }
2305 }
2306
pvr_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)2307 void pvr_FreeMemory(VkDevice _device,
2308 VkDeviceMemory _mem,
2309 const VkAllocationCallbacks *pAllocator)
2310 {
2311 PVR_FROM_HANDLE(pvr_device, device, _device);
2312 PVR_FROM_HANDLE(pvr_device_memory, mem, _mem);
2313
2314 free_memory(device, mem, pAllocator);
2315 }
2316
pvr_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)2317 VkResult pvr_MapMemory(VkDevice _device,
2318 VkDeviceMemory _memory,
2319 VkDeviceSize offset,
2320 VkDeviceSize size,
2321 VkMemoryMapFlags flags,
2322 void **ppData)
2323 {
2324 PVR_FROM_HANDLE(pvr_device, device, _device);
2325 PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
2326 VkResult result;
2327
2328 if (!mem) {
2329 *ppData = NULL;
2330 return VK_SUCCESS;
2331 }
2332
2333 if (size == VK_WHOLE_SIZE)
2334 size = mem->bo->size - offset;
2335
2336 /* From the Vulkan spec version 1.0.32 docs for MapMemory:
2337 *
2338 * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
2339 * assert(size != 0);
2340 * * If size is not equal to VK_WHOLE_SIZE, size must be less than or
2341 * equal to the size of the memory minus offset
2342 */
2343
2344 assert(size > 0);
2345 assert(offset + size <= mem->bo->size);
2346
2347 /* Check if already mapped */
2348 if (mem->bo->map) {
2349 *ppData = (uint8_t *)mem->bo->map + offset;
2350 return VK_SUCCESS;
2351 }
2352
2353 /* Map it all at once */
2354 result = device->ws->ops->buffer_map(mem->bo);
2355 if (result != VK_SUCCESS)
2356 return result;
2357
2358 *ppData = (uint8_t *)mem->bo->map + offset;
2359
2360 return VK_SUCCESS;
2361 }
2362
pvr_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)2363 void pvr_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
2364 {
2365 PVR_FROM_HANDLE(pvr_device, device, _device);
2366 PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
2367
2368 if (!mem || !mem->bo->map)
2369 return;
2370
2371 device->ws->ops->buffer_unmap(mem->bo);
2372 }
2373
pvr_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2374 VkResult pvr_FlushMappedMemoryRanges(VkDevice _device,
2375 uint32_t memoryRangeCount,
2376 const VkMappedMemoryRange *pMemoryRanges)
2377 {
2378 return VK_SUCCESS;
2379 }
2380
2381 VkResult
pvr_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2382 pvr_InvalidateMappedMemoryRanges(VkDevice _device,
2383 uint32_t memoryRangeCount,
2384 const VkMappedMemoryRange *pMemoryRanges)
2385 {
2386 return VK_SUCCESS;
2387 }
2388
pvr_GetImageSparseMemoryRequirements2(VkDevice device,const VkImageSparseMemoryRequirementsInfo2 * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)2389 void pvr_GetImageSparseMemoryRequirements2(
2390 VkDevice device,
2391 const VkImageSparseMemoryRequirementsInfo2 *pInfo,
2392 uint32_t *pSparseMemoryRequirementCount,
2393 VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
2394 {
2395 *pSparseMemoryRequirementCount = 0;
2396 }
2397
pvr_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)2398 void pvr_GetDeviceMemoryCommitment(VkDevice device,
2399 VkDeviceMemory memory,
2400 VkDeviceSize *pCommittedMemoryInBytes)
2401 {
2402 *pCommittedMemoryInBytes = 0;
2403 }
2404
pvr_bind_memory(struct pvr_device * device,struct pvr_device_memory * mem,VkDeviceSize offset,VkDeviceSize size,VkDeviceSize alignment,struct pvr_winsys_vma ** const vma_out,pvr_dev_addr_t * const dev_addr_out)2405 VkResult pvr_bind_memory(struct pvr_device *device,
2406 struct pvr_device_memory *mem,
2407 VkDeviceSize offset,
2408 VkDeviceSize size,
2409 VkDeviceSize alignment,
2410 struct pvr_winsys_vma **const vma_out,
2411 pvr_dev_addr_t *const dev_addr_out)
2412 {
2413 VkDeviceSize virt_size =
2414 size + (offset & (device->heaps.general_heap->page_size - 1));
2415 struct pvr_winsys_vma *vma;
2416 pvr_dev_addr_t dev_addr;
2417 VkResult result;
2418
2419 /* Valid usage:
2420 *
2421 * "memoryOffset must be an integer multiple of the alignment member of
2422 * the VkMemoryRequirements structure returned from a call to
2423 * vkGetBufferMemoryRequirements with buffer"
2424 *
2425 * "memoryOffset must be an integer multiple of the alignment member of
2426 * the VkMemoryRequirements structure returned from a call to
2427 * vkGetImageMemoryRequirements with image"
2428 */
2429 assert(offset % alignment == 0);
2430 assert(offset < mem->bo->size);
2431
2432 result = device->ws->ops->heap_alloc(device->heaps.general_heap,
2433 virt_size,
2434 alignment,
2435 &vma);
2436 if (result != VK_SUCCESS)
2437 goto err_out;
2438
2439 result = device->ws->ops->vma_map(vma, mem->bo, offset, size, &dev_addr);
2440 if (result != VK_SUCCESS)
2441 goto err_free_vma;
2442
2443 *dev_addr_out = dev_addr;
2444 *vma_out = vma;
2445
2446 return VK_SUCCESS;
2447
2448 err_free_vma:
2449 device->ws->ops->heap_free(vma);
2450
2451 err_out:
2452 return result;
2453 }
2454
pvr_unbind_memory(struct pvr_device * device,struct pvr_winsys_vma * vma)2455 void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma)
2456 {
2457 device->ws->ops->vma_unmap(vma);
2458 device->ws->ops->heap_free(vma);
2459 }
2460
pvr_BindBufferMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)2461 VkResult pvr_BindBufferMemory2(VkDevice _device,
2462 uint32_t bindInfoCount,
2463 const VkBindBufferMemoryInfo *pBindInfos)
2464 {
2465 PVR_FROM_HANDLE(pvr_device, device, _device);
2466 uint32_t i;
2467
2468 for (i = 0; i < bindInfoCount; i++) {
2469 PVR_FROM_HANDLE(pvr_device_memory, mem, pBindInfos[i].memory);
2470 PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2471
2472 VkResult result = pvr_bind_memory(device,
2473 mem,
2474 pBindInfos[i].memoryOffset,
2475 buffer->vk.size,
2476 buffer->alignment,
2477 &buffer->vma,
2478 &buffer->dev_addr);
2479 if (result != VK_SUCCESS) {
2480 while (i--) {
2481 PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2482 pvr_unbind_memory(device, buffer->vma);
2483 }
2484
2485 return result;
2486 }
2487 }
2488
2489 return VK_SUCCESS;
2490 }
2491
pvr_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)2492 VkResult pvr_QueueBindSparse(VkQueue _queue,
2493 uint32_t bindInfoCount,
2494 const VkBindSparseInfo *pBindInfo,
2495 VkFence fence)
2496 {
2497 return VK_SUCCESS;
2498 }
2499
2500 /* Event functions. */
2501
pvr_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)2502 VkResult pvr_CreateEvent(VkDevice _device,
2503 const VkEventCreateInfo *pCreateInfo,
2504 const VkAllocationCallbacks *pAllocator,
2505 VkEvent *pEvent)
2506 {
2507 PVR_FROM_HANDLE(pvr_device, device, _device);
2508
2509 struct pvr_event *event = vk_object_alloc(&device->vk,
2510 pAllocator,
2511 sizeof(*event),
2512 VK_OBJECT_TYPE_EVENT);
2513 if (!event)
2514 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2515
2516 event->sync = NULL;
2517 event->state = PVR_EVENT_STATE_RESET_BY_HOST;
2518
2519 *pEvent = pvr_event_to_handle(event);
2520
2521 return VK_SUCCESS;
2522 }
2523
pvr_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)2524 void pvr_DestroyEvent(VkDevice _device,
2525 VkEvent _event,
2526 const VkAllocationCallbacks *pAllocator)
2527 {
2528 PVR_FROM_HANDLE(pvr_device, device, _device);
2529 PVR_FROM_HANDLE(pvr_event, event, _event);
2530
2531 if (!event)
2532 return;
2533
2534 if (event->sync)
2535 vk_sync_destroy(&device->vk, event->sync);
2536
2537 vk_object_free(&device->vk, pAllocator, event);
2538 }
2539
pvr_GetEventStatus(VkDevice _device,VkEvent _event)2540 VkResult pvr_GetEventStatus(VkDevice _device, VkEvent _event)
2541 {
2542 PVR_FROM_HANDLE(pvr_device, device, _device);
2543 PVR_FROM_HANDLE(pvr_event, event, _event);
2544 VkResult result;
2545
2546 switch (event->state) {
2547 case PVR_EVENT_STATE_SET_BY_DEVICE:
2548 if (!event->sync)
2549 return VK_EVENT_RESET;
2550
2551 result =
2552 vk_sync_wait(&device->vk, event->sync, 0U, VK_SYNC_WAIT_COMPLETE, 0);
2553 result = (result == VK_SUCCESS) ? VK_EVENT_SET : VK_EVENT_RESET;
2554 break;
2555
2556 case PVR_EVENT_STATE_RESET_BY_DEVICE:
2557 if (!event->sync)
2558 return VK_EVENT_RESET;
2559
2560 result =
2561 vk_sync_wait(&device->vk, event->sync, 0U, VK_SYNC_WAIT_COMPLETE, 0);
2562 result = (result == VK_SUCCESS) ? VK_EVENT_RESET : VK_EVENT_SET;
2563 break;
2564
2565 case PVR_EVENT_STATE_SET_BY_HOST:
2566 result = VK_EVENT_SET;
2567 break;
2568
2569 case PVR_EVENT_STATE_RESET_BY_HOST:
2570 result = VK_EVENT_RESET;
2571 break;
2572
2573 default:
2574 unreachable("Event object in unknown state");
2575 }
2576
2577 return result;
2578 }
2579
pvr_SetEvent(VkDevice _device,VkEvent _event)2580 VkResult pvr_SetEvent(VkDevice _device, VkEvent _event)
2581 {
2582 PVR_FROM_HANDLE(pvr_event, event, _event);
2583
2584 if (event->sync) {
2585 PVR_FROM_HANDLE(pvr_device, device, _device);
2586
2587 const VkResult result = vk_sync_signal(&device->vk, event->sync, 0);
2588 if (result != VK_SUCCESS)
2589 return result;
2590 }
2591
2592 event->state = PVR_EVENT_STATE_SET_BY_HOST;
2593
2594 return VK_SUCCESS;
2595 }
2596
pvr_ResetEvent(VkDevice _device,VkEvent _event)2597 VkResult pvr_ResetEvent(VkDevice _device, VkEvent _event)
2598 {
2599 PVR_FROM_HANDLE(pvr_event, event, _event);
2600
2601 if (event->sync) {
2602 PVR_FROM_HANDLE(pvr_device, device, _device);
2603
2604 const VkResult result = vk_sync_reset(&device->vk, event->sync);
2605 if (result != VK_SUCCESS)
2606 return result;
2607 }
2608
2609 event->state = PVR_EVENT_STATE_RESET_BY_HOST;
2610
2611 return VK_SUCCESS;
2612 }
2613
2614 /* Buffer functions. */
2615
pvr_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)2616 VkResult pvr_CreateBuffer(VkDevice _device,
2617 const VkBufferCreateInfo *pCreateInfo,
2618 const VkAllocationCallbacks *pAllocator,
2619 VkBuffer *pBuffer)
2620 {
2621 PVR_FROM_HANDLE(pvr_device, device, _device);
2622 const uint32_t alignment = 4096;
2623 struct pvr_buffer *buffer;
2624
2625 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2626 assert(pCreateInfo->usage != 0);
2627
2628 /* We check against (ULONG_MAX - alignment) to prevent overflow issues */
2629 if (pCreateInfo->size >= ULONG_MAX - alignment)
2630 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2631
2632 buffer =
2633 vk_buffer_create(&device->vk, pCreateInfo, pAllocator, sizeof(*buffer));
2634 if (!buffer)
2635 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2636
2637 buffer->alignment = alignment;
2638
2639 *pBuffer = pvr_buffer_to_handle(buffer);
2640
2641 return VK_SUCCESS;
2642 }
2643
pvr_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)2644 void pvr_DestroyBuffer(VkDevice _device,
2645 VkBuffer _buffer,
2646 const VkAllocationCallbacks *pAllocator)
2647 {
2648 PVR_FROM_HANDLE(pvr_device, device, _device);
2649 PVR_FROM_HANDLE(pvr_buffer, buffer, _buffer);
2650
2651 if (!buffer)
2652 return;
2653
2654 if (buffer->vma)
2655 pvr_unbind_memory(device, buffer->vma);
2656
2657 vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
2658 }
2659
pvr_gpu_upload(struct pvr_device * device,struct pvr_winsys_heap * heap,const void * data,size_t size,uint64_t alignment,struct pvr_suballoc_bo ** const pvr_bo_out)2660 VkResult pvr_gpu_upload(struct pvr_device *device,
2661 struct pvr_winsys_heap *heap,
2662 const void *data,
2663 size_t size,
2664 uint64_t alignment,
2665 struct pvr_suballoc_bo **const pvr_bo_out)
2666 {
2667 struct pvr_suballoc_bo *suballoc_bo = NULL;
2668 struct pvr_suballocator *allocator;
2669 VkResult result;
2670 void *map;
2671
2672 assert(size > 0);
2673
2674 if (heap == device->heaps.general_heap)
2675 allocator = &device->suballoc_general;
2676 else if (heap == device->heaps.pds_heap)
2677 allocator = &device->suballoc_pds;
2678 else if (heap == device->heaps.transfer_frag_heap)
2679 allocator = &device->suballoc_transfer;
2680 else if (heap == device->heaps.usc_heap)
2681 allocator = &device->suballoc_usc;
2682 else
2683 unreachable("Unknown heap type");
2684
2685 result = pvr_bo_suballoc(allocator, size, alignment, false, &suballoc_bo);
2686 if (result != VK_SUCCESS)
2687 return result;
2688
2689 map = pvr_bo_suballoc_get_map_addr(suballoc_bo);
2690 memcpy(map, data, size);
2691
2692 *pvr_bo_out = suballoc_bo;
2693
2694 return VK_SUCCESS;
2695 }
2696
pvr_gpu_upload_usc(struct pvr_device * device,const void * code,size_t code_size,uint64_t code_alignment,struct pvr_suballoc_bo ** const pvr_bo_out)2697 VkResult pvr_gpu_upload_usc(struct pvr_device *device,
2698 const void *code,
2699 size_t code_size,
2700 uint64_t code_alignment,
2701 struct pvr_suballoc_bo **const pvr_bo_out)
2702 {
2703 struct pvr_suballoc_bo *suballoc_bo = NULL;
2704 VkResult result;
2705 void *map;
2706
2707 assert(code_size > 0);
2708
2709 /* The USC will prefetch the next instruction, so over allocate by 1
2710 * instruction to prevent reading off the end of a page into a potentially
2711 * unallocated page.
2712 */
2713 result = pvr_bo_suballoc(&device->suballoc_usc,
2714 code_size + ROGUE_MAX_INSTR_BYTES,
2715 code_alignment,
2716 false,
2717 &suballoc_bo);
2718 if (result != VK_SUCCESS)
2719 return result;
2720
2721 map = pvr_bo_suballoc_get_map_addr(suballoc_bo);
2722 memcpy(map, code, code_size);
2723
2724 *pvr_bo_out = suballoc_bo;
2725
2726 return VK_SUCCESS;
2727 }
2728
2729 /**
2730 * \brief Upload PDS program data and code segments from host memory to device
2731 * memory.
2732 *
2733 * \param[in] device Logical device pointer.
2734 * \param[in] data Pointer to PDS data segment to upload.
2735 * \param[in] data_size_dwords Size of PDS data segment in dwords.
2736 * \param[in] data_alignment Required alignment of the PDS data segment in
2737 * bytes. Must be a power of two.
2738 * \param[in] code Pointer to PDS code segment to upload.
2739 * \param[in] code_size_dwords Size of PDS code segment in dwords.
2740 * \param[in] code_alignment Required alignment of the PDS code segment in
2741 * bytes. Must be a power of two.
2742 * \param[in] min_alignment Minimum alignment of the bo holding the PDS
2743 * program in bytes.
2744 * \param[out] pds_upload_out On success will be initialized based on the
2745 * uploaded PDS program.
2746 * \return VK_SUCCESS on success, or error code otherwise.
2747 */
pvr_gpu_upload_pds(struct pvr_device * device,const uint32_t * data,uint32_t data_size_dwords,uint32_t data_alignment,const uint32_t * code,uint32_t code_size_dwords,uint32_t code_alignment,uint64_t min_alignment,struct pvr_pds_upload * const pds_upload_out)2748 VkResult pvr_gpu_upload_pds(struct pvr_device *device,
2749 const uint32_t *data,
2750 uint32_t data_size_dwords,
2751 uint32_t data_alignment,
2752 const uint32_t *code,
2753 uint32_t code_size_dwords,
2754 uint32_t code_alignment,
2755 uint64_t min_alignment,
2756 struct pvr_pds_upload *const pds_upload_out)
2757 {
2758 /* All alignment and sizes below are in bytes. */
2759 const size_t data_size = PVR_DW_TO_BYTES(data_size_dwords);
2760 const size_t code_size = PVR_DW_TO_BYTES(code_size_dwords);
2761 const uint64_t data_aligned_size = ALIGN_POT(data_size, data_alignment);
2762 const uint64_t code_aligned_size = ALIGN_POT(code_size, code_alignment);
2763 const uint32_t code_offset = ALIGN_POT(data_aligned_size, code_alignment);
2764 const uint64_t bo_alignment = MAX2(min_alignment, data_alignment);
2765 const uint64_t bo_size = (!!code) ? (code_offset + code_aligned_size)
2766 : data_aligned_size;
2767 VkResult result;
2768 void *map;
2769
2770 assert(code || data);
2771 assert(!code || (code_size_dwords != 0 && code_alignment != 0));
2772 assert(!data || (data_size_dwords != 0 && data_alignment != 0));
2773
2774 result = pvr_bo_suballoc(&device->suballoc_pds,
2775 bo_size,
2776 bo_alignment,
2777 true,
2778 &pds_upload_out->pvr_bo);
2779 if (result != VK_SUCCESS)
2780 return result;
2781
2782 map = pvr_bo_suballoc_get_map_addr(pds_upload_out->pvr_bo);
2783
2784 if (data) {
2785 memcpy(map, data, data_size);
2786
2787 pds_upload_out->data_offset = pds_upload_out->pvr_bo->dev_addr.addr -
2788 device->heaps.pds_heap->base_addr.addr;
2789
2790 /* Store data size in dwords. */
2791 assert(data_aligned_size % 4 == 0);
2792 pds_upload_out->data_size = data_aligned_size / 4;
2793 } else {
2794 pds_upload_out->data_offset = 0;
2795 pds_upload_out->data_size = 0;
2796 }
2797
2798 if (code) {
2799 memcpy((uint8_t *)map + code_offset, code, code_size);
2800
2801 pds_upload_out->code_offset =
2802 (pds_upload_out->pvr_bo->dev_addr.addr + code_offset) -
2803 device->heaps.pds_heap->base_addr.addr;
2804
2805 /* Store code size in dwords. */
2806 assert(code_aligned_size % 4 == 0);
2807 pds_upload_out->code_size = code_aligned_size / 4;
2808 } else {
2809 pds_upload_out->code_offset = 0;
2810 pds_upload_out->code_size = 0;
2811 }
2812
2813 return VK_SUCCESS;
2814 }
2815
2816 static VkResult
pvr_framebuffer_create_ppp_state(struct pvr_device * device,struct pvr_framebuffer * framebuffer)2817 pvr_framebuffer_create_ppp_state(struct pvr_device *device,
2818 struct pvr_framebuffer *framebuffer)
2819 {
2820 const uint32_t cache_line_size =
2821 rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
2822 uint32_t ppp_state[3];
2823 VkResult result;
2824
2825 pvr_csb_pack (&ppp_state[0], TA_STATE_HEADER, header) {
2826 header.pres_terminate = true;
2827 }
2828
2829 pvr_csb_pack (&ppp_state[1], TA_STATE_TERMINATE0, term0) {
2830 term0.clip_right =
2831 DIV_ROUND_UP(
2832 framebuffer->width,
2833 ROGUE_TA_STATE_TERMINATE0_CLIP_RIGHT_BLOCK_SIZE_IN_PIXELS) -
2834 1;
2835 term0.clip_bottom =
2836 DIV_ROUND_UP(
2837 framebuffer->height,
2838 ROGUE_TA_STATE_TERMINATE0_CLIP_BOTTOM_BLOCK_SIZE_IN_PIXELS) -
2839 1;
2840 }
2841
2842 pvr_csb_pack (&ppp_state[2], TA_STATE_TERMINATE1, term1) {
2843 term1.render_target = 0;
2844 term1.clip_left = 0;
2845 }
2846
2847 result = pvr_gpu_upload(device,
2848 device->heaps.general_heap,
2849 ppp_state,
2850 sizeof(ppp_state),
2851 cache_line_size,
2852 &framebuffer->ppp_state_bo);
2853 if (result != VK_SUCCESS)
2854 return result;
2855
2856 /* Calculate the size of PPP state in dwords. */
2857 framebuffer->ppp_state_size = sizeof(ppp_state) / sizeof(uint32_t);
2858
2859 return VK_SUCCESS;
2860 }
2861
pvr_render_targets_init(struct pvr_render_target * render_targets,uint32_t render_targets_count)2862 static bool pvr_render_targets_init(struct pvr_render_target *render_targets,
2863 uint32_t render_targets_count)
2864 {
2865 uint32_t i;
2866
2867 for (i = 0; i < render_targets_count; i++) {
2868 if (pthread_mutex_init(&render_targets[i].mutex, NULL))
2869 goto err_mutex_destroy;
2870 }
2871
2872 return true;
2873
2874 err_mutex_destroy:
2875 while (i--)
2876 pthread_mutex_destroy(&render_targets[i].mutex);
2877
2878 return false;
2879 }
2880
pvr_render_targets_fini(struct pvr_render_target * render_targets,uint32_t render_targets_count)2881 static void pvr_render_targets_fini(struct pvr_render_target *render_targets,
2882 uint32_t render_targets_count)
2883 {
2884 for (uint32_t i = 0; i < render_targets_count; i++) {
2885 if (render_targets[i].valid) {
2886 pvr_render_target_dataset_destroy(render_targets[i].rt_dataset);
2887 render_targets[i].valid = false;
2888 }
2889
2890 pthread_mutex_destroy(&render_targets[i].mutex);
2891 }
2892 }
2893
pvr_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)2894 VkResult pvr_CreateFramebuffer(VkDevice _device,
2895 const VkFramebufferCreateInfo *pCreateInfo,
2896 const VkAllocationCallbacks *pAllocator,
2897 VkFramebuffer *pFramebuffer)
2898 {
2899 PVR_FROM_HANDLE(pvr_render_pass, pass, pCreateInfo->renderPass);
2900 PVR_FROM_HANDLE(pvr_device, device, _device);
2901 struct pvr_spm_bgobj_state *spm_bgobj_state_per_render;
2902 struct pvr_spm_eot_state *spm_eot_state_per_render;
2903 struct pvr_render_target *render_targets;
2904 struct pvr_framebuffer *framebuffer;
2905 struct pvr_image_view **attachments;
2906 uint32_t render_targets_count;
2907 uint64_t scratch_buffer_size;
2908 VkResult result;
2909
2910 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2911
2912 render_targets_count =
2913 PVR_RENDER_TARGETS_PER_FRAMEBUFFER(&device->pdevice->dev_info);
2914
2915 VK_MULTIALLOC(ma);
2916 vk_multialloc_add(&ma, &framebuffer, __typeof__(*framebuffer), 1);
2917 vk_multialloc_add(&ma,
2918 &attachments,
2919 __typeof__(*attachments),
2920 pCreateInfo->attachmentCount);
2921 vk_multialloc_add(&ma,
2922 &render_targets,
2923 __typeof__(*render_targets),
2924 render_targets_count);
2925 vk_multialloc_add(&ma,
2926 &spm_eot_state_per_render,
2927 __typeof__(*spm_eot_state_per_render),
2928 pass->hw_setup->render_count);
2929 vk_multialloc_add(&ma,
2930 &spm_bgobj_state_per_render,
2931 __typeof__(*spm_bgobj_state_per_render),
2932 pass->hw_setup->render_count);
2933
2934 if (!vk_multialloc_zalloc2(&ma,
2935 &device->vk.alloc,
2936 pAllocator,
2937 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
2938 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2939
2940 vk_object_base_init(&device->vk,
2941 &framebuffer->base,
2942 VK_OBJECT_TYPE_FRAMEBUFFER);
2943
2944 framebuffer->width = pCreateInfo->width;
2945 framebuffer->height = pCreateInfo->height;
2946 framebuffer->layers = pCreateInfo->layers;
2947
2948 framebuffer->attachments = attachments;
2949 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2950 for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
2951 framebuffer->attachments[i] =
2952 pvr_image_view_from_handle(pCreateInfo->pAttachments[i]);
2953 }
2954
2955 result = pvr_framebuffer_create_ppp_state(device, framebuffer);
2956 if (result != VK_SUCCESS)
2957 goto err_free_framebuffer;
2958
2959 framebuffer->render_targets = render_targets;
2960 framebuffer->render_targets_count = render_targets_count;
2961 if (!pvr_render_targets_init(framebuffer->render_targets,
2962 render_targets_count)) {
2963 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2964 goto err_free_ppp_state_bo;
2965 }
2966
2967 scratch_buffer_size =
2968 pvr_spm_scratch_buffer_calc_required_size(pass,
2969 framebuffer->width,
2970 framebuffer->height);
2971
2972 result = pvr_spm_scratch_buffer_get_buffer(device,
2973 scratch_buffer_size,
2974 &framebuffer->scratch_buffer);
2975 if (result != VK_SUCCESS)
2976 goto err_finish_render_targets;
2977
2978 for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
2979 uint32_t emit_count;
2980
2981 result = pvr_spm_init_eot_state(device,
2982 &spm_eot_state_per_render[i],
2983 framebuffer,
2984 &pass->hw_setup->renders[i],
2985 &emit_count);
2986 if (result != VK_SUCCESS)
2987 goto err_finish_eot_state;
2988
2989 result = pvr_spm_init_bgobj_state(device,
2990 &spm_bgobj_state_per_render[i],
2991 framebuffer,
2992 &pass->hw_setup->renders[i],
2993 emit_count);
2994 if (result != VK_SUCCESS)
2995 goto err_finish_bgobj_state;
2996
2997 continue;
2998
2999 err_finish_bgobj_state:
3000 pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[i]);
3001
3002 for (uint32_t j = 0; j < i; j++)
3003 pvr_spm_finish_bgobj_state(device, &spm_bgobj_state_per_render[j]);
3004
3005 err_finish_eot_state:
3006 for (uint32_t j = 0; j < i; j++)
3007 pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[j]);
3008
3009 goto err_finish_render_targets;
3010 }
3011
3012 framebuffer->render_count = pass->hw_setup->render_count;
3013 framebuffer->spm_eot_state_per_render = spm_eot_state_per_render;
3014 framebuffer->spm_bgobj_state_per_render = spm_bgobj_state_per_render;
3015
3016 *pFramebuffer = pvr_framebuffer_to_handle(framebuffer);
3017
3018 return VK_SUCCESS;
3019
3020 err_finish_render_targets:
3021 pvr_render_targets_fini(framebuffer->render_targets, render_targets_count);
3022
3023 err_free_ppp_state_bo:
3024 pvr_bo_suballoc_free(framebuffer->ppp_state_bo);
3025
3026 err_free_framebuffer:
3027 vk_object_base_finish(&framebuffer->base);
3028 vk_free2(&device->vk.alloc, pAllocator, framebuffer);
3029
3030 return result;
3031 }
3032
pvr_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)3033 void pvr_DestroyFramebuffer(VkDevice _device,
3034 VkFramebuffer _fb,
3035 const VkAllocationCallbacks *pAllocator)
3036 {
3037 PVR_FROM_HANDLE(pvr_framebuffer, framebuffer, _fb);
3038 PVR_FROM_HANDLE(pvr_device, device, _device);
3039
3040 if (!framebuffer)
3041 return;
3042
3043 for (uint32_t i = 0; i < framebuffer->render_count; i++) {
3044 pvr_spm_finish_bgobj_state(device,
3045 &framebuffer->spm_bgobj_state_per_render[i]);
3046
3047 pvr_spm_finish_eot_state(device,
3048 &framebuffer->spm_eot_state_per_render[i]);
3049 }
3050
3051 pvr_spm_scratch_buffer_release(device, framebuffer->scratch_buffer);
3052 pvr_render_targets_fini(framebuffer->render_targets,
3053 framebuffer->render_targets_count);
3054 pvr_bo_suballoc_free(framebuffer->ppp_state_bo);
3055 vk_object_base_finish(&framebuffer->base);
3056 vk_free2(&device->vk.alloc, pAllocator, framebuffer);
3057 }
3058
3059 static uint32_t
pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info * dev_info,VkFilter filter)3060 pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info *dev_info,
3061 VkFilter filter)
3062 {
3063 switch (filter) {
3064 case VK_FILTER_NEAREST:
3065 return ROGUE_TEXSTATE_FILTER_POINT;
3066 case VK_FILTER_LINEAR:
3067 return ROGUE_TEXSTATE_FILTER_LINEAR;
3068 default:
3069 unreachable("Unknown filter type.");
3070 }
3071 }
3072
3073 static uint32_t
pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)3074 pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)
3075 {
3076 switch (addr_mode) {
3077 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3078 return ROGUE_TEXSTATE_ADDRMODE_REPEAT;
3079 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3080 return ROGUE_TEXSTATE_ADDRMODE_FLIP;
3081 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3082 return ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
3083 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3084 return ROGUE_TEXSTATE_ADDRMODE_FLIP_ONCE_THEN_CLAMP;
3085 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3086 return ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_BORDER;
3087 default:
3088 unreachable("Invalid sampler address mode.");
3089 }
3090 }
3091
pvr_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)3092 VkResult pvr_CreateSampler(VkDevice _device,
3093 const VkSamplerCreateInfo *pCreateInfo,
3094 const VkAllocationCallbacks *pAllocator,
3095 VkSampler *pSampler)
3096 {
3097 PVR_FROM_HANDLE(pvr_device, device, _device);
3098 uint32_t border_color_table_index;
3099 struct pvr_sampler *sampler;
3100 float lod_rounding_bias;
3101 VkFilter min_filter;
3102 VkFilter mag_filter;
3103 VkResult result;
3104 float min_lod;
3105 float max_lod;
3106
3107 STATIC_ASSERT(sizeof(((union pvr_sampler_descriptor *)NULL)->data) ==
3108 sizeof(((union pvr_sampler_descriptor *)NULL)->words));
3109
3110 sampler =
3111 vk_sampler_create(&device->vk, pCreateInfo, pAllocator, sizeof(*sampler));
3112 if (!sampler) {
3113 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3114 goto err_out;
3115 }
3116
3117 mag_filter = pCreateInfo->magFilter;
3118 min_filter = pCreateInfo->minFilter;
3119
3120 result =
3121 pvr_border_color_table_get_or_create_entry(&device->border_color_table,
3122 sampler,
3123 &border_color_table_index);
3124 if (result != VK_SUCCESS)
3125 goto err_free_sampler;
3126
3127 if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025)) {
3128 /* The min/mag filters may need adjustment here, the GPU should decide
3129 * which of the two filters to use based on the clamped LOD value: LOD
3130 * <= 0 implies magnification, while LOD > 0 implies minification.
3131 *
3132 * As a workaround, we override magFilter with minFilter if we know that
3133 * the magnification filter will never be used due to clamping anyway
3134 * (i.e. minLod > 0). Conversely, we override minFilter with magFilter
3135 * if maxLod <= 0.
3136 */
3137 if (pCreateInfo->minLod > 0.0f) {
3138 /* The clamped LOD will always be positive => always minify. */
3139 mag_filter = pCreateInfo->minFilter;
3140 }
3141
3142 if (pCreateInfo->maxLod <= 0.0f) {
3143 /* The clamped LOD will always be negative or zero => always
3144 * magnify.
3145 */
3146 min_filter = pCreateInfo->magFilter;
3147 }
3148 }
3149
3150 if (pCreateInfo->compareEnable) {
3151 sampler->descriptor.data.compare_op =
3152 (uint32_t)pvr_texstate_cmpmode(pCreateInfo->compareOp);
3153 } else {
3154 sampler->descriptor.data.compare_op =
3155 (uint32_t)pvr_texstate_cmpmode(VK_COMPARE_OP_NEVER);
3156 }
3157
3158 sampler->descriptor.data.word3 = 0;
3159 pvr_csb_pack (&sampler->descriptor.data.sampler_word,
3160 TEXSTATE_SAMPLER,
3161 word) {
3162 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
3163 const float lod_clamp_max = (float)ROGUE_TEXSTATE_CLAMP_MAX /
3164 (1 << ROGUE_TEXSTATE_CLAMP_FRACTIONAL_BITS);
3165 const float max_dadjust = ((float)(ROGUE_TEXSTATE_DADJUST_MAX_UINT -
3166 ROGUE_TEXSTATE_DADJUST_ZERO_UINT)) /
3167 (1 << ROGUE_TEXSTATE_DADJUST_FRACTIONAL_BITS);
3168 const float min_dadjust = ((float)(ROGUE_TEXSTATE_DADJUST_MIN_UINT -
3169 ROGUE_TEXSTATE_DADJUST_ZERO_UINT)) /
3170 (1 << ROGUE_TEXSTATE_DADJUST_FRACTIONAL_BITS);
3171
3172 word.magfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, mag_filter);
3173 word.minfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, min_filter);
3174
3175 if (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR)
3176 word.mipfilter = true;
3177
3178 word.addrmode_u =
3179 pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeU);
3180 word.addrmode_v =
3181 pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeV);
3182 word.addrmode_w =
3183 pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeW);
3184
3185 /* TODO: Figure out defines for these. */
3186 if (word.addrmode_u == ROGUE_TEXSTATE_ADDRMODE_FLIP)
3187 sampler->descriptor.data.word3 |= 0x40000000;
3188
3189 if (word.addrmode_v == ROGUE_TEXSTATE_ADDRMODE_FLIP)
3190 sampler->descriptor.data.word3 |= 0x20000000;
3191
3192 /* The Vulkan 1.0.205 spec says:
3193 *
3194 * The absolute value of mipLodBias must be less than or equal to
3195 * VkPhysicalDeviceLimits::maxSamplerLodBias.
3196 */
3197 word.dadjust =
3198 ROGUE_TEXSTATE_DADJUST_ZERO_UINT +
3199 util_signed_fixed(
3200 CLAMP(pCreateInfo->mipLodBias, min_dadjust, max_dadjust),
3201 ROGUE_TEXSTATE_DADJUST_FRACTIONAL_BITS);
3202
3203 /* Anisotropy is not supported for now. */
3204 word.anisoctl = ROGUE_TEXSTATE_ANISOCTL_DISABLED;
3205
3206 if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025) &&
3207 pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_NEAREST) {
3208 /* When MIPMAP_MODE_NEAREST is enabled, the LOD level should be
3209 * selected by adding 0.5 and then truncating the input LOD value.
3210 * This hardware adds the 0.5 bias before clamping against
3211 * lodmin/lodmax, while Vulkan specifies the bias to be added after
3212 * clamping. We compensate for this difference by adding the 0.5
3213 * bias to the LOD bounds, too.
3214 */
3215 lod_rounding_bias = 0.5f;
3216 } else {
3217 lod_rounding_bias = 0.0f;
3218 }
3219
3220 min_lod = pCreateInfo->minLod + lod_rounding_bias;
3221 word.minlod = util_unsigned_fixed(CLAMP(min_lod, 0.0f, lod_clamp_max),
3222 ROGUE_TEXSTATE_CLAMP_FRACTIONAL_BITS);
3223
3224 max_lod = pCreateInfo->maxLod + lod_rounding_bias;
3225 word.maxlod = util_unsigned_fixed(CLAMP(max_lod, 0.0f, lod_clamp_max),
3226 ROGUE_TEXSTATE_CLAMP_FRACTIONAL_BITS);
3227
3228 word.bordercolor_index = border_color_table_index;
3229
3230 if (pCreateInfo->unnormalizedCoordinates)
3231 word.non_normalized_coords = true;
3232 }
3233
3234 *pSampler = pvr_sampler_to_handle(sampler);
3235
3236 return VK_SUCCESS;
3237
3238 err_free_sampler:
3239 vk_object_free(&device->vk, pAllocator, sampler);
3240
3241 err_out:
3242 return result;
3243 }
3244
pvr_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)3245 void pvr_DestroySampler(VkDevice _device,
3246 VkSampler _sampler,
3247 const VkAllocationCallbacks *pAllocator)
3248 {
3249 PVR_FROM_HANDLE(pvr_device, device, _device);
3250 PVR_FROM_HANDLE(pvr_sampler, sampler, _sampler);
3251
3252 if (!sampler)
3253 return;
3254
3255 vk_sampler_destroy(&device->vk, pAllocator, &sampler->vk);
3256 }
3257
pvr_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3258 void pvr_GetBufferMemoryRequirements2(
3259 VkDevice _device,
3260 const VkBufferMemoryRequirementsInfo2 *pInfo,
3261 VkMemoryRequirements2 *pMemoryRequirements)
3262 {
3263 PVR_FROM_HANDLE(pvr_buffer, buffer, pInfo->buffer);
3264 PVR_FROM_HANDLE(pvr_device, device, _device);
3265 uint64_t size;
3266
3267 /* The Vulkan 1.0.166 spec says:
3268 *
3269 * memoryTypeBits is a bitmask and contains one bit set for every
3270 * supported memory type for the resource. Bit 'i' is set if and only
3271 * if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
3272 * structure for the physical device is supported for the resource.
3273 *
3274 * All types are currently supported for buffers.
3275 */
3276 pMemoryRequirements->memoryRequirements.memoryTypeBits =
3277 (1ul << device->pdevice->memory.memoryTypeCount) - 1;
3278
3279 pMemoryRequirements->memoryRequirements.alignment = buffer->alignment;
3280
3281 size = buffer->vk.size;
3282
3283 if (size % device->ws->page_size == 0 ||
3284 size % device->ws->page_size >
3285 device->ws->page_size - PVR_BUFFER_MEMORY_PADDING_SIZE) {
3286 /* TODO: We can save memory by having one extra virtual page mapped
3287 * in and having the first and last virtual page mapped to the first
3288 * physical address.
3289 */
3290 size += PVR_BUFFER_MEMORY_PADDING_SIZE;
3291 }
3292
3293 pMemoryRequirements->memoryRequirements.size =
3294 ALIGN_POT(size, buffer->alignment);
3295 }
3296
pvr_GetImageMemoryRequirements2(VkDevice _device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3297 void pvr_GetImageMemoryRequirements2(VkDevice _device,
3298 const VkImageMemoryRequirementsInfo2 *pInfo,
3299 VkMemoryRequirements2 *pMemoryRequirements)
3300 {
3301 PVR_FROM_HANDLE(pvr_device, device, _device);
3302 PVR_FROM_HANDLE(pvr_image, image, pInfo->image);
3303
3304 /* The Vulkan 1.0.166 spec says:
3305 *
3306 * memoryTypeBits is a bitmask and contains one bit set for every
3307 * supported memory type for the resource. Bit 'i' is set if and only
3308 * if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
3309 * structure for the physical device is supported for the resource.
3310 *
3311 * All types are currently supported for images.
3312 */
3313 const uint32_t memory_types =
3314 (1ul << device->pdevice->memory.memoryTypeCount) - 1;
3315
3316 /* TODO: The returned size is aligned here in case of arrays/CEM (as is done
3317 * in GetImageMemoryRequirements()), but this should be known at image
3318 * creation time (pCreateInfo->arrayLayers > 1). This is confirmed in
3319 * ImageCreate()/ImageGetMipMapOffsetInBytes() where it aligns the size to
3320 * 4096 if pCreateInfo->arrayLayers > 1. So is the alignment here actually
3321 * necessary? If not, what should it be when pCreateInfo->arrayLayers == 1?
3322 *
3323 * Note: Presumably the 4096 alignment requirement comes from the Vulkan
3324 * driver setting RGX_CR_TPU_TAG_CEM_4K_FACE_PACKING_EN when setting up
3325 * render and compute jobs.
3326 */
3327 pMemoryRequirements->memoryRequirements.alignment = image->alignment;
3328 pMemoryRequirements->memoryRequirements.size =
3329 align64(image->size, image->alignment);
3330 pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
3331 }
3332