1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * based in part on anv driver which is:
5 * Copyright © 2015 Intel Corporation
6 *
7 * based in part on v3dv driver which is:
8 * Copyright © 2019 Raspberry Pi
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the next
18 * paragraph) shall be included in all copies or substantial portions of the
19 * Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 * SOFTWARE.
28 */
29
30 #include <assert.h>
31 #include <fcntl.h>
32 #include <inttypes.h>
33 #include <stdbool.h>
34 #include <stddef.h>
35 #include <stdint.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <vulkan/vulkan.h>
40 #include <xf86drm.h>
41
42 #include "git_sha1.h"
43 #include "hwdef/rogue_hw_utils.h"
44 #include "pvr_bo.h"
45 #include "pvr_border.h"
46 #include "pvr_clear.h"
47 #include "pvr_csb.h"
48 #include "pvr_csb_enum_helpers.h"
49 #include "pvr_debug.h"
50 #include "pvr_device_info.h"
51 #include "pvr_dump_info.h"
52 #include "pvr_hardcode.h"
53 #include "pvr_job_render.h"
54 #include "pvr_limits.h"
55 #include "pvr_pds.h"
56 #include "pvr_private.h"
57 #include "pvr_robustness.h"
58 #include "pvr_tex_state.h"
59 #include "pvr_types.h"
60 #include "pvr_uscgen.h"
61 #include "pvr_util.h"
62 #include "pvr_winsys.h"
63 #include "rogue/rogue.h"
64 #include "util/build_id.h"
65 #include "util/log.h"
66 #include "util/macros.h"
67 #include "util/mesa-sha1.h"
68 #include "util/os_misc.h"
69 #include "util/u_dynarray.h"
70 #include "util/u_math.h"
71 #include "vk_alloc.h"
72 #include "vk_extensions.h"
73 #include "vk_log.h"
74 #include "vk_object.h"
75 #include "vk_physical_device_features.h"
76 #include "vk_physical_device_properties.h"
77 #include "vk_sampler.h"
78 #include "vk_util.h"
79
80 #define PVR_GLOBAL_FREE_LIST_INITIAL_SIZE (2U * 1024U * 1024U)
81 #define PVR_GLOBAL_FREE_LIST_MAX_SIZE (256U * 1024U * 1024U)
82 #define PVR_GLOBAL_FREE_LIST_GROW_SIZE (1U * 1024U * 1024U)
83
84 /* After PVR_SECONDARY_DEVICE_THRESHOLD devices per instance are created,
85 * devices will have a smaller global free list size, as usually this use-case
86 * implies smaller amounts of work spread out. The free list can still grow as
87 * required.
88 */
89 #define PVR_SECONDARY_DEVICE_THRESHOLD (4U)
90 #define PVR_SECONDARY_DEVICE_FREE_LIST_INITAL_SIZE (512U * 1024U)
91
92 /* The grow threshold is a percentage. This is intended to be 12.5%, but has
93 * been rounded up since the percentage is treated as an integer.
94 */
95 #define PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD 13U
96
97 #if defined(VK_USE_PLATFORM_DISPLAY_KHR)
98 # define PVR_USE_WSI_PLATFORM_DISPLAY true
99 #else
100 # define PVR_USE_WSI_PLATFORM_DISPLAY false
101 #endif
102
103 #if PVR_USE_WSI_PLATFORM_DISPLAY
104 # define PVR_USE_WSI_PLATFORM true
105 #else
106 # define PVR_USE_WSI_PLATFORM false
107 #endif
108
109 #define PVR_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION)
110
111 /* Amount of padding required for VkBuffers to ensure we don't read beyond
112 * a page boundary.
113 */
114 #define PVR_BUFFER_MEMORY_PADDING_SIZE 4
115
116 /* Default size in bytes used by pvr_CreateDevice() for setting up the
117 * suballoc_general, suballoc_pds and suballoc_usc suballocators.
118 *
119 * TODO: Investigate if a different default size can improve the overall
120 * performance of internal driver allocations.
121 */
122 #define PVR_SUBALLOCATOR_GENERAL_SIZE (128 * 1024)
123 #define PVR_SUBALLOCATOR_PDS_SIZE (128 * 1024)
124 #define PVR_SUBALLOCATOR_TRANSFER_SIZE (128 * 1024)
125 #define PVR_SUBALLOCATOR_USC_SIZE (128 * 1024)
126 #define PVR_SUBALLOCATOR_VIS_TEST_SIZE (128 * 1024)
127
128 struct pvr_drm_device_config {
129 struct pvr_drm_device_info {
130 const char *name;
131 size_t len;
132 } render, display;
133 };
134
135 #define DEF_CONFIG(render_, display_) \
136 { \
137 .render = { .name = render_, .len = sizeof(render_) - 1 }, \
138 .display = { .name = display_, .len = sizeof(display_) - 1 }, \
139 }
140
141 /* This is the list of supported DRM render/display driver configs. */
142 static const struct pvr_drm_device_config pvr_drm_configs[] = {
143 DEF_CONFIG("mediatek,mt8173-gpu", "mediatek-drm"),
144 DEF_CONFIG("ti,am62-gpu", "ti,am625-dss"),
145 };
146
147 #undef DEF_CONFIG
148
149 static const struct vk_instance_extension_table pvr_instance_extensions = {
150 .KHR_display = PVR_USE_WSI_PLATFORM_DISPLAY,
151 .KHR_external_fence_capabilities = true,
152 .KHR_external_memory_capabilities = true,
153 .KHR_external_semaphore_capabilities = true,
154 .KHR_get_display_properties2 = PVR_USE_WSI_PLATFORM_DISPLAY,
155 .KHR_get_physical_device_properties2 = true,
156 .KHR_get_surface_capabilities2 = PVR_USE_WSI_PLATFORM,
157 .KHR_surface = PVR_USE_WSI_PLATFORM,
158 #ifndef VK_USE_PLATFORM_WIN32_KHR
159 .EXT_headless_surface = PVR_USE_WSI_PLATFORM,
160 #endif
161 .EXT_debug_report = true,
162 .EXT_debug_utils = true,
163 };
164
pvr_physical_device_get_supported_extensions(struct vk_device_extension_table * extensions)165 static void pvr_physical_device_get_supported_extensions(
166 struct vk_device_extension_table *extensions)
167 {
168 *extensions = (struct vk_device_extension_table){
169 .KHR_bind_memory2 = true,
170 .KHR_copy_commands2 = true,
171 /* TODO: enable this extension when the conformance tests get
172 * updated to version 1.3.6.0, the current version does not
173 * include the imagination driver ID, which will make a dEQP
174 * test fail
175 */
176 .KHR_driver_properties = false,
177 .KHR_external_fence = true,
178 .KHR_external_fence_fd = true,
179 .KHR_external_memory = true,
180 .KHR_external_memory_fd = true,
181 .KHR_format_feature_flags2 = true,
182 .KHR_external_semaphore = PVR_USE_WSI_PLATFORM,
183 .KHR_external_semaphore_fd = PVR_USE_WSI_PLATFORM,
184 .KHR_get_memory_requirements2 = true,
185 .KHR_image_format_list = true,
186 .KHR_shader_expect_assume = true,
187 .KHR_swapchain = PVR_USE_WSI_PLATFORM,
188 .KHR_timeline_semaphore = true,
189 .KHR_uniform_buffer_standard_layout = true,
190 .EXT_external_memory_dma_buf = true,
191 .EXT_host_query_reset = true,
192 .EXT_memory_budget = true,
193 .EXT_private_data = true,
194 .EXT_scalar_block_layout = true,
195 .EXT_texel_buffer_alignment = true,
196 .EXT_tooling_info = true,
197 };
198 }
199
pvr_physical_device_get_supported_features(const struct pvr_device_info * const dev_info,struct vk_features * const features)200 static void pvr_physical_device_get_supported_features(
201 const struct pvr_device_info *const dev_info,
202 struct vk_features *const features)
203 {
204 *features = (struct vk_features){
205 /* Vulkan 1.0 */
206 .robustBufferAccess = true,
207 .fullDrawIndexUint32 = true,
208 .imageCubeArray = true,
209 .independentBlend = false,
210 .geometryShader = false,
211 .tessellationShader = false,
212 .sampleRateShading = true,
213 .dualSrcBlend = false,
214 .logicOp = false,
215 .multiDrawIndirect = true,
216 .drawIndirectFirstInstance = true,
217 .depthClamp = true,
218 .depthBiasClamp = true,
219 .fillModeNonSolid = false,
220 .depthBounds = false,
221 .wideLines = true,
222 .largePoints = true,
223 .alphaToOne = false,
224 .multiViewport = false,
225 .samplerAnisotropy = false,
226 .textureCompressionETC2 = true,
227 .textureCompressionASTC_LDR = false,
228 .textureCompressionBC = false,
229 .occlusionQueryPrecise = false,
230 .pipelineStatisticsQuery = false,
231 .vertexPipelineStoresAndAtomics = true,
232 .fragmentStoresAndAtomics = true,
233 .shaderTessellationAndGeometryPointSize = false,
234 .shaderImageGatherExtended = false,
235 .shaderStorageImageExtendedFormats = true,
236 .shaderStorageImageMultisample = false,
237 .shaderStorageImageReadWithoutFormat = true,
238 .shaderStorageImageWriteWithoutFormat = false,
239 .shaderUniformBufferArrayDynamicIndexing = true,
240 .shaderSampledImageArrayDynamicIndexing = true,
241 .shaderStorageBufferArrayDynamicIndexing = true,
242 .shaderStorageImageArrayDynamicIndexing = true,
243 .shaderClipDistance = false,
244 .shaderCullDistance = false,
245 .shaderFloat64 = false,
246 .shaderInt64 = true,
247 .shaderInt16 = true,
248 .shaderResourceResidency = false,
249 .shaderResourceMinLod = false,
250 .sparseBinding = false,
251 .sparseResidencyBuffer = false,
252 .sparseResidencyImage2D = false,
253 .sparseResidencyImage3D = false,
254 .sparseResidency2Samples = false,
255 .sparseResidency4Samples = false,
256 .sparseResidency8Samples = false,
257 .sparseResidency16Samples = false,
258 .sparseResidencyAliased = false,
259 .variableMultisampleRate = false,
260 .inheritedQueries = false,
261
262 /* Vulkan 1.2 / VK_KHR_timeline_semaphore */
263 .timelineSemaphore = true,
264
265 /* Vulkan 1.2 / VK_KHR_uniform_buffer_standard_layout */
266 .uniformBufferStandardLayout = true,
267
268 /* Vulkan 1.2 / VK_EXT_host_query_reset */
269 .hostQueryReset = true,
270
271 /* Vulkan 1.3 / VK_EXT_private_data */
272 .privateData = true,
273
274 /* Vulkan 1.2 / VK_EXT_scalar_block_layout */
275 .scalarBlockLayout = true,
276
277 /* Vulkan 1.3 / VK_EXT_texel_buffer_alignment */
278 .texelBufferAlignment = true,
279
280 /* VK_KHR_shader_expect_assume */
281 .shaderExpectAssume = true,
282 };
283 }
284
pvr_physical_device_init_pipeline_cache_uuid(const struct pvr_device_info * const dev_info,uint8_t pipeline_cache_uuid_out[const static VK_UUID_SIZE])285 static bool pvr_physical_device_init_pipeline_cache_uuid(
286 const struct pvr_device_info *const dev_info,
287 uint8_t pipeline_cache_uuid_out[const static VK_UUID_SIZE])
288 {
289 struct mesa_sha1 sha1_ctx;
290 unsigned build_id_len;
291 uint8_t sha1[20];
292 uint64_t bvnc;
293
294 const struct build_id_note *note =
295 build_id_find_nhdr_for_addr(pvr_physical_device_init_pipeline_cache_uuid);
296 if (!note) {
297 mesa_loge("Failed to find build-id");
298 return false;
299 }
300
301 build_id_len = build_id_length(note);
302 if (build_id_len < 20) {
303 mesa_loge("Build-id too short. It needs to be a SHA");
304 return false;
305 }
306
307 bvnc = pvr_get_packed_bvnc(dev_info);
308
309 _mesa_sha1_init(&sha1_ctx);
310 _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
311 _mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc));
312 _mesa_sha1_final(&sha1_ctx, sha1);
313 memcpy(pipeline_cache_uuid_out, sha1, VK_UUID_SIZE);
314
315 return true;
316 }
317
318 struct pvr_descriptor_limits {
319 uint32_t max_per_stage_resources;
320 uint32_t max_per_stage_samplers;
321 uint32_t max_per_stage_uniform_buffers;
322 uint32_t max_per_stage_storage_buffers;
323 uint32_t max_per_stage_sampled_images;
324 uint32_t max_per_stage_storage_images;
325 uint32_t max_per_stage_input_attachments;
326 };
327
328 static const struct pvr_descriptor_limits *
pvr_get_physical_device_descriptor_limits(const struct pvr_device_info * dev_info,const struct pvr_device_runtime_info * dev_runtime_info)329 pvr_get_physical_device_descriptor_limits(
330 const struct pvr_device_info *dev_info,
331 const struct pvr_device_runtime_info *dev_runtime_info)
332 {
333 enum pvr_descriptor_cs_level {
334 /* clang-format off */
335 CS4096, /* 6XT and some XE cores with large CS. */
336 CS2560, /* Mid range Rogue XE cores. */
337 CS2048, /* Low end Rogue XE cores. */
338 CS1536, /* Ultra-low-end 9XEP. */
339 CS680, /* lower limits for older devices. */
340 CS408, /* 7XE. */
341 /* clang-format on */
342 };
343
344 static const struct pvr_descriptor_limits descriptor_limits[] = {
345 [CS4096] = { 1160U, 256U, 192U, 144U, 256U, 256U, 8U, },
346 [CS2560] = { 648U, 128U, 128U, 128U, 128U, 128U, 8U, },
347 [CS2048] = { 584U, 128U, 96U, 64U, 128U, 128U, 8U, },
348 [CS1536] = { 456U, 64U, 96U, 64U, 128U, 64U, 8U, },
349 [CS680] = { 224U, 32U, 64U, 36U, 48U, 8U, 8U, },
350 [CS408] = { 128U, 16U, 40U, 28U, 16U, 8U, 8U, },
351 };
352
353 const uint32_t common_size =
354 pvr_calc_fscommon_size_and_tiles_in_flight(dev_info,
355 dev_runtime_info,
356 UINT32_MAX,
357 1);
358 enum pvr_descriptor_cs_level cs_level;
359
360 if (common_size >= 2048) {
361 cs_level = CS2048;
362 } else if (common_size >= 1526) {
363 cs_level = CS1536;
364 } else if (common_size >= 680) {
365 cs_level = CS680;
366 } else if (common_size >= 408) {
367 cs_level = CS408;
368 } else {
369 mesa_loge("This core appears to have a very limited amount of shared "
370 "register space and may not meet the Vulkan spec limits.");
371 abort();
372 }
373
374 return &descriptor_limits[cs_level];
375 }
376
pvr_physical_device_get_properties(const struct pvr_device_info * const dev_info,const struct pvr_device_runtime_info * const dev_runtime_info,struct vk_properties * const properties)377 static bool pvr_physical_device_get_properties(
378 const struct pvr_device_info *const dev_info,
379 const struct pvr_device_runtime_info *const dev_runtime_info,
380 struct vk_properties *const properties)
381 {
382 const struct pvr_descriptor_limits *descriptor_limits =
383 pvr_get_physical_device_descriptor_limits(dev_info, dev_runtime_info);
384
385 /* Default value based on the minimum value found in all existing cores. */
386 const uint32_t max_multisample =
387 PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 4);
388
389 /* Default value based on the minimum value found in all existing cores. */
390 const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 2);
391
392 /* Default value based on the minimum value found in all existing cores. */
393 const uint32_t uvs_pba_entries =
394 PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 160);
395
396 /* Default value based on the minimum value found in all existing cores. */
397 const uint32_t num_user_clip_planes =
398 PVR_GET_FEATURE_VALUE(dev_info, num_user_clip_planes, 8);
399
400 const uint32_t sub_pixel_precision =
401 PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 4U : 8U;
402
403 const uint32_t max_render_size = rogue_get_render_size_max(dev_info);
404
405 const uint32_t max_sample_bits = ((max_multisample << 1) - 1);
406
407 const uint32_t max_user_vertex_components =
408 ((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;
409
410 /* The workgroup invocations are limited by the case where we have a compute
411 * barrier - each slot has a fixed number of invocations, the whole workgroup
412 * may need to span multiple slots. As each slot will WAIT at the barrier
413 * until the last invocation completes, all have to be schedulable at the
414 * same time.
415 *
416 * Typically all Rogue cores have 16 slots. Some of the smallest cores are
417 * reduced to 14.
418 *
419 * The compute barrier slot exhaustion scenario can be tested with:
420 * dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence
421 * .atomicwrite*guard*comp
422 */
423
424 /* Default value based on the minimum value found in all existing cores. */
425 const uint32_t usc_slots = PVR_GET_FEATURE_VALUE(dev_info, usc_slots, 14);
426
427 /* Default value based on the minimum value found in all existing cores. */
428 const uint32_t max_instances_per_pds_task =
429 PVR_GET_FEATURE_VALUE(dev_info, max_instances_per_pds_task, 32U);
430
431 const uint32_t max_compute_work_group_invocations =
432 (usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U;
433
434 bool ret;
435
436 *properties = (struct vk_properties){
437 /* Vulkan 1.0 */
438 .apiVersion = PVR_API_VERSION,
439 .driverVersion = vk_get_driver_version(),
440 .vendorID = VK_VENDOR_ID_IMAGINATION,
441 .deviceID = dev_info->ident.device_id,
442 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
443 /* deviceName and pipelineCacheUUID are filled below .*/
444
445 .maxImageDimension1D = max_render_size,
446 .maxImageDimension2D = max_render_size,
447 .maxImageDimension3D = PVR_MAX_TEXTURE_EXTENT_Z,
448 .maxImageDimensionCube = max_render_size,
449 .maxImageArrayLayers = PVR_MAX_ARRAY_LAYERS,
450 .maxTexelBufferElements = 64U * 1024U,
451 .maxUniformBufferRange = 128U * 1024U * 1024U,
452 .maxStorageBufferRange = 128U * 1024U * 1024U,
453 .maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE,
454 .maxMemoryAllocationCount = UINT32_MAX,
455 .maxSamplerAllocationCount = UINT32_MAX,
456 .bufferImageGranularity = 1U,
457 .sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL,
458 /* Maximum number of descriptor sets that can be bound simultaneously. */
459 .maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS,
460 .maxPerStageResources = descriptor_limits->max_per_stage_resources,
461 .maxPerStageDescriptorSamplers =
462 descriptor_limits->max_per_stage_samplers,
463 .maxPerStageDescriptorUniformBuffers =
464 descriptor_limits->max_per_stage_uniform_buffers,
465 .maxPerStageDescriptorStorageBuffers =
466 descriptor_limits->max_per_stage_storage_buffers,
467 .maxPerStageDescriptorSampledImages =
468 descriptor_limits->max_per_stage_sampled_images,
469 .maxPerStageDescriptorStorageImages =
470 descriptor_limits->max_per_stage_storage_images,
471 .maxPerStageDescriptorInputAttachments =
472 descriptor_limits->max_per_stage_input_attachments,
473 .maxDescriptorSetSamplers = 256U,
474 .maxDescriptorSetUniformBuffers = 256U,
475 .maxDescriptorSetUniformBuffersDynamic =
476 PVR_MAX_DESCRIPTOR_SET_UNIFORM_DYNAMIC_BUFFERS,
477 .maxDescriptorSetStorageBuffers = 256U,
478 .maxDescriptorSetStorageBuffersDynamic =
479 PVR_MAX_DESCRIPTOR_SET_STORAGE_DYNAMIC_BUFFERS,
480 .maxDescriptorSetSampledImages = 256U,
481 .maxDescriptorSetStorageImages = 256U,
482 .maxDescriptorSetInputAttachments = 256U,
483
484 /* Vertex Shader Limits */
485 .maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS,
486 .maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS,
487 .maxVertexInputAttributeOffset = 0xFFFF,
488 .maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U,
489 .maxVertexOutputComponents = max_user_vertex_components,
490
491 /* Tessellation Limits */
492 .maxTessellationGenerationLevel = 0,
493 .maxTessellationPatchSize = 0,
494 .maxTessellationControlPerVertexInputComponents = 0,
495 .maxTessellationControlPerVertexOutputComponents = 0,
496 .maxTessellationControlPerPatchOutputComponents = 0,
497 .maxTessellationControlTotalOutputComponents = 0,
498 .maxTessellationEvaluationInputComponents = 0,
499 .maxTessellationEvaluationOutputComponents = 0,
500
501 /* Geometry Shader Limits */
502 .maxGeometryShaderInvocations = 0,
503 .maxGeometryInputComponents = 0,
504 .maxGeometryOutputComponents = 0,
505 .maxGeometryOutputVertices = 0,
506 .maxGeometryTotalOutputComponents = 0,
507
508 /* Fragment Shader Limits */
509 .maxFragmentInputComponents = max_user_vertex_components,
510 .maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS,
511 .maxFragmentDualSrcAttachments = 0,
512 .maxFragmentCombinedOutputResources =
513 descriptor_limits->max_per_stage_storage_buffers +
514 descriptor_limits->max_per_stage_storage_images +
515 PVR_MAX_COLOR_ATTACHMENTS,
516
517 /* Compute Shader Limits */
518 .maxComputeSharedMemorySize = 16U * 1024U,
519 .maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U },
520 .maxComputeWorkGroupInvocations = max_compute_work_group_invocations,
521 .maxComputeWorkGroupSize = { max_compute_work_group_invocations,
522 max_compute_work_group_invocations,
523 64U },
524
525 /* Rasterization Limits */
526 .subPixelPrecisionBits = sub_pixel_precision,
527 .subTexelPrecisionBits = 8U,
528 .mipmapPrecisionBits = 8U,
529
530 .maxDrawIndexedIndexValue = UINT32_MAX,
531 .maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U,
532 .maxSamplerLodBias = 16.0f,
533 .maxSamplerAnisotropy = 1.0f,
534 .maxViewports = PVR_MAX_VIEWPORTS,
535
536 .maxViewportDimensions[0] = max_render_size,
537 .maxViewportDimensions[1] = max_render_size,
538 .viewportBoundsRange[0] = -(int32_t)(2U * max_render_size),
539 .viewportBoundsRange[1] = 2U * max_render_size,
540
541 .viewportSubPixelBits = 0,
542 .minMemoryMapAlignment = 64U,
543 .minTexelBufferOffsetAlignment = 16U,
544 .minUniformBufferOffsetAlignment = 4U,
545 .minStorageBufferOffsetAlignment = 4U,
546
547 .minTexelOffset = -8,
548 .maxTexelOffset = 7U,
549 .minTexelGatherOffset = -8,
550 .maxTexelGatherOffset = 7,
551 .minInterpolationOffset = -0.5,
552 .maxInterpolationOffset = 0.5,
553 .subPixelInterpolationOffsetBits = 4U,
554
555 .maxFramebufferWidth = max_render_size,
556 .maxFramebufferHeight = max_render_size,
557 .maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS,
558
559 .framebufferColorSampleCounts = max_sample_bits,
560 .framebufferDepthSampleCounts = max_sample_bits,
561 .framebufferStencilSampleCounts = max_sample_bits,
562 .framebufferNoAttachmentsSampleCounts = max_sample_bits,
563 .maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS,
564 .sampledImageColorSampleCounts = max_sample_bits,
565 .sampledImageIntegerSampleCounts = max_sample_bits,
566 .sampledImageDepthSampleCounts = max_sample_bits,
567 .sampledImageStencilSampleCounts = max_sample_bits,
568 .storageImageSampleCounts = max_sample_bits,
569 .maxSampleMaskWords = 1U,
570 .timestampComputeAndGraphics = false,
571 .timestampPeriod = 0.0f,
572 .maxClipDistances = num_user_clip_planes,
573 .maxCullDistances = num_user_clip_planes,
574 .maxCombinedClipAndCullDistances = num_user_clip_planes,
575 .discreteQueuePriorities = 2U,
576 .pointSizeRange[0] = 1.0f,
577 .pointSizeRange[1] = 511.0f,
578 .pointSizeGranularity = 0.0625f,
579 .lineWidthRange[0] = 1.0f / 16.0f,
580 .lineWidthRange[1] = 16.0f,
581 .lineWidthGranularity = 1.0f / 16.0f,
582 .strictLines = false,
583 .standardSampleLocations = true,
584 .optimalBufferCopyOffsetAlignment = 4U,
585 .optimalBufferCopyRowPitchAlignment = 4U,
586 .nonCoherentAtomSize = 1U,
587
588 /* Vulkan 1.2 / VK_KHR_driver_properties */
589 .driverID = VK_DRIVER_ID_IMAGINATION_OPEN_SOURCE_MESA,
590 .driverName = "Imagination open-source Mesa driver",
591 .driverInfo = "Mesa " PACKAGE_VERSION MESA_GIT_SHA1,
592 .conformanceVersion = {
593 .major = 1,
594 .minor = 3,
595 .subminor = 4,
596 .patch = 1,
597 },
598
599 /* Vulkan 1.2 / VK_KHR_timeline_semaphore */
600 .maxTimelineSemaphoreValueDifference = UINT64_MAX,
601
602 /* Vulkan 1.3 / VK_EXT_texel_buffer_alignment */
603 .storageTexelBufferOffsetAlignmentBytes = 16,
604 .storageTexelBufferOffsetSingleTexelAlignment = true,
605 .uniformTexelBufferOffsetAlignmentBytes = 16,
606 .uniformTexelBufferOffsetSingleTexelAlignment = false,
607 };
608
609 snprintf(properties->deviceName,
610 sizeof(properties->deviceName),
611 "Imagination PowerVR %s %s",
612 dev_info->ident.series_name,
613 dev_info->ident.public_name);
614
615 ret = pvr_physical_device_init_pipeline_cache_uuid(
616 dev_info,
617 properties->pipelineCacheUUID);
618 if (!ret)
619 return false;
620
621 return true;
622 }
623
pvr_EnumerateInstanceVersion(uint32_t * pApiVersion)624 VkResult pvr_EnumerateInstanceVersion(uint32_t *pApiVersion)
625 {
626 *pApiVersion = PVR_API_VERSION;
627 return VK_SUCCESS;
628 }
629
630 VkResult
pvr_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)631 pvr_EnumerateInstanceExtensionProperties(const char *pLayerName,
632 uint32_t *pPropertyCount,
633 VkExtensionProperties *pProperties)
634 {
635 if (pLayerName)
636 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
637
638 return vk_enumerate_instance_extension_properties(&pvr_instance_extensions,
639 pPropertyCount,
640 pProperties);
641 }
642
pvr_physical_device_destroy(struct vk_physical_device * vk_pdevice)643 static void pvr_physical_device_destroy(struct vk_physical_device *vk_pdevice)
644 {
645 struct pvr_physical_device *pdevice =
646 container_of(vk_pdevice, struct pvr_physical_device, vk);
647
648 /* Be careful here. The device might not have been initialized. This can
649 * happen since initialization is done in vkEnumeratePhysicalDevices() but
650 * finish is done in vkDestroyInstance(). Make sure that you check for NULL
651 * before freeing or that the freeing functions accept NULL pointers.
652 */
653
654 if (pdevice->compiler)
655 ralloc_free(pdevice->compiler);
656
657 pvr_wsi_finish(pdevice);
658
659 if (pdevice->ws)
660 pvr_winsys_destroy(pdevice->ws);
661
662 vk_free(&pdevice->vk.instance->alloc, pdevice->render_path);
663 vk_free(&pdevice->vk.instance->alloc, pdevice->display_path);
664
665 vk_physical_device_finish(&pdevice->vk);
666
667 vk_free(&pdevice->vk.instance->alloc, pdevice);
668 }
669
pvr_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)670 void pvr_DestroyInstance(VkInstance _instance,
671 const VkAllocationCallbacks *pAllocator)
672 {
673 PVR_FROM_HANDLE(pvr_instance, instance, _instance);
674
675 if (!instance)
676 return;
677
678 VG(VALGRIND_DESTROY_MEMPOOL(instance));
679
680 vk_instance_finish(&instance->vk);
681 vk_free(&instance->vk.alloc, instance);
682 }
683
pvr_compute_heap_size(void)684 static uint64_t pvr_compute_heap_size(void)
685 {
686 /* Query the total ram from the system */
687 uint64_t total_ram;
688 if (!os_get_total_physical_memory(&total_ram))
689 return 0;
690
691 /* We don't want to burn too much ram with the GPU. If the user has 4GiB
692 * or less, we use at most half. If they have more than 4GiB, we use 3/4.
693 */
694 uint64_t available_ram;
695 if (total_ram <= 4ULL * 1024ULL * 1024ULL * 1024ULL)
696 available_ram = total_ram / 2U;
697 else
698 available_ram = total_ram * 3U / 4U;
699
700 return available_ram;
701 }
702
pvr_physical_device_init(struct pvr_physical_device * pdevice,struct pvr_instance * instance,drmDevicePtr drm_render_device,drmDevicePtr drm_display_device)703 static VkResult pvr_physical_device_init(struct pvr_physical_device *pdevice,
704 struct pvr_instance *instance,
705 drmDevicePtr drm_render_device,
706 drmDevicePtr drm_display_device)
707 {
708 struct vk_physical_device_dispatch_table dispatch_table;
709 struct vk_device_extension_table supported_extensions;
710 struct vk_properties supported_properties;
711 struct vk_features supported_features;
712 struct pvr_winsys *ws;
713 char *display_path;
714 char *render_path;
715 VkResult result;
716
717 if (!getenv("PVR_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
718 return vk_errorf(instance,
719 VK_ERROR_INCOMPATIBLE_DRIVER,
720 "WARNING: powervr is not a conformant Vulkan "
721 "implementation. Pass "
722 "PVR_I_WANT_A_BROKEN_VULKAN_DRIVER=1 if you know "
723 "what you're doing.");
724 }
725
726 render_path = vk_strdup(&instance->vk.alloc,
727 drm_render_device->nodes[DRM_NODE_RENDER],
728 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
729 if (!render_path) {
730 result = VK_ERROR_OUT_OF_HOST_MEMORY;
731 goto err_out;
732 }
733
734 if (instance->vk.enabled_extensions.KHR_display) {
735 display_path = vk_strdup(&instance->vk.alloc,
736 drm_display_device->nodes[DRM_NODE_PRIMARY],
737 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
738 if (!display_path) {
739 result = VK_ERROR_OUT_OF_HOST_MEMORY;
740 goto err_vk_free_render_path;
741 }
742 } else {
743 display_path = NULL;
744 }
745
746 result =
747 pvr_winsys_create(render_path, display_path, &instance->vk.alloc, &ws);
748 if (result != VK_SUCCESS)
749 goto err_vk_free_display_path;
750
751 pdevice->instance = instance;
752 pdevice->render_path = render_path;
753 pdevice->display_path = display_path;
754 pdevice->ws = ws;
755
756 result = ws->ops->device_info_init(ws,
757 &pdevice->dev_info,
758 &pdevice->dev_runtime_info);
759 if (result != VK_SUCCESS)
760 goto err_pvr_winsys_destroy;
761
762 pvr_physical_device_get_supported_extensions(&supported_extensions);
763 pvr_physical_device_get_supported_features(&pdevice->dev_info,
764 &supported_features);
765 if (!pvr_physical_device_get_properties(&pdevice->dev_info,
766 &pdevice->dev_runtime_info,
767 &supported_properties)) {
768 result = vk_errorf(instance,
769 VK_ERROR_INITIALIZATION_FAILED,
770 "Failed to collect physical device properties");
771 goto err_pvr_winsys_destroy;
772 }
773
774 vk_physical_device_dispatch_table_from_entrypoints(
775 &dispatch_table,
776 &pvr_physical_device_entrypoints,
777 true);
778
779 vk_physical_device_dispatch_table_from_entrypoints(
780 &dispatch_table,
781 &wsi_physical_device_entrypoints,
782 false);
783
784 result = vk_physical_device_init(&pdevice->vk,
785 &instance->vk,
786 &supported_extensions,
787 &supported_features,
788 &supported_properties,
789 &dispatch_table);
790 if (result != VK_SUCCESS)
791 goto err_pvr_winsys_destroy;
792
793 pdevice->vk.supported_sync_types = ws->sync_types;
794
795 /* Setup available memory heaps and types */
796 pdevice->memory.memoryHeapCount = 1;
797 pdevice->memory.memoryHeaps[0].size = pvr_compute_heap_size();
798 pdevice->memory.memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
799
800 pdevice->memory.memoryTypeCount = 1;
801 pdevice->memory.memoryTypes[0].propertyFlags =
802 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
803 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
804 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
805 pdevice->memory.memoryTypes[0].heapIndex = 0;
806
807 result = pvr_wsi_init(pdevice);
808 if (result != VK_SUCCESS) {
809 vk_error(instance, result);
810 goto err_vk_physical_device_finish;
811 }
812
813 pdevice->compiler = rogue_compiler_create(&pdevice->dev_info);
814 if (!pdevice->compiler) {
815 result = vk_errorf(instance,
816 VK_ERROR_INITIALIZATION_FAILED,
817 "Failed to initialize Rogue compiler");
818 goto err_wsi_finish;
819 }
820
821 return VK_SUCCESS;
822
823 err_wsi_finish:
824 pvr_wsi_finish(pdevice);
825
826 err_vk_physical_device_finish:
827 vk_physical_device_finish(&pdevice->vk);
828
829 err_pvr_winsys_destroy:
830 pvr_winsys_destroy(ws);
831
832 err_vk_free_display_path:
833 vk_free(&instance->vk.alloc, display_path);
834
835 err_vk_free_render_path:
836 vk_free(&instance->vk.alloc, render_path);
837
838 err_out:
839 return result;
840 }
841
pvr_get_drm_devices(void * const obj,drmDevicePtr * const devices,const int max_devices,int * const num_devices_out)842 static VkResult pvr_get_drm_devices(void *const obj,
843 drmDevicePtr *const devices,
844 const int max_devices,
845 int *const num_devices_out)
846 {
847 int ret = drmGetDevices2(0, devices, max_devices);
848 if (ret < 0) {
849 return vk_errorf(obj,
850 VK_ERROR_INITIALIZATION_FAILED,
851 "Failed to enumerate drm devices (errno %d: %s)",
852 -ret,
853 strerror(-ret));
854 }
855
856 if (num_devices_out)
857 *num_devices_out = ret;
858
859 return VK_SUCCESS;
860 }
861
862 static bool
pvr_drm_device_compatible(const struct pvr_drm_device_info * const info,drmDevice * const drm_dev)863 pvr_drm_device_compatible(const struct pvr_drm_device_info *const info,
864 drmDevice *const drm_dev)
865 {
866 char **const compatible = drm_dev->deviceinfo.platform->compatible;
867
868 for (char **compat = compatible; *compat; compat++) {
869 if (strncmp(*compat, info->name, info->len) == 0)
870 return true;
871 }
872
873 return false;
874 }
875
876 static const struct pvr_drm_device_config *
pvr_drm_device_get_config(drmDevice * const drm_dev)877 pvr_drm_device_get_config(drmDevice *const drm_dev)
878 {
879 for (size_t i = 0U; i < ARRAY_SIZE(pvr_drm_configs); i++) {
880 if (pvr_drm_device_compatible(&pvr_drm_configs[i].render, drm_dev))
881 return &pvr_drm_configs[i];
882 }
883
884 return NULL;
885 }
886
887 static void
pvr_physical_device_dump_info(const struct pvr_physical_device * pdevice,char * const * comp_display,char * const * comp_render)888 pvr_physical_device_dump_info(const struct pvr_physical_device *pdevice,
889 char *const *comp_display,
890 char *const *comp_render)
891 {
892 drmVersionPtr version_display, version_render;
893 struct pvr_device_dump_info info;
894
895 version_display = drmGetVersion(pdevice->ws->display_fd);
896 if (!version_display)
897 return;
898
899 version_render = drmGetVersion(pdevice->ws->render_fd);
900 if (!version_render) {
901 drmFreeVersion(version_display);
902 return;
903 }
904
905 info.device_info = &pdevice->dev_info;
906 info.device_runtime_info = &pdevice->dev_runtime_info;
907 info.drm_display.patchlevel = version_display->version_patchlevel;
908 info.drm_display.major = version_display->version_major;
909 info.drm_display.minor = version_display->version_minor;
910 info.drm_display.name = version_display->name;
911 info.drm_display.date = version_display->date;
912 info.drm_display.comp = comp_display;
913 info.drm_render.patchlevel = version_render->version_patchlevel;
914 info.drm_render.major = version_render->version_major;
915 info.drm_render.minor = version_render->version_minor;
916 info.drm_render.name = version_render->name;
917 info.drm_render.date = version_render->date;
918 info.drm_render.comp = comp_render;
919
920 pvr_dump_physical_device_info(&info);
921
922 drmFreeVersion(version_display);
923 drmFreeVersion(version_render);
924 }
925
926 static VkResult
pvr_physical_device_enumerate(struct vk_instance * const vk_instance)927 pvr_physical_device_enumerate(struct vk_instance *const vk_instance)
928 {
929 struct pvr_instance *const instance =
930 container_of(vk_instance, struct pvr_instance, vk);
931
932 const struct pvr_drm_device_config *config = NULL;
933
934 drmDevicePtr drm_display_device = NULL;
935 drmDevicePtr drm_render_device = NULL;
936 struct pvr_physical_device *pdevice;
937 drmDevicePtr *drm_devices;
938 int num_drm_devices = 0;
939 VkResult result;
940
941 result = pvr_get_drm_devices(instance, NULL, 0, &num_drm_devices);
942 if (result != VK_SUCCESS)
943 goto out;
944
945 if (num_drm_devices == 0) {
946 result = VK_SUCCESS;
947 goto out;
948 }
949
950 drm_devices = vk_alloc(&vk_instance->alloc,
951 sizeof(*drm_devices) * num_drm_devices,
952 8,
953 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
954 if (!drm_devices) {
955 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
956 goto out;
957 }
958
959 result = pvr_get_drm_devices(instance, drm_devices, num_drm_devices, NULL);
960 if (result != VK_SUCCESS)
961 goto out_free_drm_device_ptrs;
962
963 /* First search for our render node... */
964 for (int i = 0; i < num_drm_devices; i++) {
965 drmDevice *const drm_dev = drm_devices[i];
966
967 if (drm_dev->bustype != DRM_BUS_PLATFORM)
968 continue;
969
970 if (!(drm_dev->available_nodes & BITFIELD_BIT(DRM_NODE_RENDER)))
971 continue;
972
973 config = pvr_drm_device_get_config(drm_dev);
974 if (config) {
975 drm_render_device = drm_dev;
976 break;
977 }
978 }
979
980 if (!config) {
981 result = VK_SUCCESS;
982 goto out_free_drm_devices;
983 }
984
985 mesa_logd("Found compatible render device '%s'.",
986 drm_render_device->nodes[DRM_NODE_RENDER]);
987
988 /* ...then find the compatible display node. */
989 for (int i = 0; i < num_drm_devices; i++) {
990 drmDevice *const drm_dev = drm_devices[i];
991
992 if (!(drm_dev->available_nodes & BITFIELD_BIT(DRM_NODE_PRIMARY)))
993 continue;
994
995 if (pvr_drm_device_compatible(&config->display, drm_dev)) {
996 drm_display_device = drm_dev;
997 break;
998 }
999 }
1000
1001 if (!drm_display_device) {
1002 mesa_loge("Render device '%s' has no compatible display device.",
1003 drm_render_device->nodes[DRM_NODE_RENDER]);
1004 result = VK_SUCCESS;
1005 goto out_free_drm_devices;
1006 }
1007
1008 mesa_logd("Found compatible display device '%s'.",
1009 drm_display_device->nodes[DRM_NODE_PRIMARY]);
1010
1011 pdevice = vk_zalloc(&vk_instance->alloc,
1012 sizeof(*pdevice),
1013 8,
1014 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1015 if (!pdevice) {
1016 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1017 goto out_free_drm_devices;
1018 }
1019
1020 result = pvr_physical_device_init(pdevice,
1021 instance,
1022 drm_render_device,
1023 drm_display_device);
1024 if (result != VK_SUCCESS) {
1025 if (result == VK_ERROR_INCOMPATIBLE_DRIVER)
1026 result = VK_SUCCESS;
1027
1028 goto err_free_pdevice;
1029 }
1030
1031 if (PVR_IS_DEBUG_SET(INFO)) {
1032 pvr_physical_device_dump_info(
1033 pdevice,
1034 drm_display_device->deviceinfo.platform->compatible,
1035 drm_render_device->deviceinfo.platform->compatible);
1036 }
1037
1038 list_add(&pdevice->vk.link, &vk_instance->physical_devices.list);
1039
1040 result = VK_SUCCESS;
1041 goto out_free_drm_devices;
1042
1043 err_free_pdevice:
1044 vk_free(&vk_instance->alloc, pdevice);
1045
1046 out_free_drm_devices:
1047 drmFreeDevices(drm_devices, num_drm_devices);
1048
1049 out_free_drm_device_ptrs:
1050 vk_free(&vk_instance->alloc, drm_devices);
1051
1052 out:
1053 return result;
1054 }
1055
pvr_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1056 VkResult pvr_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
1057 const VkAllocationCallbacks *pAllocator,
1058 VkInstance *pInstance)
1059 {
1060 struct vk_instance_dispatch_table dispatch_table;
1061 struct pvr_instance *instance;
1062 VkResult result;
1063
1064 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1065
1066 if (!pAllocator)
1067 pAllocator = vk_default_allocator();
1068
1069 instance = vk_alloc(pAllocator,
1070 sizeof(*instance),
1071 8,
1072 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1073 if (!instance)
1074 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1075
1076 vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
1077 &pvr_instance_entrypoints,
1078 true);
1079
1080 vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
1081 &wsi_instance_entrypoints,
1082 false);
1083
1084 result = vk_instance_init(&instance->vk,
1085 &pvr_instance_extensions,
1086 &dispatch_table,
1087 pCreateInfo,
1088 pAllocator);
1089 if (result != VK_SUCCESS) {
1090 vk_free(pAllocator, instance);
1091 return result;
1092 }
1093
1094 pvr_process_debug_variable();
1095
1096 instance->active_device_count = 0;
1097
1098 instance->vk.physical_devices.enumerate = pvr_physical_device_enumerate;
1099 instance->vk.physical_devices.destroy = pvr_physical_device_destroy;
1100
1101 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1102
1103 *pInstance = pvr_instance_to_handle(instance);
1104
1105 return VK_SUCCESS;
1106 }
1107
pvr_get_simultaneous_num_allocs(const struct pvr_device_info * dev_info,ASSERTED const struct pvr_device_runtime_info * dev_runtime_info)1108 static uint32_t pvr_get_simultaneous_num_allocs(
1109 const struct pvr_device_info *dev_info,
1110 ASSERTED const struct pvr_device_runtime_info *dev_runtime_info)
1111 {
1112 uint32_t min_cluster_per_phantom;
1113
1114 if (PVR_HAS_FEATURE(dev_info, s8xe))
1115 return PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U);
1116
1117 assert(dev_runtime_info->num_phantoms == 1);
1118 min_cluster_per_phantom = PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U);
1119
1120 if (min_cluster_per_phantom >= 4)
1121 return 1;
1122 else if (min_cluster_per_phantom == 2)
1123 return 2;
1124 else
1125 return 4;
1126 }
1127
pvr_calc_fscommon_size_and_tiles_in_flight(const struct pvr_device_info * dev_info,const struct pvr_device_runtime_info * dev_runtime_info,uint32_t fs_common_size,uint32_t min_tiles_in_flight)1128 uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
1129 const struct pvr_device_info *dev_info,
1130 const struct pvr_device_runtime_info *dev_runtime_info,
1131 uint32_t fs_common_size,
1132 uint32_t min_tiles_in_flight)
1133 {
1134 const uint32_t available_shareds =
1135 dev_runtime_info->reserved_shared_size - dev_runtime_info->max_coeffs;
1136 const uint32_t max_tiles_in_flight =
1137 PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U);
1138 uint32_t num_tile_in_flight;
1139 uint32_t num_allocs;
1140
1141 if (fs_common_size == 0)
1142 return max_tiles_in_flight;
1143
1144 num_allocs = pvr_get_simultaneous_num_allocs(dev_info, dev_runtime_info);
1145
1146 if (fs_common_size == UINT32_MAX) {
1147 uint32_t max_common_size = available_shareds;
1148
1149 num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight);
1150
1151 if (!PVR_HAS_ERN(dev_info, 38748)) {
1152 /* Hardware needs space for one extra shared allocation. */
1153 num_allocs += 1;
1154 }
1155
1156 /* Double resource requirements to deal with fragmentation. */
1157 max_common_size /= num_allocs * 2;
1158 max_common_size = MIN2(max_common_size, ROGUE_MAX_PIXEL_SHARED_REGISTERS);
1159 max_common_size =
1160 ROUND_DOWN_TO(max_common_size,
1161 PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
1162
1163 return max_common_size;
1164 }
1165
1166 num_tile_in_flight = available_shareds / (fs_common_size * 2);
1167
1168 if (!PVR_HAS_ERN(dev_info, 38748))
1169 num_tile_in_flight -= 1;
1170
1171 num_tile_in_flight /= num_allocs;
1172
1173 #if defined(DEBUG)
1174 /* Validate the above result. */
1175
1176 assert(num_tile_in_flight >= MIN2(num_tile_in_flight, max_tiles_in_flight));
1177 num_allocs *= num_tile_in_flight;
1178
1179 if (!PVR_HAS_ERN(dev_info, 38748)) {
1180 /* Hardware needs space for one extra shared allocation. */
1181 num_allocs += 1;
1182 }
1183
1184 assert(fs_common_size <= available_shareds / (num_allocs * 2));
1185 #endif
1186
1187 return MIN2(num_tile_in_flight, max_tiles_in_flight);
1188 }
1189
1190 const static VkQueueFamilyProperties pvr_queue_family_properties = {
1191 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_GRAPHICS_BIT |
1192 VK_QUEUE_TRANSFER_BIT,
1193 .queueCount = PVR_MAX_QUEUES,
1194 .timestampValidBits = 0,
1195 .minImageTransferGranularity = { 1, 1, 1 },
1196 };
1197
pvr_compute_heap_budget(struct pvr_physical_device * pdevice)1198 static uint64_t pvr_compute_heap_budget(struct pvr_physical_device *pdevice)
1199 {
1200 const uint64_t heap_size = pdevice->memory.memoryHeaps[0].size;
1201 const uint64_t heap_used = pdevice->heap_used;
1202 uint64_t sys_available = 0, heap_available;
1203 ASSERTED bool has_available_memory =
1204 os_get_available_system_memory(&sys_available);
1205 assert(has_available_memory);
1206
1207 /* Let's not incite the app to starve the system: report at most 90% of
1208 * available system memory.
1209 */
1210 heap_available = sys_available * 9 / 10;
1211 return MIN2(heap_size, heap_used + heap_available);
1212 }
1213
pvr_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1214 void pvr_GetPhysicalDeviceQueueFamilyProperties2(
1215 VkPhysicalDevice physicalDevice,
1216 uint32_t *pQueueFamilyPropertyCount,
1217 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1218 {
1219 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2,
1220 out,
1221 pQueueFamilyProperties,
1222 pQueueFamilyPropertyCount);
1223
1224 vk_outarray_append_typed (VkQueueFamilyProperties2, &out, p) {
1225 p->queueFamilyProperties = pvr_queue_family_properties;
1226
1227 vk_foreach_struct (ext, p->pNext) {
1228 pvr_debug_ignored_stype(ext->sType);
1229 }
1230 }
1231 }
1232
pvr_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1233 void pvr_GetPhysicalDeviceMemoryProperties2(
1234 VkPhysicalDevice physicalDevice,
1235 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1236 {
1237 PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1238
1239 pMemoryProperties->memoryProperties = pdevice->memory;
1240
1241 vk_foreach_struct (ext, pMemoryProperties->pNext) {
1242 switch (ext->sType) {
1243 case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1244 VkPhysicalDeviceMemoryBudgetPropertiesEXT *pMemoryBudget =
1245 (VkPhysicalDeviceMemoryBudgetPropertiesEXT *)ext;
1246
1247 pMemoryBudget->heapBudget[0] = pvr_compute_heap_budget(pdevice);
1248 pMemoryBudget->heapUsage[0] = pdevice->heap_used;
1249
1250 for (uint32_t i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1251 pMemoryBudget->heapBudget[i] = 0u;
1252 pMemoryBudget->heapUsage[i] = 0u;
1253 }
1254 break;
1255 }
1256 default:
1257 pvr_debug_ignored_stype(ext->sType);
1258 break;
1259 }
1260 }
1261 }
1262
pvr_GetInstanceProcAddr(VkInstance _instance,const char * pName)1263 PFN_vkVoidFunction pvr_GetInstanceProcAddr(VkInstance _instance,
1264 const char *pName)
1265 {
1266 PVR_FROM_HANDLE(pvr_instance, instance, _instance);
1267 return vk_instance_get_proc_addr(&instance->vk,
1268 &pvr_instance_entrypoints,
1269 pName);
1270 }
1271
1272 /* With version 1+ of the loader interface the ICD should expose
1273 * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in
1274 * apps.
1275 */
1276 PUBLIC
1277 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)1278 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
1279 {
1280 return pvr_GetInstanceProcAddr(instance, pName);
1281 }
1282
pvr_pds_compute_shader_create_and_upload(struct pvr_device * device,struct pvr_pds_compute_shader_program * program,struct pvr_pds_upload * const pds_upload_out)1283 VkResult pvr_pds_compute_shader_create_and_upload(
1284 struct pvr_device *device,
1285 struct pvr_pds_compute_shader_program *program,
1286 struct pvr_pds_upload *const pds_upload_out)
1287 {
1288 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1289 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
1290 size_t staging_buffer_size;
1291 uint32_t *staging_buffer;
1292 uint32_t *data_buffer;
1293 uint32_t *code_buffer;
1294 VkResult result;
1295
1296 /* Calculate how much space we'll need for the compute shader PDS program.
1297 */
1298 pvr_pds_compute_shader(program, NULL, PDS_GENERATE_SIZES, dev_info);
1299
1300 /* FIXME: Fix the below inconsistency of code size being in bytes whereas
1301 * data size being in dwords.
1302 */
1303 /* Code size is in bytes, data size in dwords. */
1304 staging_buffer_size =
1305 PVR_DW_TO_BYTES(program->data_size) + program->code_size;
1306
1307 staging_buffer = vk_alloc(&device->vk.alloc,
1308 staging_buffer_size,
1309 8U,
1310 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1311 if (!staging_buffer)
1312 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1313
1314 data_buffer = staging_buffer;
1315 code_buffer = pvr_pds_compute_shader(program,
1316 data_buffer,
1317 PDS_GENERATE_DATA_SEGMENT,
1318 dev_info);
1319
1320 pvr_pds_compute_shader(program,
1321 code_buffer,
1322 PDS_GENERATE_CODE_SEGMENT,
1323 dev_info);
1324
1325 result = pvr_gpu_upload_pds(device,
1326 data_buffer,
1327 program->data_size,
1328 PVRX(CDMCTRL_KERNEL1_DATA_ADDR_ALIGNMENT),
1329 code_buffer,
1330 program->code_size / sizeof(uint32_t),
1331 PVRX(CDMCTRL_KERNEL2_CODE_ADDR_ALIGNMENT),
1332 cache_line_size,
1333 pds_upload_out);
1334
1335 vk_free(&device->vk.alloc, staging_buffer);
1336
1337 return result;
1338 }
1339
pvr_device_init_compute_fence_program(struct pvr_device * device)1340 static VkResult pvr_device_init_compute_fence_program(struct pvr_device *device)
1341 {
1342 struct pvr_pds_compute_shader_program program;
1343
1344 pvr_pds_compute_shader_program_init(&program);
1345 /* Fence kernel. */
1346 program.fence = true;
1347 program.clear_pds_barrier = true;
1348
1349 return pvr_pds_compute_shader_create_and_upload(
1350 device,
1351 &program,
1352 &device->pds_compute_fence_program);
1353 }
1354
pvr_device_init_compute_empty_program(struct pvr_device * device)1355 static VkResult pvr_device_init_compute_empty_program(struct pvr_device *device)
1356 {
1357 struct pvr_pds_compute_shader_program program;
1358
1359 pvr_pds_compute_shader_program_init(&program);
1360 program.clear_pds_barrier = true;
1361
1362 return pvr_pds_compute_shader_create_and_upload(
1363 device,
1364 &program,
1365 &device->pds_compute_empty_program);
1366 }
1367
pvr_pds_idfwdf_programs_create_and_upload(struct pvr_device * device,pvr_dev_addr_t usc_addr,uint32_t shareds,uint32_t temps,pvr_dev_addr_t shareds_buffer_addr,struct pvr_pds_upload * const upload_out,struct pvr_pds_upload * const sw_compute_barrier_upload_out)1368 static VkResult pvr_pds_idfwdf_programs_create_and_upload(
1369 struct pvr_device *device,
1370 pvr_dev_addr_t usc_addr,
1371 uint32_t shareds,
1372 uint32_t temps,
1373 pvr_dev_addr_t shareds_buffer_addr,
1374 struct pvr_pds_upload *const upload_out,
1375 struct pvr_pds_upload *const sw_compute_barrier_upload_out)
1376 {
1377 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1378 struct pvr_pds_vertex_shader_sa_program program = {
1379 .kick_usc = true,
1380 .clear_pds_barrier = PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info),
1381 };
1382 size_t staging_buffer_size;
1383 uint32_t *staging_buffer;
1384 VkResult result;
1385
1386 /* We'll need to DMA the shareds into the USC's Common Store. */
1387 program.num_dma_kicks = pvr_pds_encode_dma_burst(program.dma_control,
1388 program.dma_address,
1389 0,
1390 shareds,
1391 shareds_buffer_addr.addr,
1392 false,
1393 dev_info);
1394
1395 /* DMA temp regs. */
1396 pvr_pds_setup_doutu(&program.usc_task_control,
1397 usc_addr.addr,
1398 temps,
1399 PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
1400 false);
1401
1402 pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1403
1404 staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
1405
1406 staging_buffer = vk_alloc(&device->vk.alloc,
1407 staging_buffer_size,
1408 8,
1409 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1410 if (!staging_buffer)
1411 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1412
1413 /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1414 pvr_pds_vertex_shader_sa(&program,
1415 staging_buffer,
1416 PDS_GENERATE_DATA_SEGMENT,
1417 dev_info);
1418 pvr_pds_vertex_shader_sa(&program,
1419 &staging_buffer[program.data_size],
1420 PDS_GENERATE_CODE_SEGMENT,
1421 dev_info);
1422
1423 /* At the time of writing, the SW_COMPUTE_PDS_BARRIER variant of the program
1424 * is bigger so we handle it first (if needed) and realloc() for a smaller
1425 * size.
1426 */
1427 if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
1428 /* FIXME: Figure out the define for alignment of 16. */
1429 result = pvr_gpu_upload_pds(device,
1430 &staging_buffer[0],
1431 program.data_size,
1432 16,
1433 &staging_buffer[program.data_size],
1434 program.code_size,
1435 16,
1436 16,
1437 sw_compute_barrier_upload_out);
1438 if (result != VK_SUCCESS) {
1439 vk_free(&device->vk.alloc, staging_buffer);
1440 return result;
1441 }
1442
1443 program.clear_pds_barrier = false;
1444
1445 pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1446
1447 staging_buffer_size =
1448 PVR_DW_TO_BYTES(program.code_size + program.data_size);
1449
1450 staging_buffer = vk_realloc(&device->vk.alloc,
1451 staging_buffer,
1452 staging_buffer_size,
1453 8,
1454 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1455 if (!staging_buffer) {
1456 pvr_bo_suballoc_free(sw_compute_barrier_upload_out->pvr_bo);
1457
1458 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1459 }
1460
1461 /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1462 pvr_pds_vertex_shader_sa(&program,
1463 staging_buffer,
1464 PDS_GENERATE_DATA_SEGMENT,
1465 dev_info);
1466 pvr_pds_vertex_shader_sa(&program,
1467 &staging_buffer[program.data_size],
1468 PDS_GENERATE_CODE_SEGMENT,
1469 dev_info);
1470 } else {
1471 *sw_compute_barrier_upload_out = (struct pvr_pds_upload){
1472 .pvr_bo = NULL,
1473 };
1474 }
1475
1476 /* FIXME: Figure out the define for alignment of 16. */
1477 result = pvr_gpu_upload_pds(device,
1478 &staging_buffer[0],
1479 program.data_size,
1480 16,
1481 &staging_buffer[program.data_size],
1482 program.code_size,
1483 16,
1484 16,
1485 upload_out);
1486 if (result != VK_SUCCESS) {
1487 vk_free(&device->vk.alloc, staging_buffer);
1488 pvr_bo_suballoc_free(sw_compute_barrier_upload_out->pvr_bo);
1489
1490 return result;
1491 }
1492
1493 vk_free(&device->vk.alloc, staging_buffer);
1494
1495 return VK_SUCCESS;
1496 }
1497
pvr_device_init_compute_idfwdf_state(struct pvr_device * device)1498 static VkResult pvr_device_init_compute_idfwdf_state(struct pvr_device *device)
1499 {
1500 uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
1501 uint64_t image_state[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
1502 struct util_dynarray usc_program;
1503 struct pvr_texture_state_info tex_info;
1504 uint32_t *dword_ptr;
1505 uint32_t usc_shareds;
1506 uint32_t usc_temps;
1507 VkResult result;
1508
1509 util_dynarray_init(&usc_program, NULL);
1510 pvr_hard_code_get_idfwdf_program(&device->pdevice->dev_info,
1511 &usc_program,
1512 &usc_shareds,
1513 &usc_temps);
1514
1515 device->idfwdf_state.usc_shareds = usc_shareds;
1516
1517 /* FIXME: Figure out the define for alignment of 16. */
1518 result = pvr_gpu_upload_usc(device,
1519 usc_program.data,
1520 usc_program.size,
1521 16,
1522 &device->idfwdf_state.usc);
1523 util_dynarray_fini(&usc_program);
1524
1525 if (result != VK_SUCCESS)
1526 return result;
1527
1528 /* TODO: Get the store buffer size from the compiler? */
1529 /* TODO: How was the size derived here? */
1530 result = pvr_bo_alloc(device,
1531 device->heaps.general_heap,
1532 4 * sizeof(float) * 4 * 2,
1533 4,
1534 0,
1535 &device->idfwdf_state.store_bo);
1536 if (result != VK_SUCCESS)
1537 goto err_free_usc_program;
1538
1539 result = pvr_bo_alloc(device,
1540 device->heaps.general_heap,
1541 usc_shareds * ROGUE_REG_SIZE_BYTES,
1542 ROGUE_REG_SIZE_BYTES,
1543 PVR_BO_ALLOC_FLAG_CPU_MAPPED,
1544 &device->idfwdf_state.shareds_bo);
1545 if (result != VK_SUCCESS)
1546 goto err_free_store_buffer;
1547
1548 /* Pack state words. */
1549
1550 pvr_csb_pack (&sampler_state[0], TEXSTATE_SAMPLER, sampler) {
1551 sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
1552 sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
1553 sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1554 sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1555 }
1556
1557 /* clang-format off */
1558 pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {}
1559 /* clang-format on */
1560
1561 STATIC_ASSERT(1 + 1 == ROGUE_NUM_TEXSTATE_SAMPLER_WORDS);
1562
1563 tex_info = (struct pvr_texture_state_info){
1564 .format = VK_FORMAT_R32G32B32A32_SFLOAT,
1565 .mem_layout = PVR_MEMLAYOUT_LINEAR,
1566 .flags = PVR_TEXFLAGS_INDEX_LOOKUP,
1567 .type = VK_IMAGE_VIEW_TYPE_2D,
1568 .extent = { .width = 4, .height = 2, .depth = 0 },
1569 .mip_levels = 1,
1570 .sample_count = 1,
1571 .stride = 4,
1572 .swizzle = { PIPE_SWIZZLE_X,
1573 PIPE_SWIZZLE_Y,
1574 PIPE_SWIZZLE_Z,
1575 PIPE_SWIZZLE_W },
1576 .addr = device->idfwdf_state.store_bo->vma->dev_addr,
1577 };
1578
1579 result = pvr_pack_tex_state(device, &tex_info, image_state);
1580 if (result != VK_SUCCESS)
1581 goto err_free_shareds_buffer;
1582
1583 /* Fill the shareds buffer. */
1584
1585 dword_ptr = (uint32_t *)device->idfwdf_state.shareds_bo->bo->map;
1586
1587 #define HIGH_32(val) ((uint32_t)((val) >> 32U))
1588 #define LOW_32(val) ((uint32_t)(val))
1589
1590 /* TODO: Should we use compiler info to setup the shareds data instead of
1591 * assuming there's always 12 and this is how they should be setup?
1592 */
1593
1594 dword_ptr[0] = HIGH_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1595 dword_ptr[1] = LOW_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1596
1597 /* Pad the shareds as the texture/sample state words are 128 bit aligned. */
1598 dword_ptr[2] = 0U;
1599 dword_ptr[3] = 0U;
1600
1601 dword_ptr[4] = LOW_32(image_state[0]);
1602 dword_ptr[5] = HIGH_32(image_state[0]);
1603 dword_ptr[6] = LOW_32(image_state[1]);
1604 dword_ptr[7] = HIGH_32(image_state[1]);
1605
1606 dword_ptr[8] = LOW_32(sampler_state[0]);
1607 dword_ptr[9] = HIGH_32(sampler_state[0]);
1608 dword_ptr[10] = LOW_32(sampler_state[1]);
1609 dword_ptr[11] = HIGH_32(sampler_state[1]);
1610 assert(11 + 1 == usc_shareds);
1611
1612 #undef HIGH_32
1613 #undef LOW_32
1614
1615 pvr_bo_cpu_unmap(device, device->idfwdf_state.shareds_bo);
1616 dword_ptr = NULL;
1617
1618 /* Generate and upload PDS programs. */
1619 result = pvr_pds_idfwdf_programs_create_and_upload(
1620 device,
1621 device->idfwdf_state.usc->dev_addr,
1622 usc_shareds,
1623 usc_temps,
1624 device->idfwdf_state.shareds_bo->vma->dev_addr,
1625 &device->idfwdf_state.pds,
1626 &device->idfwdf_state.sw_compute_barrier_pds);
1627 if (result != VK_SUCCESS)
1628 goto err_free_shareds_buffer;
1629
1630 return VK_SUCCESS;
1631
1632 err_free_shareds_buffer:
1633 pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1634
1635 err_free_store_buffer:
1636 pvr_bo_free(device, device->idfwdf_state.store_bo);
1637
1638 err_free_usc_program:
1639 pvr_bo_suballoc_free(device->idfwdf_state.usc);
1640
1641 return result;
1642 }
1643
pvr_device_finish_compute_idfwdf_state(struct pvr_device * device)1644 static void pvr_device_finish_compute_idfwdf_state(struct pvr_device *device)
1645 {
1646 pvr_bo_suballoc_free(device->idfwdf_state.pds.pvr_bo);
1647 pvr_bo_suballoc_free(device->idfwdf_state.sw_compute_barrier_pds.pvr_bo);
1648 pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1649 pvr_bo_free(device, device->idfwdf_state.store_bo);
1650 pvr_bo_suballoc_free(device->idfwdf_state.usc);
1651 }
1652
1653 /* FIXME: We should be calculating the size when we upload the code in
1654 * pvr_srv_setup_static_pixel_event_program().
1655 */
pvr_device_get_pixel_event_pds_program_data_size(const struct pvr_device_info * dev_info,uint32_t * const data_size_in_dwords_out)1656 static void pvr_device_get_pixel_event_pds_program_data_size(
1657 const struct pvr_device_info *dev_info,
1658 uint32_t *const data_size_in_dwords_out)
1659 {
1660 struct pvr_pds_event_program program = {
1661 /* No data to DMA, just a DOUTU needed. */
1662 .num_emit_word_pairs = 0,
1663 };
1664
1665 pvr_pds_set_sizes_pixel_event(&program, dev_info);
1666
1667 *data_size_in_dwords_out = program.data_size;
1668 }
1669
pvr_device_init_nop_program(struct pvr_device * device)1670 static VkResult pvr_device_init_nop_program(struct pvr_device *device)
1671 {
1672 const uint32_t cache_line_size =
1673 rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1674 struct pvr_pds_kickusc_program program = { 0 };
1675 struct util_dynarray nop_usc_bin;
1676 uint32_t staging_buffer_size;
1677 uint32_t *staging_buffer;
1678 VkResult result;
1679
1680 pvr_uscgen_nop(&nop_usc_bin);
1681
1682 result = pvr_gpu_upload_usc(device,
1683 util_dynarray_begin(&nop_usc_bin),
1684 nop_usc_bin.size,
1685 cache_line_size,
1686 &device->nop_program.usc);
1687 util_dynarray_fini(&nop_usc_bin);
1688 if (result != VK_SUCCESS)
1689 return result;
1690
1691 /* Setup a PDS program that kicks the static USC program. */
1692 pvr_pds_setup_doutu(&program.usc_task_control,
1693 device->nop_program.usc->dev_addr.addr,
1694 0U,
1695 PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
1696 false);
1697
1698 pvr_pds_set_sizes_pixel_shader(&program);
1699
1700 staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
1701
1702 staging_buffer = vk_alloc(&device->vk.alloc,
1703 staging_buffer_size,
1704 8U,
1705 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1706 if (!staging_buffer) {
1707 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1708 goto err_free_nop_usc_bo;
1709 }
1710
1711 pvr_pds_generate_pixel_shader_program(&program, staging_buffer);
1712
1713 /* FIXME: Figure out the define for alignment of 16. */
1714 result = pvr_gpu_upload_pds(device,
1715 staging_buffer,
1716 program.data_size,
1717 16U,
1718 &staging_buffer[program.data_size],
1719 program.code_size,
1720 16U,
1721 16U,
1722 &device->nop_program.pds);
1723 if (result != VK_SUCCESS)
1724 goto err_free_staging_buffer;
1725
1726 vk_free(&device->vk.alloc, staging_buffer);
1727
1728 return VK_SUCCESS;
1729
1730 err_free_staging_buffer:
1731 vk_free(&device->vk.alloc, staging_buffer);
1732
1733 err_free_nop_usc_bo:
1734 pvr_bo_suballoc_free(device->nop_program.usc);
1735
1736 return result;
1737 }
1738
pvr_device_init_tile_buffer_state(struct pvr_device * device)1739 static void pvr_device_init_tile_buffer_state(struct pvr_device *device)
1740 {
1741 simple_mtx_init(&device->tile_buffer_state.mtx, mtx_plain);
1742
1743 for (uint32_t i = 0; i < ARRAY_SIZE(device->tile_buffer_state.buffers); i++)
1744 device->tile_buffer_state.buffers[i] = NULL;
1745
1746 device->tile_buffer_state.buffer_count = 0;
1747 }
1748
pvr_device_finish_tile_buffer_state(struct pvr_device * device)1749 static void pvr_device_finish_tile_buffer_state(struct pvr_device *device)
1750 {
1751 /* Destroy the mutex first to trigger asserts in case it's still locked so
1752 * that we don't put things in an inconsistent state by freeing buffers that
1753 * might be in use or attempt to free buffers while new buffers are being
1754 * allocated.
1755 */
1756 simple_mtx_destroy(&device->tile_buffer_state.mtx);
1757
1758 for (uint32_t i = 0; i < device->tile_buffer_state.buffer_count; i++)
1759 pvr_bo_free(device, device->tile_buffer_state.buffers[i]);
1760 }
1761
1762 /**
1763 * \brief Ensures that a certain amount of tile buffers are allocated.
1764 *
1765 * Make sure that \p capacity amount of tile buffers are allocated. If less were
1766 * present, append new tile buffers of \p size_in_bytes each to reach the quota.
1767 */
pvr_device_tile_buffer_ensure_cap(struct pvr_device * device,uint32_t capacity,uint32_t size_in_bytes)1768 VkResult pvr_device_tile_buffer_ensure_cap(struct pvr_device *device,
1769 uint32_t capacity,
1770 uint32_t size_in_bytes)
1771 {
1772 struct pvr_device_tile_buffer_state *tile_buffer_state =
1773 &device->tile_buffer_state;
1774 const uint32_t cache_line_size =
1775 rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1776 VkResult result;
1777
1778 simple_mtx_lock(&tile_buffer_state->mtx);
1779
1780 /* Clamping in release and asserting in debug. */
1781 assert(capacity <= ARRAY_SIZE(tile_buffer_state->buffers));
1782 capacity = CLAMP(capacity,
1783 tile_buffer_state->buffer_count,
1784 ARRAY_SIZE(tile_buffer_state->buffers));
1785
1786 /* TODO: Implement bo multialloc? To reduce the amount of syscalls and
1787 * allocations.
1788 */
1789 for (uint32_t i = tile_buffer_state->buffer_count; i < capacity; i++) {
1790 result = pvr_bo_alloc(device,
1791 device->heaps.general_heap,
1792 size_in_bytes,
1793 cache_line_size,
1794 0,
1795 &tile_buffer_state->buffers[i]);
1796 if (result != VK_SUCCESS) {
1797 for (uint32_t j = tile_buffer_state->buffer_count; j < i; j++)
1798 pvr_bo_free(device, tile_buffer_state->buffers[j]);
1799
1800 goto err_release_lock;
1801 }
1802 }
1803
1804 tile_buffer_state->buffer_count = capacity;
1805
1806 simple_mtx_unlock(&tile_buffer_state->mtx);
1807
1808 return VK_SUCCESS;
1809
1810 err_release_lock:
1811 simple_mtx_unlock(&tile_buffer_state->mtx);
1812
1813 return result;
1814 }
1815
pvr_device_init_default_sampler_state(struct pvr_device * device)1816 static void pvr_device_init_default_sampler_state(struct pvr_device *device)
1817 {
1818 pvr_csb_pack (&device->input_attachment_sampler, TEXSTATE_SAMPLER, sampler) {
1819 sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1820 sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1821 sampler.addrmode_w = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1822 sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
1823 sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
1824 sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT);
1825 sampler.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
1826 sampler.non_normalized_coords = true;
1827 }
1828 }
1829
pvr_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)1830 VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice,
1831 const VkDeviceCreateInfo *pCreateInfo,
1832 const VkAllocationCallbacks *pAllocator,
1833 VkDevice *pDevice)
1834 {
1835 PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1836 uint32_t initial_free_list_size = PVR_GLOBAL_FREE_LIST_INITIAL_SIZE;
1837 struct pvr_instance *instance = pdevice->instance;
1838 struct vk_device_dispatch_table dispatch_table;
1839 struct pvr_device *device;
1840 struct pvr_winsys *ws;
1841 VkResult result;
1842
1843 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
1844
1845 result = pvr_winsys_create(pdevice->render_path,
1846 pdevice->display_path,
1847 pAllocator ? pAllocator : &instance->vk.alloc,
1848 &ws);
1849 if (result != VK_SUCCESS)
1850 goto err_out;
1851
1852 device = vk_alloc2(&instance->vk.alloc,
1853 pAllocator,
1854 sizeof(*device),
1855 8,
1856 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1857 if (!device) {
1858 result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1859 goto err_pvr_winsys_destroy;
1860 }
1861
1862 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1863 &pvr_device_entrypoints,
1864 true);
1865
1866 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1867 &wsi_device_entrypoints,
1868 false);
1869
1870 result = vk_device_init(&device->vk,
1871 &pdevice->vk,
1872 &dispatch_table,
1873 pCreateInfo,
1874 pAllocator);
1875 if (result != VK_SUCCESS)
1876 goto err_free_device;
1877
1878 device->instance = instance;
1879 device->pdevice = pdevice;
1880 device->ws = ws;
1881
1882 vk_device_set_drm_fd(&device->vk, ws->render_fd);
1883
1884 if (ws->features.supports_threaded_submit) {
1885 /* Queue submission can be blocked if the kernel CCBs become full,
1886 * so enable threaded submit to not block the submitter.
1887 */
1888 vk_device_enable_threaded_submit(&device->vk);
1889 }
1890
1891 ws->ops->get_heaps_info(ws, &device->heaps);
1892
1893 result = pvr_bo_store_create(device);
1894 if (result != VK_SUCCESS)
1895 goto err_vk_device_finish;
1896
1897 pvr_bo_suballocator_init(&device->suballoc_general,
1898 device->heaps.general_heap,
1899 device,
1900 PVR_SUBALLOCATOR_GENERAL_SIZE);
1901 pvr_bo_suballocator_init(&device->suballoc_pds,
1902 device->heaps.pds_heap,
1903 device,
1904 PVR_SUBALLOCATOR_PDS_SIZE);
1905 pvr_bo_suballocator_init(&device->suballoc_transfer,
1906 device->heaps.transfer_frag_heap,
1907 device,
1908 PVR_SUBALLOCATOR_TRANSFER_SIZE);
1909 pvr_bo_suballocator_init(&device->suballoc_usc,
1910 device->heaps.usc_heap,
1911 device,
1912 PVR_SUBALLOCATOR_USC_SIZE);
1913 pvr_bo_suballocator_init(&device->suballoc_vis_test,
1914 device->heaps.vis_test_heap,
1915 device,
1916 PVR_SUBALLOCATOR_VIS_TEST_SIZE);
1917
1918 if (p_atomic_inc_return(&instance->active_device_count) >
1919 PVR_SECONDARY_DEVICE_THRESHOLD) {
1920 initial_free_list_size = PVR_SECONDARY_DEVICE_FREE_LIST_INITAL_SIZE;
1921 }
1922
1923 result = pvr_free_list_create(device,
1924 initial_free_list_size,
1925 PVR_GLOBAL_FREE_LIST_MAX_SIZE,
1926 PVR_GLOBAL_FREE_LIST_GROW_SIZE,
1927 PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD,
1928 NULL /* parent_free_list */,
1929 &device->global_free_list);
1930 if (result != VK_SUCCESS)
1931 goto err_dec_device_count;
1932
1933 result = pvr_device_init_nop_program(device);
1934 if (result != VK_SUCCESS)
1935 goto err_pvr_free_list_destroy;
1936
1937 result = pvr_device_init_compute_fence_program(device);
1938 if (result != VK_SUCCESS)
1939 goto err_pvr_free_nop_program;
1940
1941 result = pvr_device_init_compute_empty_program(device);
1942 if (result != VK_SUCCESS)
1943 goto err_pvr_free_compute_fence;
1944
1945 result = pvr_device_create_compute_query_programs(device);
1946 if (result != VK_SUCCESS)
1947 goto err_pvr_free_compute_empty;
1948
1949 result = pvr_device_init_compute_idfwdf_state(device);
1950 if (result != VK_SUCCESS)
1951 goto err_pvr_destroy_compute_query_programs;
1952
1953 result = pvr_device_init_graphics_static_clear_state(device);
1954 if (result != VK_SUCCESS)
1955 goto err_pvr_finish_compute_idfwdf;
1956
1957 result = pvr_device_init_spm_load_state(device);
1958 if (result != VK_SUCCESS)
1959 goto err_pvr_finish_graphics_static_clear_state;
1960
1961 pvr_device_init_tile_buffer_state(device);
1962
1963 result = pvr_queues_create(device, pCreateInfo);
1964 if (result != VK_SUCCESS)
1965 goto err_pvr_finish_tile_buffer_state;
1966
1967 pvr_device_init_default_sampler_state(device);
1968
1969 pvr_spm_init_scratch_buffer_store(device);
1970
1971 result = pvr_init_robustness_buffer(device);
1972 if (result != VK_SUCCESS)
1973 goto err_pvr_spm_finish_scratch_buffer_store;
1974
1975 result = pvr_border_color_table_init(&device->border_color_table, device);
1976 if (result != VK_SUCCESS)
1977 goto err_pvr_robustness_buffer_finish;
1978
1979 /* FIXME: Move this to a later stage and possibly somewhere other than
1980 * pvr_device. The purpose of this is so that we don't have to get the size
1981 * on each kick.
1982 */
1983 pvr_device_get_pixel_event_pds_program_data_size(
1984 &pdevice->dev_info,
1985 &device->pixel_event_data_size_in_dwords);
1986
1987 device->global_cmd_buffer_submit_count = 0;
1988 device->global_queue_present_count = 0;
1989
1990 *pDevice = pvr_device_to_handle(device);
1991
1992 return VK_SUCCESS;
1993
1994 err_pvr_robustness_buffer_finish:
1995 pvr_robustness_buffer_finish(device);
1996
1997 err_pvr_spm_finish_scratch_buffer_store:
1998 pvr_spm_finish_scratch_buffer_store(device);
1999
2000 pvr_queues_destroy(device);
2001
2002 err_pvr_finish_tile_buffer_state:
2003 pvr_device_finish_tile_buffer_state(device);
2004 pvr_device_finish_spm_load_state(device);
2005
2006 err_pvr_finish_graphics_static_clear_state:
2007 pvr_device_finish_graphics_static_clear_state(device);
2008
2009 err_pvr_finish_compute_idfwdf:
2010 pvr_device_finish_compute_idfwdf_state(device);
2011
2012 err_pvr_destroy_compute_query_programs:
2013 pvr_device_destroy_compute_query_programs(device);
2014
2015 err_pvr_free_compute_empty:
2016 pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo);
2017
2018 err_pvr_free_compute_fence:
2019 pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo);
2020
2021 err_pvr_free_nop_program:
2022 pvr_bo_suballoc_free(device->nop_program.pds.pvr_bo);
2023 pvr_bo_suballoc_free(device->nop_program.usc);
2024
2025 err_pvr_free_list_destroy:
2026 pvr_free_list_destroy(device->global_free_list);
2027
2028 err_dec_device_count:
2029 p_atomic_dec(&device->instance->active_device_count);
2030
2031 pvr_bo_suballocator_fini(&device->suballoc_vis_test);
2032 pvr_bo_suballocator_fini(&device->suballoc_usc);
2033 pvr_bo_suballocator_fini(&device->suballoc_transfer);
2034 pvr_bo_suballocator_fini(&device->suballoc_pds);
2035 pvr_bo_suballocator_fini(&device->suballoc_general);
2036
2037 pvr_bo_store_destroy(device);
2038
2039 err_vk_device_finish:
2040 vk_device_finish(&device->vk);
2041
2042 err_free_device:
2043 vk_free(&device->vk.alloc, device);
2044
2045 err_pvr_winsys_destroy:
2046 pvr_winsys_destroy(ws);
2047
2048 err_out:
2049 return result;
2050 }
2051
pvr_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2052 void pvr_DestroyDevice(VkDevice _device,
2053 const VkAllocationCallbacks *pAllocator)
2054 {
2055 PVR_FROM_HANDLE(pvr_device, device, _device);
2056
2057 if (!device)
2058 return;
2059
2060 pvr_border_color_table_finish(&device->border_color_table, device);
2061 pvr_robustness_buffer_finish(device);
2062 pvr_spm_finish_scratch_buffer_store(device);
2063 pvr_queues_destroy(device);
2064 pvr_device_finish_tile_buffer_state(device);
2065 pvr_device_finish_spm_load_state(device);
2066 pvr_device_finish_graphics_static_clear_state(device);
2067 pvr_device_finish_compute_idfwdf_state(device);
2068 pvr_device_destroy_compute_query_programs(device);
2069 pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo);
2070 pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo);
2071 pvr_bo_suballoc_free(device->nop_program.pds.pvr_bo);
2072 pvr_bo_suballoc_free(device->nop_program.usc);
2073 pvr_free_list_destroy(device->global_free_list);
2074 pvr_bo_suballocator_fini(&device->suballoc_vis_test);
2075 pvr_bo_suballocator_fini(&device->suballoc_usc);
2076 pvr_bo_suballocator_fini(&device->suballoc_transfer);
2077 pvr_bo_suballocator_fini(&device->suballoc_pds);
2078 pvr_bo_suballocator_fini(&device->suballoc_general);
2079 pvr_bo_store_destroy(device);
2080 pvr_winsys_destroy(device->ws);
2081 p_atomic_dec(&device->instance->active_device_count);
2082 vk_device_finish(&device->vk);
2083 vk_free(&device->vk.alloc, device);
2084 }
2085
pvr_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2086 VkResult pvr_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2087 VkLayerProperties *pProperties)
2088 {
2089 if (!pProperties) {
2090 *pPropertyCount = 0;
2091 return VK_SUCCESS;
2092 }
2093
2094 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2095 }
2096
free_memory(struct pvr_device * device,struct pvr_device_memory * mem,const VkAllocationCallbacks * pAllocator)2097 static void free_memory(struct pvr_device *device,
2098 struct pvr_device_memory *mem,
2099 const VkAllocationCallbacks *pAllocator)
2100 {
2101 if (!mem)
2102 return;
2103
2104 /* From the Vulkan spec (§11.2.13. Freeing Device Memory):
2105 * If a memory object is mapped at the time it is freed, it is implicitly
2106 * unmapped.
2107 */
2108 if (mem->bo->map)
2109 device->ws->ops->buffer_unmap(mem->bo);
2110
2111 p_atomic_add(&device->pdevice->heap_used, -mem->bo->size);
2112
2113 device->ws->ops->buffer_destroy(mem->bo);
2114
2115 vk_object_free(&device->vk, pAllocator, mem);
2116 }
2117
pvr_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2118 VkResult pvr_AllocateMemory(VkDevice _device,
2119 const VkMemoryAllocateInfo *pAllocateInfo,
2120 const VkAllocationCallbacks *pAllocator,
2121 VkDeviceMemory *pMem)
2122 {
2123 const VkImportMemoryFdInfoKHR *fd_info = NULL;
2124 PVR_FROM_HANDLE(pvr_device, device, _device);
2125 enum pvr_winsys_bo_type type = PVR_WINSYS_BO_TYPE_GPU;
2126 struct pvr_device_memory *mem;
2127 uint64_t heap_used;
2128 VkResult result;
2129
2130 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2131 assert(pAllocateInfo->allocationSize > 0);
2132
2133 mem = vk_object_alloc(&device->vk,
2134 pAllocator,
2135 sizeof(*mem),
2136 VK_OBJECT_TYPE_DEVICE_MEMORY);
2137 if (!mem)
2138 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2139
2140 vk_foreach_struct_const (ext, pAllocateInfo->pNext) {
2141 switch ((unsigned)ext->sType) {
2142 case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
2143 if (device->ws->display_fd >= 0)
2144 type = PVR_WINSYS_BO_TYPE_DISPLAY;
2145 break;
2146 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
2147 fd_info = (void *)ext;
2148 break;
2149 default:
2150 pvr_debug_ignored_stype(ext->sType);
2151 break;
2152 }
2153 }
2154
2155 if (fd_info && fd_info->handleType) {
2156 VkDeviceSize aligned_alloc_size =
2157 ALIGN_POT(pAllocateInfo->allocationSize, device->ws->page_size);
2158
2159 assert(
2160 fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2161 fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2162
2163 result = device->ws->ops->buffer_create_from_fd(device->ws,
2164 fd_info->fd,
2165 &mem->bo);
2166 if (result != VK_SUCCESS)
2167 goto err_vk_object_free_mem;
2168
2169 /* For security purposes, we reject importing the bo if it's smaller
2170 * than the requested allocation size. This prevents a malicious client
2171 * from passing a buffer to a trusted client, lying about the size, and
2172 * telling the trusted client to try and texture from an image that goes
2173 * out-of-bounds. This sort of thing could lead to GPU hangs or worse
2174 * in the trusted client. The trusted client can protect itself against
2175 * this sort of attack but only if it can trust the buffer size.
2176 */
2177 if (aligned_alloc_size > mem->bo->size) {
2178 result = vk_errorf(device,
2179 VK_ERROR_INVALID_EXTERNAL_HANDLE,
2180 "Aligned requested size too large for the given fd "
2181 "%" PRIu64 "B > %" PRIu64 "B",
2182 pAllocateInfo->allocationSize,
2183 mem->bo->size);
2184 device->ws->ops->buffer_destroy(mem->bo);
2185 goto err_vk_object_free_mem;
2186 }
2187
2188 /* From the Vulkan spec:
2189 *
2190 * "Importing memory from a file descriptor transfers ownership of
2191 * the file descriptor from the application to the Vulkan
2192 * implementation. The application must not perform any operations on
2193 * the file descriptor after a successful import."
2194 *
2195 * If the import fails, we leave the file descriptor open.
2196 */
2197 close(fd_info->fd);
2198 } else {
2199 /* Align physical allocations to the page size of the heap that will be
2200 * used when binding device memory (see pvr_bind_memory()) to ensure the
2201 * entire allocation can be mapped.
2202 */
2203 const uint64_t alignment = device->heaps.general_heap->page_size;
2204
2205 /* FIXME: Need to determine the flags based on
2206 * device->pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags.
2207 *
2208 * The alternative would be to store the flags alongside the memory
2209 * types as an array that's indexed by pAllocateInfo->memoryTypeIndex so
2210 * that they can be looked up.
2211 */
2212 result = device->ws->ops->buffer_create(device->ws,
2213 pAllocateInfo->allocationSize,
2214 alignment,
2215 type,
2216 PVR_WINSYS_BO_FLAG_CPU_ACCESS,
2217 &mem->bo);
2218 if (result != VK_SUCCESS)
2219 goto err_vk_object_free_mem;
2220 }
2221
2222 heap_used = p_atomic_add_return(&device->pdevice->heap_used, mem->bo->size);
2223 if (heap_used > device->pdevice->memory.memoryHeaps[0].size) {
2224 free_memory(device, mem, pAllocator);
2225 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2226 }
2227
2228 *pMem = pvr_device_memory_to_handle(mem);
2229
2230 return VK_SUCCESS;
2231
2232 err_vk_object_free_mem:
2233 vk_object_free(&device->vk, pAllocator, mem);
2234
2235 return result;
2236 }
2237
pvr_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)2238 VkResult pvr_GetMemoryFdKHR(VkDevice _device,
2239 const VkMemoryGetFdInfoKHR *pGetFdInfo,
2240 int *pFd)
2241 {
2242 PVR_FROM_HANDLE(pvr_device, device, _device);
2243 PVR_FROM_HANDLE(pvr_device_memory, mem, pGetFdInfo->memory);
2244
2245 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
2246
2247 assert(
2248 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2249 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2250
2251 return device->ws->ops->buffer_get_fd(mem->bo, pFd);
2252 }
2253
2254 VkResult
pvr_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)2255 pvr_GetMemoryFdPropertiesKHR(VkDevice _device,
2256 VkExternalMemoryHandleTypeFlagBits handleType,
2257 int fd,
2258 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
2259 {
2260 PVR_FROM_HANDLE(pvr_device, device, _device);
2261
2262 switch (handleType) {
2263 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
2264 /* FIXME: This should only allow memory types having
2265 * VK_MEMORY_PROPERTY_HOST_CACHED_BIT flag set, as
2266 * dma-buf should be imported using cacheable memory types,
2267 * given exporter's mmap will always map it as cacheable.
2268 * Ref:
2269 * https://www.kernel.org/doc/html/latest/driver-api/dma-buf.html#c.dma_buf_ops
2270 */
2271 pMemoryFdProperties->memoryTypeBits =
2272 (1 << device->pdevice->memory.memoryTypeCount) - 1;
2273 return VK_SUCCESS;
2274 default:
2275 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2276 }
2277 }
2278
pvr_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)2279 void pvr_FreeMemory(VkDevice _device,
2280 VkDeviceMemory _mem,
2281 const VkAllocationCallbacks *pAllocator)
2282 {
2283 PVR_FROM_HANDLE(pvr_device, device, _device);
2284 PVR_FROM_HANDLE(pvr_device_memory, mem, _mem);
2285
2286 free_memory(device, mem, pAllocator);
2287 }
2288
pvr_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)2289 VkResult pvr_MapMemory(VkDevice _device,
2290 VkDeviceMemory _memory,
2291 VkDeviceSize offset,
2292 VkDeviceSize size,
2293 VkMemoryMapFlags flags,
2294 void **ppData)
2295 {
2296 PVR_FROM_HANDLE(pvr_device, device, _device);
2297 PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
2298 VkResult result;
2299
2300 if (!mem) {
2301 *ppData = NULL;
2302 return VK_SUCCESS;
2303 }
2304
2305 if (size == VK_WHOLE_SIZE)
2306 size = mem->bo->size - offset;
2307
2308 /* From the Vulkan spec version 1.0.32 docs for MapMemory:
2309 *
2310 * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
2311 * assert(size != 0);
2312 * * If size is not equal to VK_WHOLE_SIZE, size must be less than or
2313 * equal to the size of the memory minus offset
2314 */
2315
2316 assert(size > 0);
2317 assert(offset + size <= mem->bo->size);
2318
2319 /* Check if already mapped */
2320 if (mem->bo->map) {
2321 *ppData = (uint8_t *)mem->bo->map + offset;
2322 return VK_SUCCESS;
2323 }
2324
2325 /* Map it all at once */
2326 result = device->ws->ops->buffer_map(mem->bo);
2327 if (result != VK_SUCCESS)
2328 return result;
2329
2330 *ppData = (uint8_t *)mem->bo->map + offset;
2331
2332 return VK_SUCCESS;
2333 }
2334
pvr_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)2335 void pvr_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
2336 {
2337 PVR_FROM_HANDLE(pvr_device, device, _device);
2338 PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
2339
2340 if (!mem || !mem->bo->map)
2341 return;
2342
2343 device->ws->ops->buffer_unmap(mem->bo);
2344 }
2345
pvr_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2346 VkResult pvr_FlushMappedMemoryRanges(VkDevice _device,
2347 uint32_t memoryRangeCount,
2348 const VkMappedMemoryRange *pMemoryRanges)
2349 {
2350 return VK_SUCCESS;
2351 }
2352
2353 VkResult
pvr_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2354 pvr_InvalidateMappedMemoryRanges(VkDevice _device,
2355 uint32_t memoryRangeCount,
2356 const VkMappedMemoryRange *pMemoryRanges)
2357 {
2358 return VK_SUCCESS;
2359 }
2360
pvr_GetImageSparseMemoryRequirements2(VkDevice device,const VkImageSparseMemoryRequirementsInfo2 * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)2361 void pvr_GetImageSparseMemoryRequirements2(
2362 VkDevice device,
2363 const VkImageSparseMemoryRequirementsInfo2 *pInfo,
2364 uint32_t *pSparseMemoryRequirementCount,
2365 VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
2366 {
2367 *pSparseMemoryRequirementCount = 0;
2368 }
2369
pvr_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)2370 void pvr_GetDeviceMemoryCommitment(VkDevice device,
2371 VkDeviceMemory memory,
2372 VkDeviceSize *pCommittedMemoryInBytes)
2373 {
2374 *pCommittedMemoryInBytes = 0;
2375 }
2376
pvr_bind_memory(struct pvr_device * device,struct pvr_device_memory * mem,VkDeviceSize offset,VkDeviceSize size,VkDeviceSize alignment,struct pvr_winsys_vma ** const vma_out,pvr_dev_addr_t * const dev_addr_out)2377 VkResult pvr_bind_memory(struct pvr_device *device,
2378 struct pvr_device_memory *mem,
2379 VkDeviceSize offset,
2380 VkDeviceSize size,
2381 VkDeviceSize alignment,
2382 struct pvr_winsys_vma **const vma_out,
2383 pvr_dev_addr_t *const dev_addr_out)
2384 {
2385 VkDeviceSize virt_size =
2386 size + (offset & (device->heaps.general_heap->page_size - 1));
2387 struct pvr_winsys_vma *vma;
2388 pvr_dev_addr_t dev_addr;
2389 VkResult result;
2390
2391 /* Valid usage:
2392 *
2393 * "memoryOffset must be an integer multiple of the alignment member of
2394 * the VkMemoryRequirements structure returned from a call to
2395 * vkGetBufferMemoryRequirements with buffer"
2396 *
2397 * "memoryOffset must be an integer multiple of the alignment member of
2398 * the VkMemoryRequirements structure returned from a call to
2399 * vkGetImageMemoryRequirements with image"
2400 */
2401 assert(offset % alignment == 0);
2402 assert(offset < mem->bo->size);
2403
2404 result = device->ws->ops->heap_alloc(device->heaps.general_heap,
2405 virt_size,
2406 alignment,
2407 &vma);
2408 if (result != VK_SUCCESS)
2409 goto err_out;
2410
2411 result = device->ws->ops->vma_map(vma, mem->bo, offset, size, &dev_addr);
2412 if (result != VK_SUCCESS)
2413 goto err_free_vma;
2414
2415 *dev_addr_out = dev_addr;
2416 *vma_out = vma;
2417
2418 return VK_SUCCESS;
2419
2420 err_free_vma:
2421 device->ws->ops->heap_free(vma);
2422
2423 err_out:
2424 return result;
2425 }
2426
pvr_unbind_memory(struct pvr_device * device,struct pvr_winsys_vma * vma)2427 void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma)
2428 {
2429 device->ws->ops->vma_unmap(vma);
2430 device->ws->ops->heap_free(vma);
2431 }
2432
pvr_BindBufferMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)2433 VkResult pvr_BindBufferMemory2(VkDevice _device,
2434 uint32_t bindInfoCount,
2435 const VkBindBufferMemoryInfo *pBindInfos)
2436 {
2437 PVR_FROM_HANDLE(pvr_device, device, _device);
2438 uint32_t i;
2439
2440 for (i = 0; i < bindInfoCount; i++) {
2441 PVR_FROM_HANDLE(pvr_device_memory, mem, pBindInfos[i].memory);
2442 PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2443
2444 VkResult result = pvr_bind_memory(device,
2445 mem,
2446 pBindInfos[i].memoryOffset,
2447 buffer->vk.size,
2448 buffer->alignment,
2449 &buffer->vma,
2450 &buffer->dev_addr);
2451 if (result != VK_SUCCESS) {
2452 while (i--) {
2453 PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2454 pvr_unbind_memory(device, buffer->vma);
2455 }
2456
2457 return result;
2458 }
2459 }
2460
2461 return VK_SUCCESS;
2462 }
2463
pvr_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)2464 VkResult pvr_QueueBindSparse(VkQueue _queue,
2465 uint32_t bindInfoCount,
2466 const VkBindSparseInfo *pBindInfo,
2467 VkFence fence)
2468 {
2469 return VK_SUCCESS;
2470 }
2471
2472 /* Event functions. */
2473
pvr_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)2474 VkResult pvr_CreateEvent(VkDevice _device,
2475 const VkEventCreateInfo *pCreateInfo,
2476 const VkAllocationCallbacks *pAllocator,
2477 VkEvent *pEvent)
2478 {
2479 PVR_FROM_HANDLE(pvr_device, device, _device);
2480
2481 struct pvr_event *event = vk_object_alloc(&device->vk,
2482 pAllocator,
2483 sizeof(*event),
2484 VK_OBJECT_TYPE_EVENT);
2485 if (!event)
2486 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2487
2488 event->sync = NULL;
2489 event->state = PVR_EVENT_STATE_RESET_BY_HOST;
2490
2491 *pEvent = pvr_event_to_handle(event);
2492
2493 return VK_SUCCESS;
2494 }
2495
pvr_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)2496 void pvr_DestroyEvent(VkDevice _device,
2497 VkEvent _event,
2498 const VkAllocationCallbacks *pAllocator)
2499 {
2500 PVR_FROM_HANDLE(pvr_device, device, _device);
2501 PVR_FROM_HANDLE(pvr_event, event, _event);
2502
2503 if (!event)
2504 return;
2505
2506 if (event->sync)
2507 vk_sync_destroy(&device->vk, event->sync);
2508
2509 vk_object_free(&device->vk, pAllocator, event);
2510 }
2511
pvr_GetEventStatus(VkDevice _device,VkEvent _event)2512 VkResult pvr_GetEventStatus(VkDevice _device, VkEvent _event)
2513 {
2514 PVR_FROM_HANDLE(pvr_device, device, _device);
2515 PVR_FROM_HANDLE(pvr_event, event, _event);
2516 VkResult result;
2517
2518 switch (event->state) {
2519 case PVR_EVENT_STATE_SET_BY_DEVICE:
2520 if (!event->sync)
2521 return VK_EVENT_RESET;
2522
2523 result =
2524 vk_sync_wait(&device->vk, event->sync, 0U, VK_SYNC_WAIT_COMPLETE, 0);
2525 result = (result == VK_SUCCESS) ? VK_EVENT_SET : VK_EVENT_RESET;
2526 break;
2527
2528 case PVR_EVENT_STATE_RESET_BY_DEVICE:
2529 if (!event->sync)
2530 return VK_EVENT_RESET;
2531
2532 result =
2533 vk_sync_wait(&device->vk, event->sync, 0U, VK_SYNC_WAIT_COMPLETE, 0);
2534 result = (result == VK_SUCCESS) ? VK_EVENT_RESET : VK_EVENT_SET;
2535 break;
2536
2537 case PVR_EVENT_STATE_SET_BY_HOST:
2538 result = VK_EVENT_SET;
2539 break;
2540
2541 case PVR_EVENT_STATE_RESET_BY_HOST:
2542 result = VK_EVENT_RESET;
2543 break;
2544
2545 default:
2546 unreachable("Event object in unknown state");
2547 }
2548
2549 return result;
2550 }
2551
pvr_SetEvent(VkDevice _device,VkEvent _event)2552 VkResult pvr_SetEvent(VkDevice _device, VkEvent _event)
2553 {
2554 PVR_FROM_HANDLE(pvr_event, event, _event);
2555
2556 if (event->sync) {
2557 PVR_FROM_HANDLE(pvr_device, device, _device);
2558
2559 const VkResult result = vk_sync_signal(&device->vk, event->sync, 0);
2560 if (result != VK_SUCCESS)
2561 return result;
2562 }
2563
2564 event->state = PVR_EVENT_STATE_SET_BY_HOST;
2565
2566 return VK_SUCCESS;
2567 }
2568
pvr_ResetEvent(VkDevice _device,VkEvent _event)2569 VkResult pvr_ResetEvent(VkDevice _device, VkEvent _event)
2570 {
2571 PVR_FROM_HANDLE(pvr_event, event, _event);
2572
2573 if (event->sync) {
2574 PVR_FROM_HANDLE(pvr_device, device, _device);
2575
2576 const VkResult result = vk_sync_reset(&device->vk, event->sync);
2577 if (result != VK_SUCCESS)
2578 return result;
2579 }
2580
2581 event->state = PVR_EVENT_STATE_RESET_BY_HOST;
2582
2583 return VK_SUCCESS;
2584 }
2585
2586 /* Buffer functions. */
2587
pvr_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)2588 VkResult pvr_CreateBuffer(VkDevice _device,
2589 const VkBufferCreateInfo *pCreateInfo,
2590 const VkAllocationCallbacks *pAllocator,
2591 VkBuffer *pBuffer)
2592 {
2593 PVR_FROM_HANDLE(pvr_device, device, _device);
2594 const uint32_t alignment = 4096;
2595 struct pvr_buffer *buffer;
2596
2597 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2598 assert(pCreateInfo->usage != 0);
2599
2600 /* We check against (ULONG_MAX - alignment) to prevent overflow issues */
2601 if (pCreateInfo->size >= ULONG_MAX - alignment)
2602 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2603
2604 buffer =
2605 vk_buffer_create(&device->vk, pCreateInfo, pAllocator, sizeof(*buffer));
2606 if (!buffer)
2607 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2608
2609 buffer->alignment = alignment;
2610
2611 *pBuffer = pvr_buffer_to_handle(buffer);
2612
2613 return VK_SUCCESS;
2614 }
2615
pvr_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)2616 void pvr_DestroyBuffer(VkDevice _device,
2617 VkBuffer _buffer,
2618 const VkAllocationCallbacks *pAllocator)
2619 {
2620 PVR_FROM_HANDLE(pvr_device, device, _device);
2621 PVR_FROM_HANDLE(pvr_buffer, buffer, _buffer);
2622
2623 if (!buffer)
2624 return;
2625
2626 if (buffer->vma)
2627 pvr_unbind_memory(device, buffer->vma);
2628
2629 vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
2630 }
2631
pvr_gpu_upload(struct pvr_device * device,struct pvr_winsys_heap * heap,const void * data,size_t size,uint64_t alignment,struct pvr_suballoc_bo ** const pvr_bo_out)2632 VkResult pvr_gpu_upload(struct pvr_device *device,
2633 struct pvr_winsys_heap *heap,
2634 const void *data,
2635 size_t size,
2636 uint64_t alignment,
2637 struct pvr_suballoc_bo **const pvr_bo_out)
2638 {
2639 struct pvr_suballoc_bo *suballoc_bo = NULL;
2640 struct pvr_suballocator *allocator;
2641 VkResult result;
2642 void *map;
2643
2644 assert(size > 0);
2645
2646 if (heap == device->heaps.general_heap)
2647 allocator = &device->suballoc_general;
2648 else if (heap == device->heaps.pds_heap)
2649 allocator = &device->suballoc_pds;
2650 else if (heap == device->heaps.transfer_frag_heap)
2651 allocator = &device->suballoc_transfer;
2652 else if (heap == device->heaps.usc_heap)
2653 allocator = &device->suballoc_usc;
2654 else
2655 unreachable("Unknown heap type");
2656
2657 result = pvr_bo_suballoc(allocator, size, alignment, false, &suballoc_bo);
2658 if (result != VK_SUCCESS)
2659 return result;
2660
2661 map = pvr_bo_suballoc_get_map_addr(suballoc_bo);
2662 memcpy(map, data, size);
2663
2664 *pvr_bo_out = suballoc_bo;
2665
2666 return VK_SUCCESS;
2667 }
2668
pvr_gpu_upload_usc(struct pvr_device * device,const void * code,size_t code_size,uint64_t code_alignment,struct pvr_suballoc_bo ** const pvr_bo_out)2669 VkResult pvr_gpu_upload_usc(struct pvr_device *device,
2670 const void *code,
2671 size_t code_size,
2672 uint64_t code_alignment,
2673 struct pvr_suballoc_bo **const pvr_bo_out)
2674 {
2675 struct pvr_suballoc_bo *suballoc_bo = NULL;
2676 VkResult result;
2677 void *map;
2678
2679 assert(code_size > 0);
2680
2681 /* The USC will prefetch the next instruction, so over allocate by 1
2682 * instruction to prevent reading off the end of a page into a potentially
2683 * unallocated page.
2684 */
2685 result = pvr_bo_suballoc(&device->suballoc_usc,
2686 code_size + ROGUE_MAX_INSTR_BYTES,
2687 code_alignment,
2688 false,
2689 &suballoc_bo);
2690 if (result != VK_SUCCESS)
2691 return result;
2692
2693 map = pvr_bo_suballoc_get_map_addr(suballoc_bo);
2694 memcpy(map, code, code_size);
2695
2696 *pvr_bo_out = suballoc_bo;
2697
2698 return VK_SUCCESS;
2699 }
2700
2701 /**
2702 * \brief Upload PDS program data and code segments from host memory to device
2703 * memory.
2704 *
2705 * \param[in] device Logical device pointer.
2706 * \param[in] data Pointer to PDS data segment to upload.
2707 * \param[in] data_size_dwords Size of PDS data segment in dwords.
2708 * \param[in] data_alignment Required alignment of the PDS data segment in
2709 * bytes. Must be a power of two.
2710 * \param[in] code Pointer to PDS code segment to upload.
2711 * \param[in] code_size_dwords Size of PDS code segment in dwords.
2712 * \param[in] code_alignment Required alignment of the PDS code segment in
2713 * bytes. Must be a power of two.
2714 * \param[in] min_alignment Minimum alignment of the bo holding the PDS
2715 * program in bytes.
2716 * \param[out] pds_upload_out On success will be initialized based on the
2717 * uploaded PDS program.
2718 * \return VK_SUCCESS on success, or error code otherwise.
2719 */
pvr_gpu_upload_pds(struct pvr_device * device,const uint32_t * data,uint32_t data_size_dwords,uint32_t data_alignment,const uint32_t * code,uint32_t code_size_dwords,uint32_t code_alignment,uint64_t min_alignment,struct pvr_pds_upload * const pds_upload_out)2720 VkResult pvr_gpu_upload_pds(struct pvr_device *device,
2721 const uint32_t *data,
2722 uint32_t data_size_dwords,
2723 uint32_t data_alignment,
2724 const uint32_t *code,
2725 uint32_t code_size_dwords,
2726 uint32_t code_alignment,
2727 uint64_t min_alignment,
2728 struct pvr_pds_upload *const pds_upload_out)
2729 {
2730 /* All alignment and sizes below are in bytes. */
2731 const size_t data_size = PVR_DW_TO_BYTES(data_size_dwords);
2732 const size_t code_size = PVR_DW_TO_BYTES(code_size_dwords);
2733 const uint64_t data_aligned_size = ALIGN_POT(data_size, data_alignment);
2734 const uint64_t code_aligned_size = ALIGN_POT(code_size, code_alignment);
2735 const uint32_t code_offset = ALIGN_POT(data_aligned_size, code_alignment);
2736 const uint64_t bo_alignment = MAX2(min_alignment, data_alignment);
2737 const uint64_t bo_size = (!!code) ? (code_offset + code_aligned_size)
2738 : data_aligned_size;
2739 VkResult result;
2740 void *map;
2741
2742 assert(code || data);
2743 assert(!code || (code_size_dwords != 0 && code_alignment != 0));
2744 assert(!data || (data_size_dwords != 0 && data_alignment != 0));
2745
2746 result = pvr_bo_suballoc(&device->suballoc_pds,
2747 bo_size,
2748 bo_alignment,
2749 true,
2750 &pds_upload_out->pvr_bo);
2751 if (result != VK_SUCCESS)
2752 return result;
2753
2754 map = pvr_bo_suballoc_get_map_addr(pds_upload_out->pvr_bo);
2755
2756 if (data) {
2757 memcpy(map, data, data_size);
2758
2759 pds_upload_out->data_offset = pds_upload_out->pvr_bo->dev_addr.addr -
2760 device->heaps.pds_heap->base_addr.addr;
2761
2762 /* Store data size in dwords. */
2763 assert(data_aligned_size % 4 == 0);
2764 pds_upload_out->data_size = data_aligned_size / 4;
2765 } else {
2766 pds_upload_out->data_offset = 0;
2767 pds_upload_out->data_size = 0;
2768 }
2769
2770 if (code) {
2771 memcpy((uint8_t *)map + code_offset, code, code_size);
2772
2773 pds_upload_out->code_offset =
2774 (pds_upload_out->pvr_bo->dev_addr.addr + code_offset) -
2775 device->heaps.pds_heap->base_addr.addr;
2776
2777 /* Store code size in dwords. */
2778 assert(code_aligned_size % 4 == 0);
2779 pds_upload_out->code_size = code_aligned_size / 4;
2780 } else {
2781 pds_upload_out->code_offset = 0;
2782 pds_upload_out->code_size = 0;
2783 }
2784
2785 return VK_SUCCESS;
2786 }
2787
2788 static VkResult
pvr_framebuffer_create_ppp_state(struct pvr_device * device,struct pvr_framebuffer * framebuffer)2789 pvr_framebuffer_create_ppp_state(struct pvr_device *device,
2790 struct pvr_framebuffer *framebuffer)
2791 {
2792 const uint32_t cache_line_size =
2793 rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
2794 uint32_t ppp_state[3];
2795 VkResult result;
2796
2797 pvr_csb_pack (&ppp_state[0], TA_STATE_HEADER, header) {
2798 header.pres_terminate = true;
2799 }
2800
2801 pvr_csb_pack (&ppp_state[1], TA_STATE_TERMINATE0, term0) {
2802 term0.clip_right =
2803 DIV_ROUND_UP(
2804 framebuffer->width,
2805 PVRX(TA_STATE_TERMINATE0_CLIP_RIGHT_BLOCK_SIZE_IN_PIXELS)) -
2806 1;
2807 term0.clip_bottom =
2808 DIV_ROUND_UP(
2809 framebuffer->height,
2810 PVRX(TA_STATE_TERMINATE0_CLIP_BOTTOM_BLOCK_SIZE_IN_PIXELS)) -
2811 1;
2812 }
2813
2814 pvr_csb_pack (&ppp_state[2], TA_STATE_TERMINATE1, term1) {
2815 term1.render_target = 0;
2816 term1.clip_left = 0;
2817 }
2818
2819 result = pvr_gpu_upload(device,
2820 device->heaps.general_heap,
2821 ppp_state,
2822 sizeof(ppp_state),
2823 cache_line_size,
2824 &framebuffer->ppp_state_bo);
2825 if (result != VK_SUCCESS)
2826 return result;
2827
2828 /* Calculate the size of PPP state in dwords. */
2829 framebuffer->ppp_state_size = sizeof(ppp_state) / sizeof(uint32_t);
2830
2831 return VK_SUCCESS;
2832 }
2833
pvr_render_targets_init(struct pvr_render_target * render_targets,uint32_t render_targets_count)2834 static bool pvr_render_targets_init(struct pvr_render_target *render_targets,
2835 uint32_t render_targets_count)
2836 {
2837 uint32_t i;
2838
2839 for (i = 0; i < render_targets_count; i++) {
2840 if (pthread_mutex_init(&render_targets[i].mutex, NULL))
2841 goto err_mutex_destroy;
2842 }
2843
2844 return true;
2845
2846 err_mutex_destroy:
2847 while (i--)
2848 pthread_mutex_destroy(&render_targets[i].mutex);
2849
2850 return false;
2851 }
2852
pvr_render_targets_fini(struct pvr_render_target * render_targets,uint32_t render_targets_count)2853 static void pvr_render_targets_fini(struct pvr_render_target *render_targets,
2854 uint32_t render_targets_count)
2855 {
2856 for (uint32_t i = 0; i < render_targets_count; i++) {
2857 if (render_targets[i].valid) {
2858 pvr_render_target_dataset_destroy(render_targets[i].rt_dataset);
2859 render_targets[i].valid = false;
2860 }
2861
2862 pthread_mutex_destroy(&render_targets[i].mutex);
2863 }
2864 }
2865
pvr_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)2866 VkResult pvr_CreateFramebuffer(VkDevice _device,
2867 const VkFramebufferCreateInfo *pCreateInfo,
2868 const VkAllocationCallbacks *pAllocator,
2869 VkFramebuffer *pFramebuffer)
2870 {
2871 PVR_FROM_HANDLE(pvr_render_pass, pass, pCreateInfo->renderPass);
2872 PVR_FROM_HANDLE(pvr_device, device, _device);
2873 struct pvr_spm_bgobj_state *spm_bgobj_state_per_render;
2874 struct pvr_spm_eot_state *spm_eot_state_per_render;
2875 struct pvr_render_target *render_targets;
2876 struct pvr_framebuffer *framebuffer;
2877 struct pvr_image_view **attachments;
2878 uint32_t render_targets_count;
2879 uint64_t scratch_buffer_size;
2880 VkResult result;
2881
2882 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2883
2884 render_targets_count =
2885 PVR_RENDER_TARGETS_PER_FRAMEBUFFER(&device->pdevice->dev_info);
2886
2887 VK_MULTIALLOC(ma);
2888 vk_multialloc_add(&ma, &framebuffer, __typeof__(*framebuffer), 1);
2889 vk_multialloc_add(&ma,
2890 &attachments,
2891 __typeof__(*attachments),
2892 pCreateInfo->attachmentCount);
2893 vk_multialloc_add(&ma,
2894 &render_targets,
2895 __typeof__(*render_targets),
2896 render_targets_count);
2897 vk_multialloc_add(&ma,
2898 &spm_eot_state_per_render,
2899 __typeof__(*spm_eot_state_per_render),
2900 pass->hw_setup->render_count);
2901 vk_multialloc_add(&ma,
2902 &spm_bgobj_state_per_render,
2903 __typeof__(*spm_bgobj_state_per_render),
2904 pass->hw_setup->render_count);
2905
2906 if (!vk_multialloc_zalloc2(&ma,
2907 &device->vk.alloc,
2908 pAllocator,
2909 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
2910 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2911
2912 vk_object_base_init(&device->vk,
2913 &framebuffer->base,
2914 VK_OBJECT_TYPE_FRAMEBUFFER);
2915
2916 framebuffer->width = pCreateInfo->width;
2917 framebuffer->height = pCreateInfo->height;
2918 framebuffer->layers = pCreateInfo->layers;
2919
2920 framebuffer->attachments = attachments;
2921 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2922 for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
2923 framebuffer->attachments[i] =
2924 pvr_image_view_from_handle(pCreateInfo->pAttachments[i]);
2925 }
2926
2927 result = pvr_framebuffer_create_ppp_state(device, framebuffer);
2928 if (result != VK_SUCCESS)
2929 goto err_free_framebuffer;
2930
2931 framebuffer->render_targets = render_targets;
2932 framebuffer->render_targets_count = render_targets_count;
2933 if (!pvr_render_targets_init(framebuffer->render_targets,
2934 render_targets_count)) {
2935 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2936 goto err_free_ppp_state_bo;
2937 }
2938
2939 scratch_buffer_size =
2940 pvr_spm_scratch_buffer_calc_required_size(pass,
2941 framebuffer->width,
2942 framebuffer->height);
2943
2944 result = pvr_spm_scratch_buffer_get_buffer(device,
2945 scratch_buffer_size,
2946 &framebuffer->scratch_buffer);
2947 if (result != VK_SUCCESS)
2948 goto err_finish_render_targets;
2949
2950 for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
2951 uint32_t emit_count;
2952
2953 result = pvr_spm_init_eot_state(device,
2954 &spm_eot_state_per_render[i],
2955 framebuffer,
2956 &pass->hw_setup->renders[i],
2957 &emit_count);
2958 if (result != VK_SUCCESS)
2959 goto err_finish_eot_state;
2960
2961 result = pvr_spm_init_bgobj_state(device,
2962 &spm_bgobj_state_per_render[i],
2963 framebuffer,
2964 &pass->hw_setup->renders[i],
2965 emit_count);
2966 if (result != VK_SUCCESS)
2967 goto err_finish_bgobj_state;
2968
2969 continue;
2970
2971 err_finish_bgobj_state:
2972 pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[i]);
2973
2974 for (uint32_t j = 0; j < i; j++)
2975 pvr_spm_finish_bgobj_state(device, &spm_bgobj_state_per_render[j]);
2976
2977 err_finish_eot_state:
2978 for (uint32_t j = 0; j < i; j++)
2979 pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[j]);
2980
2981 goto err_finish_render_targets;
2982 }
2983
2984 framebuffer->render_count = pass->hw_setup->render_count;
2985 framebuffer->spm_eot_state_per_render = spm_eot_state_per_render;
2986 framebuffer->spm_bgobj_state_per_render = spm_bgobj_state_per_render;
2987
2988 *pFramebuffer = pvr_framebuffer_to_handle(framebuffer);
2989
2990 return VK_SUCCESS;
2991
2992 err_finish_render_targets:
2993 pvr_render_targets_fini(framebuffer->render_targets, render_targets_count);
2994
2995 err_free_ppp_state_bo:
2996 pvr_bo_suballoc_free(framebuffer->ppp_state_bo);
2997
2998 err_free_framebuffer:
2999 vk_object_base_finish(&framebuffer->base);
3000 vk_free2(&device->vk.alloc, pAllocator, framebuffer);
3001
3002 return result;
3003 }
3004
pvr_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)3005 void pvr_DestroyFramebuffer(VkDevice _device,
3006 VkFramebuffer _fb,
3007 const VkAllocationCallbacks *pAllocator)
3008 {
3009 PVR_FROM_HANDLE(pvr_framebuffer, framebuffer, _fb);
3010 PVR_FROM_HANDLE(pvr_device, device, _device);
3011
3012 if (!framebuffer)
3013 return;
3014
3015 for (uint32_t i = 0; i < framebuffer->render_count; i++) {
3016 pvr_spm_finish_bgobj_state(device,
3017 &framebuffer->spm_bgobj_state_per_render[i]);
3018
3019 pvr_spm_finish_eot_state(device,
3020 &framebuffer->spm_eot_state_per_render[i]);
3021 }
3022
3023 pvr_spm_scratch_buffer_release(device, framebuffer->scratch_buffer);
3024 pvr_render_targets_fini(framebuffer->render_targets,
3025 framebuffer->render_targets_count);
3026 pvr_bo_suballoc_free(framebuffer->ppp_state_bo);
3027 vk_object_base_finish(&framebuffer->base);
3028 vk_free2(&device->vk.alloc, pAllocator, framebuffer);
3029 }
3030
3031 static uint32_t
pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info * dev_info,VkFilter filter)3032 pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info *dev_info,
3033 VkFilter filter)
3034 {
3035 switch (filter) {
3036 case VK_FILTER_NEAREST:
3037 return PVRX(TEXSTATE_FILTER_POINT);
3038 case VK_FILTER_LINEAR:
3039 return PVRX(TEXSTATE_FILTER_LINEAR);
3040 default:
3041 unreachable("Unknown filter type.");
3042 }
3043 }
3044
3045 static uint32_t
pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)3046 pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)
3047 {
3048 switch (addr_mode) {
3049 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3050 return PVRX(TEXSTATE_ADDRMODE_REPEAT);
3051 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3052 return PVRX(TEXSTATE_ADDRMODE_FLIP);
3053 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3054 return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
3055 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3056 return PVRX(TEXSTATE_ADDRMODE_FLIP_ONCE_THEN_CLAMP);
3057 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3058 return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_BORDER);
3059 default:
3060 unreachable("Invalid sampler address mode.");
3061 }
3062 }
3063
pvr_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)3064 VkResult pvr_CreateSampler(VkDevice _device,
3065 const VkSamplerCreateInfo *pCreateInfo,
3066 const VkAllocationCallbacks *pAllocator,
3067 VkSampler *pSampler)
3068 {
3069 PVR_FROM_HANDLE(pvr_device, device, _device);
3070 uint32_t border_color_table_index;
3071 struct pvr_sampler *sampler;
3072 float lod_rounding_bias;
3073 VkFilter min_filter;
3074 VkFilter mag_filter;
3075 VkResult result;
3076 float min_lod;
3077 float max_lod;
3078
3079 STATIC_ASSERT(sizeof(((union pvr_sampler_descriptor *)NULL)->data) ==
3080 sizeof(((union pvr_sampler_descriptor *)NULL)->words));
3081
3082 sampler =
3083 vk_sampler_create(&device->vk, pCreateInfo, pAllocator, sizeof(*sampler));
3084 if (!sampler) {
3085 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3086 goto err_out;
3087 }
3088
3089 mag_filter = pCreateInfo->magFilter;
3090 min_filter = pCreateInfo->minFilter;
3091
3092 result =
3093 pvr_border_color_table_get_or_create_entry(&device->border_color_table,
3094 sampler,
3095 &border_color_table_index);
3096 if (result != VK_SUCCESS)
3097 goto err_free_sampler;
3098
3099 if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025)) {
3100 /* The min/mag filters may need adjustment here, the GPU should decide
3101 * which of the two filters to use based on the clamped LOD value: LOD
3102 * <= 0 implies magnification, while LOD > 0 implies minification.
3103 *
3104 * As a workaround, we override magFilter with minFilter if we know that
3105 * the magnification filter will never be used due to clamping anyway
3106 * (i.e. minLod > 0). Conversely, we override minFilter with magFilter
3107 * if maxLod <= 0.
3108 */
3109 if (pCreateInfo->minLod > 0.0f) {
3110 /* The clamped LOD will always be positive => always minify. */
3111 mag_filter = pCreateInfo->minFilter;
3112 }
3113
3114 if (pCreateInfo->maxLod <= 0.0f) {
3115 /* The clamped LOD will always be negative or zero => always
3116 * magnify.
3117 */
3118 min_filter = pCreateInfo->magFilter;
3119 }
3120 }
3121
3122 if (pCreateInfo->compareEnable) {
3123 sampler->descriptor.data.compare_op =
3124 (uint32_t)pvr_texstate_cmpmode(pCreateInfo->compareOp);
3125 } else {
3126 sampler->descriptor.data.compare_op =
3127 (uint32_t)pvr_texstate_cmpmode(VK_COMPARE_OP_NEVER);
3128 }
3129
3130 sampler->descriptor.data.word3 = 0;
3131 pvr_csb_pack (&sampler->descriptor.data.sampler_word,
3132 TEXSTATE_SAMPLER,
3133 word) {
3134 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
3135 const float lod_clamp_max = (float)PVRX(TEXSTATE_CLAMP_MAX) /
3136 (1 << PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
3137 const float max_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MAX_UINT) -
3138 PVRX(TEXSTATE_DADJUST_ZERO_UINT))) /
3139 (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
3140 const float min_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MIN_UINT) -
3141 PVRX(TEXSTATE_DADJUST_ZERO_UINT))) /
3142 (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
3143
3144 word.magfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, mag_filter);
3145 word.minfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, min_filter);
3146
3147 if (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR)
3148 word.mipfilter = true;
3149
3150 word.addrmode_u =
3151 pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeU);
3152 word.addrmode_v =
3153 pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeV);
3154 word.addrmode_w =
3155 pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeW);
3156
3157 /* TODO: Figure out defines for these. */
3158 if (word.addrmode_u == PVRX(TEXSTATE_ADDRMODE_FLIP))
3159 sampler->descriptor.data.word3 |= 0x40000000;
3160
3161 if (word.addrmode_v == PVRX(TEXSTATE_ADDRMODE_FLIP))
3162 sampler->descriptor.data.word3 |= 0x20000000;
3163
3164 /* The Vulkan 1.0.205 spec says:
3165 *
3166 * The absolute value of mipLodBias must be less than or equal to
3167 * VkPhysicalDeviceLimits::maxSamplerLodBias.
3168 */
3169 word.dadjust =
3170 PVRX(TEXSTATE_DADJUST_ZERO_UINT) +
3171 util_signed_fixed(
3172 CLAMP(pCreateInfo->mipLodBias, min_dadjust, max_dadjust),
3173 PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
3174
3175 /* Anisotropy is not supported for now. */
3176 word.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
3177
3178 if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025) &&
3179 pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_NEAREST) {
3180 /* When MIPMAP_MODE_NEAREST is enabled, the LOD level should be
3181 * selected by adding 0.5 and then truncating the input LOD value.
3182 * This hardware adds the 0.5 bias before clamping against
3183 * lodmin/lodmax, while Vulkan specifies the bias to be added after
3184 * clamping. We compensate for this difference by adding the 0.5
3185 * bias to the LOD bounds, too.
3186 */
3187 lod_rounding_bias = 0.5f;
3188 } else {
3189 lod_rounding_bias = 0.0f;
3190 }
3191
3192 min_lod = pCreateInfo->minLod + lod_rounding_bias;
3193 word.minlod = util_unsigned_fixed(CLAMP(min_lod, 0.0f, lod_clamp_max),
3194 PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
3195
3196 max_lod = pCreateInfo->maxLod + lod_rounding_bias;
3197 word.maxlod = util_unsigned_fixed(CLAMP(max_lod, 0.0f, lod_clamp_max),
3198 PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
3199
3200 word.bordercolor_index = border_color_table_index;
3201
3202 if (pCreateInfo->unnormalizedCoordinates)
3203 word.non_normalized_coords = true;
3204 }
3205
3206 *pSampler = pvr_sampler_to_handle(sampler);
3207
3208 return VK_SUCCESS;
3209
3210 err_free_sampler:
3211 vk_object_free(&device->vk, pAllocator, sampler);
3212
3213 err_out:
3214 return result;
3215 }
3216
pvr_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)3217 void pvr_DestroySampler(VkDevice _device,
3218 VkSampler _sampler,
3219 const VkAllocationCallbacks *pAllocator)
3220 {
3221 PVR_FROM_HANDLE(pvr_device, device, _device);
3222 PVR_FROM_HANDLE(pvr_sampler, sampler, _sampler);
3223
3224 if (!sampler)
3225 return;
3226
3227 vk_sampler_destroy(&device->vk, pAllocator, &sampler->vk);
3228 }
3229
pvr_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3230 void pvr_GetBufferMemoryRequirements2(
3231 VkDevice _device,
3232 const VkBufferMemoryRequirementsInfo2 *pInfo,
3233 VkMemoryRequirements2 *pMemoryRequirements)
3234 {
3235 PVR_FROM_HANDLE(pvr_buffer, buffer, pInfo->buffer);
3236 PVR_FROM_HANDLE(pvr_device, device, _device);
3237 uint64_t size;
3238
3239 /* The Vulkan 1.0.166 spec says:
3240 *
3241 * memoryTypeBits is a bitmask and contains one bit set for every
3242 * supported memory type for the resource. Bit 'i' is set if and only
3243 * if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
3244 * structure for the physical device is supported for the resource.
3245 *
3246 * All types are currently supported for buffers.
3247 */
3248 pMemoryRequirements->memoryRequirements.memoryTypeBits =
3249 (1ul << device->pdevice->memory.memoryTypeCount) - 1;
3250
3251 pMemoryRequirements->memoryRequirements.alignment = buffer->alignment;
3252
3253 size = buffer->vk.size;
3254
3255 if (size % device->ws->page_size == 0 ||
3256 size % device->ws->page_size >
3257 device->ws->page_size - PVR_BUFFER_MEMORY_PADDING_SIZE) {
3258 /* TODO: We can save memory by having one extra virtual page mapped
3259 * in and having the first and last virtual page mapped to the first
3260 * physical address.
3261 */
3262 size += PVR_BUFFER_MEMORY_PADDING_SIZE;
3263 }
3264
3265 pMemoryRequirements->memoryRequirements.size =
3266 ALIGN_POT(size, buffer->alignment);
3267 }
3268
pvr_GetImageMemoryRequirements2(VkDevice _device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3269 void pvr_GetImageMemoryRequirements2(VkDevice _device,
3270 const VkImageMemoryRequirementsInfo2 *pInfo,
3271 VkMemoryRequirements2 *pMemoryRequirements)
3272 {
3273 PVR_FROM_HANDLE(pvr_device, device, _device);
3274 PVR_FROM_HANDLE(pvr_image, image, pInfo->image);
3275
3276 /* The Vulkan 1.0.166 spec says:
3277 *
3278 * memoryTypeBits is a bitmask and contains one bit set for every
3279 * supported memory type for the resource. Bit 'i' is set if and only
3280 * if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
3281 * structure for the physical device is supported for the resource.
3282 *
3283 * All types are currently supported for images.
3284 */
3285 const uint32_t memory_types =
3286 (1ul << device->pdevice->memory.memoryTypeCount) - 1;
3287
3288 /* TODO: The returned size is aligned here in case of arrays/CEM (as is done
3289 * in GetImageMemoryRequirements()), but this should be known at image
3290 * creation time (pCreateInfo->arrayLayers > 1). This is confirmed in
3291 * ImageCreate()/ImageGetMipMapOffsetInBytes() where it aligns the size to
3292 * 4096 if pCreateInfo->arrayLayers > 1. So is the alignment here actually
3293 * necessary? If not, what should it be when pCreateInfo->arrayLayers == 1?
3294 *
3295 * Note: Presumably the 4096 alignment requirement comes from the Vulkan
3296 * driver setting RGX_CR_TPU_TAG_CEM_4K_FACE_PACKING_EN when setting up
3297 * render and compute jobs.
3298 */
3299 pMemoryRequirements->memoryRequirements.alignment = image->alignment;
3300 pMemoryRequirements->memoryRequirements.size =
3301 align64(image->size, image->alignment);
3302 pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
3303 }
3304