• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * based in part on anv driver which is:
5  * Copyright © 2015 Intel Corporation
6  *
7  * based in part on v3dv driver which is:
8  * Copyright © 2019 Raspberry Pi
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice (including the next
18  * paragraph) shall be included in all copies or substantial portions of the
19  * Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27  * SOFTWARE.
28  */
29 
30 #include <assert.h>
31 #include <fcntl.h>
32 #include <inttypes.h>
33 #include <stdbool.h>
34 #include <stddef.h>
35 #include <stdint.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <vulkan/vulkan.h>
40 #include <xf86drm.h>
41 
42 #include "git_sha1.h"
43 #include "hwdef/rogue_hw_utils.h"
44 #include "pco/pco.h"
45 #include "pvr_bo.h"
46 #include "pvr_border.h"
47 #include "pvr_clear.h"
48 #include "pvr_csb.h"
49 #include "pvr_csb_enum_helpers.h"
50 #include "pvr_debug.h"
51 #include "pvr_device_info.h"
52 #include "pvr_dump_info.h"
53 #include "pvr_hardcode.h"
54 #include "pvr_job_render.h"
55 #include "pvr_limits.h"
56 #include "pvr_pds.h"
57 #include "pvr_private.h"
58 #include "pvr_robustness.h"
59 #include "pvr_tex_state.h"
60 #include "pvr_types.h"
61 #include "usc/pvr_uscgen.h"
62 #include "pvr_util.h"
63 #include "pvr_winsys.h"
64 #include "rogue/rogue.h"
65 #include "util/build_id.h"
66 #include "util/log.h"
67 #include "util/macros.h"
68 #include "util/mesa-sha1.h"
69 #include "util/os_misc.h"
70 #include "util/u_dynarray.h"
71 #include "util/u_math.h"
72 #include "vk_alloc.h"
73 #include "vk_extensions.h"
74 #include "vk_log.h"
75 #include "vk_object.h"
76 #include "vk_physical_device_features.h"
77 #include "vk_physical_device_properties.h"
78 #include "vk_sampler.h"
79 #include "vk_util.h"
80 
81 #define PVR_GLOBAL_FREE_LIST_INITIAL_SIZE (2U * 1024U * 1024U)
82 #define PVR_GLOBAL_FREE_LIST_MAX_SIZE (256U * 1024U * 1024U)
83 #define PVR_GLOBAL_FREE_LIST_GROW_SIZE (1U * 1024U * 1024U)
84 
85 /* After PVR_SECONDARY_DEVICE_THRESHOLD devices per instance are created,
86  * devices will have a smaller global free list size, as usually this use-case
87  * implies smaller amounts of work spread out. The free list can still grow as
88  * required.
89  */
90 #define PVR_SECONDARY_DEVICE_THRESHOLD (4U)
91 #define PVR_SECONDARY_DEVICE_FREE_LIST_INITAL_SIZE (512U * 1024U)
92 
93 /* The grow threshold is a percentage. This is intended to be 12.5%, but has
94  * been rounded up since the percentage is treated as an integer.
95  */
96 #define PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD 13U
97 
98 #if defined(VK_USE_PLATFORM_DISPLAY_KHR)
99 #   define PVR_USE_WSI_PLATFORM_DISPLAY true
100 #else
101 #   define PVR_USE_WSI_PLATFORM_DISPLAY false
102 #endif
103 
104 #if PVR_USE_WSI_PLATFORM_DISPLAY
105 #   define PVR_USE_WSI_PLATFORM true
106 #else
107 #   define PVR_USE_WSI_PLATFORM false
108 #endif
109 
110 #define PVR_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION)
111 
112 /* Amount of padding required for VkBuffers to ensure we don't read beyond
113  * a page boundary.
114  */
115 #define PVR_BUFFER_MEMORY_PADDING_SIZE 4
116 
117 /* Default size in bytes used by pvr_CreateDevice() for setting up the
118  * suballoc_general, suballoc_pds and suballoc_usc suballocators.
119  *
120  * TODO: Investigate if a different default size can improve the overall
121  * performance of internal driver allocations.
122  */
123 #define PVR_SUBALLOCATOR_GENERAL_SIZE (128 * 1024)
124 #define PVR_SUBALLOCATOR_PDS_SIZE (128 * 1024)
125 #define PVR_SUBALLOCATOR_TRANSFER_SIZE (128 * 1024)
126 #define PVR_SUBALLOCATOR_USC_SIZE (128 * 1024)
127 #define PVR_SUBALLOCATOR_VIS_TEST_SIZE (128 * 1024)
128 
129 struct pvr_drm_device_config {
130    struct pvr_drm_device_info {
131       const char *name;
132       size_t len;
133    } render, display;
134 };
135 
136 #define DEF_CONFIG(render_, display_)                               \
137    {                                                                \
138       .render = { .name = render_, .len = sizeof(render_) - 1 },    \
139       .display = { .name = display_, .len = sizeof(display_) - 1 }, \
140    }
141 
142 /* This is the list of supported DRM render/display driver configs. */
143 static const struct pvr_drm_device_config pvr_drm_configs[] = {
144    DEF_CONFIG("mediatek,mt8173-gpu", "mediatek-drm"),
145    DEF_CONFIG("ti,am62-gpu", "ti,am625-dss"),
146    DEF_CONFIG("ti,j721s2-gpu", "ti,j721e-dss"),
147 };
148 
149 #undef DEF_CONFIG
150 
151 static const struct vk_instance_extension_table pvr_instance_extensions = {
152    .KHR_display = PVR_USE_WSI_PLATFORM_DISPLAY,
153    .KHR_external_fence_capabilities = true,
154    .KHR_external_memory_capabilities = true,
155    .KHR_external_semaphore_capabilities = true,
156    .KHR_get_display_properties2 = PVR_USE_WSI_PLATFORM_DISPLAY,
157    .KHR_get_physical_device_properties2 = true,
158    .KHR_get_surface_capabilities2 = PVR_USE_WSI_PLATFORM,
159    .KHR_surface = PVR_USE_WSI_PLATFORM,
160 #ifndef VK_USE_PLATFORM_WIN32_KHR
161    .EXT_headless_surface = PVR_USE_WSI_PLATFORM,
162 #endif
163    .EXT_debug_report = true,
164    .EXT_debug_utils = true,
165 };
166 
pvr_physical_device_get_supported_extensions(struct vk_device_extension_table * extensions)167 static void pvr_physical_device_get_supported_extensions(
168    struct vk_device_extension_table *extensions)
169 {
170    *extensions = (struct vk_device_extension_table){
171       .KHR_bind_memory2 = true,
172       .KHR_copy_commands2 = true,
173       /* TODO: enable this extension when the conformance tests get
174        * updated to version 1.3.6.0, the current version does not
175        * include the imagination driver ID, which will make a dEQP
176        * test fail
177        */
178       .KHR_driver_properties = false,
179       .KHR_external_fence = true,
180       .KHR_external_fence_fd = true,
181       .KHR_external_memory = true,
182       .KHR_external_memory_fd = true,
183       .KHR_format_feature_flags2 = true,
184       .KHR_external_semaphore = PVR_USE_WSI_PLATFORM,
185       .KHR_external_semaphore_fd = PVR_USE_WSI_PLATFORM,
186       .KHR_get_memory_requirements2 = true,
187       .KHR_image_format_list = true,
188       .KHR_index_type_uint8 = true,
189       .KHR_shader_expect_assume = true,
190       .KHR_swapchain = PVR_USE_WSI_PLATFORM,
191       .KHR_timeline_semaphore = true,
192       .KHR_uniform_buffer_standard_layout = true,
193       .EXT_external_memory_dma_buf = true,
194       .EXT_host_query_reset = true,
195       .EXT_index_type_uint8 = true,
196       .EXT_memory_budget = true,
197       .EXT_private_data = true,
198       .EXT_scalar_block_layout = true,
199       .EXT_texel_buffer_alignment = true,
200       .EXT_tooling_info = true,
201    };
202 }
203 
pvr_physical_device_get_supported_features(const struct pvr_device_info * const dev_info,struct vk_features * const features)204 static void pvr_physical_device_get_supported_features(
205    const struct pvr_device_info *const dev_info,
206    struct vk_features *const features)
207 {
208    *features = (struct vk_features){
209       /* Vulkan 1.0 */
210       .robustBufferAccess = true,
211       .fullDrawIndexUint32 = true,
212       .imageCubeArray = true,
213       .independentBlend = false,
214       .geometryShader = false,
215       .tessellationShader = false,
216       .sampleRateShading = true,
217       .dualSrcBlend = false,
218       .logicOp = false,
219       .multiDrawIndirect = true,
220       .drawIndirectFirstInstance = true,
221       .depthClamp = true,
222       .depthBiasClamp = true,
223       .fillModeNonSolid = false,
224       .depthBounds = false,
225       .wideLines = true,
226       .largePoints = true,
227       .alphaToOne = false,
228       .multiViewport = false,
229       .samplerAnisotropy = false,
230       .textureCompressionETC2 = true,
231       .textureCompressionASTC_LDR = false,
232       .textureCompressionBC = false,
233       .occlusionQueryPrecise = false,
234       .pipelineStatisticsQuery = false,
235       .vertexPipelineStoresAndAtomics = true,
236       .fragmentStoresAndAtomics = true,
237       .shaderTessellationAndGeometryPointSize = false,
238       .shaderImageGatherExtended = false,
239       .shaderStorageImageExtendedFormats = true,
240       .shaderStorageImageMultisample = false,
241       .shaderStorageImageReadWithoutFormat = true,
242       .shaderStorageImageWriteWithoutFormat = false,
243       .shaderUniformBufferArrayDynamicIndexing = true,
244       .shaderSampledImageArrayDynamicIndexing = true,
245       .shaderStorageBufferArrayDynamicIndexing = true,
246       .shaderStorageImageArrayDynamicIndexing = true,
247       .shaderClipDistance = false,
248       .shaderCullDistance = false,
249       .shaderFloat64 = false,
250       .shaderInt64 = true,
251       .shaderInt16 = true,
252       .shaderResourceResidency = false,
253       .shaderResourceMinLod = false,
254       .sparseBinding = false,
255       .sparseResidencyBuffer = false,
256       .sparseResidencyImage2D = false,
257       .sparseResidencyImage3D = false,
258       .sparseResidency2Samples = false,
259       .sparseResidency4Samples = false,
260       .sparseResidency8Samples = false,
261       .sparseResidency16Samples = false,
262       .sparseResidencyAliased = false,
263       .variableMultisampleRate = false,
264       .inheritedQueries = false,
265 
266       /* VK_KHR_index_type_uint8 */
267       .indexTypeUint8 = true,
268 
269       /* Vulkan 1.2 / VK_KHR_timeline_semaphore */
270       .timelineSemaphore = true,
271 
272       /* Vulkan 1.2 / VK_KHR_uniform_buffer_standard_layout */
273       .uniformBufferStandardLayout = true,
274 
275       /* Vulkan 1.2 / VK_EXT_host_query_reset */
276       .hostQueryReset = true,
277 
278       /* Vulkan 1.3 / VK_EXT_private_data */
279       .privateData = true,
280 
281       /* Vulkan 1.2 / VK_EXT_scalar_block_layout */
282       .scalarBlockLayout = true,
283 
284       /* Vulkan 1.3 / VK_EXT_texel_buffer_alignment */
285       .texelBufferAlignment = true,
286 
287       /* VK_KHR_shader_expect_assume */
288       .shaderExpectAssume = true,
289    };
290 }
291 
pvr_physical_device_init_pipeline_cache_uuid(const struct pvr_device_info * const dev_info,uint8_t pipeline_cache_uuid_out[const static VK_UUID_SIZE])292 static bool pvr_physical_device_init_pipeline_cache_uuid(
293    const struct pvr_device_info *const dev_info,
294    uint8_t pipeline_cache_uuid_out[const static VK_UUID_SIZE])
295 {
296    struct mesa_sha1 sha1_ctx;
297    unsigned build_id_len;
298    uint8_t sha1[20];
299    uint64_t bvnc;
300 
301    const struct build_id_note *note =
302       build_id_find_nhdr_for_addr(pvr_physical_device_init_pipeline_cache_uuid);
303    if (!note) {
304       mesa_loge("Failed to find build-id");
305       return false;
306    }
307 
308    build_id_len = build_id_length(note);
309    if (build_id_len < 20) {
310       mesa_loge("Build-id too short. It needs to be a SHA");
311       return false;
312    }
313 
314    bvnc = pvr_get_packed_bvnc(dev_info);
315 
316    _mesa_sha1_init(&sha1_ctx);
317    _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
318    _mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc));
319    _mesa_sha1_final(&sha1_ctx, sha1);
320    memcpy(pipeline_cache_uuid_out, sha1, VK_UUID_SIZE);
321 
322    return true;
323 }
324 
325 struct pvr_descriptor_limits {
326    uint32_t max_per_stage_resources;
327    uint32_t max_per_stage_samplers;
328    uint32_t max_per_stage_uniform_buffers;
329    uint32_t max_per_stage_storage_buffers;
330    uint32_t max_per_stage_sampled_images;
331    uint32_t max_per_stage_storage_images;
332    uint32_t max_per_stage_input_attachments;
333 };
334 
335 static const struct pvr_descriptor_limits *
pvr_get_physical_device_descriptor_limits(const struct pvr_device_info * dev_info,const struct pvr_device_runtime_info * dev_runtime_info)336 pvr_get_physical_device_descriptor_limits(
337    const struct pvr_device_info *dev_info,
338    const struct pvr_device_runtime_info *dev_runtime_info)
339 {
340    enum pvr_descriptor_cs_level {
341       /* clang-format off */
342       CS4096, /* 6XT and some XE cores with large CS. */
343       CS2560, /* Mid range Rogue XE cores. */
344       CS2048, /* Low end Rogue XE cores. */
345       CS1536, /* Ultra-low-end 9XEP. */
346       CS680,  /* lower limits for older devices. */
347       CS408,  /* 7XE. */
348       /* clang-format on */
349    };
350 
351    static const struct pvr_descriptor_limits descriptor_limits[] = {
352       [CS4096] = { 1160U, 256U, 192U, 144U, 256U, 256U, 8U, },
353       [CS2560] = {  648U, 128U, 128U, 128U, 128U, 128U, 8U, },
354       [CS2048] = {  584U, 128U,  96U,  64U, 128U, 128U, 8U, },
355       [CS1536] = {  456U,  64U,  96U,  64U, 128U,  64U, 8U, },
356       [CS680]  = {  224U,  32U,  64U,  36U,  48U,   8U, 8U, },
357       [CS408]  = {  128U,  16U,  40U,  28U,  16U,   8U, 8U, },
358    };
359 
360    const uint32_t common_size =
361       pvr_calc_fscommon_size_and_tiles_in_flight(dev_info,
362                                                  dev_runtime_info,
363                                                  UINT32_MAX,
364                                                  1);
365    enum pvr_descriptor_cs_level cs_level;
366 
367    if (common_size >= 2048) {
368       cs_level = CS2048;
369    } else if (common_size >= 1526) {
370       cs_level = CS1536;
371    } else if (common_size >= 680) {
372       cs_level = CS680;
373    } else if (common_size >= 408) {
374       cs_level = CS408;
375    } else {
376       mesa_loge("This core appears to have a very limited amount of shared "
377                 "register space and may not meet the Vulkan spec limits.");
378       abort();
379    }
380 
381    return &descriptor_limits[cs_level];
382 }
383 
pvr_physical_device_get_properties(const struct pvr_physical_device * const pdevice,struct vk_properties * const properties)384 static bool pvr_physical_device_get_properties(
385    const struct pvr_physical_device *const pdevice,
386    struct vk_properties *const properties)
387 {
388    const struct pvr_device_info *const dev_info = &pdevice->dev_info;
389    const struct pvr_device_runtime_info *const dev_runtime_info =
390       &pdevice->dev_runtime_info;
391    const struct pvr_descriptor_limits *descriptor_limits =
392       pvr_get_physical_device_descriptor_limits(dev_info, dev_runtime_info);
393 
394    /* Default value based on the minimum value found in all existing cores. */
395    const uint32_t max_multisample =
396       PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 4);
397 
398    /* Default value based on the minimum value found in all existing cores. */
399    const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 2);
400 
401    /* Default value based on the minimum value found in all existing cores. */
402    const uint32_t uvs_pba_entries =
403       PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 160);
404 
405    /* Default value based on the minimum value found in all existing cores. */
406    const uint32_t num_user_clip_planes =
407       PVR_GET_FEATURE_VALUE(dev_info, num_user_clip_planes, 8);
408 
409    const uint32_t sub_pixel_precision =
410       PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 4U : 8U;
411 
412    const uint32_t max_render_size = rogue_get_render_size_max(dev_info);
413 
414    const uint32_t max_sample_bits = ((max_multisample << 1) - 1);
415 
416    const uint32_t max_user_vertex_components =
417       ((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;
418 
419    /* The workgroup invocations are limited by the case where we have a compute
420     * barrier - each slot has a fixed number of invocations, the whole workgroup
421     * may need to span multiple slots. As each slot will WAIT at the barrier
422     * until the last invocation completes, all have to be schedulable at the
423     * same time.
424     *
425     * Typically all Rogue cores have 16 slots. Some of the smallest cores are
426     * reduced to 14.
427     *
428     * The compute barrier slot exhaustion scenario can be tested with:
429     * dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence
430     *    .atomicwrite*guard*comp
431     */
432 
433    /* Default value based on the minimum value found in all existing cores. */
434    const uint32_t usc_slots = PVR_GET_FEATURE_VALUE(dev_info, usc_slots, 14);
435 
436    /* Default value based on the minimum value found in all existing cores. */
437    const uint32_t max_instances_per_pds_task =
438       PVR_GET_FEATURE_VALUE(dev_info, max_instances_per_pds_task, 32U);
439 
440    const uint32_t max_compute_work_group_invocations =
441       (usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U;
442 
443    bool ret;
444 
445    *properties = (struct vk_properties){
446       /* Vulkan 1.0 */
447       .apiVersion = PVR_API_VERSION,
448       .driverVersion = vk_get_driver_version(),
449       .vendorID = VK_VENDOR_ID_IMAGINATION,
450       .deviceID = dev_info->ident.device_id,
451       .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
452       /* deviceName and pipelineCacheUUID are filled below .*/
453 
454       .maxImageDimension1D = max_render_size,
455       .maxImageDimension2D = max_render_size,
456       .maxImageDimension3D = PVR_MAX_TEXTURE_EXTENT_Z,
457       .maxImageDimensionCube = max_render_size,
458       .maxImageArrayLayers = PVR_MAX_ARRAY_LAYERS,
459       .maxTexelBufferElements = 64U * 1024U,
460       .maxUniformBufferRange = 128U * 1024U * 1024U,
461       .maxStorageBufferRange = 128U * 1024U * 1024U,
462       .maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE,
463       .maxMemoryAllocationCount = UINT32_MAX,
464       .maxSamplerAllocationCount = UINT32_MAX,
465       .bufferImageGranularity = 1U,
466       .sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL,
467       /* Maximum number of descriptor sets that can be bound simultaneously. */
468       .maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS,
469       .maxPerStageResources = descriptor_limits->max_per_stage_resources,
470       .maxPerStageDescriptorSamplers =
471          descriptor_limits->max_per_stage_samplers,
472       .maxPerStageDescriptorUniformBuffers =
473          descriptor_limits->max_per_stage_uniform_buffers,
474       .maxPerStageDescriptorStorageBuffers =
475          descriptor_limits->max_per_stage_storage_buffers,
476       .maxPerStageDescriptorSampledImages =
477          descriptor_limits->max_per_stage_sampled_images,
478       .maxPerStageDescriptorStorageImages =
479          descriptor_limits->max_per_stage_storage_images,
480       .maxPerStageDescriptorInputAttachments =
481          descriptor_limits->max_per_stage_input_attachments,
482       .maxDescriptorSetSamplers = 256U,
483       .maxDescriptorSetUniformBuffers = 256U,
484       .maxDescriptorSetUniformBuffersDynamic =
485          PVR_MAX_DESCRIPTOR_SET_UNIFORM_DYNAMIC_BUFFERS,
486       .maxDescriptorSetStorageBuffers = 256U,
487       .maxDescriptorSetStorageBuffersDynamic =
488          PVR_MAX_DESCRIPTOR_SET_STORAGE_DYNAMIC_BUFFERS,
489       .maxDescriptorSetSampledImages = 256U,
490       .maxDescriptorSetStorageImages = 256U,
491       .maxDescriptorSetInputAttachments = 256U,
492 
493       /* Vertex Shader Limits */
494       .maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS,
495       .maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS,
496       .maxVertexInputAttributeOffset = 0xFFFF,
497       .maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U,
498       .maxVertexOutputComponents = max_user_vertex_components,
499 
500       /* Tessellation Limits */
501       .maxTessellationGenerationLevel = 0,
502       .maxTessellationPatchSize = 0,
503       .maxTessellationControlPerVertexInputComponents = 0,
504       .maxTessellationControlPerVertexOutputComponents = 0,
505       .maxTessellationControlPerPatchOutputComponents = 0,
506       .maxTessellationControlTotalOutputComponents = 0,
507       .maxTessellationEvaluationInputComponents = 0,
508       .maxTessellationEvaluationOutputComponents = 0,
509 
510       /* Geometry Shader Limits */
511       .maxGeometryShaderInvocations = 0,
512       .maxGeometryInputComponents = 0,
513       .maxGeometryOutputComponents = 0,
514       .maxGeometryOutputVertices = 0,
515       .maxGeometryTotalOutputComponents = 0,
516 
517       /* Fragment Shader Limits */
518       .maxFragmentInputComponents = max_user_vertex_components,
519       .maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS,
520       .maxFragmentDualSrcAttachments = 0,
521       .maxFragmentCombinedOutputResources =
522          descriptor_limits->max_per_stage_storage_buffers +
523          descriptor_limits->max_per_stage_storage_images +
524          PVR_MAX_COLOR_ATTACHMENTS,
525 
526       /* Compute Shader Limits */
527       .maxComputeSharedMemorySize = 16U * 1024U,
528       .maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U },
529       .maxComputeWorkGroupInvocations = max_compute_work_group_invocations,
530       .maxComputeWorkGroupSize = { max_compute_work_group_invocations,
531                                    max_compute_work_group_invocations,
532                                    64U },
533 
534       /* Rasterization Limits */
535       .subPixelPrecisionBits = sub_pixel_precision,
536       .subTexelPrecisionBits = 8U,
537       .mipmapPrecisionBits = 8U,
538 
539       .maxDrawIndexedIndexValue = UINT32_MAX,
540       .maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U,
541       .maxSamplerLodBias = 16.0f,
542       .maxSamplerAnisotropy = 1.0f,
543       .maxViewports = PVR_MAX_VIEWPORTS,
544 
545       .maxViewportDimensions[0] = max_render_size,
546       .maxViewportDimensions[1] = max_render_size,
547       .viewportBoundsRange[0] = -(int32_t)(2U * max_render_size),
548       .viewportBoundsRange[1] = 2U * max_render_size,
549 
550       .viewportSubPixelBits = 0,
551       .minMemoryMapAlignment = pdevice->ws->page_size,
552       .minTexelBufferOffsetAlignment = 16U,
553       .minUniformBufferOffsetAlignment = 4U,
554       .minStorageBufferOffsetAlignment = 4U,
555 
556       .minTexelOffset = -8,
557       .maxTexelOffset = 7U,
558       .minTexelGatherOffset = -8,
559       .maxTexelGatherOffset = 7,
560       .minInterpolationOffset = -0.5,
561       .maxInterpolationOffset = 0.5,
562       .subPixelInterpolationOffsetBits = 4U,
563 
564       .maxFramebufferWidth = max_render_size,
565       .maxFramebufferHeight = max_render_size,
566       .maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS,
567 
568       .framebufferColorSampleCounts = max_sample_bits,
569       .framebufferDepthSampleCounts = max_sample_bits,
570       .framebufferStencilSampleCounts = max_sample_bits,
571       .framebufferNoAttachmentsSampleCounts = max_sample_bits,
572       .maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS,
573       .sampledImageColorSampleCounts = max_sample_bits,
574       .sampledImageIntegerSampleCounts = max_sample_bits,
575       .sampledImageDepthSampleCounts = max_sample_bits,
576       .sampledImageStencilSampleCounts = max_sample_bits,
577       .storageImageSampleCounts = max_sample_bits,
578       .maxSampleMaskWords = 1U,
579       .timestampComputeAndGraphics = false,
580       .timestampPeriod = 0.0f,
581       .maxClipDistances = num_user_clip_planes,
582       .maxCullDistances = num_user_clip_planes,
583       .maxCombinedClipAndCullDistances = num_user_clip_planes,
584       .discreteQueuePriorities = 2U,
585       .pointSizeRange[0] = 1.0f,
586       .pointSizeRange[1] = 511.0f,
587       .pointSizeGranularity = 0.0625f,
588       .lineWidthRange[0] = 1.0f / 16.0f,
589       .lineWidthRange[1] = 16.0f,
590       .lineWidthGranularity = 1.0f / 16.0f,
591       .strictLines = false,
592       .standardSampleLocations = true,
593       .optimalBufferCopyOffsetAlignment = 4U,
594       .optimalBufferCopyRowPitchAlignment = 4U,
595       .nonCoherentAtomSize = 1U,
596 
597       /* Vulkan 1.2 / VK_KHR_driver_properties */
598       .driverID = VK_DRIVER_ID_IMAGINATION_OPEN_SOURCE_MESA,
599       .driverName = "Imagination open-source Mesa driver",
600       .driverInfo = "Mesa " PACKAGE_VERSION MESA_GIT_SHA1,
601       .conformanceVersion = {
602          .major = 1,
603          .minor = 3,
604          .subminor = 4,
605          .patch = 1,
606       },
607 
608       /* Vulkan 1.2 / VK_KHR_timeline_semaphore */
609       .maxTimelineSemaphoreValueDifference = UINT64_MAX,
610 
611       /* Vulkan 1.3 / VK_EXT_texel_buffer_alignment */
612       .storageTexelBufferOffsetAlignmentBytes = 16,
613       .storageTexelBufferOffsetSingleTexelAlignment = true,
614       .uniformTexelBufferOffsetAlignmentBytes = 16,
615       .uniformTexelBufferOffsetSingleTexelAlignment = false,
616    };
617 
618    snprintf(properties->deviceName,
619             sizeof(properties->deviceName),
620             "PowerVR %s %s",
621             dev_info->ident.series_name,
622             dev_info->ident.public_name);
623 
624    ret = pvr_physical_device_init_pipeline_cache_uuid(
625       dev_info,
626       properties->pipelineCacheUUID);
627    if (!ret)
628       return false;
629 
630    return true;
631 }
632 
pvr_EnumerateInstanceVersion(uint32_t * pApiVersion)633 VkResult pvr_EnumerateInstanceVersion(uint32_t *pApiVersion)
634 {
635    *pApiVersion = PVR_API_VERSION;
636    return VK_SUCCESS;
637 }
638 
639 VkResult
pvr_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)640 pvr_EnumerateInstanceExtensionProperties(const char *pLayerName,
641                                          uint32_t *pPropertyCount,
642                                          VkExtensionProperties *pProperties)
643 {
644    if (pLayerName)
645       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
646 
647    return vk_enumerate_instance_extension_properties(&pvr_instance_extensions,
648                                                      pPropertyCount,
649                                                      pProperties);
650 }
651 
pvr_physical_device_destroy(struct vk_physical_device * vk_pdevice)652 static void pvr_physical_device_destroy(struct vk_physical_device *vk_pdevice)
653 {
654    struct pvr_physical_device *pdevice =
655       container_of(vk_pdevice, struct pvr_physical_device, vk);
656 
657    /* Be careful here. The device might not have been initialized. This can
658     * happen since initialization is done in vkEnumeratePhysicalDevices() but
659     * finish is done in vkDestroyInstance(). Make sure that you check for NULL
660     * before freeing or that the freeing functions accept NULL pointers.
661     */
662 
663    if (pdevice->pco_ctx)
664       ralloc_free(pdevice->pco_ctx);
665 
666    if (pdevice->compiler)
667       ralloc_free(pdevice->compiler);
668 
669    pvr_wsi_finish(pdevice);
670 
671    if (pdevice->ws)
672       pvr_winsys_destroy(pdevice->ws);
673 
674    vk_free(&pdevice->vk.instance->alloc, pdevice->render_path);
675    vk_free(&pdevice->vk.instance->alloc, pdevice->display_path);
676 
677    vk_physical_device_finish(&pdevice->vk);
678 
679    vk_free(&pdevice->vk.instance->alloc, pdevice);
680 }
681 
pvr_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)682 void pvr_DestroyInstance(VkInstance _instance,
683                          const VkAllocationCallbacks *pAllocator)
684 {
685    PVR_FROM_HANDLE(pvr_instance, instance, _instance);
686 
687    if (!instance)
688       return;
689 
690    VG(VALGRIND_DESTROY_MEMPOOL(instance));
691 
692    vk_instance_finish(&instance->vk);
693    vk_free(&instance->vk.alloc, instance);
694 }
695 
pvr_compute_heap_size(void)696 static uint64_t pvr_compute_heap_size(void)
697 {
698    /* Query the total ram from the system */
699    uint64_t total_ram;
700    if (!os_get_total_physical_memory(&total_ram))
701       return 0;
702 
703    /* We don't want to burn too much ram with the GPU. If the user has 4GiB
704     * or less, we use at most half. If they have more than 4GiB, we use 3/4.
705     */
706    uint64_t available_ram;
707    if (total_ram <= 4ULL * 1024ULL * 1024ULL * 1024ULL)
708       available_ram = total_ram / 2U;
709    else
710       available_ram = total_ram * 3U / 4U;
711 
712    return available_ram;
713 }
714 
pvr_physical_device_init(struct pvr_physical_device * pdevice,struct pvr_instance * instance,drmDevicePtr drm_render_device,drmDevicePtr drm_display_device)715 static VkResult pvr_physical_device_init(struct pvr_physical_device *pdevice,
716                                          struct pvr_instance *instance,
717                                          drmDevicePtr drm_render_device,
718                                          drmDevicePtr drm_display_device)
719 {
720    struct vk_physical_device_dispatch_table dispatch_table;
721    struct vk_device_extension_table supported_extensions;
722    struct vk_properties supported_properties;
723    struct vk_features supported_features;
724    struct pvr_winsys *ws;
725    char *display_path;
726    char *render_path;
727    VkResult result;
728 
729    render_path = vk_strdup(&instance->vk.alloc,
730                            drm_render_device->nodes[DRM_NODE_RENDER],
731                            VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
732    if (!render_path) {
733       result = VK_ERROR_OUT_OF_HOST_MEMORY;
734       goto err_out;
735    }
736 
737    if (instance->vk.enabled_extensions.KHR_display) {
738       display_path = vk_strdup(&instance->vk.alloc,
739                                drm_display_device->nodes[DRM_NODE_PRIMARY],
740                                VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
741       if (!display_path) {
742          result = VK_ERROR_OUT_OF_HOST_MEMORY;
743          goto err_vk_free_render_path;
744       }
745    } else {
746       display_path = NULL;
747    }
748 
749    result =
750       pvr_winsys_create(render_path, display_path, &instance->vk.alloc, &ws);
751    if (result != VK_SUCCESS)
752       goto err_vk_free_display_path;
753 
754    if (!getenv("PVR_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
755       result = vk_errorf(instance,
756                          VK_ERROR_INCOMPATIBLE_DRIVER,
757                          "WARNING: powervr is not a conformant Vulkan "
758                          "implementation. Pass "
759                          "PVR_I_WANT_A_BROKEN_VULKAN_DRIVER=1 if you know "
760                          "what you're doing.");
761       goto err_pvr_winsys_destroy;
762    }
763 
764    pdevice->instance = instance;
765    pdevice->render_path = render_path;
766    pdevice->display_path = display_path;
767    pdevice->ws = ws;
768 
769    result = ws->ops->device_info_init(ws,
770                                       &pdevice->dev_info,
771                                       &pdevice->dev_runtime_info);
772    if (result != VK_SUCCESS)
773       goto err_pvr_winsys_destroy;
774 
775    pvr_physical_device_get_supported_extensions(&supported_extensions);
776    pvr_physical_device_get_supported_features(&pdevice->dev_info,
777                                               &supported_features);
778    if (!pvr_physical_device_get_properties(pdevice, &supported_properties)) {
779       result = vk_errorf(instance,
780                          VK_ERROR_INITIALIZATION_FAILED,
781                          "Failed to collect physical device properties");
782       goto err_pvr_winsys_destroy;
783    }
784 
785    vk_physical_device_dispatch_table_from_entrypoints(
786       &dispatch_table,
787       &pvr_physical_device_entrypoints,
788       true);
789 
790    vk_physical_device_dispatch_table_from_entrypoints(
791       &dispatch_table,
792       &wsi_physical_device_entrypoints,
793       false);
794 
795    result = vk_physical_device_init(&pdevice->vk,
796                                     &instance->vk,
797                                     &supported_extensions,
798                                     &supported_features,
799                                     &supported_properties,
800                                     &dispatch_table);
801    if (result != VK_SUCCESS)
802       goto err_pvr_winsys_destroy;
803 
804    pdevice->vk.supported_sync_types = ws->sync_types;
805 
806    /* Setup available memory heaps and types */
807    pdevice->memory.memoryHeapCount = 1;
808    pdevice->memory.memoryHeaps[0].size = pvr_compute_heap_size();
809    pdevice->memory.memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
810 
811    pdevice->memory.memoryTypeCount = 1;
812    pdevice->memory.memoryTypes[0].propertyFlags =
813       VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
814       VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
815       VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
816    pdevice->memory.memoryTypes[0].heapIndex = 0;
817 
818    result = pvr_wsi_init(pdevice);
819    if (result != VK_SUCCESS) {
820       vk_error(instance, result);
821       goto err_vk_physical_device_finish;
822    }
823 
824    pdevice->compiler = rogue_compiler_create(&pdevice->dev_info);
825    if (!pdevice->compiler) {
826       result = vk_errorf(instance,
827                          VK_ERROR_INITIALIZATION_FAILED,
828                          "Failed to initialize Rogue compiler");
829       goto err_wsi_finish;
830    }
831 
832    pdevice->pco_ctx = pco_ctx_create(&pdevice->dev_info, NULL);
833    if (!pdevice->pco_ctx) {
834       ralloc_free(pdevice->compiler);
835       result = vk_errorf(instance,
836                          VK_ERROR_INITIALIZATION_FAILED,
837                          "Failed to initialize PCO compiler context");
838       goto err_wsi_finish;
839    }
840 
841    return VK_SUCCESS;
842 
843 err_wsi_finish:
844    pvr_wsi_finish(pdevice);
845 
846 err_vk_physical_device_finish:
847    vk_physical_device_finish(&pdevice->vk);
848 
849 err_pvr_winsys_destroy:
850    pvr_winsys_destroy(ws);
851 
852 err_vk_free_display_path:
853    vk_free(&instance->vk.alloc, display_path);
854 
855 err_vk_free_render_path:
856    vk_free(&instance->vk.alloc, render_path);
857 
858 err_out:
859    return result;
860 }
861 
pvr_get_drm_devices(void * const obj,drmDevicePtr * const devices,const int max_devices,int * const num_devices_out)862 static VkResult pvr_get_drm_devices(void *const obj,
863                                     drmDevicePtr *const devices,
864                                     const int max_devices,
865                                     int *const num_devices_out)
866 {
867    int ret = drmGetDevices2(0, devices, max_devices);
868    if (ret < 0) {
869       return vk_errorf(obj,
870                        VK_ERROR_INITIALIZATION_FAILED,
871                        "Failed to enumerate drm devices (errno %d: %s)",
872                        -ret,
873                        strerror(-ret));
874    }
875 
876    if (num_devices_out)
877       *num_devices_out = ret;
878 
879    return VK_SUCCESS;
880 }
881 
882 static bool
pvr_drm_device_compatible(const struct pvr_drm_device_info * const info,drmDevice * const drm_dev)883 pvr_drm_device_compatible(const struct pvr_drm_device_info *const info,
884                           drmDevice *const drm_dev)
885 {
886    char **const compatible = drm_dev->deviceinfo.platform->compatible;
887 
888    for (char **compat = compatible; *compat; compat++) {
889       if (strncmp(*compat, info->name, info->len) == 0)
890          return true;
891    }
892 
893    return false;
894 }
895 
896 static const struct pvr_drm_device_config *
pvr_drm_device_get_config(drmDevice * const drm_dev)897 pvr_drm_device_get_config(drmDevice *const drm_dev)
898 {
899    for (size_t i = 0U; i < ARRAY_SIZE(pvr_drm_configs); i++) {
900       if (pvr_drm_device_compatible(&pvr_drm_configs[i].render, drm_dev))
901          return &pvr_drm_configs[i];
902    }
903 
904    return NULL;
905 }
906 
907 static void
pvr_physical_device_dump_info(const struct pvr_physical_device * pdevice,char * const * comp_display,char * const * comp_render)908 pvr_physical_device_dump_info(const struct pvr_physical_device *pdevice,
909                               char *const *comp_display,
910                               char *const *comp_render)
911 {
912    drmVersionPtr version_display, version_render;
913    struct pvr_device_dump_info info;
914 
915    version_display = drmGetVersion(pdevice->ws->display_fd);
916    if (!version_display)
917       return;
918 
919    version_render = drmGetVersion(pdevice->ws->render_fd);
920    if (!version_render) {
921       drmFreeVersion(version_display);
922       return;
923    }
924 
925    info.device_info = &pdevice->dev_info;
926    info.device_runtime_info = &pdevice->dev_runtime_info;
927    info.drm_display.patchlevel = version_display->version_patchlevel;
928    info.drm_display.major = version_display->version_major;
929    info.drm_display.minor = version_display->version_minor;
930    info.drm_display.name = version_display->name;
931    info.drm_display.date = version_display->date;
932    info.drm_display.comp = comp_display;
933    info.drm_render.patchlevel = version_render->version_patchlevel;
934    info.drm_render.major = version_render->version_major;
935    info.drm_render.minor = version_render->version_minor;
936    info.drm_render.name = version_render->name;
937    info.drm_render.date = version_render->date;
938    info.drm_render.comp = comp_render;
939 
940    pvr_dump_physical_device_info(&info);
941 
942    drmFreeVersion(version_display);
943    drmFreeVersion(version_render);
944 }
945 
946 static VkResult
pvr_physical_device_enumerate(struct vk_instance * const vk_instance)947 pvr_physical_device_enumerate(struct vk_instance *const vk_instance)
948 {
949    struct pvr_instance *const instance =
950       container_of(vk_instance, struct pvr_instance, vk);
951 
952    const struct pvr_drm_device_config *config = NULL;
953 
954    drmDevicePtr drm_display_device = NULL;
955    drmDevicePtr drm_render_device = NULL;
956    struct pvr_physical_device *pdevice;
957    drmDevicePtr *drm_devices;
958    int num_drm_devices = 0;
959    VkResult result;
960 
961    result = pvr_get_drm_devices(instance, NULL, 0, &num_drm_devices);
962    if (result != VK_SUCCESS)
963       goto out;
964 
965    if (num_drm_devices == 0) {
966       result = VK_SUCCESS;
967       goto out;
968    }
969 
970    drm_devices = vk_alloc(&vk_instance->alloc,
971                           sizeof(*drm_devices) * num_drm_devices,
972                           8,
973                           VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
974    if (!drm_devices) {
975       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
976       goto out;
977    }
978 
979    result = pvr_get_drm_devices(instance, drm_devices, num_drm_devices, NULL);
980    if (result != VK_SUCCESS)
981       goto out_free_drm_device_ptrs;
982 
983    /* First search for our render node... */
984    for (int i = 0; i < num_drm_devices; i++) {
985       drmDevice *const drm_dev = drm_devices[i];
986 
987       if (drm_dev->bustype != DRM_BUS_PLATFORM)
988          continue;
989 
990       if (!(drm_dev->available_nodes & BITFIELD_BIT(DRM_NODE_RENDER)))
991          continue;
992 
993       config = pvr_drm_device_get_config(drm_dev);
994       if (config) {
995          drm_render_device = drm_dev;
996          break;
997       }
998    }
999 
1000    if (!config) {
1001       result = VK_SUCCESS;
1002       goto out_free_drm_devices;
1003    }
1004 
1005    mesa_logd("Found compatible render device '%s'.",
1006              drm_render_device->nodes[DRM_NODE_RENDER]);
1007 
1008    /* ...then find the compatible display node. */
1009    for (int i = 0; i < num_drm_devices; i++) {
1010       drmDevice *const drm_dev = drm_devices[i];
1011 
1012       if (!(drm_dev->available_nodes & BITFIELD_BIT(DRM_NODE_PRIMARY)))
1013          continue;
1014 
1015       if (pvr_drm_device_compatible(&config->display, drm_dev)) {
1016          drm_display_device = drm_dev;
1017          break;
1018       }
1019    }
1020 
1021    if (!drm_display_device) {
1022       mesa_loge("Render device '%s' has no compatible display device.",
1023                 drm_render_device->nodes[DRM_NODE_RENDER]);
1024       result = VK_SUCCESS;
1025       goto out_free_drm_devices;
1026    }
1027 
1028    mesa_logd("Found compatible display device '%s'.",
1029              drm_display_device->nodes[DRM_NODE_PRIMARY]);
1030 
1031    pdevice = vk_zalloc(&vk_instance->alloc,
1032                        sizeof(*pdevice),
1033                        8,
1034                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1035    if (!pdevice) {
1036       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1037       goto out_free_drm_devices;
1038    }
1039 
1040    result = pvr_physical_device_init(pdevice,
1041                                      instance,
1042                                      drm_render_device,
1043                                      drm_display_device);
1044    if (result != VK_SUCCESS) {
1045       if (result == VK_ERROR_INCOMPATIBLE_DRIVER)
1046          result = VK_SUCCESS;
1047 
1048       goto err_free_pdevice;
1049    }
1050 
1051    if (PVR_IS_DEBUG_SET(INFO)) {
1052       pvr_physical_device_dump_info(
1053          pdevice,
1054          drm_display_device->deviceinfo.platform->compatible,
1055          drm_render_device->deviceinfo.platform->compatible);
1056    }
1057 
1058    list_add(&pdevice->vk.link, &vk_instance->physical_devices.list);
1059 
1060    result = VK_SUCCESS;
1061    goto out_free_drm_devices;
1062 
1063 err_free_pdevice:
1064    vk_free(&vk_instance->alloc, pdevice);
1065 
1066 out_free_drm_devices:
1067    drmFreeDevices(drm_devices, num_drm_devices);
1068 
1069 out_free_drm_device_ptrs:
1070    vk_free(&vk_instance->alloc, drm_devices);
1071 
1072 out:
1073    return result;
1074 }
1075 
pvr_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1076 VkResult pvr_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
1077                             const VkAllocationCallbacks *pAllocator,
1078                             VkInstance *pInstance)
1079 {
1080    struct vk_instance_dispatch_table dispatch_table;
1081    struct pvr_instance *instance;
1082    VkResult result;
1083 
1084    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1085 
1086    if (!pAllocator)
1087       pAllocator = vk_default_allocator();
1088 
1089    instance = vk_alloc(pAllocator,
1090                        sizeof(*instance),
1091                        8,
1092                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1093    if (!instance)
1094       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1095 
1096    vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
1097                                                &pvr_instance_entrypoints,
1098                                                true);
1099 
1100    vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
1101                                                &wsi_instance_entrypoints,
1102                                                false);
1103 
1104    result = vk_instance_init(&instance->vk,
1105                              &pvr_instance_extensions,
1106                              &dispatch_table,
1107                              pCreateInfo,
1108                              pAllocator);
1109    if (result != VK_SUCCESS) {
1110       vk_free(pAllocator, instance);
1111       return result;
1112    }
1113 
1114    pvr_process_debug_variable();
1115 
1116    instance->active_device_count = 0;
1117 
1118    instance->vk.physical_devices.enumerate = pvr_physical_device_enumerate;
1119    instance->vk.physical_devices.destroy = pvr_physical_device_destroy;
1120 
1121    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1122 
1123    *pInstance = pvr_instance_to_handle(instance);
1124 
1125    return VK_SUCCESS;
1126 }
1127 
pvr_get_simultaneous_num_allocs(const struct pvr_device_info * dev_info,ASSERTED const struct pvr_device_runtime_info * dev_runtime_info)1128 static uint32_t pvr_get_simultaneous_num_allocs(
1129    const struct pvr_device_info *dev_info,
1130    ASSERTED const struct pvr_device_runtime_info *dev_runtime_info)
1131 {
1132    uint32_t min_cluster_per_phantom;
1133 
1134    if (PVR_HAS_FEATURE(dev_info, s8xe))
1135       return PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U);
1136 
1137    assert(dev_runtime_info->num_phantoms == 1);
1138    min_cluster_per_phantom = PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U);
1139 
1140    if (min_cluster_per_phantom >= 4)
1141       return 1;
1142    else if (min_cluster_per_phantom == 2)
1143       return 2;
1144    else
1145       return 4;
1146 }
1147 
pvr_calc_fscommon_size_and_tiles_in_flight(const struct pvr_device_info * dev_info,const struct pvr_device_runtime_info * dev_runtime_info,uint32_t fs_common_size,uint32_t min_tiles_in_flight)1148 uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
1149    const struct pvr_device_info *dev_info,
1150    const struct pvr_device_runtime_info *dev_runtime_info,
1151    uint32_t fs_common_size,
1152    uint32_t min_tiles_in_flight)
1153 {
1154    const uint32_t available_shareds =
1155       dev_runtime_info->reserved_shared_size - dev_runtime_info->max_coeffs;
1156    const uint32_t max_tiles_in_flight =
1157       PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U);
1158    uint32_t num_tile_in_flight;
1159    uint32_t num_allocs;
1160 
1161    if (fs_common_size == 0)
1162       return max_tiles_in_flight;
1163 
1164    num_allocs = pvr_get_simultaneous_num_allocs(dev_info, dev_runtime_info);
1165 
1166    if (fs_common_size == UINT32_MAX) {
1167       uint32_t max_common_size = available_shareds;
1168 
1169       num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight);
1170 
1171       if (!PVR_HAS_ERN(dev_info, 38748)) {
1172          /* Hardware needs space for one extra shared allocation. */
1173          num_allocs += 1;
1174       }
1175 
1176       /* Double resource requirements to deal with fragmentation. */
1177       max_common_size /= num_allocs * 2;
1178       max_common_size = MIN2(max_common_size, ROGUE_MAX_PIXEL_SHARED_REGISTERS);
1179       max_common_size =
1180          ROUND_DOWN_TO(max_common_size,
1181                        ROGUE_TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE);
1182 
1183       return max_common_size;
1184    }
1185 
1186    num_tile_in_flight = available_shareds / (fs_common_size * 2);
1187 
1188    if (!PVR_HAS_ERN(dev_info, 38748))
1189       num_tile_in_flight -= 1;
1190 
1191    num_tile_in_flight /= num_allocs;
1192 
1193 #if MESA_DEBUG
1194    /* Validate the above result. */
1195 
1196    assert(num_tile_in_flight >= MIN2(num_tile_in_flight, max_tiles_in_flight));
1197    num_allocs *= num_tile_in_flight;
1198 
1199    if (!PVR_HAS_ERN(dev_info, 38748)) {
1200       /* Hardware needs space for one extra shared allocation. */
1201       num_allocs += 1;
1202    }
1203 
1204    assert(fs_common_size <= available_shareds / (num_allocs * 2));
1205 #endif
1206 
1207    return MIN2(num_tile_in_flight, max_tiles_in_flight);
1208 }
1209 
1210 const static VkQueueFamilyProperties pvr_queue_family_properties = {
1211    .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_GRAPHICS_BIT |
1212                  VK_QUEUE_TRANSFER_BIT,
1213    .queueCount = PVR_MAX_QUEUES,
1214    .timestampValidBits = 0,
1215    .minImageTransferGranularity = { 1, 1, 1 },
1216 };
1217 
pvr_compute_heap_budget(struct pvr_physical_device * pdevice)1218 static uint64_t pvr_compute_heap_budget(struct pvr_physical_device *pdevice)
1219 {
1220    const uint64_t heap_size = pdevice->memory.memoryHeaps[0].size;
1221    const uint64_t heap_used = pdevice->heap_used;
1222    uint64_t sys_available = 0, heap_available;
1223    ASSERTED bool has_available_memory =
1224       os_get_available_system_memory(&sys_available);
1225    assert(has_available_memory);
1226 
1227    /* Let's not incite the app to starve the system: report at most 90% of
1228     * available system memory.
1229     */
1230    heap_available = sys_available * 9 / 10;
1231    return MIN2(heap_size, heap_used + heap_available);
1232 }
1233 
pvr_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1234 void pvr_GetPhysicalDeviceQueueFamilyProperties2(
1235    VkPhysicalDevice physicalDevice,
1236    uint32_t *pQueueFamilyPropertyCount,
1237    VkQueueFamilyProperties2 *pQueueFamilyProperties)
1238 {
1239    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2,
1240                           out,
1241                           pQueueFamilyProperties,
1242                           pQueueFamilyPropertyCount);
1243 
1244    vk_outarray_append_typed (VkQueueFamilyProperties2, &out, p) {
1245       p->queueFamilyProperties = pvr_queue_family_properties;
1246 
1247       vk_foreach_struct (ext, p->pNext) {
1248          vk_debug_ignored_stype(ext->sType);
1249       }
1250    }
1251 }
1252 
pvr_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1253 void pvr_GetPhysicalDeviceMemoryProperties2(
1254    VkPhysicalDevice physicalDevice,
1255    VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1256 {
1257    PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1258 
1259    pMemoryProperties->memoryProperties = pdevice->memory;
1260 
1261    vk_foreach_struct (ext, pMemoryProperties->pNext) {
1262       switch (ext->sType) {
1263       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1264          VkPhysicalDeviceMemoryBudgetPropertiesEXT *pMemoryBudget =
1265             (VkPhysicalDeviceMemoryBudgetPropertiesEXT *)ext;
1266 
1267          pMemoryBudget->heapBudget[0] = pvr_compute_heap_budget(pdevice);
1268          pMemoryBudget->heapUsage[0] = pdevice->heap_used;
1269 
1270          for (uint32_t i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1271             pMemoryBudget->heapBudget[i] = 0u;
1272             pMemoryBudget->heapUsage[i] = 0u;
1273          }
1274          break;
1275       }
1276       default:
1277          vk_debug_ignored_stype(ext->sType);
1278          break;
1279       }
1280    }
1281 }
1282 
pvr_GetInstanceProcAddr(VkInstance _instance,const char * pName)1283 PFN_vkVoidFunction pvr_GetInstanceProcAddr(VkInstance _instance,
1284                                            const char *pName)
1285 {
1286    const struct vk_instance *vk_instance = NULL;
1287 
1288    if (_instance != NULL) {
1289       PVR_FROM_HANDLE(pvr_instance, instance, _instance);
1290       vk_instance = &instance->vk;
1291    }
1292 
1293    return vk_instance_get_proc_addr(vk_instance,
1294                                     &pvr_instance_entrypoints,
1295                                     pName);
1296 }
1297 
1298 /* With version 1+ of the loader interface the ICD should expose
1299  * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in
1300  * apps.
1301  */
1302 PUBLIC
1303 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)1304 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
1305 {
1306    return pvr_GetInstanceProcAddr(instance, pName);
1307 }
1308 
pvr_pds_compute_shader_create_and_upload(struct pvr_device * device,struct pvr_pds_compute_shader_program * program,struct pvr_pds_upload * const pds_upload_out)1309 VkResult pvr_pds_compute_shader_create_and_upload(
1310    struct pvr_device *device,
1311    struct pvr_pds_compute_shader_program *program,
1312    struct pvr_pds_upload *const pds_upload_out)
1313 {
1314    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1315    const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
1316    size_t staging_buffer_size;
1317    uint32_t *staging_buffer;
1318    uint32_t *data_buffer;
1319    uint32_t *code_buffer;
1320    VkResult result;
1321 
1322    /* Calculate how much space we'll need for the compute shader PDS program.
1323     */
1324    pvr_pds_compute_shader(program, NULL, PDS_GENERATE_SIZES, dev_info);
1325 
1326    /* FIXME: Fix the below inconsistency of code size being in bytes whereas
1327     * data size being in dwords.
1328     */
1329    /* Code size is in bytes, data size in dwords. */
1330    staging_buffer_size =
1331       PVR_DW_TO_BYTES(program->data_size) + program->code_size;
1332 
1333    staging_buffer = vk_alloc(&device->vk.alloc,
1334                              staging_buffer_size,
1335                              8U,
1336                              VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1337    if (!staging_buffer)
1338       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1339 
1340    data_buffer = staging_buffer;
1341    code_buffer = pvr_pds_compute_shader(program,
1342                                         data_buffer,
1343                                         PDS_GENERATE_DATA_SEGMENT,
1344                                         dev_info);
1345 
1346    pvr_pds_compute_shader(program,
1347                           code_buffer,
1348                           PDS_GENERATE_CODE_SEGMENT,
1349                           dev_info);
1350 
1351    result = pvr_gpu_upload_pds(device,
1352                                data_buffer,
1353                                program->data_size,
1354                                ROGUE_CDMCTRL_KERNEL1_DATA_ADDR_ALIGNMENT,
1355                                code_buffer,
1356                                program->code_size / sizeof(uint32_t),
1357                                ROGUE_CDMCTRL_KERNEL2_CODE_ADDR_ALIGNMENT,
1358                                cache_line_size,
1359                                pds_upload_out);
1360 
1361    vk_free(&device->vk.alloc, staging_buffer);
1362 
1363    return result;
1364 }
1365 
pvr_device_init_compute_fence_program(struct pvr_device * device)1366 static VkResult pvr_device_init_compute_fence_program(struct pvr_device *device)
1367 {
1368    struct pvr_pds_compute_shader_program program;
1369 
1370    pvr_pds_compute_shader_program_init(&program);
1371    /* Fence kernel. */
1372    program.fence = true;
1373    program.clear_pds_barrier = true;
1374 
1375    return pvr_pds_compute_shader_create_and_upload(
1376       device,
1377       &program,
1378       &device->pds_compute_fence_program);
1379 }
1380 
pvr_device_init_compute_empty_program(struct pvr_device * device)1381 static VkResult pvr_device_init_compute_empty_program(struct pvr_device *device)
1382 {
1383    struct pvr_pds_compute_shader_program program;
1384 
1385    pvr_pds_compute_shader_program_init(&program);
1386    program.clear_pds_barrier = true;
1387 
1388    return pvr_pds_compute_shader_create_and_upload(
1389       device,
1390       &program,
1391       &device->pds_compute_empty_program);
1392 }
1393 
pvr_pds_idfwdf_programs_create_and_upload(struct pvr_device * device,pvr_dev_addr_t usc_addr,uint32_t shareds,uint32_t temps,pvr_dev_addr_t shareds_buffer_addr,struct pvr_pds_upload * const upload_out,struct pvr_pds_upload * const sw_compute_barrier_upload_out)1394 static VkResult pvr_pds_idfwdf_programs_create_and_upload(
1395    struct pvr_device *device,
1396    pvr_dev_addr_t usc_addr,
1397    uint32_t shareds,
1398    uint32_t temps,
1399    pvr_dev_addr_t shareds_buffer_addr,
1400    struct pvr_pds_upload *const upload_out,
1401    struct pvr_pds_upload *const sw_compute_barrier_upload_out)
1402 {
1403    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1404    struct pvr_pds_vertex_shader_sa_program program = {
1405       .kick_usc = true,
1406       .clear_pds_barrier = PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info),
1407    };
1408    size_t staging_buffer_size;
1409    uint32_t *staging_buffer;
1410    VkResult result;
1411 
1412    /* We'll need to DMA the shareds into the USC's Common Store. */
1413    program.num_dma_kicks = pvr_pds_encode_dma_burst(program.dma_control,
1414                                                     program.dma_address,
1415                                                     0,
1416                                                     shareds,
1417                                                     shareds_buffer_addr.addr,
1418                                                     false,
1419                                                     dev_info);
1420 
1421    /* DMA temp regs. */
1422    pvr_pds_setup_doutu(&program.usc_task_control,
1423                        usc_addr.addr,
1424                        temps,
1425                        ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
1426                        false);
1427 
1428    pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1429 
1430    staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
1431 
1432    staging_buffer = vk_alloc(&device->vk.alloc,
1433                              staging_buffer_size,
1434                              8,
1435                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1436    if (!staging_buffer)
1437       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1438 
1439    /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1440    pvr_pds_vertex_shader_sa(&program,
1441                             staging_buffer,
1442                             PDS_GENERATE_DATA_SEGMENT,
1443                             dev_info);
1444    pvr_pds_vertex_shader_sa(&program,
1445                             &staging_buffer[program.data_size],
1446                             PDS_GENERATE_CODE_SEGMENT,
1447                             dev_info);
1448 
1449    /* At the time of writing, the SW_COMPUTE_PDS_BARRIER variant of the program
1450     * is bigger so we handle it first (if needed) and realloc() for a smaller
1451     * size.
1452     */
1453    if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
1454       /* FIXME: Figure out the define for alignment of 16. */
1455       result = pvr_gpu_upload_pds(device,
1456                                   &staging_buffer[0],
1457                                   program.data_size,
1458                                   16,
1459                                   &staging_buffer[program.data_size],
1460                                   program.code_size,
1461                                   16,
1462                                   16,
1463                                   sw_compute_barrier_upload_out);
1464       if (result != VK_SUCCESS) {
1465          vk_free(&device->vk.alloc, staging_buffer);
1466          return result;
1467       }
1468 
1469       program.clear_pds_barrier = false;
1470 
1471       pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1472 
1473       staging_buffer_size =
1474          PVR_DW_TO_BYTES(program.code_size + program.data_size);
1475 
1476       staging_buffer = vk_realloc(&device->vk.alloc,
1477                                   staging_buffer,
1478                                   staging_buffer_size,
1479                                   8,
1480                                   VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1481       if (!staging_buffer) {
1482          pvr_bo_suballoc_free(sw_compute_barrier_upload_out->pvr_bo);
1483 
1484          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1485       }
1486 
1487       /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1488       pvr_pds_vertex_shader_sa(&program,
1489                                staging_buffer,
1490                                PDS_GENERATE_DATA_SEGMENT,
1491                                dev_info);
1492       pvr_pds_vertex_shader_sa(&program,
1493                                &staging_buffer[program.data_size],
1494                                PDS_GENERATE_CODE_SEGMENT,
1495                                dev_info);
1496    } else {
1497       *sw_compute_barrier_upload_out = (struct pvr_pds_upload){
1498          .pvr_bo = NULL,
1499       };
1500    }
1501 
1502    /* FIXME: Figure out the define for alignment of 16. */
1503    result = pvr_gpu_upload_pds(device,
1504                                &staging_buffer[0],
1505                                program.data_size,
1506                                16,
1507                                &staging_buffer[program.data_size],
1508                                program.code_size,
1509                                16,
1510                                16,
1511                                upload_out);
1512    if (result != VK_SUCCESS) {
1513       vk_free(&device->vk.alloc, staging_buffer);
1514       pvr_bo_suballoc_free(sw_compute_barrier_upload_out->pvr_bo);
1515 
1516       return result;
1517    }
1518 
1519    vk_free(&device->vk.alloc, staging_buffer);
1520 
1521    return VK_SUCCESS;
1522 }
1523 
pvr_device_init_compute_idfwdf_state(struct pvr_device * device)1524 static VkResult pvr_device_init_compute_idfwdf_state(struct pvr_device *device)
1525 {
1526    uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
1527    uint64_t image_state[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
1528    struct util_dynarray usc_program;
1529    struct pvr_texture_state_info tex_info;
1530    uint32_t *dword_ptr;
1531    uint32_t usc_shareds;
1532    uint32_t usc_temps;
1533    VkResult result;
1534 
1535    util_dynarray_init(&usc_program, NULL);
1536    pvr_hard_code_get_idfwdf_program(&device->pdevice->dev_info,
1537                                     &usc_program,
1538                                     &usc_shareds,
1539                                     &usc_temps);
1540 
1541    device->idfwdf_state.usc_shareds = usc_shareds;
1542 
1543    /* FIXME: Figure out the define for alignment of 16. */
1544    result = pvr_gpu_upload_usc(device,
1545                                usc_program.data,
1546                                usc_program.size,
1547                                16,
1548                                &device->idfwdf_state.usc);
1549    util_dynarray_fini(&usc_program);
1550 
1551    if (result != VK_SUCCESS)
1552       return result;
1553 
1554    /* TODO: Get the store buffer size from the compiler? */
1555    /* TODO: How was the size derived here? */
1556    result = pvr_bo_alloc(device,
1557                          device->heaps.general_heap,
1558                          4 * sizeof(float) * 4 * 2,
1559                          4,
1560                          0,
1561                          &device->idfwdf_state.store_bo);
1562    if (result != VK_SUCCESS)
1563       goto err_free_usc_program;
1564 
1565    result = pvr_bo_alloc(device,
1566                          device->heaps.general_heap,
1567                          usc_shareds * ROGUE_REG_SIZE_BYTES,
1568                          ROGUE_REG_SIZE_BYTES,
1569                          PVR_BO_ALLOC_FLAG_CPU_MAPPED,
1570                          &device->idfwdf_state.shareds_bo);
1571    if (result != VK_SUCCESS)
1572       goto err_free_store_buffer;
1573 
1574    /* Pack state words. */
1575 
1576    pvr_csb_pack (&sampler_state[0], TEXSTATE_SAMPLER, sampler) {
1577       sampler.dadjust = ROGUE_TEXSTATE_DADJUST_ZERO_UINT;
1578       sampler.magfilter = ROGUE_TEXSTATE_FILTER_POINT;
1579       sampler.addrmode_u = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
1580       sampler.addrmode_v = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
1581    }
1582 
1583    /* clang-format off */
1584    pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {}
1585    /* clang-format on */
1586 
1587    STATIC_ASSERT(1 + 1 == ROGUE_NUM_TEXSTATE_SAMPLER_WORDS);
1588 
1589    tex_info = (struct pvr_texture_state_info){
1590       .format = VK_FORMAT_R32G32B32A32_SFLOAT,
1591       .mem_layout = PVR_MEMLAYOUT_LINEAR,
1592       .flags = PVR_TEXFLAGS_INDEX_LOOKUP,
1593       .type = VK_IMAGE_VIEW_TYPE_2D,
1594       .extent = { .width = 4, .height = 2, .depth = 0 },
1595       .mip_levels = 1,
1596       .sample_count = 1,
1597       .stride = 4,
1598       .swizzle = { PIPE_SWIZZLE_X,
1599                    PIPE_SWIZZLE_Y,
1600                    PIPE_SWIZZLE_Z,
1601                    PIPE_SWIZZLE_W },
1602       .addr = device->idfwdf_state.store_bo->vma->dev_addr,
1603    };
1604 
1605    result = pvr_pack_tex_state(device, &tex_info, image_state);
1606    if (result != VK_SUCCESS)
1607       goto err_free_shareds_buffer;
1608 
1609    /* Fill the shareds buffer. */
1610 
1611    dword_ptr = (uint32_t *)device->idfwdf_state.shareds_bo->bo->map;
1612 
1613 #define HIGH_32(val) ((uint32_t)((val) >> 32U))
1614 #define LOW_32(val) ((uint32_t)(val))
1615 
1616    /* TODO: Should we use compiler info to setup the shareds data instead of
1617     * assuming there's always 12 and this is how they should be setup?
1618     */
1619 
1620    dword_ptr[0] = HIGH_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1621    dword_ptr[1] = LOW_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1622 
1623    /* Pad the shareds as the texture/sample state words are 128 bit aligned. */
1624    dword_ptr[2] = 0U;
1625    dword_ptr[3] = 0U;
1626 
1627    dword_ptr[4] = LOW_32(image_state[0]);
1628    dword_ptr[5] = HIGH_32(image_state[0]);
1629    dword_ptr[6] = LOW_32(image_state[1]);
1630    dword_ptr[7] = HIGH_32(image_state[1]);
1631 
1632    dword_ptr[8] = LOW_32(sampler_state[0]);
1633    dword_ptr[9] = HIGH_32(sampler_state[0]);
1634    dword_ptr[10] = LOW_32(sampler_state[1]);
1635    dword_ptr[11] = HIGH_32(sampler_state[1]);
1636    assert(11 + 1 == usc_shareds);
1637 
1638 #undef HIGH_32
1639 #undef LOW_32
1640 
1641    pvr_bo_cpu_unmap(device, device->idfwdf_state.shareds_bo);
1642    dword_ptr = NULL;
1643 
1644    /* Generate and upload PDS programs. */
1645    result = pvr_pds_idfwdf_programs_create_and_upload(
1646       device,
1647       device->idfwdf_state.usc->dev_addr,
1648       usc_shareds,
1649       usc_temps,
1650       device->idfwdf_state.shareds_bo->vma->dev_addr,
1651       &device->idfwdf_state.pds,
1652       &device->idfwdf_state.sw_compute_barrier_pds);
1653    if (result != VK_SUCCESS)
1654       goto err_free_shareds_buffer;
1655 
1656    return VK_SUCCESS;
1657 
1658 err_free_shareds_buffer:
1659    pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1660 
1661 err_free_store_buffer:
1662    pvr_bo_free(device, device->idfwdf_state.store_bo);
1663 
1664 err_free_usc_program:
1665    pvr_bo_suballoc_free(device->idfwdf_state.usc);
1666 
1667    return result;
1668 }
1669 
pvr_device_finish_compute_idfwdf_state(struct pvr_device * device)1670 static void pvr_device_finish_compute_idfwdf_state(struct pvr_device *device)
1671 {
1672    pvr_bo_suballoc_free(device->idfwdf_state.pds.pvr_bo);
1673    pvr_bo_suballoc_free(device->idfwdf_state.sw_compute_barrier_pds.pvr_bo);
1674    pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1675    pvr_bo_free(device, device->idfwdf_state.store_bo);
1676    pvr_bo_suballoc_free(device->idfwdf_state.usc);
1677 }
1678 
1679 /* FIXME: We should be calculating the size when we upload the code in
1680  * pvr_srv_setup_static_pixel_event_program().
1681  */
pvr_device_get_pixel_event_pds_program_data_size(const struct pvr_device_info * dev_info,uint32_t * const data_size_in_dwords_out)1682 static void pvr_device_get_pixel_event_pds_program_data_size(
1683    const struct pvr_device_info *dev_info,
1684    uint32_t *const data_size_in_dwords_out)
1685 {
1686    struct pvr_pds_event_program program = {
1687       /* No data to DMA, just a DOUTU needed. */
1688       .num_emit_word_pairs = 0,
1689    };
1690 
1691    pvr_pds_set_sizes_pixel_event(&program, dev_info);
1692 
1693    *data_size_in_dwords_out = program.data_size;
1694 }
1695 
pvr_device_init_nop_program(struct pvr_device * device)1696 static VkResult pvr_device_init_nop_program(struct pvr_device *device)
1697 {
1698    const uint32_t cache_line_size =
1699       rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1700    struct pvr_pds_kickusc_program program = { 0 };
1701    struct util_dynarray nop_usc_bin;
1702    uint32_t staging_buffer_size;
1703    uint32_t *staging_buffer;
1704    VkResult result;
1705 
1706    pvr_uscgen_nop(&nop_usc_bin);
1707 
1708    result = pvr_gpu_upload_usc(device,
1709                                util_dynarray_begin(&nop_usc_bin),
1710                                nop_usc_bin.size,
1711                                cache_line_size,
1712                                &device->nop_program.usc);
1713    util_dynarray_fini(&nop_usc_bin);
1714    if (result != VK_SUCCESS)
1715       return result;
1716 
1717    /* Setup a PDS program that kicks the static USC program. */
1718    pvr_pds_setup_doutu(&program.usc_task_control,
1719                        device->nop_program.usc->dev_addr.addr,
1720                        0U,
1721                        ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
1722                        false);
1723 
1724    pvr_pds_set_sizes_pixel_shader(&program);
1725 
1726    staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
1727 
1728    staging_buffer = vk_alloc(&device->vk.alloc,
1729                              staging_buffer_size,
1730                              8U,
1731                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1732    if (!staging_buffer) {
1733       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1734       goto err_free_nop_usc_bo;
1735    }
1736 
1737    pvr_pds_generate_pixel_shader_program(&program, staging_buffer);
1738 
1739    /* FIXME: Figure out the define for alignment of 16. */
1740    result = pvr_gpu_upload_pds(device,
1741                                staging_buffer,
1742                                program.data_size,
1743                                16U,
1744                                &staging_buffer[program.data_size],
1745                                program.code_size,
1746                                16U,
1747                                16U,
1748                                &device->nop_program.pds);
1749    if (result != VK_SUCCESS)
1750       goto err_free_staging_buffer;
1751 
1752    vk_free(&device->vk.alloc, staging_buffer);
1753 
1754    return VK_SUCCESS;
1755 
1756 err_free_staging_buffer:
1757    vk_free(&device->vk.alloc, staging_buffer);
1758 
1759 err_free_nop_usc_bo:
1760    pvr_bo_suballoc_free(device->nop_program.usc);
1761 
1762    return result;
1763 }
1764 
pvr_device_init_tile_buffer_state(struct pvr_device * device)1765 static void pvr_device_init_tile_buffer_state(struct pvr_device *device)
1766 {
1767    simple_mtx_init(&device->tile_buffer_state.mtx, mtx_plain);
1768 
1769    for (uint32_t i = 0; i < ARRAY_SIZE(device->tile_buffer_state.buffers); i++)
1770       device->tile_buffer_state.buffers[i] = NULL;
1771 
1772    device->tile_buffer_state.buffer_count = 0;
1773 }
1774 
pvr_device_finish_tile_buffer_state(struct pvr_device * device)1775 static void pvr_device_finish_tile_buffer_state(struct pvr_device *device)
1776 {
1777    /* Destroy the mutex first to trigger asserts in case it's still locked so
1778     * that we don't put things in an inconsistent state by freeing buffers that
1779     * might be in use or attempt to free buffers while new buffers are being
1780     * allocated.
1781     */
1782    simple_mtx_destroy(&device->tile_buffer_state.mtx);
1783 
1784    for (uint32_t i = 0; i < device->tile_buffer_state.buffer_count; i++)
1785       pvr_bo_free(device, device->tile_buffer_state.buffers[i]);
1786 }
1787 
1788 /**
1789  * \brief Ensures that a certain amount of tile buffers are allocated.
1790  *
1791  * Make sure that \p capacity amount of tile buffers are allocated. If less were
1792  * present, append new tile buffers of \p size_in_bytes each to reach the quota.
1793  */
pvr_device_tile_buffer_ensure_cap(struct pvr_device * device,uint32_t capacity,uint32_t size_in_bytes)1794 VkResult pvr_device_tile_buffer_ensure_cap(struct pvr_device *device,
1795                                            uint32_t capacity,
1796                                            uint32_t size_in_bytes)
1797 {
1798    struct pvr_device_tile_buffer_state *tile_buffer_state =
1799       &device->tile_buffer_state;
1800    const uint32_t cache_line_size =
1801       rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1802    VkResult result;
1803 
1804    simple_mtx_lock(&tile_buffer_state->mtx);
1805 
1806    /* Clamping in release and asserting in debug. */
1807    assert(capacity <= ARRAY_SIZE(tile_buffer_state->buffers));
1808    capacity = CLAMP(capacity,
1809                     tile_buffer_state->buffer_count,
1810                     ARRAY_SIZE(tile_buffer_state->buffers));
1811 
1812    /* TODO: Implement bo multialloc? To reduce the amount of syscalls and
1813     * allocations.
1814     */
1815    for (uint32_t i = tile_buffer_state->buffer_count; i < capacity; i++) {
1816       result = pvr_bo_alloc(device,
1817                             device->heaps.general_heap,
1818                             size_in_bytes,
1819                             cache_line_size,
1820                             0,
1821                             &tile_buffer_state->buffers[i]);
1822       if (result != VK_SUCCESS) {
1823          for (uint32_t j = tile_buffer_state->buffer_count; j < i; j++)
1824             pvr_bo_free(device, tile_buffer_state->buffers[j]);
1825 
1826          goto err_release_lock;
1827       }
1828    }
1829 
1830    tile_buffer_state->buffer_count = capacity;
1831 
1832    simple_mtx_unlock(&tile_buffer_state->mtx);
1833 
1834    return VK_SUCCESS;
1835 
1836 err_release_lock:
1837    simple_mtx_unlock(&tile_buffer_state->mtx);
1838 
1839    return result;
1840 }
1841 
pvr_device_init_default_sampler_state(struct pvr_device * device)1842 static void pvr_device_init_default_sampler_state(struct pvr_device *device)
1843 {
1844    pvr_csb_pack (&device->input_attachment_sampler, TEXSTATE_SAMPLER, sampler) {
1845       sampler.addrmode_u = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
1846       sampler.addrmode_v = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
1847       sampler.addrmode_w = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
1848       sampler.dadjust = ROGUE_TEXSTATE_DADJUST_ZERO_UINT;
1849       sampler.magfilter = ROGUE_TEXSTATE_FILTER_POINT;
1850       sampler.minfilter = ROGUE_TEXSTATE_FILTER_POINT;
1851       sampler.anisoctl = ROGUE_TEXSTATE_ANISOCTL_DISABLED;
1852       sampler.non_normalized_coords = true;
1853    }
1854 }
1855 
pvr_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)1856 VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice,
1857                           const VkDeviceCreateInfo *pCreateInfo,
1858                           const VkAllocationCallbacks *pAllocator,
1859                           VkDevice *pDevice)
1860 {
1861    PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1862    uint32_t initial_free_list_size = PVR_GLOBAL_FREE_LIST_INITIAL_SIZE;
1863    struct pvr_instance *instance = pdevice->instance;
1864    struct vk_device_dispatch_table dispatch_table;
1865    struct pvr_device *device;
1866    struct pvr_winsys *ws;
1867    VkResult result;
1868 
1869    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
1870 
1871    result = pvr_winsys_create(pdevice->render_path,
1872                               pdevice->display_path,
1873                               pAllocator ? pAllocator : &instance->vk.alloc,
1874                               &ws);
1875    if (result != VK_SUCCESS)
1876       goto err_out;
1877 
1878    device = vk_alloc2(&instance->vk.alloc,
1879                       pAllocator,
1880                       sizeof(*device),
1881                       8,
1882                       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1883    if (!device) {
1884       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1885       goto err_pvr_winsys_destroy;
1886    }
1887 
1888    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1889                                              &pvr_device_entrypoints,
1890                                              true);
1891 
1892    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1893                                              &wsi_device_entrypoints,
1894                                              false);
1895 
1896    result = vk_device_init(&device->vk,
1897                            &pdevice->vk,
1898                            &dispatch_table,
1899                            pCreateInfo,
1900                            pAllocator);
1901    if (result != VK_SUCCESS)
1902       goto err_free_device;
1903 
1904    device->instance = instance;
1905    device->pdevice = pdevice;
1906    device->ws = ws;
1907 
1908    vk_device_set_drm_fd(&device->vk, ws->render_fd);
1909 
1910    if (ws->features.supports_threaded_submit) {
1911       /* Queue submission can be blocked if the kernel CCBs become full,
1912        * so enable threaded submit to not block the submitter.
1913        */
1914       vk_device_enable_threaded_submit(&device->vk);
1915    }
1916 
1917    ws->ops->get_heaps_info(ws, &device->heaps);
1918 
1919    result = pvr_bo_store_create(device);
1920    if (result != VK_SUCCESS)
1921       goto err_vk_device_finish;
1922 
1923    pvr_bo_suballocator_init(&device->suballoc_general,
1924                             device->heaps.general_heap,
1925                             device,
1926                             PVR_SUBALLOCATOR_GENERAL_SIZE);
1927    pvr_bo_suballocator_init(&device->suballoc_pds,
1928                             device->heaps.pds_heap,
1929                             device,
1930                             PVR_SUBALLOCATOR_PDS_SIZE);
1931    pvr_bo_suballocator_init(&device->suballoc_transfer,
1932                             device->heaps.transfer_frag_heap,
1933                             device,
1934                             PVR_SUBALLOCATOR_TRANSFER_SIZE);
1935    pvr_bo_suballocator_init(&device->suballoc_usc,
1936                             device->heaps.usc_heap,
1937                             device,
1938                             PVR_SUBALLOCATOR_USC_SIZE);
1939    pvr_bo_suballocator_init(&device->suballoc_vis_test,
1940                             device->heaps.vis_test_heap,
1941                             device,
1942                             PVR_SUBALLOCATOR_VIS_TEST_SIZE);
1943 
1944    if (p_atomic_inc_return(&instance->active_device_count) >
1945        PVR_SECONDARY_DEVICE_THRESHOLD) {
1946       initial_free_list_size = PVR_SECONDARY_DEVICE_FREE_LIST_INITAL_SIZE;
1947    }
1948 
1949    result = pvr_free_list_create(device,
1950                                  initial_free_list_size,
1951                                  PVR_GLOBAL_FREE_LIST_MAX_SIZE,
1952                                  PVR_GLOBAL_FREE_LIST_GROW_SIZE,
1953                                  PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD,
1954                                  NULL /* parent_free_list */,
1955                                  &device->global_free_list);
1956    if (result != VK_SUCCESS)
1957       goto err_dec_device_count;
1958 
1959    result = pvr_device_init_nop_program(device);
1960    if (result != VK_SUCCESS)
1961       goto err_pvr_free_list_destroy;
1962 
1963    result = pvr_device_init_compute_fence_program(device);
1964    if (result != VK_SUCCESS)
1965       goto err_pvr_free_nop_program;
1966 
1967    result = pvr_device_init_compute_empty_program(device);
1968    if (result != VK_SUCCESS)
1969       goto err_pvr_free_compute_fence;
1970 
1971    result = pvr_device_create_compute_query_programs(device);
1972    if (result != VK_SUCCESS)
1973       goto err_pvr_free_compute_empty;
1974 
1975    result = pvr_device_init_compute_idfwdf_state(device);
1976    if (result != VK_SUCCESS)
1977       goto err_pvr_destroy_compute_query_programs;
1978 
1979    result = pvr_device_init_graphics_static_clear_state(device);
1980    if (result != VK_SUCCESS)
1981       goto err_pvr_finish_compute_idfwdf;
1982 
1983    result = pvr_device_init_spm_load_state(device);
1984    if (result != VK_SUCCESS)
1985       goto err_pvr_finish_graphics_static_clear_state;
1986 
1987    pvr_device_init_tile_buffer_state(device);
1988 
1989    result = pvr_queues_create(device, pCreateInfo);
1990    if (result != VK_SUCCESS)
1991       goto err_pvr_finish_tile_buffer_state;
1992 
1993    pvr_device_init_default_sampler_state(device);
1994 
1995    pvr_spm_init_scratch_buffer_store(device);
1996 
1997    result = pvr_init_robustness_buffer(device);
1998    if (result != VK_SUCCESS)
1999       goto err_pvr_spm_finish_scratch_buffer_store;
2000 
2001    result = pvr_border_color_table_init(&device->border_color_table, device);
2002    if (result != VK_SUCCESS)
2003       goto err_pvr_robustness_buffer_finish;
2004 
2005    /* FIXME: Move this to a later stage and possibly somewhere other than
2006     * pvr_device. The purpose of this is so that we don't have to get the size
2007     * on each kick.
2008     */
2009    pvr_device_get_pixel_event_pds_program_data_size(
2010       &pdevice->dev_info,
2011       &device->pixel_event_data_size_in_dwords);
2012 
2013    device->global_cmd_buffer_submit_count = 0;
2014    device->global_queue_present_count = 0;
2015 
2016    *pDevice = pvr_device_to_handle(device);
2017 
2018    return VK_SUCCESS;
2019 
2020 err_pvr_robustness_buffer_finish:
2021    pvr_robustness_buffer_finish(device);
2022 
2023 err_pvr_spm_finish_scratch_buffer_store:
2024    pvr_spm_finish_scratch_buffer_store(device);
2025 
2026    pvr_queues_destroy(device);
2027 
2028 err_pvr_finish_tile_buffer_state:
2029    pvr_device_finish_tile_buffer_state(device);
2030    pvr_device_finish_spm_load_state(device);
2031 
2032 err_pvr_finish_graphics_static_clear_state:
2033    pvr_device_finish_graphics_static_clear_state(device);
2034 
2035 err_pvr_finish_compute_idfwdf:
2036    pvr_device_finish_compute_idfwdf_state(device);
2037 
2038 err_pvr_destroy_compute_query_programs:
2039    pvr_device_destroy_compute_query_programs(device);
2040 
2041 err_pvr_free_compute_empty:
2042    pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo);
2043 
2044 err_pvr_free_compute_fence:
2045    pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo);
2046 
2047 err_pvr_free_nop_program:
2048    pvr_bo_suballoc_free(device->nop_program.pds.pvr_bo);
2049    pvr_bo_suballoc_free(device->nop_program.usc);
2050 
2051 err_pvr_free_list_destroy:
2052    pvr_free_list_destroy(device->global_free_list);
2053 
2054 err_dec_device_count:
2055    p_atomic_dec(&device->instance->active_device_count);
2056 
2057    pvr_bo_suballocator_fini(&device->suballoc_vis_test);
2058    pvr_bo_suballocator_fini(&device->suballoc_usc);
2059    pvr_bo_suballocator_fini(&device->suballoc_transfer);
2060    pvr_bo_suballocator_fini(&device->suballoc_pds);
2061    pvr_bo_suballocator_fini(&device->suballoc_general);
2062 
2063    pvr_bo_store_destroy(device);
2064 
2065 err_vk_device_finish:
2066    vk_device_finish(&device->vk);
2067 
2068 err_free_device:
2069    vk_free(&device->vk.alloc, device);
2070 
2071 err_pvr_winsys_destroy:
2072    pvr_winsys_destroy(ws);
2073 
2074 err_out:
2075    return result;
2076 }
2077 
pvr_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2078 void pvr_DestroyDevice(VkDevice _device,
2079                        const VkAllocationCallbacks *pAllocator)
2080 {
2081    PVR_FROM_HANDLE(pvr_device, device, _device);
2082 
2083    if (!device)
2084       return;
2085 
2086    pvr_border_color_table_finish(&device->border_color_table, device);
2087    pvr_robustness_buffer_finish(device);
2088    pvr_spm_finish_scratch_buffer_store(device);
2089    pvr_queues_destroy(device);
2090    pvr_device_finish_tile_buffer_state(device);
2091    pvr_device_finish_spm_load_state(device);
2092    pvr_device_finish_graphics_static_clear_state(device);
2093    pvr_device_finish_compute_idfwdf_state(device);
2094    pvr_device_destroy_compute_query_programs(device);
2095    pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo);
2096    pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo);
2097    pvr_bo_suballoc_free(device->nop_program.pds.pvr_bo);
2098    pvr_bo_suballoc_free(device->nop_program.usc);
2099    pvr_free_list_destroy(device->global_free_list);
2100    pvr_bo_suballocator_fini(&device->suballoc_vis_test);
2101    pvr_bo_suballocator_fini(&device->suballoc_usc);
2102    pvr_bo_suballocator_fini(&device->suballoc_transfer);
2103    pvr_bo_suballocator_fini(&device->suballoc_pds);
2104    pvr_bo_suballocator_fini(&device->suballoc_general);
2105    pvr_bo_store_destroy(device);
2106    pvr_winsys_destroy(device->ws);
2107    p_atomic_dec(&device->instance->active_device_count);
2108    vk_device_finish(&device->vk);
2109    vk_free(&device->vk.alloc, device);
2110 }
2111 
pvr_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2112 VkResult pvr_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2113                                               VkLayerProperties *pProperties)
2114 {
2115    if (!pProperties) {
2116       *pPropertyCount = 0;
2117       return VK_SUCCESS;
2118    }
2119 
2120    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2121 }
2122 
free_memory(struct pvr_device * device,struct pvr_device_memory * mem,const VkAllocationCallbacks * pAllocator)2123 static void free_memory(struct pvr_device *device,
2124                         struct pvr_device_memory *mem,
2125                         const VkAllocationCallbacks *pAllocator)
2126 {
2127    if (!mem)
2128       return;
2129 
2130    /* From the Vulkan spec (§11.2.13. Freeing Device Memory):
2131     *   If a memory object is mapped at the time it is freed, it is implicitly
2132     *   unmapped.
2133     */
2134    if (mem->bo->map)
2135       device->ws->ops->buffer_unmap(mem->bo);
2136 
2137    p_atomic_add(&device->pdevice->heap_used, -mem->bo->size);
2138 
2139    device->ws->ops->buffer_destroy(mem->bo);
2140 
2141    vk_object_free(&device->vk, pAllocator, mem);
2142 }
2143 
pvr_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2144 VkResult pvr_AllocateMemory(VkDevice _device,
2145                             const VkMemoryAllocateInfo *pAllocateInfo,
2146                             const VkAllocationCallbacks *pAllocator,
2147                             VkDeviceMemory *pMem)
2148 {
2149    const VkImportMemoryFdInfoKHR *fd_info = NULL;
2150    PVR_FROM_HANDLE(pvr_device, device, _device);
2151    enum pvr_winsys_bo_type type = PVR_WINSYS_BO_TYPE_GPU;
2152    struct pvr_device_memory *mem;
2153    uint64_t heap_used;
2154    VkResult result;
2155 
2156    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2157    assert(pAllocateInfo->allocationSize > 0);
2158 
2159    mem = vk_object_alloc(&device->vk,
2160                          pAllocator,
2161                          sizeof(*mem),
2162                          VK_OBJECT_TYPE_DEVICE_MEMORY);
2163    if (!mem)
2164       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2165 
2166    vk_foreach_struct_const (ext, pAllocateInfo->pNext) {
2167       switch ((unsigned)ext->sType) {
2168       case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
2169          if (device->ws->display_fd >= 0)
2170             type = PVR_WINSYS_BO_TYPE_DISPLAY;
2171          break;
2172       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
2173          fd_info = (void *)ext;
2174          break;
2175       case VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO:
2176          break;
2177       default:
2178          vk_debug_ignored_stype(ext->sType);
2179          break;
2180       }
2181    }
2182 
2183    if (fd_info && fd_info->handleType) {
2184       VkDeviceSize aligned_alloc_size =
2185          ALIGN_POT(pAllocateInfo->allocationSize, device->ws->page_size);
2186 
2187       assert(
2188          fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2189          fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2190 
2191       result = device->ws->ops->buffer_create_from_fd(device->ws,
2192                                                       fd_info->fd,
2193                                                       &mem->bo);
2194       if (result != VK_SUCCESS)
2195          goto err_vk_object_free_mem;
2196 
2197       /* For security purposes, we reject importing the bo if it's smaller
2198        * than the requested allocation size. This prevents a malicious client
2199        * from passing a buffer to a trusted client, lying about the size, and
2200        * telling the trusted client to try and texture from an image that goes
2201        * out-of-bounds. This sort of thing could lead to GPU hangs or worse
2202        * in the trusted client. The trusted client can protect itself against
2203        * this sort of attack but only if it can trust the buffer size.
2204        */
2205       if (aligned_alloc_size > mem->bo->size) {
2206          result = vk_errorf(device,
2207                             VK_ERROR_INVALID_EXTERNAL_HANDLE,
2208                             "Aligned requested size too large for the given fd "
2209                             "%" PRIu64 "B > %" PRIu64 "B",
2210                             pAllocateInfo->allocationSize,
2211                             mem->bo->size);
2212          device->ws->ops->buffer_destroy(mem->bo);
2213          goto err_vk_object_free_mem;
2214       }
2215 
2216       /* From the Vulkan spec:
2217        *
2218        *    "Importing memory from a file descriptor transfers ownership of
2219        *    the file descriptor from the application to the Vulkan
2220        *    implementation. The application must not perform any operations on
2221        *    the file descriptor after a successful import."
2222        *
2223        * If the import fails, we leave the file descriptor open.
2224        */
2225       close(fd_info->fd);
2226    } else {
2227       /* Align physical allocations to the page size of the heap that will be
2228        * used when binding device memory (see pvr_bind_memory()) to ensure the
2229        * entire allocation can be mapped.
2230        */
2231       const uint64_t alignment = device->heaps.general_heap->page_size;
2232 
2233       /* FIXME: Need to determine the flags based on
2234        * device->pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags.
2235        *
2236        * The alternative would be to store the flags alongside the memory
2237        * types as an array that's indexed by pAllocateInfo->memoryTypeIndex so
2238        * that they can be looked up.
2239        */
2240       result = device->ws->ops->buffer_create(device->ws,
2241                                               pAllocateInfo->allocationSize,
2242                                               alignment,
2243                                               type,
2244                                               PVR_WINSYS_BO_FLAG_CPU_ACCESS,
2245                                               &mem->bo);
2246       if (result != VK_SUCCESS)
2247          goto err_vk_object_free_mem;
2248    }
2249 
2250    heap_used = p_atomic_add_return(&device->pdevice->heap_used, mem->bo->size);
2251    if (heap_used > device->pdevice->memory.memoryHeaps[0].size) {
2252       free_memory(device, mem, pAllocator);
2253       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2254    }
2255 
2256    *pMem = pvr_device_memory_to_handle(mem);
2257 
2258    return VK_SUCCESS;
2259 
2260 err_vk_object_free_mem:
2261    vk_object_free(&device->vk, pAllocator, mem);
2262 
2263    return result;
2264 }
2265 
pvr_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)2266 VkResult pvr_GetMemoryFdKHR(VkDevice _device,
2267                             const VkMemoryGetFdInfoKHR *pGetFdInfo,
2268                             int *pFd)
2269 {
2270    PVR_FROM_HANDLE(pvr_device, device, _device);
2271    PVR_FROM_HANDLE(pvr_device_memory, mem, pGetFdInfo->memory);
2272 
2273    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
2274 
2275    assert(
2276       pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2277       pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2278 
2279    return device->ws->ops->buffer_get_fd(mem->bo, pFd);
2280 }
2281 
2282 VkResult
pvr_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)2283 pvr_GetMemoryFdPropertiesKHR(VkDevice _device,
2284                              VkExternalMemoryHandleTypeFlagBits handleType,
2285                              int fd,
2286                              VkMemoryFdPropertiesKHR *pMemoryFdProperties)
2287 {
2288    PVR_FROM_HANDLE(pvr_device, device, _device);
2289 
2290    switch (handleType) {
2291    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
2292       /* FIXME: This should only allow memory types having
2293        * VK_MEMORY_PROPERTY_HOST_CACHED_BIT flag set, as
2294        * dma-buf should be imported using cacheable memory types,
2295        * given exporter's mmap will always map it as cacheable.
2296        * Ref:
2297        * https://www.kernel.org/doc/html/latest/driver-api/dma-buf.html#c.dma_buf_ops
2298        */
2299       pMemoryFdProperties->memoryTypeBits =
2300          (1 << device->pdevice->memory.memoryTypeCount) - 1;
2301       return VK_SUCCESS;
2302    default:
2303       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2304    }
2305 }
2306 
pvr_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)2307 void pvr_FreeMemory(VkDevice _device,
2308                     VkDeviceMemory _mem,
2309                     const VkAllocationCallbacks *pAllocator)
2310 {
2311    PVR_FROM_HANDLE(pvr_device, device, _device);
2312    PVR_FROM_HANDLE(pvr_device_memory, mem, _mem);
2313 
2314    free_memory(device, mem, pAllocator);
2315 }
2316 
pvr_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)2317 VkResult pvr_MapMemory(VkDevice _device,
2318                        VkDeviceMemory _memory,
2319                        VkDeviceSize offset,
2320                        VkDeviceSize size,
2321                        VkMemoryMapFlags flags,
2322                        void **ppData)
2323 {
2324    PVR_FROM_HANDLE(pvr_device, device, _device);
2325    PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
2326    VkResult result;
2327 
2328    if (!mem) {
2329       *ppData = NULL;
2330       return VK_SUCCESS;
2331    }
2332 
2333    if (size == VK_WHOLE_SIZE)
2334       size = mem->bo->size - offset;
2335 
2336    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
2337     *
2338     *  * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
2339     *    assert(size != 0);
2340     *  * If size is not equal to VK_WHOLE_SIZE, size must be less than or
2341     *    equal to the size of the memory minus offset
2342     */
2343 
2344    assert(size > 0);
2345    assert(offset + size <= mem->bo->size);
2346 
2347    /* Check if already mapped */
2348    if (mem->bo->map) {
2349       *ppData = (uint8_t *)mem->bo->map + offset;
2350       return VK_SUCCESS;
2351    }
2352 
2353    /* Map it all at once */
2354    result = device->ws->ops->buffer_map(mem->bo);
2355    if (result != VK_SUCCESS)
2356       return result;
2357 
2358    *ppData = (uint8_t *)mem->bo->map + offset;
2359 
2360    return VK_SUCCESS;
2361 }
2362 
pvr_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)2363 void pvr_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
2364 {
2365    PVR_FROM_HANDLE(pvr_device, device, _device);
2366    PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
2367 
2368    if (!mem || !mem->bo->map)
2369       return;
2370 
2371    device->ws->ops->buffer_unmap(mem->bo);
2372 }
2373 
pvr_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2374 VkResult pvr_FlushMappedMemoryRanges(VkDevice _device,
2375                                      uint32_t memoryRangeCount,
2376                                      const VkMappedMemoryRange *pMemoryRanges)
2377 {
2378    return VK_SUCCESS;
2379 }
2380 
2381 VkResult
pvr_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2382 pvr_InvalidateMappedMemoryRanges(VkDevice _device,
2383                                  uint32_t memoryRangeCount,
2384                                  const VkMappedMemoryRange *pMemoryRanges)
2385 {
2386    return VK_SUCCESS;
2387 }
2388 
pvr_GetImageSparseMemoryRequirements2(VkDevice device,const VkImageSparseMemoryRequirementsInfo2 * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)2389 void pvr_GetImageSparseMemoryRequirements2(
2390    VkDevice device,
2391    const VkImageSparseMemoryRequirementsInfo2 *pInfo,
2392    uint32_t *pSparseMemoryRequirementCount,
2393    VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
2394 {
2395    *pSparseMemoryRequirementCount = 0;
2396 }
2397 
pvr_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)2398 void pvr_GetDeviceMemoryCommitment(VkDevice device,
2399                                    VkDeviceMemory memory,
2400                                    VkDeviceSize *pCommittedMemoryInBytes)
2401 {
2402    *pCommittedMemoryInBytes = 0;
2403 }
2404 
pvr_bind_memory(struct pvr_device * device,struct pvr_device_memory * mem,VkDeviceSize offset,VkDeviceSize size,VkDeviceSize alignment,struct pvr_winsys_vma ** const vma_out,pvr_dev_addr_t * const dev_addr_out)2405 VkResult pvr_bind_memory(struct pvr_device *device,
2406                          struct pvr_device_memory *mem,
2407                          VkDeviceSize offset,
2408                          VkDeviceSize size,
2409                          VkDeviceSize alignment,
2410                          struct pvr_winsys_vma **const vma_out,
2411                          pvr_dev_addr_t *const dev_addr_out)
2412 {
2413    VkDeviceSize virt_size =
2414       size + (offset & (device->heaps.general_heap->page_size - 1));
2415    struct pvr_winsys_vma *vma;
2416    pvr_dev_addr_t dev_addr;
2417    VkResult result;
2418 
2419    /* Valid usage:
2420     *
2421     *   "memoryOffset must be an integer multiple of the alignment member of
2422     *    the VkMemoryRequirements structure returned from a call to
2423     *    vkGetBufferMemoryRequirements with buffer"
2424     *
2425     *   "memoryOffset must be an integer multiple of the alignment member of
2426     *    the VkMemoryRequirements structure returned from a call to
2427     *    vkGetImageMemoryRequirements with image"
2428     */
2429    assert(offset % alignment == 0);
2430    assert(offset < mem->bo->size);
2431 
2432    result = device->ws->ops->heap_alloc(device->heaps.general_heap,
2433                                         virt_size,
2434                                         alignment,
2435                                         &vma);
2436    if (result != VK_SUCCESS)
2437       goto err_out;
2438 
2439    result = device->ws->ops->vma_map(vma, mem->bo, offset, size, &dev_addr);
2440    if (result != VK_SUCCESS)
2441       goto err_free_vma;
2442 
2443    *dev_addr_out = dev_addr;
2444    *vma_out = vma;
2445 
2446    return VK_SUCCESS;
2447 
2448 err_free_vma:
2449    device->ws->ops->heap_free(vma);
2450 
2451 err_out:
2452    return result;
2453 }
2454 
pvr_unbind_memory(struct pvr_device * device,struct pvr_winsys_vma * vma)2455 void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma)
2456 {
2457    device->ws->ops->vma_unmap(vma);
2458    device->ws->ops->heap_free(vma);
2459 }
2460 
pvr_BindBufferMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)2461 VkResult pvr_BindBufferMemory2(VkDevice _device,
2462                                uint32_t bindInfoCount,
2463                                const VkBindBufferMemoryInfo *pBindInfos)
2464 {
2465    PVR_FROM_HANDLE(pvr_device, device, _device);
2466    uint32_t i;
2467 
2468    for (i = 0; i < bindInfoCount; i++) {
2469       PVR_FROM_HANDLE(pvr_device_memory, mem, pBindInfos[i].memory);
2470       PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2471 
2472       VkResult result = pvr_bind_memory(device,
2473                                         mem,
2474                                         pBindInfos[i].memoryOffset,
2475                                         buffer->vk.size,
2476                                         buffer->alignment,
2477                                         &buffer->vma,
2478                                         &buffer->dev_addr);
2479       if (result != VK_SUCCESS) {
2480          while (i--) {
2481             PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2482             pvr_unbind_memory(device, buffer->vma);
2483          }
2484 
2485          return result;
2486       }
2487    }
2488 
2489    return VK_SUCCESS;
2490 }
2491 
pvr_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)2492 VkResult pvr_QueueBindSparse(VkQueue _queue,
2493                              uint32_t bindInfoCount,
2494                              const VkBindSparseInfo *pBindInfo,
2495                              VkFence fence)
2496 {
2497    return VK_SUCCESS;
2498 }
2499 
2500 /* Event functions. */
2501 
pvr_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)2502 VkResult pvr_CreateEvent(VkDevice _device,
2503                          const VkEventCreateInfo *pCreateInfo,
2504                          const VkAllocationCallbacks *pAllocator,
2505                          VkEvent *pEvent)
2506 {
2507    PVR_FROM_HANDLE(pvr_device, device, _device);
2508 
2509    struct pvr_event *event = vk_object_alloc(&device->vk,
2510                                              pAllocator,
2511                                              sizeof(*event),
2512                                              VK_OBJECT_TYPE_EVENT);
2513    if (!event)
2514       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2515 
2516    event->sync = NULL;
2517    event->state = PVR_EVENT_STATE_RESET_BY_HOST;
2518 
2519    *pEvent = pvr_event_to_handle(event);
2520 
2521    return VK_SUCCESS;
2522 }
2523 
pvr_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)2524 void pvr_DestroyEvent(VkDevice _device,
2525                       VkEvent _event,
2526                       const VkAllocationCallbacks *pAllocator)
2527 {
2528    PVR_FROM_HANDLE(pvr_device, device, _device);
2529    PVR_FROM_HANDLE(pvr_event, event, _event);
2530 
2531    if (!event)
2532       return;
2533 
2534    if (event->sync)
2535       vk_sync_destroy(&device->vk, event->sync);
2536 
2537    vk_object_free(&device->vk, pAllocator, event);
2538 }
2539 
pvr_GetEventStatus(VkDevice _device,VkEvent _event)2540 VkResult pvr_GetEventStatus(VkDevice _device, VkEvent _event)
2541 {
2542    PVR_FROM_HANDLE(pvr_device, device, _device);
2543    PVR_FROM_HANDLE(pvr_event, event, _event);
2544    VkResult result;
2545 
2546    switch (event->state) {
2547    case PVR_EVENT_STATE_SET_BY_DEVICE:
2548       if (!event->sync)
2549          return VK_EVENT_RESET;
2550 
2551       result =
2552          vk_sync_wait(&device->vk, event->sync, 0U, VK_SYNC_WAIT_COMPLETE, 0);
2553       result = (result == VK_SUCCESS) ? VK_EVENT_SET : VK_EVENT_RESET;
2554       break;
2555 
2556    case PVR_EVENT_STATE_RESET_BY_DEVICE:
2557       if (!event->sync)
2558          return VK_EVENT_RESET;
2559 
2560       result =
2561          vk_sync_wait(&device->vk, event->sync, 0U, VK_SYNC_WAIT_COMPLETE, 0);
2562       result = (result == VK_SUCCESS) ? VK_EVENT_RESET : VK_EVENT_SET;
2563       break;
2564 
2565    case PVR_EVENT_STATE_SET_BY_HOST:
2566       result = VK_EVENT_SET;
2567       break;
2568 
2569    case PVR_EVENT_STATE_RESET_BY_HOST:
2570       result = VK_EVENT_RESET;
2571       break;
2572 
2573    default:
2574       unreachable("Event object in unknown state");
2575    }
2576 
2577    return result;
2578 }
2579 
pvr_SetEvent(VkDevice _device,VkEvent _event)2580 VkResult pvr_SetEvent(VkDevice _device, VkEvent _event)
2581 {
2582    PVR_FROM_HANDLE(pvr_event, event, _event);
2583 
2584    if (event->sync) {
2585       PVR_FROM_HANDLE(pvr_device, device, _device);
2586 
2587       const VkResult result = vk_sync_signal(&device->vk, event->sync, 0);
2588       if (result != VK_SUCCESS)
2589          return result;
2590    }
2591 
2592    event->state = PVR_EVENT_STATE_SET_BY_HOST;
2593 
2594    return VK_SUCCESS;
2595 }
2596 
pvr_ResetEvent(VkDevice _device,VkEvent _event)2597 VkResult pvr_ResetEvent(VkDevice _device, VkEvent _event)
2598 {
2599    PVR_FROM_HANDLE(pvr_event, event, _event);
2600 
2601    if (event->sync) {
2602       PVR_FROM_HANDLE(pvr_device, device, _device);
2603 
2604       const VkResult result = vk_sync_reset(&device->vk, event->sync);
2605       if (result != VK_SUCCESS)
2606          return result;
2607    }
2608 
2609    event->state = PVR_EVENT_STATE_RESET_BY_HOST;
2610 
2611    return VK_SUCCESS;
2612 }
2613 
2614 /* Buffer functions. */
2615 
pvr_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)2616 VkResult pvr_CreateBuffer(VkDevice _device,
2617                           const VkBufferCreateInfo *pCreateInfo,
2618                           const VkAllocationCallbacks *pAllocator,
2619                           VkBuffer *pBuffer)
2620 {
2621    PVR_FROM_HANDLE(pvr_device, device, _device);
2622    const uint32_t alignment = 4096;
2623    struct pvr_buffer *buffer;
2624 
2625    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2626    assert(pCreateInfo->usage != 0);
2627 
2628    /* We check against (ULONG_MAX - alignment) to prevent overflow issues */
2629    if (pCreateInfo->size >= ULONG_MAX - alignment)
2630       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2631 
2632    buffer =
2633       vk_buffer_create(&device->vk, pCreateInfo, pAllocator, sizeof(*buffer));
2634    if (!buffer)
2635       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2636 
2637    buffer->alignment = alignment;
2638 
2639    *pBuffer = pvr_buffer_to_handle(buffer);
2640 
2641    return VK_SUCCESS;
2642 }
2643 
pvr_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)2644 void pvr_DestroyBuffer(VkDevice _device,
2645                        VkBuffer _buffer,
2646                        const VkAllocationCallbacks *pAllocator)
2647 {
2648    PVR_FROM_HANDLE(pvr_device, device, _device);
2649    PVR_FROM_HANDLE(pvr_buffer, buffer, _buffer);
2650 
2651    if (!buffer)
2652       return;
2653 
2654    if (buffer->vma)
2655       pvr_unbind_memory(device, buffer->vma);
2656 
2657    vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
2658 }
2659 
pvr_gpu_upload(struct pvr_device * device,struct pvr_winsys_heap * heap,const void * data,size_t size,uint64_t alignment,struct pvr_suballoc_bo ** const pvr_bo_out)2660 VkResult pvr_gpu_upload(struct pvr_device *device,
2661                         struct pvr_winsys_heap *heap,
2662                         const void *data,
2663                         size_t size,
2664                         uint64_t alignment,
2665                         struct pvr_suballoc_bo **const pvr_bo_out)
2666 {
2667    struct pvr_suballoc_bo *suballoc_bo = NULL;
2668    struct pvr_suballocator *allocator;
2669    VkResult result;
2670    void *map;
2671 
2672    assert(size > 0);
2673 
2674    if (heap == device->heaps.general_heap)
2675       allocator = &device->suballoc_general;
2676    else if (heap == device->heaps.pds_heap)
2677       allocator = &device->suballoc_pds;
2678    else if (heap == device->heaps.transfer_frag_heap)
2679       allocator = &device->suballoc_transfer;
2680    else if (heap == device->heaps.usc_heap)
2681       allocator = &device->suballoc_usc;
2682    else
2683       unreachable("Unknown heap type");
2684 
2685    result = pvr_bo_suballoc(allocator, size, alignment, false, &suballoc_bo);
2686    if (result != VK_SUCCESS)
2687       return result;
2688 
2689    map = pvr_bo_suballoc_get_map_addr(suballoc_bo);
2690    memcpy(map, data, size);
2691 
2692    *pvr_bo_out = suballoc_bo;
2693 
2694    return VK_SUCCESS;
2695 }
2696 
pvr_gpu_upload_usc(struct pvr_device * device,const void * code,size_t code_size,uint64_t code_alignment,struct pvr_suballoc_bo ** const pvr_bo_out)2697 VkResult pvr_gpu_upload_usc(struct pvr_device *device,
2698                             const void *code,
2699                             size_t code_size,
2700                             uint64_t code_alignment,
2701                             struct pvr_suballoc_bo **const pvr_bo_out)
2702 {
2703    struct pvr_suballoc_bo *suballoc_bo = NULL;
2704    VkResult result;
2705    void *map;
2706 
2707    assert(code_size > 0);
2708 
2709    /* The USC will prefetch the next instruction, so over allocate by 1
2710     * instruction to prevent reading off the end of a page into a potentially
2711     * unallocated page.
2712     */
2713    result = pvr_bo_suballoc(&device->suballoc_usc,
2714                             code_size + ROGUE_MAX_INSTR_BYTES,
2715                             code_alignment,
2716                             false,
2717                             &suballoc_bo);
2718    if (result != VK_SUCCESS)
2719       return result;
2720 
2721    map = pvr_bo_suballoc_get_map_addr(suballoc_bo);
2722    memcpy(map, code, code_size);
2723 
2724    *pvr_bo_out = suballoc_bo;
2725 
2726    return VK_SUCCESS;
2727 }
2728 
2729 /**
2730  * \brief Upload PDS program data and code segments from host memory to device
2731  * memory.
2732  *
2733  * \param[in] device            Logical device pointer.
2734  * \param[in] data              Pointer to PDS data segment to upload.
2735  * \param[in] data_size_dwords  Size of PDS data segment in dwords.
2736  * \param[in] data_alignment    Required alignment of the PDS data segment in
2737  *                              bytes. Must be a power of two.
2738  * \param[in] code              Pointer to PDS code segment to upload.
2739  * \param[in] code_size_dwords  Size of PDS code segment in dwords.
2740  * \param[in] code_alignment    Required alignment of the PDS code segment in
2741  *                              bytes. Must be a power of two.
2742  * \param[in] min_alignment     Minimum alignment of the bo holding the PDS
2743  *                              program in bytes.
2744  * \param[out] pds_upload_out   On success will be initialized based on the
2745  *                              uploaded PDS program.
2746  * \return VK_SUCCESS on success, or error code otherwise.
2747  */
pvr_gpu_upload_pds(struct pvr_device * device,const uint32_t * data,uint32_t data_size_dwords,uint32_t data_alignment,const uint32_t * code,uint32_t code_size_dwords,uint32_t code_alignment,uint64_t min_alignment,struct pvr_pds_upload * const pds_upload_out)2748 VkResult pvr_gpu_upload_pds(struct pvr_device *device,
2749                             const uint32_t *data,
2750                             uint32_t data_size_dwords,
2751                             uint32_t data_alignment,
2752                             const uint32_t *code,
2753                             uint32_t code_size_dwords,
2754                             uint32_t code_alignment,
2755                             uint64_t min_alignment,
2756                             struct pvr_pds_upload *const pds_upload_out)
2757 {
2758    /* All alignment and sizes below are in bytes. */
2759    const size_t data_size = PVR_DW_TO_BYTES(data_size_dwords);
2760    const size_t code_size = PVR_DW_TO_BYTES(code_size_dwords);
2761    const uint64_t data_aligned_size = ALIGN_POT(data_size, data_alignment);
2762    const uint64_t code_aligned_size = ALIGN_POT(code_size, code_alignment);
2763    const uint32_t code_offset = ALIGN_POT(data_aligned_size, code_alignment);
2764    const uint64_t bo_alignment = MAX2(min_alignment, data_alignment);
2765    const uint64_t bo_size = (!!code) ? (code_offset + code_aligned_size)
2766                                      : data_aligned_size;
2767    VkResult result;
2768    void *map;
2769 
2770    assert(code || data);
2771    assert(!code || (code_size_dwords != 0 && code_alignment != 0));
2772    assert(!data || (data_size_dwords != 0 && data_alignment != 0));
2773 
2774    result = pvr_bo_suballoc(&device->suballoc_pds,
2775                             bo_size,
2776                             bo_alignment,
2777                             true,
2778                             &pds_upload_out->pvr_bo);
2779    if (result != VK_SUCCESS)
2780       return result;
2781 
2782    map = pvr_bo_suballoc_get_map_addr(pds_upload_out->pvr_bo);
2783 
2784    if (data) {
2785       memcpy(map, data, data_size);
2786 
2787       pds_upload_out->data_offset = pds_upload_out->pvr_bo->dev_addr.addr -
2788                                     device->heaps.pds_heap->base_addr.addr;
2789 
2790       /* Store data size in dwords. */
2791       assert(data_aligned_size % 4 == 0);
2792       pds_upload_out->data_size = data_aligned_size / 4;
2793    } else {
2794       pds_upload_out->data_offset = 0;
2795       pds_upload_out->data_size = 0;
2796    }
2797 
2798    if (code) {
2799       memcpy((uint8_t *)map + code_offset, code, code_size);
2800 
2801       pds_upload_out->code_offset =
2802          (pds_upload_out->pvr_bo->dev_addr.addr + code_offset) -
2803          device->heaps.pds_heap->base_addr.addr;
2804 
2805       /* Store code size in dwords. */
2806       assert(code_aligned_size % 4 == 0);
2807       pds_upload_out->code_size = code_aligned_size / 4;
2808    } else {
2809       pds_upload_out->code_offset = 0;
2810       pds_upload_out->code_size = 0;
2811    }
2812 
2813    return VK_SUCCESS;
2814 }
2815 
2816 static VkResult
pvr_framebuffer_create_ppp_state(struct pvr_device * device,struct pvr_framebuffer * framebuffer)2817 pvr_framebuffer_create_ppp_state(struct pvr_device *device,
2818                                  struct pvr_framebuffer *framebuffer)
2819 {
2820    const uint32_t cache_line_size =
2821       rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
2822    uint32_t ppp_state[3];
2823    VkResult result;
2824 
2825    pvr_csb_pack (&ppp_state[0], TA_STATE_HEADER, header) {
2826       header.pres_terminate = true;
2827    }
2828 
2829    pvr_csb_pack (&ppp_state[1], TA_STATE_TERMINATE0, term0) {
2830       term0.clip_right =
2831          DIV_ROUND_UP(
2832             framebuffer->width,
2833             ROGUE_TA_STATE_TERMINATE0_CLIP_RIGHT_BLOCK_SIZE_IN_PIXELS) -
2834          1;
2835       term0.clip_bottom =
2836          DIV_ROUND_UP(
2837             framebuffer->height,
2838             ROGUE_TA_STATE_TERMINATE0_CLIP_BOTTOM_BLOCK_SIZE_IN_PIXELS) -
2839          1;
2840    }
2841 
2842    pvr_csb_pack (&ppp_state[2], TA_STATE_TERMINATE1, term1) {
2843       term1.render_target = 0;
2844       term1.clip_left = 0;
2845    }
2846 
2847    result = pvr_gpu_upload(device,
2848                            device->heaps.general_heap,
2849                            ppp_state,
2850                            sizeof(ppp_state),
2851                            cache_line_size,
2852                            &framebuffer->ppp_state_bo);
2853    if (result != VK_SUCCESS)
2854       return result;
2855 
2856    /* Calculate the size of PPP state in dwords. */
2857    framebuffer->ppp_state_size = sizeof(ppp_state) / sizeof(uint32_t);
2858 
2859    return VK_SUCCESS;
2860 }
2861 
pvr_render_targets_init(struct pvr_render_target * render_targets,uint32_t render_targets_count)2862 static bool pvr_render_targets_init(struct pvr_render_target *render_targets,
2863                                     uint32_t render_targets_count)
2864 {
2865    uint32_t i;
2866 
2867    for (i = 0; i < render_targets_count; i++) {
2868       if (pthread_mutex_init(&render_targets[i].mutex, NULL))
2869          goto err_mutex_destroy;
2870    }
2871 
2872    return true;
2873 
2874 err_mutex_destroy:
2875    while (i--)
2876       pthread_mutex_destroy(&render_targets[i].mutex);
2877 
2878    return false;
2879 }
2880 
pvr_render_targets_fini(struct pvr_render_target * render_targets,uint32_t render_targets_count)2881 static void pvr_render_targets_fini(struct pvr_render_target *render_targets,
2882                                     uint32_t render_targets_count)
2883 {
2884    for (uint32_t i = 0; i < render_targets_count; i++) {
2885       if (render_targets[i].valid) {
2886          pvr_render_target_dataset_destroy(render_targets[i].rt_dataset);
2887          render_targets[i].valid = false;
2888       }
2889 
2890       pthread_mutex_destroy(&render_targets[i].mutex);
2891    }
2892 }
2893 
pvr_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)2894 VkResult pvr_CreateFramebuffer(VkDevice _device,
2895                                const VkFramebufferCreateInfo *pCreateInfo,
2896                                const VkAllocationCallbacks *pAllocator,
2897                                VkFramebuffer *pFramebuffer)
2898 {
2899    PVR_FROM_HANDLE(pvr_render_pass, pass, pCreateInfo->renderPass);
2900    PVR_FROM_HANDLE(pvr_device, device, _device);
2901    struct pvr_spm_bgobj_state *spm_bgobj_state_per_render;
2902    struct pvr_spm_eot_state *spm_eot_state_per_render;
2903    struct pvr_render_target *render_targets;
2904    struct pvr_framebuffer *framebuffer;
2905    struct pvr_image_view **attachments;
2906    uint32_t render_targets_count;
2907    uint64_t scratch_buffer_size;
2908    VkResult result;
2909 
2910    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2911 
2912    render_targets_count =
2913       PVR_RENDER_TARGETS_PER_FRAMEBUFFER(&device->pdevice->dev_info);
2914 
2915    VK_MULTIALLOC(ma);
2916    vk_multialloc_add(&ma, &framebuffer, __typeof__(*framebuffer), 1);
2917    vk_multialloc_add(&ma,
2918                      &attachments,
2919                      __typeof__(*attachments),
2920                      pCreateInfo->attachmentCount);
2921    vk_multialloc_add(&ma,
2922                      &render_targets,
2923                      __typeof__(*render_targets),
2924                      render_targets_count);
2925    vk_multialloc_add(&ma,
2926                      &spm_eot_state_per_render,
2927                      __typeof__(*spm_eot_state_per_render),
2928                      pass->hw_setup->render_count);
2929    vk_multialloc_add(&ma,
2930                      &spm_bgobj_state_per_render,
2931                      __typeof__(*spm_bgobj_state_per_render),
2932                      pass->hw_setup->render_count);
2933 
2934    if (!vk_multialloc_zalloc2(&ma,
2935                               &device->vk.alloc,
2936                               pAllocator,
2937                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
2938       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2939 
2940    vk_object_base_init(&device->vk,
2941                        &framebuffer->base,
2942                        VK_OBJECT_TYPE_FRAMEBUFFER);
2943 
2944    framebuffer->width = pCreateInfo->width;
2945    framebuffer->height = pCreateInfo->height;
2946    framebuffer->layers = pCreateInfo->layers;
2947 
2948    framebuffer->attachments = attachments;
2949    framebuffer->attachment_count = pCreateInfo->attachmentCount;
2950    for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
2951       framebuffer->attachments[i] =
2952          pvr_image_view_from_handle(pCreateInfo->pAttachments[i]);
2953    }
2954 
2955    result = pvr_framebuffer_create_ppp_state(device, framebuffer);
2956    if (result != VK_SUCCESS)
2957       goto err_free_framebuffer;
2958 
2959    framebuffer->render_targets = render_targets;
2960    framebuffer->render_targets_count = render_targets_count;
2961    if (!pvr_render_targets_init(framebuffer->render_targets,
2962                                 render_targets_count)) {
2963       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2964       goto err_free_ppp_state_bo;
2965    }
2966 
2967    scratch_buffer_size =
2968       pvr_spm_scratch_buffer_calc_required_size(pass,
2969                                                 framebuffer->width,
2970                                                 framebuffer->height);
2971 
2972    result = pvr_spm_scratch_buffer_get_buffer(device,
2973                                               scratch_buffer_size,
2974                                               &framebuffer->scratch_buffer);
2975    if (result != VK_SUCCESS)
2976       goto err_finish_render_targets;
2977 
2978    for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
2979       uint32_t emit_count;
2980 
2981       result = pvr_spm_init_eot_state(device,
2982                                       &spm_eot_state_per_render[i],
2983                                       framebuffer,
2984                                       &pass->hw_setup->renders[i],
2985                                       &emit_count);
2986       if (result != VK_SUCCESS)
2987          goto err_finish_eot_state;
2988 
2989       result = pvr_spm_init_bgobj_state(device,
2990                                         &spm_bgobj_state_per_render[i],
2991                                         framebuffer,
2992                                         &pass->hw_setup->renders[i],
2993                                         emit_count);
2994       if (result != VK_SUCCESS)
2995          goto err_finish_bgobj_state;
2996 
2997       continue;
2998 
2999 err_finish_bgobj_state:
3000       pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[i]);
3001 
3002       for (uint32_t j = 0; j < i; j++)
3003          pvr_spm_finish_bgobj_state(device, &spm_bgobj_state_per_render[j]);
3004 
3005 err_finish_eot_state:
3006       for (uint32_t j = 0; j < i; j++)
3007          pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[j]);
3008 
3009       goto err_finish_render_targets;
3010    }
3011 
3012    framebuffer->render_count = pass->hw_setup->render_count;
3013    framebuffer->spm_eot_state_per_render = spm_eot_state_per_render;
3014    framebuffer->spm_bgobj_state_per_render = spm_bgobj_state_per_render;
3015 
3016    *pFramebuffer = pvr_framebuffer_to_handle(framebuffer);
3017 
3018    return VK_SUCCESS;
3019 
3020 err_finish_render_targets:
3021    pvr_render_targets_fini(framebuffer->render_targets, render_targets_count);
3022 
3023 err_free_ppp_state_bo:
3024    pvr_bo_suballoc_free(framebuffer->ppp_state_bo);
3025 
3026 err_free_framebuffer:
3027    vk_object_base_finish(&framebuffer->base);
3028    vk_free2(&device->vk.alloc, pAllocator, framebuffer);
3029 
3030    return result;
3031 }
3032 
pvr_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)3033 void pvr_DestroyFramebuffer(VkDevice _device,
3034                             VkFramebuffer _fb,
3035                             const VkAllocationCallbacks *pAllocator)
3036 {
3037    PVR_FROM_HANDLE(pvr_framebuffer, framebuffer, _fb);
3038    PVR_FROM_HANDLE(pvr_device, device, _device);
3039 
3040    if (!framebuffer)
3041       return;
3042 
3043    for (uint32_t i = 0; i < framebuffer->render_count; i++) {
3044       pvr_spm_finish_bgobj_state(device,
3045                                  &framebuffer->spm_bgobj_state_per_render[i]);
3046 
3047       pvr_spm_finish_eot_state(device,
3048                                &framebuffer->spm_eot_state_per_render[i]);
3049    }
3050 
3051    pvr_spm_scratch_buffer_release(device, framebuffer->scratch_buffer);
3052    pvr_render_targets_fini(framebuffer->render_targets,
3053                            framebuffer->render_targets_count);
3054    pvr_bo_suballoc_free(framebuffer->ppp_state_bo);
3055    vk_object_base_finish(&framebuffer->base);
3056    vk_free2(&device->vk.alloc, pAllocator, framebuffer);
3057 }
3058 
3059 static uint32_t
pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info * dev_info,VkFilter filter)3060 pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info *dev_info,
3061                                   VkFilter filter)
3062 {
3063    switch (filter) {
3064    case VK_FILTER_NEAREST:
3065       return ROGUE_TEXSTATE_FILTER_POINT;
3066    case VK_FILTER_LINEAR:
3067       return ROGUE_TEXSTATE_FILTER_LINEAR;
3068    default:
3069       unreachable("Unknown filter type.");
3070    }
3071 }
3072 
3073 static uint32_t
pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)3074 pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)
3075 {
3076    switch (addr_mode) {
3077    case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3078       return ROGUE_TEXSTATE_ADDRMODE_REPEAT;
3079    case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3080       return ROGUE_TEXSTATE_ADDRMODE_FLIP;
3081    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3082       return ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
3083    case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3084       return ROGUE_TEXSTATE_ADDRMODE_FLIP_ONCE_THEN_CLAMP;
3085    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3086       return ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_BORDER;
3087    default:
3088       unreachable("Invalid sampler address mode.");
3089    }
3090 }
3091 
pvr_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)3092 VkResult pvr_CreateSampler(VkDevice _device,
3093                            const VkSamplerCreateInfo *pCreateInfo,
3094                            const VkAllocationCallbacks *pAllocator,
3095                            VkSampler *pSampler)
3096 {
3097    PVR_FROM_HANDLE(pvr_device, device, _device);
3098    uint32_t border_color_table_index;
3099    struct pvr_sampler *sampler;
3100    float lod_rounding_bias;
3101    VkFilter min_filter;
3102    VkFilter mag_filter;
3103    VkResult result;
3104    float min_lod;
3105    float max_lod;
3106 
3107    STATIC_ASSERT(sizeof(((union pvr_sampler_descriptor *)NULL)->data) ==
3108                  sizeof(((union pvr_sampler_descriptor *)NULL)->words));
3109 
3110    sampler =
3111       vk_sampler_create(&device->vk, pCreateInfo, pAllocator, sizeof(*sampler));
3112    if (!sampler) {
3113       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3114       goto err_out;
3115    }
3116 
3117    mag_filter = pCreateInfo->magFilter;
3118    min_filter = pCreateInfo->minFilter;
3119 
3120    result =
3121       pvr_border_color_table_get_or_create_entry(&device->border_color_table,
3122                                                  sampler,
3123                                                  &border_color_table_index);
3124    if (result != VK_SUCCESS)
3125       goto err_free_sampler;
3126 
3127    if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025)) {
3128       /* The min/mag filters may need adjustment here, the GPU should decide
3129        * which of the two filters to use based on the clamped LOD value: LOD
3130        * <= 0 implies magnification, while LOD > 0 implies minification.
3131        *
3132        * As a workaround, we override magFilter with minFilter if we know that
3133        * the magnification filter will never be used due to clamping anyway
3134        * (i.e. minLod > 0). Conversely, we override minFilter with magFilter
3135        * if maxLod <= 0.
3136        */
3137       if (pCreateInfo->minLod > 0.0f) {
3138          /* The clamped LOD will always be positive => always minify. */
3139          mag_filter = pCreateInfo->minFilter;
3140       }
3141 
3142       if (pCreateInfo->maxLod <= 0.0f) {
3143          /* The clamped LOD will always be negative or zero => always
3144           * magnify.
3145           */
3146          min_filter = pCreateInfo->magFilter;
3147       }
3148    }
3149 
3150    if (pCreateInfo->compareEnable) {
3151       sampler->descriptor.data.compare_op =
3152          (uint32_t)pvr_texstate_cmpmode(pCreateInfo->compareOp);
3153    } else {
3154       sampler->descriptor.data.compare_op =
3155          (uint32_t)pvr_texstate_cmpmode(VK_COMPARE_OP_NEVER);
3156    }
3157 
3158    sampler->descriptor.data.word3 = 0;
3159    pvr_csb_pack (&sampler->descriptor.data.sampler_word,
3160                  TEXSTATE_SAMPLER,
3161                  word) {
3162       const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
3163       const float lod_clamp_max = (float)ROGUE_TEXSTATE_CLAMP_MAX /
3164                                   (1 << ROGUE_TEXSTATE_CLAMP_FRACTIONAL_BITS);
3165       const float max_dadjust = ((float)(ROGUE_TEXSTATE_DADJUST_MAX_UINT -
3166                                          ROGUE_TEXSTATE_DADJUST_ZERO_UINT)) /
3167                                 (1 << ROGUE_TEXSTATE_DADJUST_FRACTIONAL_BITS);
3168       const float min_dadjust = ((float)(ROGUE_TEXSTATE_DADJUST_MIN_UINT -
3169                                          ROGUE_TEXSTATE_DADJUST_ZERO_UINT)) /
3170                                 (1 << ROGUE_TEXSTATE_DADJUST_FRACTIONAL_BITS);
3171 
3172       word.magfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, mag_filter);
3173       word.minfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, min_filter);
3174 
3175       if (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR)
3176          word.mipfilter = true;
3177 
3178       word.addrmode_u =
3179          pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeU);
3180       word.addrmode_v =
3181          pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeV);
3182       word.addrmode_w =
3183          pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeW);
3184 
3185       /* TODO: Figure out defines for these. */
3186       if (word.addrmode_u == ROGUE_TEXSTATE_ADDRMODE_FLIP)
3187          sampler->descriptor.data.word3 |= 0x40000000;
3188 
3189       if (word.addrmode_v == ROGUE_TEXSTATE_ADDRMODE_FLIP)
3190          sampler->descriptor.data.word3 |= 0x20000000;
3191 
3192       /* The Vulkan 1.0.205 spec says:
3193        *
3194        *    The absolute value of mipLodBias must be less than or equal to
3195        *    VkPhysicalDeviceLimits::maxSamplerLodBias.
3196        */
3197       word.dadjust =
3198          ROGUE_TEXSTATE_DADJUST_ZERO_UINT +
3199          util_signed_fixed(
3200             CLAMP(pCreateInfo->mipLodBias, min_dadjust, max_dadjust),
3201             ROGUE_TEXSTATE_DADJUST_FRACTIONAL_BITS);
3202 
3203       /* Anisotropy is not supported for now. */
3204       word.anisoctl = ROGUE_TEXSTATE_ANISOCTL_DISABLED;
3205 
3206       if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025) &&
3207           pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_NEAREST) {
3208          /* When MIPMAP_MODE_NEAREST is enabled, the LOD level should be
3209           * selected by adding 0.5 and then truncating the input LOD value.
3210           * This hardware adds the 0.5 bias before clamping against
3211           * lodmin/lodmax, while Vulkan specifies the bias to be added after
3212           * clamping. We compensate for this difference by adding the 0.5
3213           * bias to the LOD bounds, too.
3214           */
3215          lod_rounding_bias = 0.5f;
3216       } else {
3217          lod_rounding_bias = 0.0f;
3218       }
3219 
3220       min_lod = pCreateInfo->minLod + lod_rounding_bias;
3221       word.minlod = util_unsigned_fixed(CLAMP(min_lod, 0.0f, lod_clamp_max),
3222                                         ROGUE_TEXSTATE_CLAMP_FRACTIONAL_BITS);
3223 
3224       max_lod = pCreateInfo->maxLod + lod_rounding_bias;
3225       word.maxlod = util_unsigned_fixed(CLAMP(max_lod, 0.0f, lod_clamp_max),
3226                                         ROGUE_TEXSTATE_CLAMP_FRACTIONAL_BITS);
3227 
3228       word.bordercolor_index = border_color_table_index;
3229 
3230       if (pCreateInfo->unnormalizedCoordinates)
3231          word.non_normalized_coords = true;
3232    }
3233 
3234    *pSampler = pvr_sampler_to_handle(sampler);
3235 
3236    return VK_SUCCESS;
3237 
3238 err_free_sampler:
3239    vk_object_free(&device->vk, pAllocator, sampler);
3240 
3241 err_out:
3242    return result;
3243 }
3244 
pvr_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)3245 void pvr_DestroySampler(VkDevice _device,
3246                         VkSampler _sampler,
3247                         const VkAllocationCallbacks *pAllocator)
3248 {
3249    PVR_FROM_HANDLE(pvr_device, device, _device);
3250    PVR_FROM_HANDLE(pvr_sampler, sampler, _sampler);
3251 
3252    if (!sampler)
3253       return;
3254 
3255    vk_sampler_destroy(&device->vk, pAllocator, &sampler->vk);
3256 }
3257 
pvr_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3258 void pvr_GetBufferMemoryRequirements2(
3259    VkDevice _device,
3260    const VkBufferMemoryRequirementsInfo2 *pInfo,
3261    VkMemoryRequirements2 *pMemoryRequirements)
3262 {
3263    PVR_FROM_HANDLE(pvr_buffer, buffer, pInfo->buffer);
3264    PVR_FROM_HANDLE(pvr_device, device, _device);
3265    uint64_t size;
3266 
3267    /* The Vulkan 1.0.166 spec says:
3268     *
3269     *    memoryTypeBits is a bitmask and contains one bit set for every
3270     *    supported memory type for the resource. Bit 'i' is set if and only
3271     *    if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
3272     *    structure for the physical device is supported for the resource.
3273     *
3274     * All types are currently supported for buffers.
3275     */
3276    pMemoryRequirements->memoryRequirements.memoryTypeBits =
3277       (1ul << device->pdevice->memory.memoryTypeCount) - 1;
3278 
3279    pMemoryRequirements->memoryRequirements.alignment = buffer->alignment;
3280 
3281    size = buffer->vk.size;
3282 
3283    if (size % device->ws->page_size == 0 ||
3284        size % device->ws->page_size >
3285           device->ws->page_size - PVR_BUFFER_MEMORY_PADDING_SIZE) {
3286       /* TODO: We can save memory by having one extra virtual page mapped
3287        * in and having the first and last virtual page mapped to the first
3288        * physical address.
3289        */
3290       size += PVR_BUFFER_MEMORY_PADDING_SIZE;
3291    }
3292 
3293    pMemoryRequirements->memoryRequirements.size =
3294       ALIGN_POT(size, buffer->alignment);
3295 }
3296 
pvr_GetImageMemoryRequirements2(VkDevice _device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3297 void pvr_GetImageMemoryRequirements2(VkDevice _device,
3298                                      const VkImageMemoryRequirementsInfo2 *pInfo,
3299                                      VkMemoryRequirements2 *pMemoryRequirements)
3300 {
3301    PVR_FROM_HANDLE(pvr_device, device, _device);
3302    PVR_FROM_HANDLE(pvr_image, image, pInfo->image);
3303 
3304    /* The Vulkan 1.0.166 spec says:
3305     *
3306     *    memoryTypeBits is a bitmask and contains one bit set for every
3307     *    supported memory type for the resource. Bit 'i' is set if and only
3308     *    if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
3309     *    structure for the physical device is supported for the resource.
3310     *
3311     * All types are currently supported for images.
3312     */
3313    const uint32_t memory_types =
3314       (1ul << device->pdevice->memory.memoryTypeCount) - 1;
3315 
3316    /* TODO: The returned size is aligned here in case of arrays/CEM (as is done
3317     * in GetImageMemoryRequirements()), but this should be known at image
3318     * creation time (pCreateInfo->arrayLayers > 1). This is confirmed in
3319     * ImageCreate()/ImageGetMipMapOffsetInBytes() where it aligns the size to
3320     * 4096 if pCreateInfo->arrayLayers > 1. So is the alignment here actually
3321     * necessary? If not, what should it be when pCreateInfo->arrayLayers == 1?
3322     *
3323     * Note: Presumably the 4096 alignment requirement comes from the Vulkan
3324     * driver setting RGX_CR_TPU_TAG_CEM_4K_FACE_PACKING_EN when setting up
3325     * render and compute jobs.
3326     */
3327    pMemoryRequirements->memoryRequirements.alignment = image->alignment;
3328    pMemoryRequirements->memoryRequirements.size =
3329       align64(image->size, image->alignment);
3330    pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
3331 }
3332