• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * based in part on anv driver which is:
5  * Copyright © 2015 Intel Corporation
6  *
7  * based in part on v3dv driver which is:
8  * Copyright © 2019 Raspberry Pi
9  *
10  * Permission is hereby granted, free of charge, to any person obtaining a copy
11  * of this software and associated documentation files (the "Software"), to deal
12  * in the Software without restriction, including without limitation the rights
13  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14  * copies of the Software, and to permit persons to whom the Software is
15  * furnished to do so, subject to the following conditions:
16  *
17  * The above copyright notice and this permission notice (including the next
18  * paragraph) shall be included in all copies or substantial portions of the
19  * Software.
20  *
21  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27  * SOFTWARE.
28  */
29 
30 #include <assert.h>
31 #include <fcntl.h>
32 #include <inttypes.h>
33 #include <stdbool.h>
34 #include <stddef.h>
35 #include <stdint.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <vulkan/vulkan.h>
40 #include <xf86drm.h>
41 
42 #include "git_sha1.h"
43 #include "hwdef/rogue_hw_utils.h"
44 #include "pvr_bo.h"
45 #include "pvr_border.h"
46 #include "pvr_clear.h"
47 #include "pvr_csb.h"
48 #include "pvr_csb_enum_helpers.h"
49 #include "pvr_debug.h"
50 #include "pvr_device_info.h"
51 #include "pvr_dump_info.h"
52 #include "pvr_hardcode.h"
53 #include "pvr_job_render.h"
54 #include "pvr_limits.h"
55 #include "pvr_pds.h"
56 #include "pvr_private.h"
57 #include "pvr_robustness.h"
58 #include "pvr_tex_state.h"
59 #include "pvr_types.h"
60 #include "pvr_uscgen.h"
61 #include "pvr_util.h"
62 #include "pvr_winsys.h"
63 #include "rogue/rogue.h"
64 #include "util/build_id.h"
65 #include "util/log.h"
66 #include "util/macros.h"
67 #include "util/mesa-sha1.h"
68 #include "util/os_misc.h"
69 #include "util/u_dynarray.h"
70 #include "util/u_math.h"
71 #include "vk_alloc.h"
72 #include "vk_extensions.h"
73 #include "vk_log.h"
74 #include "vk_object.h"
75 #include "vk_physical_device_features.h"
76 #include "vk_physical_device_properties.h"
77 #include "vk_sampler.h"
78 #include "vk_util.h"
79 
80 #define PVR_GLOBAL_FREE_LIST_INITIAL_SIZE (2U * 1024U * 1024U)
81 #define PVR_GLOBAL_FREE_LIST_MAX_SIZE (256U * 1024U * 1024U)
82 #define PVR_GLOBAL_FREE_LIST_GROW_SIZE (1U * 1024U * 1024U)
83 
84 /* After PVR_SECONDARY_DEVICE_THRESHOLD devices per instance are created,
85  * devices will have a smaller global free list size, as usually this use-case
86  * implies smaller amounts of work spread out. The free list can still grow as
87  * required.
88  */
89 #define PVR_SECONDARY_DEVICE_THRESHOLD (4U)
90 #define PVR_SECONDARY_DEVICE_FREE_LIST_INITAL_SIZE (512U * 1024U)
91 
92 /* The grow threshold is a percentage. This is intended to be 12.5%, but has
93  * been rounded up since the percentage is treated as an integer.
94  */
95 #define PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD 13U
96 
97 #if defined(VK_USE_PLATFORM_DISPLAY_KHR)
98 #   define PVR_USE_WSI_PLATFORM_DISPLAY true
99 #else
100 #   define PVR_USE_WSI_PLATFORM_DISPLAY false
101 #endif
102 
103 #if PVR_USE_WSI_PLATFORM_DISPLAY
104 #   define PVR_USE_WSI_PLATFORM true
105 #else
106 #   define PVR_USE_WSI_PLATFORM false
107 #endif
108 
109 #define PVR_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION)
110 
111 /* Amount of padding required for VkBuffers to ensure we don't read beyond
112  * a page boundary.
113  */
114 #define PVR_BUFFER_MEMORY_PADDING_SIZE 4
115 
116 /* Default size in bytes used by pvr_CreateDevice() for setting up the
117  * suballoc_general, suballoc_pds and suballoc_usc suballocators.
118  *
119  * TODO: Investigate if a different default size can improve the overall
120  * performance of internal driver allocations.
121  */
122 #define PVR_SUBALLOCATOR_GENERAL_SIZE (128 * 1024)
123 #define PVR_SUBALLOCATOR_PDS_SIZE (128 * 1024)
124 #define PVR_SUBALLOCATOR_TRANSFER_SIZE (128 * 1024)
125 #define PVR_SUBALLOCATOR_USC_SIZE (128 * 1024)
126 #define PVR_SUBALLOCATOR_VIS_TEST_SIZE (128 * 1024)
127 
128 struct pvr_drm_device_config {
129    struct pvr_drm_device_info {
130       const char *name;
131       size_t len;
132    } render, display;
133 };
134 
135 #define DEF_CONFIG(render_, display_)                               \
136    {                                                                \
137       .render = { .name = render_, .len = sizeof(render_) - 1 },    \
138       .display = { .name = display_, .len = sizeof(display_) - 1 }, \
139    }
140 
141 /* This is the list of supported DRM render/display driver configs. */
142 static const struct pvr_drm_device_config pvr_drm_configs[] = {
143    DEF_CONFIG("mediatek,mt8173-gpu", "mediatek-drm"),
144    DEF_CONFIG("ti,am62-gpu", "ti,am625-dss"),
145 };
146 
147 #undef DEF_CONFIG
148 
149 static const struct vk_instance_extension_table pvr_instance_extensions = {
150    .KHR_display = PVR_USE_WSI_PLATFORM_DISPLAY,
151    .KHR_external_fence_capabilities = true,
152    .KHR_external_memory_capabilities = true,
153    .KHR_external_semaphore_capabilities = true,
154    .KHR_get_display_properties2 = PVR_USE_WSI_PLATFORM_DISPLAY,
155    .KHR_get_physical_device_properties2 = true,
156    .KHR_get_surface_capabilities2 = PVR_USE_WSI_PLATFORM,
157    .KHR_surface = PVR_USE_WSI_PLATFORM,
158 #ifndef VK_USE_PLATFORM_WIN32_KHR
159    .EXT_headless_surface = PVR_USE_WSI_PLATFORM,
160 #endif
161    .EXT_debug_report = true,
162    .EXT_debug_utils = true,
163 };
164 
pvr_physical_device_get_supported_extensions(struct vk_device_extension_table * extensions)165 static void pvr_physical_device_get_supported_extensions(
166    struct vk_device_extension_table *extensions)
167 {
168    *extensions = (struct vk_device_extension_table){
169       .KHR_bind_memory2 = true,
170       .KHR_copy_commands2 = true,
171       /* TODO: enable this extension when the conformance tests get
172        * updated to version 1.3.6.0, the current version does not
173        * include the imagination driver ID, which will make a dEQP
174        * test fail
175        */
176       .KHR_driver_properties = false,
177       .KHR_external_fence = true,
178       .KHR_external_fence_fd = true,
179       .KHR_external_memory = true,
180       .KHR_external_memory_fd = true,
181       .KHR_format_feature_flags2 = true,
182       .KHR_external_semaphore = PVR_USE_WSI_PLATFORM,
183       .KHR_external_semaphore_fd = PVR_USE_WSI_PLATFORM,
184       .KHR_get_memory_requirements2 = true,
185       .KHR_image_format_list = true,
186       .KHR_shader_expect_assume = true,
187       .KHR_swapchain = PVR_USE_WSI_PLATFORM,
188       .KHR_timeline_semaphore = true,
189       .KHR_uniform_buffer_standard_layout = true,
190       .EXT_external_memory_dma_buf = true,
191       .EXT_host_query_reset = true,
192       .EXT_memory_budget = true,
193       .EXT_private_data = true,
194       .EXT_scalar_block_layout = true,
195       .EXT_texel_buffer_alignment = true,
196       .EXT_tooling_info = true,
197    };
198 }
199 
pvr_physical_device_get_supported_features(const struct pvr_device_info * const dev_info,struct vk_features * const features)200 static void pvr_physical_device_get_supported_features(
201    const struct pvr_device_info *const dev_info,
202    struct vk_features *const features)
203 {
204    *features = (struct vk_features){
205       /* Vulkan 1.0 */
206       .robustBufferAccess = true,
207       .fullDrawIndexUint32 = true,
208       .imageCubeArray = true,
209       .independentBlend = false,
210       .geometryShader = false,
211       .tessellationShader = false,
212       .sampleRateShading = true,
213       .dualSrcBlend = false,
214       .logicOp = false,
215       .multiDrawIndirect = true,
216       .drawIndirectFirstInstance = true,
217       .depthClamp = true,
218       .depthBiasClamp = true,
219       .fillModeNonSolid = false,
220       .depthBounds = false,
221       .wideLines = true,
222       .largePoints = true,
223       .alphaToOne = false,
224       .multiViewport = false,
225       .samplerAnisotropy = false,
226       .textureCompressionETC2 = true,
227       .textureCompressionASTC_LDR = false,
228       .textureCompressionBC = false,
229       .occlusionQueryPrecise = false,
230       .pipelineStatisticsQuery = false,
231       .vertexPipelineStoresAndAtomics = true,
232       .fragmentStoresAndAtomics = true,
233       .shaderTessellationAndGeometryPointSize = false,
234       .shaderImageGatherExtended = false,
235       .shaderStorageImageExtendedFormats = true,
236       .shaderStorageImageMultisample = false,
237       .shaderStorageImageReadWithoutFormat = true,
238       .shaderStorageImageWriteWithoutFormat = false,
239       .shaderUniformBufferArrayDynamicIndexing = true,
240       .shaderSampledImageArrayDynamicIndexing = true,
241       .shaderStorageBufferArrayDynamicIndexing = true,
242       .shaderStorageImageArrayDynamicIndexing = true,
243       .shaderClipDistance = false,
244       .shaderCullDistance = false,
245       .shaderFloat64 = false,
246       .shaderInt64 = true,
247       .shaderInt16 = true,
248       .shaderResourceResidency = false,
249       .shaderResourceMinLod = false,
250       .sparseBinding = false,
251       .sparseResidencyBuffer = false,
252       .sparseResidencyImage2D = false,
253       .sparseResidencyImage3D = false,
254       .sparseResidency2Samples = false,
255       .sparseResidency4Samples = false,
256       .sparseResidency8Samples = false,
257       .sparseResidency16Samples = false,
258       .sparseResidencyAliased = false,
259       .variableMultisampleRate = false,
260       .inheritedQueries = false,
261 
262       /* Vulkan 1.2 / VK_KHR_timeline_semaphore */
263       .timelineSemaphore = true,
264 
265       /* Vulkan 1.2 / VK_KHR_uniform_buffer_standard_layout */
266       .uniformBufferStandardLayout = true,
267 
268       /* Vulkan 1.2 / VK_EXT_host_query_reset */
269       .hostQueryReset = true,
270 
271       /* Vulkan 1.3 / VK_EXT_private_data */
272       .privateData = true,
273 
274       /* Vulkan 1.2 / VK_EXT_scalar_block_layout */
275       .scalarBlockLayout = true,
276 
277       /* Vulkan 1.3 / VK_EXT_texel_buffer_alignment */
278       .texelBufferAlignment = true,
279 
280       /* VK_KHR_shader_expect_assume */
281       .shaderExpectAssume = true,
282    };
283 }
284 
pvr_physical_device_init_pipeline_cache_uuid(const struct pvr_device_info * const dev_info,uint8_t pipeline_cache_uuid_out[const static VK_UUID_SIZE])285 static bool pvr_physical_device_init_pipeline_cache_uuid(
286    const struct pvr_device_info *const dev_info,
287    uint8_t pipeline_cache_uuid_out[const static VK_UUID_SIZE])
288 {
289    struct mesa_sha1 sha1_ctx;
290    unsigned build_id_len;
291    uint8_t sha1[20];
292    uint64_t bvnc;
293 
294    const struct build_id_note *note =
295       build_id_find_nhdr_for_addr(pvr_physical_device_init_pipeline_cache_uuid);
296    if (!note) {
297       mesa_loge("Failed to find build-id");
298       return false;
299    }
300 
301    build_id_len = build_id_length(note);
302    if (build_id_len < 20) {
303       mesa_loge("Build-id too short. It needs to be a SHA");
304       return false;
305    }
306 
307    bvnc = pvr_get_packed_bvnc(dev_info);
308 
309    _mesa_sha1_init(&sha1_ctx);
310    _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
311    _mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc));
312    _mesa_sha1_final(&sha1_ctx, sha1);
313    memcpy(pipeline_cache_uuid_out, sha1, VK_UUID_SIZE);
314 
315    return true;
316 }
317 
318 struct pvr_descriptor_limits {
319    uint32_t max_per_stage_resources;
320    uint32_t max_per_stage_samplers;
321    uint32_t max_per_stage_uniform_buffers;
322    uint32_t max_per_stage_storage_buffers;
323    uint32_t max_per_stage_sampled_images;
324    uint32_t max_per_stage_storage_images;
325    uint32_t max_per_stage_input_attachments;
326 };
327 
328 static const struct pvr_descriptor_limits *
pvr_get_physical_device_descriptor_limits(const struct pvr_device_info * dev_info,const struct pvr_device_runtime_info * dev_runtime_info)329 pvr_get_physical_device_descriptor_limits(
330    const struct pvr_device_info *dev_info,
331    const struct pvr_device_runtime_info *dev_runtime_info)
332 {
333    enum pvr_descriptor_cs_level {
334       /* clang-format off */
335       CS4096, /* 6XT and some XE cores with large CS. */
336       CS2560, /* Mid range Rogue XE cores. */
337       CS2048, /* Low end Rogue XE cores. */
338       CS1536, /* Ultra-low-end 9XEP. */
339       CS680,  /* lower limits for older devices. */
340       CS408,  /* 7XE. */
341       /* clang-format on */
342    };
343 
344    static const struct pvr_descriptor_limits descriptor_limits[] = {
345       [CS4096] = { 1160U, 256U, 192U, 144U, 256U, 256U, 8U, },
346       [CS2560] = {  648U, 128U, 128U, 128U, 128U, 128U, 8U, },
347       [CS2048] = {  584U, 128U,  96U,  64U, 128U, 128U, 8U, },
348       [CS1536] = {  456U,  64U,  96U,  64U, 128U,  64U, 8U, },
349       [CS680]  = {  224U,  32U,  64U,  36U,  48U,   8U, 8U, },
350       [CS408]  = {  128U,  16U,  40U,  28U,  16U,   8U, 8U, },
351    };
352 
353    const uint32_t common_size =
354       pvr_calc_fscommon_size_and_tiles_in_flight(dev_info,
355                                                  dev_runtime_info,
356                                                  UINT32_MAX,
357                                                  1);
358    enum pvr_descriptor_cs_level cs_level;
359 
360    if (common_size >= 2048) {
361       cs_level = CS2048;
362    } else if (common_size >= 1526) {
363       cs_level = CS1536;
364    } else if (common_size >= 680) {
365       cs_level = CS680;
366    } else if (common_size >= 408) {
367       cs_level = CS408;
368    } else {
369       mesa_loge("This core appears to have a very limited amount of shared "
370                 "register space and may not meet the Vulkan spec limits.");
371       abort();
372    }
373 
374    return &descriptor_limits[cs_level];
375 }
376 
pvr_physical_device_get_properties(const struct pvr_device_info * const dev_info,const struct pvr_device_runtime_info * const dev_runtime_info,struct vk_properties * const properties)377 static bool pvr_physical_device_get_properties(
378    const struct pvr_device_info *const dev_info,
379    const struct pvr_device_runtime_info *const dev_runtime_info,
380    struct vk_properties *const properties)
381 {
382    const struct pvr_descriptor_limits *descriptor_limits =
383       pvr_get_physical_device_descriptor_limits(dev_info, dev_runtime_info);
384 
385    /* Default value based on the minimum value found in all existing cores. */
386    const uint32_t max_multisample =
387       PVR_GET_FEATURE_VALUE(dev_info, max_multisample, 4);
388 
389    /* Default value based on the minimum value found in all existing cores. */
390    const uint32_t uvs_banks = PVR_GET_FEATURE_VALUE(dev_info, uvs_banks, 2);
391 
392    /* Default value based on the minimum value found in all existing cores. */
393    const uint32_t uvs_pba_entries =
394       PVR_GET_FEATURE_VALUE(dev_info, uvs_pba_entries, 160);
395 
396    /* Default value based on the minimum value found in all existing cores. */
397    const uint32_t num_user_clip_planes =
398       PVR_GET_FEATURE_VALUE(dev_info, num_user_clip_planes, 8);
399 
400    const uint32_t sub_pixel_precision =
401       PVR_HAS_FEATURE(dev_info, simple_internal_parameter_format) ? 4U : 8U;
402 
403    const uint32_t max_render_size = rogue_get_render_size_max(dev_info);
404 
405    const uint32_t max_sample_bits = ((max_multisample << 1) - 1);
406 
407    const uint32_t max_user_vertex_components =
408       ((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;
409 
410    /* The workgroup invocations are limited by the case where we have a compute
411     * barrier - each slot has a fixed number of invocations, the whole workgroup
412     * may need to span multiple slots. As each slot will WAIT at the barrier
413     * until the last invocation completes, all have to be schedulable at the
414     * same time.
415     *
416     * Typically all Rogue cores have 16 slots. Some of the smallest cores are
417     * reduced to 14.
418     *
419     * The compute barrier slot exhaustion scenario can be tested with:
420     * dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence
421     *    .atomicwrite*guard*comp
422     */
423 
424    /* Default value based on the minimum value found in all existing cores. */
425    const uint32_t usc_slots = PVR_GET_FEATURE_VALUE(dev_info, usc_slots, 14);
426 
427    /* Default value based on the minimum value found in all existing cores. */
428    const uint32_t max_instances_per_pds_task =
429       PVR_GET_FEATURE_VALUE(dev_info, max_instances_per_pds_task, 32U);
430 
431    const uint32_t max_compute_work_group_invocations =
432       (usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U;
433 
434    bool ret;
435 
436    *properties = (struct vk_properties){
437       /* Vulkan 1.0 */
438       .apiVersion = PVR_API_VERSION,
439       .driverVersion = vk_get_driver_version(),
440       .vendorID = VK_VENDOR_ID_IMAGINATION,
441       .deviceID = dev_info->ident.device_id,
442       .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
443       /* deviceName and pipelineCacheUUID are filled below .*/
444 
445       .maxImageDimension1D = max_render_size,
446       .maxImageDimension2D = max_render_size,
447       .maxImageDimension3D = PVR_MAX_TEXTURE_EXTENT_Z,
448       .maxImageDimensionCube = max_render_size,
449       .maxImageArrayLayers = PVR_MAX_ARRAY_LAYERS,
450       .maxTexelBufferElements = 64U * 1024U,
451       .maxUniformBufferRange = 128U * 1024U * 1024U,
452       .maxStorageBufferRange = 128U * 1024U * 1024U,
453       .maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE,
454       .maxMemoryAllocationCount = UINT32_MAX,
455       .maxSamplerAllocationCount = UINT32_MAX,
456       .bufferImageGranularity = 1U,
457       .sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL,
458       /* Maximum number of descriptor sets that can be bound simultaneously. */
459       .maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS,
460       .maxPerStageResources = descriptor_limits->max_per_stage_resources,
461       .maxPerStageDescriptorSamplers =
462          descriptor_limits->max_per_stage_samplers,
463       .maxPerStageDescriptorUniformBuffers =
464          descriptor_limits->max_per_stage_uniform_buffers,
465       .maxPerStageDescriptorStorageBuffers =
466          descriptor_limits->max_per_stage_storage_buffers,
467       .maxPerStageDescriptorSampledImages =
468          descriptor_limits->max_per_stage_sampled_images,
469       .maxPerStageDescriptorStorageImages =
470          descriptor_limits->max_per_stage_storage_images,
471       .maxPerStageDescriptorInputAttachments =
472          descriptor_limits->max_per_stage_input_attachments,
473       .maxDescriptorSetSamplers = 256U,
474       .maxDescriptorSetUniformBuffers = 256U,
475       .maxDescriptorSetUniformBuffersDynamic =
476          PVR_MAX_DESCRIPTOR_SET_UNIFORM_DYNAMIC_BUFFERS,
477       .maxDescriptorSetStorageBuffers = 256U,
478       .maxDescriptorSetStorageBuffersDynamic =
479          PVR_MAX_DESCRIPTOR_SET_STORAGE_DYNAMIC_BUFFERS,
480       .maxDescriptorSetSampledImages = 256U,
481       .maxDescriptorSetStorageImages = 256U,
482       .maxDescriptorSetInputAttachments = 256U,
483 
484       /* Vertex Shader Limits */
485       .maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS,
486       .maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS,
487       .maxVertexInputAttributeOffset = 0xFFFF,
488       .maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U,
489       .maxVertexOutputComponents = max_user_vertex_components,
490 
491       /* Tessellation Limits */
492       .maxTessellationGenerationLevel = 0,
493       .maxTessellationPatchSize = 0,
494       .maxTessellationControlPerVertexInputComponents = 0,
495       .maxTessellationControlPerVertexOutputComponents = 0,
496       .maxTessellationControlPerPatchOutputComponents = 0,
497       .maxTessellationControlTotalOutputComponents = 0,
498       .maxTessellationEvaluationInputComponents = 0,
499       .maxTessellationEvaluationOutputComponents = 0,
500 
501       /* Geometry Shader Limits */
502       .maxGeometryShaderInvocations = 0,
503       .maxGeometryInputComponents = 0,
504       .maxGeometryOutputComponents = 0,
505       .maxGeometryOutputVertices = 0,
506       .maxGeometryTotalOutputComponents = 0,
507 
508       /* Fragment Shader Limits */
509       .maxFragmentInputComponents = max_user_vertex_components,
510       .maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS,
511       .maxFragmentDualSrcAttachments = 0,
512       .maxFragmentCombinedOutputResources =
513          descriptor_limits->max_per_stage_storage_buffers +
514          descriptor_limits->max_per_stage_storage_images +
515          PVR_MAX_COLOR_ATTACHMENTS,
516 
517       /* Compute Shader Limits */
518       .maxComputeSharedMemorySize = 16U * 1024U,
519       .maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U },
520       .maxComputeWorkGroupInvocations = max_compute_work_group_invocations,
521       .maxComputeWorkGroupSize = { max_compute_work_group_invocations,
522                                    max_compute_work_group_invocations,
523                                    64U },
524 
525       /* Rasterization Limits */
526       .subPixelPrecisionBits = sub_pixel_precision,
527       .subTexelPrecisionBits = 8U,
528       .mipmapPrecisionBits = 8U,
529 
530       .maxDrawIndexedIndexValue = UINT32_MAX,
531       .maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U,
532       .maxSamplerLodBias = 16.0f,
533       .maxSamplerAnisotropy = 1.0f,
534       .maxViewports = PVR_MAX_VIEWPORTS,
535 
536       .maxViewportDimensions[0] = max_render_size,
537       .maxViewportDimensions[1] = max_render_size,
538       .viewportBoundsRange[0] = -(int32_t)(2U * max_render_size),
539       .viewportBoundsRange[1] = 2U * max_render_size,
540 
541       .viewportSubPixelBits = 0,
542       .minMemoryMapAlignment = 64U,
543       .minTexelBufferOffsetAlignment = 16U,
544       .minUniformBufferOffsetAlignment = 4U,
545       .minStorageBufferOffsetAlignment = 4U,
546 
547       .minTexelOffset = -8,
548       .maxTexelOffset = 7U,
549       .minTexelGatherOffset = -8,
550       .maxTexelGatherOffset = 7,
551       .minInterpolationOffset = -0.5,
552       .maxInterpolationOffset = 0.5,
553       .subPixelInterpolationOffsetBits = 4U,
554 
555       .maxFramebufferWidth = max_render_size,
556       .maxFramebufferHeight = max_render_size,
557       .maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS,
558 
559       .framebufferColorSampleCounts = max_sample_bits,
560       .framebufferDepthSampleCounts = max_sample_bits,
561       .framebufferStencilSampleCounts = max_sample_bits,
562       .framebufferNoAttachmentsSampleCounts = max_sample_bits,
563       .maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS,
564       .sampledImageColorSampleCounts = max_sample_bits,
565       .sampledImageIntegerSampleCounts = max_sample_bits,
566       .sampledImageDepthSampleCounts = max_sample_bits,
567       .sampledImageStencilSampleCounts = max_sample_bits,
568       .storageImageSampleCounts = max_sample_bits,
569       .maxSampleMaskWords = 1U,
570       .timestampComputeAndGraphics = false,
571       .timestampPeriod = 0.0f,
572       .maxClipDistances = num_user_clip_planes,
573       .maxCullDistances = num_user_clip_planes,
574       .maxCombinedClipAndCullDistances = num_user_clip_planes,
575       .discreteQueuePriorities = 2U,
576       .pointSizeRange[0] = 1.0f,
577       .pointSizeRange[1] = 511.0f,
578       .pointSizeGranularity = 0.0625f,
579       .lineWidthRange[0] = 1.0f / 16.0f,
580       .lineWidthRange[1] = 16.0f,
581       .lineWidthGranularity = 1.0f / 16.0f,
582       .strictLines = false,
583       .standardSampleLocations = true,
584       .optimalBufferCopyOffsetAlignment = 4U,
585       .optimalBufferCopyRowPitchAlignment = 4U,
586       .nonCoherentAtomSize = 1U,
587 
588       /* Vulkan 1.2 / VK_KHR_driver_properties */
589       .driverID = VK_DRIVER_ID_IMAGINATION_OPEN_SOURCE_MESA,
590       .driverName = "Imagination open-source Mesa driver",
591       .driverInfo = "Mesa " PACKAGE_VERSION MESA_GIT_SHA1,
592       .conformanceVersion = {
593          .major = 1,
594          .minor = 3,
595          .subminor = 4,
596          .patch = 1,
597       },
598 
599       /* Vulkan 1.2 / VK_KHR_timeline_semaphore */
600       .maxTimelineSemaphoreValueDifference = UINT64_MAX,
601 
602       /* Vulkan 1.3 / VK_EXT_texel_buffer_alignment */
603       .storageTexelBufferOffsetAlignmentBytes = 16,
604       .storageTexelBufferOffsetSingleTexelAlignment = true,
605       .uniformTexelBufferOffsetAlignmentBytes = 16,
606       .uniformTexelBufferOffsetSingleTexelAlignment = false,
607    };
608 
609    snprintf(properties->deviceName,
610             sizeof(properties->deviceName),
611             "Imagination PowerVR %s %s",
612             dev_info->ident.series_name,
613             dev_info->ident.public_name);
614 
615    ret = pvr_physical_device_init_pipeline_cache_uuid(
616       dev_info,
617       properties->pipelineCacheUUID);
618    if (!ret)
619       return false;
620 
621    return true;
622 }
623 
pvr_EnumerateInstanceVersion(uint32_t * pApiVersion)624 VkResult pvr_EnumerateInstanceVersion(uint32_t *pApiVersion)
625 {
626    *pApiVersion = PVR_API_VERSION;
627    return VK_SUCCESS;
628 }
629 
630 VkResult
pvr_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)631 pvr_EnumerateInstanceExtensionProperties(const char *pLayerName,
632                                          uint32_t *pPropertyCount,
633                                          VkExtensionProperties *pProperties)
634 {
635    if (pLayerName)
636       return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
637 
638    return vk_enumerate_instance_extension_properties(&pvr_instance_extensions,
639                                                      pPropertyCount,
640                                                      pProperties);
641 }
642 
pvr_physical_device_destroy(struct vk_physical_device * vk_pdevice)643 static void pvr_physical_device_destroy(struct vk_physical_device *vk_pdevice)
644 {
645    struct pvr_physical_device *pdevice =
646       container_of(vk_pdevice, struct pvr_physical_device, vk);
647 
648    /* Be careful here. The device might not have been initialized. This can
649     * happen since initialization is done in vkEnumeratePhysicalDevices() but
650     * finish is done in vkDestroyInstance(). Make sure that you check for NULL
651     * before freeing or that the freeing functions accept NULL pointers.
652     */
653 
654    if (pdevice->compiler)
655       ralloc_free(pdevice->compiler);
656 
657    pvr_wsi_finish(pdevice);
658 
659    if (pdevice->ws)
660       pvr_winsys_destroy(pdevice->ws);
661 
662    vk_free(&pdevice->vk.instance->alloc, pdevice->render_path);
663    vk_free(&pdevice->vk.instance->alloc, pdevice->display_path);
664 
665    vk_physical_device_finish(&pdevice->vk);
666 
667    vk_free(&pdevice->vk.instance->alloc, pdevice);
668 }
669 
pvr_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)670 void pvr_DestroyInstance(VkInstance _instance,
671                          const VkAllocationCallbacks *pAllocator)
672 {
673    PVR_FROM_HANDLE(pvr_instance, instance, _instance);
674 
675    if (!instance)
676       return;
677 
678    VG(VALGRIND_DESTROY_MEMPOOL(instance));
679 
680    vk_instance_finish(&instance->vk);
681    vk_free(&instance->vk.alloc, instance);
682 }
683 
pvr_compute_heap_size(void)684 static uint64_t pvr_compute_heap_size(void)
685 {
686    /* Query the total ram from the system */
687    uint64_t total_ram;
688    if (!os_get_total_physical_memory(&total_ram))
689       return 0;
690 
691    /* We don't want to burn too much ram with the GPU. If the user has 4GiB
692     * or less, we use at most half. If they have more than 4GiB, we use 3/4.
693     */
694    uint64_t available_ram;
695    if (total_ram <= 4ULL * 1024ULL * 1024ULL * 1024ULL)
696       available_ram = total_ram / 2U;
697    else
698       available_ram = total_ram * 3U / 4U;
699 
700    return available_ram;
701 }
702 
pvr_physical_device_init(struct pvr_physical_device * pdevice,struct pvr_instance * instance,drmDevicePtr drm_render_device,drmDevicePtr drm_display_device)703 static VkResult pvr_physical_device_init(struct pvr_physical_device *pdevice,
704                                          struct pvr_instance *instance,
705                                          drmDevicePtr drm_render_device,
706                                          drmDevicePtr drm_display_device)
707 {
708    struct vk_physical_device_dispatch_table dispatch_table;
709    struct vk_device_extension_table supported_extensions;
710    struct vk_properties supported_properties;
711    struct vk_features supported_features;
712    struct pvr_winsys *ws;
713    char *display_path;
714    char *render_path;
715    VkResult result;
716 
717    if (!getenv("PVR_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
718       return vk_errorf(instance,
719                        VK_ERROR_INCOMPATIBLE_DRIVER,
720                        "WARNING: powervr is not a conformant Vulkan "
721                        "implementation. Pass "
722                        "PVR_I_WANT_A_BROKEN_VULKAN_DRIVER=1 if you know "
723                        "what you're doing.");
724    }
725 
726    render_path = vk_strdup(&instance->vk.alloc,
727                            drm_render_device->nodes[DRM_NODE_RENDER],
728                            VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
729    if (!render_path) {
730       result = VK_ERROR_OUT_OF_HOST_MEMORY;
731       goto err_out;
732    }
733 
734    if (instance->vk.enabled_extensions.KHR_display) {
735       display_path = vk_strdup(&instance->vk.alloc,
736                                drm_display_device->nodes[DRM_NODE_PRIMARY],
737                                VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
738       if (!display_path) {
739          result = VK_ERROR_OUT_OF_HOST_MEMORY;
740          goto err_vk_free_render_path;
741       }
742    } else {
743       display_path = NULL;
744    }
745 
746    result =
747       pvr_winsys_create(render_path, display_path, &instance->vk.alloc, &ws);
748    if (result != VK_SUCCESS)
749       goto err_vk_free_display_path;
750 
751    pdevice->instance = instance;
752    pdevice->render_path = render_path;
753    pdevice->display_path = display_path;
754    pdevice->ws = ws;
755 
756    result = ws->ops->device_info_init(ws,
757                                       &pdevice->dev_info,
758                                       &pdevice->dev_runtime_info);
759    if (result != VK_SUCCESS)
760       goto err_pvr_winsys_destroy;
761 
762    pvr_physical_device_get_supported_extensions(&supported_extensions);
763    pvr_physical_device_get_supported_features(&pdevice->dev_info,
764                                               &supported_features);
765    if (!pvr_physical_device_get_properties(&pdevice->dev_info,
766                                            &pdevice->dev_runtime_info,
767                                            &supported_properties)) {
768       result = vk_errorf(instance,
769                          VK_ERROR_INITIALIZATION_FAILED,
770                          "Failed to collect physical device properties");
771       goto err_pvr_winsys_destroy;
772    }
773 
774    vk_physical_device_dispatch_table_from_entrypoints(
775       &dispatch_table,
776       &pvr_physical_device_entrypoints,
777       true);
778 
779    vk_physical_device_dispatch_table_from_entrypoints(
780       &dispatch_table,
781       &wsi_physical_device_entrypoints,
782       false);
783 
784    result = vk_physical_device_init(&pdevice->vk,
785                                     &instance->vk,
786                                     &supported_extensions,
787                                     &supported_features,
788                                     &supported_properties,
789                                     &dispatch_table);
790    if (result != VK_SUCCESS)
791       goto err_pvr_winsys_destroy;
792 
793    pdevice->vk.supported_sync_types = ws->sync_types;
794 
795    /* Setup available memory heaps and types */
796    pdevice->memory.memoryHeapCount = 1;
797    pdevice->memory.memoryHeaps[0].size = pvr_compute_heap_size();
798    pdevice->memory.memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
799 
800    pdevice->memory.memoryTypeCount = 1;
801    pdevice->memory.memoryTypes[0].propertyFlags =
802       VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
803       VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
804       VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
805    pdevice->memory.memoryTypes[0].heapIndex = 0;
806 
807    result = pvr_wsi_init(pdevice);
808    if (result != VK_SUCCESS) {
809       vk_error(instance, result);
810       goto err_vk_physical_device_finish;
811    }
812 
813    pdevice->compiler = rogue_compiler_create(&pdevice->dev_info);
814    if (!pdevice->compiler) {
815       result = vk_errorf(instance,
816                          VK_ERROR_INITIALIZATION_FAILED,
817                          "Failed to initialize Rogue compiler");
818       goto err_wsi_finish;
819    }
820 
821    return VK_SUCCESS;
822 
823 err_wsi_finish:
824    pvr_wsi_finish(pdevice);
825 
826 err_vk_physical_device_finish:
827    vk_physical_device_finish(&pdevice->vk);
828 
829 err_pvr_winsys_destroy:
830    pvr_winsys_destroy(ws);
831 
832 err_vk_free_display_path:
833    vk_free(&instance->vk.alloc, display_path);
834 
835 err_vk_free_render_path:
836    vk_free(&instance->vk.alloc, render_path);
837 
838 err_out:
839    return result;
840 }
841 
pvr_get_drm_devices(void * const obj,drmDevicePtr * const devices,const int max_devices,int * const num_devices_out)842 static VkResult pvr_get_drm_devices(void *const obj,
843                                     drmDevicePtr *const devices,
844                                     const int max_devices,
845                                     int *const num_devices_out)
846 {
847    int ret = drmGetDevices2(0, devices, max_devices);
848    if (ret < 0) {
849       return vk_errorf(obj,
850                        VK_ERROR_INITIALIZATION_FAILED,
851                        "Failed to enumerate drm devices (errno %d: %s)",
852                        -ret,
853                        strerror(-ret));
854    }
855 
856    if (num_devices_out)
857       *num_devices_out = ret;
858 
859    return VK_SUCCESS;
860 }
861 
862 static bool
pvr_drm_device_compatible(const struct pvr_drm_device_info * const info,drmDevice * const drm_dev)863 pvr_drm_device_compatible(const struct pvr_drm_device_info *const info,
864                           drmDevice *const drm_dev)
865 {
866    char **const compatible = drm_dev->deviceinfo.platform->compatible;
867 
868    for (char **compat = compatible; *compat; compat++) {
869       if (strncmp(*compat, info->name, info->len) == 0)
870          return true;
871    }
872 
873    return false;
874 }
875 
876 static const struct pvr_drm_device_config *
pvr_drm_device_get_config(drmDevice * const drm_dev)877 pvr_drm_device_get_config(drmDevice *const drm_dev)
878 {
879    for (size_t i = 0U; i < ARRAY_SIZE(pvr_drm_configs); i++) {
880       if (pvr_drm_device_compatible(&pvr_drm_configs[i].render, drm_dev))
881          return &pvr_drm_configs[i];
882    }
883 
884    return NULL;
885 }
886 
887 static void
pvr_physical_device_dump_info(const struct pvr_physical_device * pdevice,char * const * comp_display,char * const * comp_render)888 pvr_physical_device_dump_info(const struct pvr_physical_device *pdevice,
889                               char *const *comp_display,
890                               char *const *comp_render)
891 {
892    drmVersionPtr version_display, version_render;
893    struct pvr_device_dump_info info;
894 
895    version_display = drmGetVersion(pdevice->ws->display_fd);
896    if (!version_display)
897       return;
898 
899    version_render = drmGetVersion(pdevice->ws->render_fd);
900    if (!version_render) {
901       drmFreeVersion(version_display);
902       return;
903    }
904 
905    info.device_info = &pdevice->dev_info;
906    info.device_runtime_info = &pdevice->dev_runtime_info;
907    info.drm_display.patchlevel = version_display->version_patchlevel;
908    info.drm_display.major = version_display->version_major;
909    info.drm_display.minor = version_display->version_minor;
910    info.drm_display.name = version_display->name;
911    info.drm_display.date = version_display->date;
912    info.drm_display.comp = comp_display;
913    info.drm_render.patchlevel = version_render->version_patchlevel;
914    info.drm_render.major = version_render->version_major;
915    info.drm_render.minor = version_render->version_minor;
916    info.drm_render.name = version_render->name;
917    info.drm_render.date = version_render->date;
918    info.drm_render.comp = comp_render;
919 
920    pvr_dump_physical_device_info(&info);
921 
922    drmFreeVersion(version_display);
923    drmFreeVersion(version_render);
924 }
925 
926 static VkResult
pvr_physical_device_enumerate(struct vk_instance * const vk_instance)927 pvr_physical_device_enumerate(struct vk_instance *const vk_instance)
928 {
929    struct pvr_instance *const instance =
930       container_of(vk_instance, struct pvr_instance, vk);
931 
932    const struct pvr_drm_device_config *config = NULL;
933 
934    drmDevicePtr drm_display_device = NULL;
935    drmDevicePtr drm_render_device = NULL;
936    struct pvr_physical_device *pdevice;
937    drmDevicePtr *drm_devices;
938    int num_drm_devices = 0;
939    VkResult result;
940 
941    result = pvr_get_drm_devices(instance, NULL, 0, &num_drm_devices);
942    if (result != VK_SUCCESS)
943       goto out;
944 
945    if (num_drm_devices == 0) {
946       result = VK_SUCCESS;
947       goto out;
948    }
949 
950    drm_devices = vk_alloc(&vk_instance->alloc,
951                           sizeof(*drm_devices) * num_drm_devices,
952                           8,
953                           VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
954    if (!drm_devices) {
955       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
956       goto out;
957    }
958 
959    result = pvr_get_drm_devices(instance, drm_devices, num_drm_devices, NULL);
960    if (result != VK_SUCCESS)
961       goto out_free_drm_device_ptrs;
962 
963    /* First search for our render node... */
964    for (int i = 0; i < num_drm_devices; i++) {
965       drmDevice *const drm_dev = drm_devices[i];
966 
967       if (drm_dev->bustype != DRM_BUS_PLATFORM)
968          continue;
969 
970       if (!(drm_dev->available_nodes & BITFIELD_BIT(DRM_NODE_RENDER)))
971          continue;
972 
973       config = pvr_drm_device_get_config(drm_dev);
974       if (config) {
975          drm_render_device = drm_dev;
976          break;
977       }
978    }
979 
980    if (!config) {
981       result = VK_SUCCESS;
982       goto out_free_drm_devices;
983    }
984 
985    mesa_logd("Found compatible render device '%s'.",
986              drm_render_device->nodes[DRM_NODE_RENDER]);
987 
988    /* ...then find the compatible display node. */
989    for (int i = 0; i < num_drm_devices; i++) {
990       drmDevice *const drm_dev = drm_devices[i];
991 
992       if (!(drm_dev->available_nodes & BITFIELD_BIT(DRM_NODE_PRIMARY)))
993          continue;
994 
995       if (pvr_drm_device_compatible(&config->display, drm_dev)) {
996          drm_display_device = drm_dev;
997          break;
998       }
999    }
1000 
1001    if (!drm_display_device) {
1002       mesa_loge("Render device '%s' has no compatible display device.",
1003                 drm_render_device->nodes[DRM_NODE_RENDER]);
1004       result = VK_SUCCESS;
1005       goto out_free_drm_devices;
1006    }
1007 
1008    mesa_logd("Found compatible display device '%s'.",
1009              drm_display_device->nodes[DRM_NODE_PRIMARY]);
1010 
1011    pdevice = vk_zalloc(&vk_instance->alloc,
1012                        sizeof(*pdevice),
1013                        8,
1014                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1015    if (!pdevice) {
1016       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1017       goto out_free_drm_devices;
1018    }
1019 
1020    result = pvr_physical_device_init(pdevice,
1021                                      instance,
1022                                      drm_render_device,
1023                                      drm_display_device);
1024    if (result != VK_SUCCESS) {
1025       if (result == VK_ERROR_INCOMPATIBLE_DRIVER)
1026          result = VK_SUCCESS;
1027 
1028       goto err_free_pdevice;
1029    }
1030 
1031    if (PVR_IS_DEBUG_SET(INFO)) {
1032       pvr_physical_device_dump_info(
1033          pdevice,
1034          drm_display_device->deviceinfo.platform->compatible,
1035          drm_render_device->deviceinfo.platform->compatible);
1036    }
1037 
1038    list_add(&pdevice->vk.link, &vk_instance->physical_devices.list);
1039 
1040    result = VK_SUCCESS;
1041    goto out_free_drm_devices;
1042 
1043 err_free_pdevice:
1044    vk_free(&vk_instance->alloc, pdevice);
1045 
1046 out_free_drm_devices:
1047    drmFreeDevices(drm_devices, num_drm_devices);
1048 
1049 out_free_drm_device_ptrs:
1050    vk_free(&vk_instance->alloc, drm_devices);
1051 
1052 out:
1053    return result;
1054 }
1055 
pvr_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)1056 VkResult pvr_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
1057                             const VkAllocationCallbacks *pAllocator,
1058                             VkInstance *pInstance)
1059 {
1060    struct vk_instance_dispatch_table dispatch_table;
1061    struct pvr_instance *instance;
1062    VkResult result;
1063 
1064    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
1065 
1066    if (!pAllocator)
1067       pAllocator = vk_default_allocator();
1068 
1069    instance = vk_alloc(pAllocator,
1070                        sizeof(*instance),
1071                        8,
1072                        VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
1073    if (!instance)
1074       return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1075 
1076    vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
1077                                                &pvr_instance_entrypoints,
1078                                                true);
1079 
1080    vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
1081                                                &wsi_instance_entrypoints,
1082                                                false);
1083 
1084    result = vk_instance_init(&instance->vk,
1085                              &pvr_instance_extensions,
1086                              &dispatch_table,
1087                              pCreateInfo,
1088                              pAllocator);
1089    if (result != VK_SUCCESS) {
1090       vk_free(pAllocator, instance);
1091       return result;
1092    }
1093 
1094    pvr_process_debug_variable();
1095 
1096    instance->active_device_count = 0;
1097 
1098    instance->vk.physical_devices.enumerate = pvr_physical_device_enumerate;
1099    instance->vk.physical_devices.destroy = pvr_physical_device_destroy;
1100 
1101    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
1102 
1103    *pInstance = pvr_instance_to_handle(instance);
1104 
1105    return VK_SUCCESS;
1106 }
1107 
pvr_get_simultaneous_num_allocs(const struct pvr_device_info * dev_info,ASSERTED const struct pvr_device_runtime_info * dev_runtime_info)1108 static uint32_t pvr_get_simultaneous_num_allocs(
1109    const struct pvr_device_info *dev_info,
1110    ASSERTED const struct pvr_device_runtime_info *dev_runtime_info)
1111 {
1112    uint32_t min_cluster_per_phantom;
1113 
1114    if (PVR_HAS_FEATURE(dev_info, s8xe))
1115       return PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U);
1116 
1117    assert(dev_runtime_info->num_phantoms == 1);
1118    min_cluster_per_phantom = PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U);
1119 
1120    if (min_cluster_per_phantom >= 4)
1121       return 1;
1122    else if (min_cluster_per_phantom == 2)
1123       return 2;
1124    else
1125       return 4;
1126 }
1127 
pvr_calc_fscommon_size_and_tiles_in_flight(const struct pvr_device_info * dev_info,const struct pvr_device_runtime_info * dev_runtime_info,uint32_t fs_common_size,uint32_t min_tiles_in_flight)1128 uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
1129    const struct pvr_device_info *dev_info,
1130    const struct pvr_device_runtime_info *dev_runtime_info,
1131    uint32_t fs_common_size,
1132    uint32_t min_tiles_in_flight)
1133 {
1134    const uint32_t available_shareds =
1135       dev_runtime_info->reserved_shared_size - dev_runtime_info->max_coeffs;
1136    const uint32_t max_tiles_in_flight =
1137       PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U);
1138    uint32_t num_tile_in_flight;
1139    uint32_t num_allocs;
1140 
1141    if (fs_common_size == 0)
1142       return max_tiles_in_flight;
1143 
1144    num_allocs = pvr_get_simultaneous_num_allocs(dev_info, dev_runtime_info);
1145 
1146    if (fs_common_size == UINT32_MAX) {
1147       uint32_t max_common_size = available_shareds;
1148 
1149       num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight);
1150 
1151       if (!PVR_HAS_ERN(dev_info, 38748)) {
1152          /* Hardware needs space for one extra shared allocation. */
1153          num_allocs += 1;
1154       }
1155 
1156       /* Double resource requirements to deal with fragmentation. */
1157       max_common_size /= num_allocs * 2;
1158       max_common_size = MIN2(max_common_size, ROGUE_MAX_PIXEL_SHARED_REGISTERS);
1159       max_common_size =
1160          ROUND_DOWN_TO(max_common_size,
1161                        PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
1162 
1163       return max_common_size;
1164    }
1165 
1166    num_tile_in_flight = available_shareds / (fs_common_size * 2);
1167 
1168    if (!PVR_HAS_ERN(dev_info, 38748))
1169       num_tile_in_flight -= 1;
1170 
1171    num_tile_in_flight /= num_allocs;
1172 
1173 #if defined(DEBUG)
1174    /* Validate the above result. */
1175 
1176    assert(num_tile_in_flight >= MIN2(num_tile_in_flight, max_tiles_in_flight));
1177    num_allocs *= num_tile_in_flight;
1178 
1179    if (!PVR_HAS_ERN(dev_info, 38748)) {
1180       /* Hardware needs space for one extra shared allocation. */
1181       num_allocs += 1;
1182    }
1183 
1184    assert(fs_common_size <= available_shareds / (num_allocs * 2));
1185 #endif
1186 
1187    return MIN2(num_tile_in_flight, max_tiles_in_flight);
1188 }
1189 
1190 const static VkQueueFamilyProperties pvr_queue_family_properties = {
1191    .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_GRAPHICS_BIT |
1192                  VK_QUEUE_TRANSFER_BIT,
1193    .queueCount = PVR_MAX_QUEUES,
1194    .timestampValidBits = 0,
1195    .minImageTransferGranularity = { 1, 1, 1 },
1196 };
1197 
pvr_compute_heap_budget(struct pvr_physical_device * pdevice)1198 static uint64_t pvr_compute_heap_budget(struct pvr_physical_device *pdevice)
1199 {
1200    const uint64_t heap_size = pdevice->memory.memoryHeaps[0].size;
1201    const uint64_t heap_used = pdevice->heap_used;
1202    uint64_t sys_available = 0, heap_available;
1203    ASSERTED bool has_available_memory =
1204       os_get_available_system_memory(&sys_available);
1205    assert(has_available_memory);
1206 
1207    /* Let's not incite the app to starve the system: report at most 90% of
1208     * available system memory.
1209     */
1210    heap_available = sys_available * 9 / 10;
1211    return MIN2(heap_size, heap_used + heap_available);
1212 }
1213 
pvr_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1214 void pvr_GetPhysicalDeviceQueueFamilyProperties2(
1215    VkPhysicalDevice physicalDevice,
1216    uint32_t *pQueueFamilyPropertyCount,
1217    VkQueueFamilyProperties2 *pQueueFamilyProperties)
1218 {
1219    VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2,
1220                           out,
1221                           pQueueFamilyProperties,
1222                           pQueueFamilyPropertyCount);
1223 
1224    vk_outarray_append_typed (VkQueueFamilyProperties2, &out, p) {
1225       p->queueFamilyProperties = pvr_queue_family_properties;
1226 
1227       vk_foreach_struct (ext, p->pNext) {
1228          pvr_debug_ignored_stype(ext->sType);
1229       }
1230    }
1231 }
1232 
pvr_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1233 void pvr_GetPhysicalDeviceMemoryProperties2(
1234    VkPhysicalDevice physicalDevice,
1235    VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1236 {
1237    PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1238 
1239    pMemoryProperties->memoryProperties = pdevice->memory;
1240 
1241    vk_foreach_struct (ext, pMemoryProperties->pNext) {
1242       switch (ext->sType) {
1243       case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT: {
1244          VkPhysicalDeviceMemoryBudgetPropertiesEXT *pMemoryBudget =
1245             (VkPhysicalDeviceMemoryBudgetPropertiesEXT *)ext;
1246 
1247          pMemoryBudget->heapBudget[0] = pvr_compute_heap_budget(pdevice);
1248          pMemoryBudget->heapUsage[0] = pdevice->heap_used;
1249 
1250          for (uint32_t i = 1; i < VK_MAX_MEMORY_HEAPS; i++) {
1251             pMemoryBudget->heapBudget[i] = 0u;
1252             pMemoryBudget->heapUsage[i] = 0u;
1253          }
1254          break;
1255       }
1256       default:
1257          pvr_debug_ignored_stype(ext->sType);
1258          break;
1259       }
1260    }
1261 }
1262 
pvr_GetInstanceProcAddr(VkInstance _instance,const char * pName)1263 PFN_vkVoidFunction pvr_GetInstanceProcAddr(VkInstance _instance,
1264                                            const char *pName)
1265 {
1266    PVR_FROM_HANDLE(pvr_instance, instance, _instance);
1267    return vk_instance_get_proc_addr(&instance->vk,
1268                                     &pvr_instance_entrypoints,
1269                                     pName);
1270 }
1271 
1272 /* With version 1+ of the loader interface the ICD should expose
1273  * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in
1274  * apps.
1275  */
1276 PUBLIC
1277 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)1278 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
1279 {
1280    return pvr_GetInstanceProcAddr(instance, pName);
1281 }
1282 
pvr_pds_compute_shader_create_and_upload(struct pvr_device * device,struct pvr_pds_compute_shader_program * program,struct pvr_pds_upload * const pds_upload_out)1283 VkResult pvr_pds_compute_shader_create_and_upload(
1284    struct pvr_device *device,
1285    struct pvr_pds_compute_shader_program *program,
1286    struct pvr_pds_upload *const pds_upload_out)
1287 {
1288    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1289    const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
1290    size_t staging_buffer_size;
1291    uint32_t *staging_buffer;
1292    uint32_t *data_buffer;
1293    uint32_t *code_buffer;
1294    VkResult result;
1295 
1296    /* Calculate how much space we'll need for the compute shader PDS program.
1297     */
1298    pvr_pds_compute_shader(program, NULL, PDS_GENERATE_SIZES, dev_info);
1299 
1300    /* FIXME: Fix the below inconsistency of code size being in bytes whereas
1301     * data size being in dwords.
1302     */
1303    /* Code size is in bytes, data size in dwords. */
1304    staging_buffer_size =
1305       PVR_DW_TO_BYTES(program->data_size) + program->code_size;
1306 
1307    staging_buffer = vk_alloc(&device->vk.alloc,
1308                              staging_buffer_size,
1309                              8U,
1310                              VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1311    if (!staging_buffer)
1312       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1313 
1314    data_buffer = staging_buffer;
1315    code_buffer = pvr_pds_compute_shader(program,
1316                                         data_buffer,
1317                                         PDS_GENERATE_DATA_SEGMENT,
1318                                         dev_info);
1319 
1320    pvr_pds_compute_shader(program,
1321                           code_buffer,
1322                           PDS_GENERATE_CODE_SEGMENT,
1323                           dev_info);
1324 
1325    result = pvr_gpu_upload_pds(device,
1326                                data_buffer,
1327                                program->data_size,
1328                                PVRX(CDMCTRL_KERNEL1_DATA_ADDR_ALIGNMENT),
1329                                code_buffer,
1330                                program->code_size / sizeof(uint32_t),
1331                                PVRX(CDMCTRL_KERNEL2_CODE_ADDR_ALIGNMENT),
1332                                cache_line_size,
1333                                pds_upload_out);
1334 
1335    vk_free(&device->vk.alloc, staging_buffer);
1336 
1337    return result;
1338 }
1339 
pvr_device_init_compute_fence_program(struct pvr_device * device)1340 static VkResult pvr_device_init_compute_fence_program(struct pvr_device *device)
1341 {
1342    struct pvr_pds_compute_shader_program program;
1343 
1344    pvr_pds_compute_shader_program_init(&program);
1345    /* Fence kernel. */
1346    program.fence = true;
1347    program.clear_pds_barrier = true;
1348 
1349    return pvr_pds_compute_shader_create_and_upload(
1350       device,
1351       &program,
1352       &device->pds_compute_fence_program);
1353 }
1354 
pvr_device_init_compute_empty_program(struct pvr_device * device)1355 static VkResult pvr_device_init_compute_empty_program(struct pvr_device *device)
1356 {
1357    struct pvr_pds_compute_shader_program program;
1358 
1359    pvr_pds_compute_shader_program_init(&program);
1360    program.clear_pds_barrier = true;
1361 
1362    return pvr_pds_compute_shader_create_and_upload(
1363       device,
1364       &program,
1365       &device->pds_compute_empty_program);
1366 }
1367 
pvr_pds_idfwdf_programs_create_and_upload(struct pvr_device * device,pvr_dev_addr_t usc_addr,uint32_t shareds,uint32_t temps,pvr_dev_addr_t shareds_buffer_addr,struct pvr_pds_upload * const upload_out,struct pvr_pds_upload * const sw_compute_barrier_upload_out)1368 static VkResult pvr_pds_idfwdf_programs_create_and_upload(
1369    struct pvr_device *device,
1370    pvr_dev_addr_t usc_addr,
1371    uint32_t shareds,
1372    uint32_t temps,
1373    pvr_dev_addr_t shareds_buffer_addr,
1374    struct pvr_pds_upload *const upload_out,
1375    struct pvr_pds_upload *const sw_compute_barrier_upload_out)
1376 {
1377    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1378    struct pvr_pds_vertex_shader_sa_program program = {
1379       .kick_usc = true,
1380       .clear_pds_barrier = PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info),
1381    };
1382    size_t staging_buffer_size;
1383    uint32_t *staging_buffer;
1384    VkResult result;
1385 
1386    /* We'll need to DMA the shareds into the USC's Common Store. */
1387    program.num_dma_kicks = pvr_pds_encode_dma_burst(program.dma_control,
1388                                                     program.dma_address,
1389                                                     0,
1390                                                     shareds,
1391                                                     shareds_buffer_addr.addr,
1392                                                     false,
1393                                                     dev_info);
1394 
1395    /* DMA temp regs. */
1396    pvr_pds_setup_doutu(&program.usc_task_control,
1397                        usc_addr.addr,
1398                        temps,
1399                        PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
1400                        false);
1401 
1402    pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1403 
1404    staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
1405 
1406    staging_buffer = vk_alloc(&device->vk.alloc,
1407                              staging_buffer_size,
1408                              8,
1409                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1410    if (!staging_buffer)
1411       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1412 
1413    /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1414    pvr_pds_vertex_shader_sa(&program,
1415                             staging_buffer,
1416                             PDS_GENERATE_DATA_SEGMENT,
1417                             dev_info);
1418    pvr_pds_vertex_shader_sa(&program,
1419                             &staging_buffer[program.data_size],
1420                             PDS_GENERATE_CODE_SEGMENT,
1421                             dev_info);
1422 
1423    /* At the time of writing, the SW_COMPUTE_PDS_BARRIER variant of the program
1424     * is bigger so we handle it first (if needed) and realloc() for a smaller
1425     * size.
1426     */
1427    if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
1428       /* FIXME: Figure out the define for alignment of 16. */
1429       result = pvr_gpu_upload_pds(device,
1430                                   &staging_buffer[0],
1431                                   program.data_size,
1432                                   16,
1433                                   &staging_buffer[program.data_size],
1434                                   program.code_size,
1435                                   16,
1436                                   16,
1437                                   sw_compute_barrier_upload_out);
1438       if (result != VK_SUCCESS) {
1439          vk_free(&device->vk.alloc, staging_buffer);
1440          return result;
1441       }
1442 
1443       program.clear_pds_barrier = false;
1444 
1445       pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1446 
1447       staging_buffer_size =
1448          PVR_DW_TO_BYTES(program.code_size + program.data_size);
1449 
1450       staging_buffer = vk_realloc(&device->vk.alloc,
1451                                   staging_buffer,
1452                                   staging_buffer_size,
1453                                   8,
1454                                   VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1455       if (!staging_buffer) {
1456          pvr_bo_suballoc_free(sw_compute_barrier_upload_out->pvr_bo);
1457 
1458          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1459       }
1460 
1461       /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1462       pvr_pds_vertex_shader_sa(&program,
1463                                staging_buffer,
1464                                PDS_GENERATE_DATA_SEGMENT,
1465                                dev_info);
1466       pvr_pds_vertex_shader_sa(&program,
1467                                &staging_buffer[program.data_size],
1468                                PDS_GENERATE_CODE_SEGMENT,
1469                                dev_info);
1470    } else {
1471       *sw_compute_barrier_upload_out = (struct pvr_pds_upload){
1472          .pvr_bo = NULL,
1473       };
1474    }
1475 
1476    /* FIXME: Figure out the define for alignment of 16. */
1477    result = pvr_gpu_upload_pds(device,
1478                                &staging_buffer[0],
1479                                program.data_size,
1480                                16,
1481                                &staging_buffer[program.data_size],
1482                                program.code_size,
1483                                16,
1484                                16,
1485                                upload_out);
1486    if (result != VK_SUCCESS) {
1487       vk_free(&device->vk.alloc, staging_buffer);
1488       pvr_bo_suballoc_free(sw_compute_barrier_upload_out->pvr_bo);
1489 
1490       return result;
1491    }
1492 
1493    vk_free(&device->vk.alloc, staging_buffer);
1494 
1495    return VK_SUCCESS;
1496 }
1497 
pvr_device_init_compute_idfwdf_state(struct pvr_device * device)1498 static VkResult pvr_device_init_compute_idfwdf_state(struct pvr_device *device)
1499 {
1500    uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
1501    uint64_t image_state[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
1502    struct util_dynarray usc_program;
1503    struct pvr_texture_state_info tex_info;
1504    uint32_t *dword_ptr;
1505    uint32_t usc_shareds;
1506    uint32_t usc_temps;
1507    VkResult result;
1508 
1509    util_dynarray_init(&usc_program, NULL);
1510    pvr_hard_code_get_idfwdf_program(&device->pdevice->dev_info,
1511                                     &usc_program,
1512                                     &usc_shareds,
1513                                     &usc_temps);
1514 
1515    device->idfwdf_state.usc_shareds = usc_shareds;
1516 
1517    /* FIXME: Figure out the define for alignment of 16. */
1518    result = pvr_gpu_upload_usc(device,
1519                                usc_program.data,
1520                                usc_program.size,
1521                                16,
1522                                &device->idfwdf_state.usc);
1523    util_dynarray_fini(&usc_program);
1524 
1525    if (result != VK_SUCCESS)
1526       return result;
1527 
1528    /* TODO: Get the store buffer size from the compiler? */
1529    /* TODO: How was the size derived here? */
1530    result = pvr_bo_alloc(device,
1531                          device->heaps.general_heap,
1532                          4 * sizeof(float) * 4 * 2,
1533                          4,
1534                          0,
1535                          &device->idfwdf_state.store_bo);
1536    if (result != VK_SUCCESS)
1537       goto err_free_usc_program;
1538 
1539    result = pvr_bo_alloc(device,
1540                          device->heaps.general_heap,
1541                          usc_shareds * ROGUE_REG_SIZE_BYTES,
1542                          ROGUE_REG_SIZE_BYTES,
1543                          PVR_BO_ALLOC_FLAG_CPU_MAPPED,
1544                          &device->idfwdf_state.shareds_bo);
1545    if (result != VK_SUCCESS)
1546       goto err_free_store_buffer;
1547 
1548    /* Pack state words. */
1549 
1550    pvr_csb_pack (&sampler_state[0], TEXSTATE_SAMPLER, sampler) {
1551       sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
1552       sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
1553       sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1554       sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1555    }
1556 
1557    /* clang-format off */
1558    pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {}
1559    /* clang-format on */
1560 
1561    STATIC_ASSERT(1 + 1 == ROGUE_NUM_TEXSTATE_SAMPLER_WORDS);
1562 
1563    tex_info = (struct pvr_texture_state_info){
1564       .format = VK_FORMAT_R32G32B32A32_SFLOAT,
1565       .mem_layout = PVR_MEMLAYOUT_LINEAR,
1566       .flags = PVR_TEXFLAGS_INDEX_LOOKUP,
1567       .type = VK_IMAGE_VIEW_TYPE_2D,
1568       .extent = { .width = 4, .height = 2, .depth = 0 },
1569       .mip_levels = 1,
1570       .sample_count = 1,
1571       .stride = 4,
1572       .swizzle = { PIPE_SWIZZLE_X,
1573                    PIPE_SWIZZLE_Y,
1574                    PIPE_SWIZZLE_Z,
1575                    PIPE_SWIZZLE_W },
1576       .addr = device->idfwdf_state.store_bo->vma->dev_addr,
1577    };
1578 
1579    result = pvr_pack_tex_state(device, &tex_info, image_state);
1580    if (result != VK_SUCCESS)
1581       goto err_free_shareds_buffer;
1582 
1583    /* Fill the shareds buffer. */
1584 
1585    dword_ptr = (uint32_t *)device->idfwdf_state.shareds_bo->bo->map;
1586 
1587 #define HIGH_32(val) ((uint32_t)((val) >> 32U))
1588 #define LOW_32(val) ((uint32_t)(val))
1589 
1590    /* TODO: Should we use compiler info to setup the shareds data instead of
1591     * assuming there's always 12 and this is how they should be setup?
1592     */
1593 
1594    dword_ptr[0] = HIGH_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1595    dword_ptr[1] = LOW_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1596 
1597    /* Pad the shareds as the texture/sample state words are 128 bit aligned. */
1598    dword_ptr[2] = 0U;
1599    dword_ptr[3] = 0U;
1600 
1601    dword_ptr[4] = LOW_32(image_state[0]);
1602    dword_ptr[5] = HIGH_32(image_state[0]);
1603    dword_ptr[6] = LOW_32(image_state[1]);
1604    dword_ptr[7] = HIGH_32(image_state[1]);
1605 
1606    dword_ptr[8] = LOW_32(sampler_state[0]);
1607    dword_ptr[9] = HIGH_32(sampler_state[0]);
1608    dword_ptr[10] = LOW_32(sampler_state[1]);
1609    dword_ptr[11] = HIGH_32(sampler_state[1]);
1610    assert(11 + 1 == usc_shareds);
1611 
1612 #undef HIGH_32
1613 #undef LOW_32
1614 
1615    pvr_bo_cpu_unmap(device, device->idfwdf_state.shareds_bo);
1616    dword_ptr = NULL;
1617 
1618    /* Generate and upload PDS programs. */
1619    result = pvr_pds_idfwdf_programs_create_and_upload(
1620       device,
1621       device->idfwdf_state.usc->dev_addr,
1622       usc_shareds,
1623       usc_temps,
1624       device->idfwdf_state.shareds_bo->vma->dev_addr,
1625       &device->idfwdf_state.pds,
1626       &device->idfwdf_state.sw_compute_barrier_pds);
1627    if (result != VK_SUCCESS)
1628       goto err_free_shareds_buffer;
1629 
1630    return VK_SUCCESS;
1631 
1632 err_free_shareds_buffer:
1633    pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1634 
1635 err_free_store_buffer:
1636    pvr_bo_free(device, device->idfwdf_state.store_bo);
1637 
1638 err_free_usc_program:
1639    pvr_bo_suballoc_free(device->idfwdf_state.usc);
1640 
1641    return result;
1642 }
1643 
pvr_device_finish_compute_idfwdf_state(struct pvr_device * device)1644 static void pvr_device_finish_compute_idfwdf_state(struct pvr_device *device)
1645 {
1646    pvr_bo_suballoc_free(device->idfwdf_state.pds.pvr_bo);
1647    pvr_bo_suballoc_free(device->idfwdf_state.sw_compute_barrier_pds.pvr_bo);
1648    pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1649    pvr_bo_free(device, device->idfwdf_state.store_bo);
1650    pvr_bo_suballoc_free(device->idfwdf_state.usc);
1651 }
1652 
1653 /* FIXME: We should be calculating the size when we upload the code in
1654  * pvr_srv_setup_static_pixel_event_program().
1655  */
pvr_device_get_pixel_event_pds_program_data_size(const struct pvr_device_info * dev_info,uint32_t * const data_size_in_dwords_out)1656 static void pvr_device_get_pixel_event_pds_program_data_size(
1657    const struct pvr_device_info *dev_info,
1658    uint32_t *const data_size_in_dwords_out)
1659 {
1660    struct pvr_pds_event_program program = {
1661       /* No data to DMA, just a DOUTU needed. */
1662       .num_emit_word_pairs = 0,
1663    };
1664 
1665    pvr_pds_set_sizes_pixel_event(&program, dev_info);
1666 
1667    *data_size_in_dwords_out = program.data_size;
1668 }
1669 
pvr_device_init_nop_program(struct pvr_device * device)1670 static VkResult pvr_device_init_nop_program(struct pvr_device *device)
1671 {
1672    const uint32_t cache_line_size =
1673       rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1674    struct pvr_pds_kickusc_program program = { 0 };
1675    struct util_dynarray nop_usc_bin;
1676    uint32_t staging_buffer_size;
1677    uint32_t *staging_buffer;
1678    VkResult result;
1679 
1680    pvr_uscgen_nop(&nop_usc_bin);
1681 
1682    result = pvr_gpu_upload_usc(device,
1683                                util_dynarray_begin(&nop_usc_bin),
1684                                nop_usc_bin.size,
1685                                cache_line_size,
1686                                &device->nop_program.usc);
1687    util_dynarray_fini(&nop_usc_bin);
1688    if (result != VK_SUCCESS)
1689       return result;
1690 
1691    /* Setup a PDS program that kicks the static USC program. */
1692    pvr_pds_setup_doutu(&program.usc_task_control,
1693                        device->nop_program.usc->dev_addr.addr,
1694                        0U,
1695                        PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
1696                        false);
1697 
1698    pvr_pds_set_sizes_pixel_shader(&program);
1699 
1700    staging_buffer_size = PVR_DW_TO_BYTES(program.code_size + program.data_size);
1701 
1702    staging_buffer = vk_alloc(&device->vk.alloc,
1703                              staging_buffer_size,
1704                              8U,
1705                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1706    if (!staging_buffer) {
1707       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1708       goto err_free_nop_usc_bo;
1709    }
1710 
1711    pvr_pds_generate_pixel_shader_program(&program, staging_buffer);
1712 
1713    /* FIXME: Figure out the define for alignment of 16. */
1714    result = pvr_gpu_upload_pds(device,
1715                                staging_buffer,
1716                                program.data_size,
1717                                16U,
1718                                &staging_buffer[program.data_size],
1719                                program.code_size,
1720                                16U,
1721                                16U,
1722                                &device->nop_program.pds);
1723    if (result != VK_SUCCESS)
1724       goto err_free_staging_buffer;
1725 
1726    vk_free(&device->vk.alloc, staging_buffer);
1727 
1728    return VK_SUCCESS;
1729 
1730 err_free_staging_buffer:
1731    vk_free(&device->vk.alloc, staging_buffer);
1732 
1733 err_free_nop_usc_bo:
1734    pvr_bo_suballoc_free(device->nop_program.usc);
1735 
1736    return result;
1737 }
1738 
pvr_device_init_tile_buffer_state(struct pvr_device * device)1739 static void pvr_device_init_tile_buffer_state(struct pvr_device *device)
1740 {
1741    simple_mtx_init(&device->tile_buffer_state.mtx, mtx_plain);
1742 
1743    for (uint32_t i = 0; i < ARRAY_SIZE(device->tile_buffer_state.buffers); i++)
1744       device->tile_buffer_state.buffers[i] = NULL;
1745 
1746    device->tile_buffer_state.buffer_count = 0;
1747 }
1748 
pvr_device_finish_tile_buffer_state(struct pvr_device * device)1749 static void pvr_device_finish_tile_buffer_state(struct pvr_device *device)
1750 {
1751    /* Destroy the mutex first to trigger asserts in case it's still locked so
1752     * that we don't put things in an inconsistent state by freeing buffers that
1753     * might be in use or attempt to free buffers while new buffers are being
1754     * allocated.
1755     */
1756    simple_mtx_destroy(&device->tile_buffer_state.mtx);
1757 
1758    for (uint32_t i = 0; i < device->tile_buffer_state.buffer_count; i++)
1759       pvr_bo_free(device, device->tile_buffer_state.buffers[i]);
1760 }
1761 
1762 /**
1763  * \brief Ensures that a certain amount of tile buffers are allocated.
1764  *
1765  * Make sure that \p capacity amount of tile buffers are allocated. If less were
1766  * present, append new tile buffers of \p size_in_bytes each to reach the quota.
1767  */
pvr_device_tile_buffer_ensure_cap(struct pvr_device * device,uint32_t capacity,uint32_t size_in_bytes)1768 VkResult pvr_device_tile_buffer_ensure_cap(struct pvr_device *device,
1769                                            uint32_t capacity,
1770                                            uint32_t size_in_bytes)
1771 {
1772    struct pvr_device_tile_buffer_state *tile_buffer_state =
1773       &device->tile_buffer_state;
1774    const uint32_t cache_line_size =
1775       rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1776    VkResult result;
1777 
1778    simple_mtx_lock(&tile_buffer_state->mtx);
1779 
1780    /* Clamping in release and asserting in debug. */
1781    assert(capacity <= ARRAY_SIZE(tile_buffer_state->buffers));
1782    capacity = CLAMP(capacity,
1783                     tile_buffer_state->buffer_count,
1784                     ARRAY_SIZE(tile_buffer_state->buffers));
1785 
1786    /* TODO: Implement bo multialloc? To reduce the amount of syscalls and
1787     * allocations.
1788     */
1789    for (uint32_t i = tile_buffer_state->buffer_count; i < capacity; i++) {
1790       result = pvr_bo_alloc(device,
1791                             device->heaps.general_heap,
1792                             size_in_bytes,
1793                             cache_line_size,
1794                             0,
1795                             &tile_buffer_state->buffers[i]);
1796       if (result != VK_SUCCESS) {
1797          for (uint32_t j = tile_buffer_state->buffer_count; j < i; j++)
1798             pvr_bo_free(device, tile_buffer_state->buffers[j]);
1799 
1800          goto err_release_lock;
1801       }
1802    }
1803 
1804    tile_buffer_state->buffer_count = capacity;
1805 
1806    simple_mtx_unlock(&tile_buffer_state->mtx);
1807 
1808    return VK_SUCCESS;
1809 
1810 err_release_lock:
1811    simple_mtx_unlock(&tile_buffer_state->mtx);
1812 
1813    return result;
1814 }
1815 
pvr_device_init_default_sampler_state(struct pvr_device * device)1816 static void pvr_device_init_default_sampler_state(struct pvr_device *device)
1817 {
1818    pvr_csb_pack (&device->input_attachment_sampler, TEXSTATE_SAMPLER, sampler) {
1819       sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1820       sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1821       sampler.addrmode_w = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1822       sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
1823       sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
1824       sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT);
1825       sampler.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
1826       sampler.non_normalized_coords = true;
1827    }
1828 }
1829 
pvr_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)1830 VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice,
1831                           const VkDeviceCreateInfo *pCreateInfo,
1832                           const VkAllocationCallbacks *pAllocator,
1833                           VkDevice *pDevice)
1834 {
1835    PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1836    uint32_t initial_free_list_size = PVR_GLOBAL_FREE_LIST_INITIAL_SIZE;
1837    struct pvr_instance *instance = pdevice->instance;
1838    struct vk_device_dispatch_table dispatch_table;
1839    struct pvr_device *device;
1840    struct pvr_winsys *ws;
1841    VkResult result;
1842 
1843    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
1844 
1845    result = pvr_winsys_create(pdevice->render_path,
1846                               pdevice->display_path,
1847                               pAllocator ? pAllocator : &instance->vk.alloc,
1848                               &ws);
1849    if (result != VK_SUCCESS)
1850       goto err_out;
1851 
1852    device = vk_alloc2(&instance->vk.alloc,
1853                       pAllocator,
1854                       sizeof(*device),
1855                       8,
1856                       VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1857    if (!device) {
1858       result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1859       goto err_pvr_winsys_destroy;
1860    }
1861 
1862    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1863                                              &pvr_device_entrypoints,
1864                                              true);
1865 
1866    vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1867                                              &wsi_device_entrypoints,
1868                                              false);
1869 
1870    result = vk_device_init(&device->vk,
1871                            &pdevice->vk,
1872                            &dispatch_table,
1873                            pCreateInfo,
1874                            pAllocator);
1875    if (result != VK_SUCCESS)
1876       goto err_free_device;
1877 
1878    device->instance = instance;
1879    device->pdevice = pdevice;
1880    device->ws = ws;
1881 
1882    vk_device_set_drm_fd(&device->vk, ws->render_fd);
1883 
1884    if (ws->features.supports_threaded_submit) {
1885       /* Queue submission can be blocked if the kernel CCBs become full,
1886        * so enable threaded submit to not block the submitter.
1887        */
1888       vk_device_enable_threaded_submit(&device->vk);
1889    }
1890 
1891    ws->ops->get_heaps_info(ws, &device->heaps);
1892 
1893    result = pvr_bo_store_create(device);
1894    if (result != VK_SUCCESS)
1895       goto err_vk_device_finish;
1896 
1897    pvr_bo_suballocator_init(&device->suballoc_general,
1898                             device->heaps.general_heap,
1899                             device,
1900                             PVR_SUBALLOCATOR_GENERAL_SIZE);
1901    pvr_bo_suballocator_init(&device->suballoc_pds,
1902                             device->heaps.pds_heap,
1903                             device,
1904                             PVR_SUBALLOCATOR_PDS_SIZE);
1905    pvr_bo_suballocator_init(&device->suballoc_transfer,
1906                             device->heaps.transfer_frag_heap,
1907                             device,
1908                             PVR_SUBALLOCATOR_TRANSFER_SIZE);
1909    pvr_bo_suballocator_init(&device->suballoc_usc,
1910                             device->heaps.usc_heap,
1911                             device,
1912                             PVR_SUBALLOCATOR_USC_SIZE);
1913    pvr_bo_suballocator_init(&device->suballoc_vis_test,
1914                             device->heaps.vis_test_heap,
1915                             device,
1916                             PVR_SUBALLOCATOR_VIS_TEST_SIZE);
1917 
1918    if (p_atomic_inc_return(&instance->active_device_count) >
1919        PVR_SECONDARY_DEVICE_THRESHOLD) {
1920       initial_free_list_size = PVR_SECONDARY_DEVICE_FREE_LIST_INITAL_SIZE;
1921    }
1922 
1923    result = pvr_free_list_create(device,
1924                                  initial_free_list_size,
1925                                  PVR_GLOBAL_FREE_LIST_MAX_SIZE,
1926                                  PVR_GLOBAL_FREE_LIST_GROW_SIZE,
1927                                  PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD,
1928                                  NULL /* parent_free_list */,
1929                                  &device->global_free_list);
1930    if (result != VK_SUCCESS)
1931       goto err_dec_device_count;
1932 
1933    result = pvr_device_init_nop_program(device);
1934    if (result != VK_SUCCESS)
1935       goto err_pvr_free_list_destroy;
1936 
1937    result = pvr_device_init_compute_fence_program(device);
1938    if (result != VK_SUCCESS)
1939       goto err_pvr_free_nop_program;
1940 
1941    result = pvr_device_init_compute_empty_program(device);
1942    if (result != VK_SUCCESS)
1943       goto err_pvr_free_compute_fence;
1944 
1945    result = pvr_device_create_compute_query_programs(device);
1946    if (result != VK_SUCCESS)
1947       goto err_pvr_free_compute_empty;
1948 
1949    result = pvr_device_init_compute_idfwdf_state(device);
1950    if (result != VK_SUCCESS)
1951       goto err_pvr_destroy_compute_query_programs;
1952 
1953    result = pvr_device_init_graphics_static_clear_state(device);
1954    if (result != VK_SUCCESS)
1955       goto err_pvr_finish_compute_idfwdf;
1956 
1957    result = pvr_device_init_spm_load_state(device);
1958    if (result != VK_SUCCESS)
1959       goto err_pvr_finish_graphics_static_clear_state;
1960 
1961    pvr_device_init_tile_buffer_state(device);
1962 
1963    result = pvr_queues_create(device, pCreateInfo);
1964    if (result != VK_SUCCESS)
1965       goto err_pvr_finish_tile_buffer_state;
1966 
1967    pvr_device_init_default_sampler_state(device);
1968 
1969    pvr_spm_init_scratch_buffer_store(device);
1970 
1971    result = pvr_init_robustness_buffer(device);
1972    if (result != VK_SUCCESS)
1973       goto err_pvr_spm_finish_scratch_buffer_store;
1974 
1975    result = pvr_border_color_table_init(&device->border_color_table, device);
1976    if (result != VK_SUCCESS)
1977       goto err_pvr_robustness_buffer_finish;
1978 
1979    /* FIXME: Move this to a later stage and possibly somewhere other than
1980     * pvr_device. The purpose of this is so that we don't have to get the size
1981     * on each kick.
1982     */
1983    pvr_device_get_pixel_event_pds_program_data_size(
1984       &pdevice->dev_info,
1985       &device->pixel_event_data_size_in_dwords);
1986 
1987    device->global_cmd_buffer_submit_count = 0;
1988    device->global_queue_present_count = 0;
1989 
1990    *pDevice = pvr_device_to_handle(device);
1991 
1992    return VK_SUCCESS;
1993 
1994 err_pvr_robustness_buffer_finish:
1995    pvr_robustness_buffer_finish(device);
1996 
1997 err_pvr_spm_finish_scratch_buffer_store:
1998    pvr_spm_finish_scratch_buffer_store(device);
1999 
2000    pvr_queues_destroy(device);
2001 
2002 err_pvr_finish_tile_buffer_state:
2003    pvr_device_finish_tile_buffer_state(device);
2004    pvr_device_finish_spm_load_state(device);
2005 
2006 err_pvr_finish_graphics_static_clear_state:
2007    pvr_device_finish_graphics_static_clear_state(device);
2008 
2009 err_pvr_finish_compute_idfwdf:
2010    pvr_device_finish_compute_idfwdf_state(device);
2011 
2012 err_pvr_destroy_compute_query_programs:
2013    pvr_device_destroy_compute_query_programs(device);
2014 
2015 err_pvr_free_compute_empty:
2016    pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo);
2017 
2018 err_pvr_free_compute_fence:
2019    pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo);
2020 
2021 err_pvr_free_nop_program:
2022    pvr_bo_suballoc_free(device->nop_program.pds.pvr_bo);
2023    pvr_bo_suballoc_free(device->nop_program.usc);
2024 
2025 err_pvr_free_list_destroy:
2026    pvr_free_list_destroy(device->global_free_list);
2027 
2028 err_dec_device_count:
2029    p_atomic_dec(&device->instance->active_device_count);
2030 
2031    pvr_bo_suballocator_fini(&device->suballoc_vis_test);
2032    pvr_bo_suballocator_fini(&device->suballoc_usc);
2033    pvr_bo_suballocator_fini(&device->suballoc_transfer);
2034    pvr_bo_suballocator_fini(&device->suballoc_pds);
2035    pvr_bo_suballocator_fini(&device->suballoc_general);
2036 
2037    pvr_bo_store_destroy(device);
2038 
2039 err_vk_device_finish:
2040    vk_device_finish(&device->vk);
2041 
2042 err_free_device:
2043    vk_free(&device->vk.alloc, device);
2044 
2045 err_pvr_winsys_destroy:
2046    pvr_winsys_destroy(ws);
2047 
2048 err_out:
2049    return result;
2050 }
2051 
pvr_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)2052 void pvr_DestroyDevice(VkDevice _device,
2053                        const VkAllocationCallbacks *pAllocator)
2054 {
2055    PVR_FROM_HANDLE(pvr_device, device, _device);
2056 
2057    if (!device)
2058       return;
2059 
2060    pvr_border_color_table_finish(&device->border_color_table, device);
2061    pvr_robustness_buffer_finish(device);
2062    pvr_spm_finish_scratch_buffer_store(device);
2063    pvr_queues_destroy(device);
2064    pvr_device_finish_tile_buffer_state(device);
2065    pvr_device_finish_spm_load_state(device);
2066    pvr_device_finish_graphics_static_clear_state(device);
2067    pvr_device_finish_compute_idfwdf_state(device);
2068    pvr_device_destroy_compute_query_programs(device);
2069    pvr_bo_suballoc_free(device->pds_compute_empty_program.pvr_bo);
2070    pvr_bo_suballoc_free(device->pds_compute_fence_program.pvr_bo);
2071    pvr_bo_suballoc_free(device->nop_program.pds.pvr_bo);
2072    pvr_bo_suballoc_free(device->nop_program.usc);
2073    pvr_free_list_destroy(device->global_free_list);
2074    pvr_bo_suballocator_fini(&device->suballoc_vis_test);
2075    pvr_bo_suballocator_fini(&device->suballoc_usc);
2076    pvr_bo_suballocator_fini(&device->suballoc_transfer);
2077    pvr_bo_suballocator_fini(&device->suballoc_pds);
2078    pvr_bo_suballocator_fini(&device->suballoc_general);
2079    pvr_bo_store_destroy(device);
2080    pvr_winsys_destroy(device->ws);
2081    p_atomic_dec(&device->instance->active_device_count);
2082    vk_device_finish(&device->vk);
2083    vk_free(&device->vk.alloc, device);
2084 }
2085 
pvr_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)2086 VkResult pvr_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
2087                                               VkLayerProperties *pProperties)
2088 {
2089    if (!pProperties) {
2090       *pPropertyCount = 0;
2091       return VK_SUCCESS;
2092    }
2093 
2094    return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
2095 }
2096 
free_memory(struct pvr_device * device,struct pvr_device_memory * mem,const VkAllocationCallbacks * pAllocator)2097 static void free_memory(struct pvr_device *device,
2098                         struct pvr_device_memory *mem,
2099                         const VkAllocationCallbacks *pAllocator)
2100 {
2101    if (!mem)
2102       return;
2103 
2104    /* From the Vulkan spec (§11.2.13. Freeing Device Memory):
2105     *   If a memory object is mapped at the time it is freed, it is implicitly
2106     *   unmapped.
2107     */
2108    if (mem->bo->map)
2109       device->ws->ops->buffer_unmap(mem->bo);
2110 
2111    p_atomic_add(&device->pdevice->heap_used, -mem->bo->size);
2112 
2113    device->ws->ops->buffer_destroy(mem->bo);
2114 
2115    vk_object_free(&device->vk, pAllocator, mem);
2116 }
2117 
pvr_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)2118 VkResult pvr_AllocateMemory(VkDevice _device,
2119                             const VkMemoryAllocateInfo *pAllocateInfo,
2120                             const VkAllocationCallbacks *pAllocator,
2121                             VkDeviceMemory *pMem)
2122 {
2123    const VkImportMemoryFdInfoKHR *fd_info = NULL;
2124    PVR_FROM_HANDLE(pvr_device, device, _device);
2125    enum pvr_winsys_bo_type type = PVR_WINSYS_BO_TYPE_GPU;
2126    struct pvr_device_memory *mem;
2127    uint64_t heap_used;
2128    VkResult result;
2129 
2130    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
2131    assert(pAllocateInfo->allocationSize > 0);
2132 
2133    mem = vk_object_alloc(&device->vk,
2134                          pAllocator,
2135                          sizeof(*mem),
2136                          VK_OBJECT_TYPE_DEVICE_MEMORY);
2137    if (!mem)
2138       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2139 
2140    vk_foreach_struct_const (ext, pAllocateInfo->pNext) {
2141       switch ((unsigned)ext->sType) {
2142       case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
2143          if (device->ws->display_fd >= 0)
2144             type = PVR_WINSYS_BO_TYPE_DISPLAY;
2145          break;
2146       case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
2147          fd_info = (void *)ext;
2148          break;
2149       default:
2150          pvr_debug_ignored_stype(ext->sType);
2151          break;
2152       }
2153    }
2154 
2155    if (fd_info && fd_info->handleType) {
2156       VkDeviceSize aligned_alloc_size =
2157          ALIGN_POT(pAllocateInfo->allocationSize, device->ws->page_size);
2158 
2159       assert(
2160          fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2161          fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2162 
2163       result = device->ws->ops->buffer_create_from_fd(device->ws,
2164                                                       fd_info->fd,
2165                                                       &mem->bo);
2166       if (result != VK_SUCCESS)
2167          goto err_vk_object_free_mem;
2168 
2169       /* For security purposes, we reject importing the bo if it's smaller
2170        * than the requested allocation size. This prevents a malicious client
2171        * from passing a buffer to a trusted client, lying about the size, and
2172        * telling the trusted client to try and texture from an image that goes
2173        * out-of-bounds. This sort of thing could lead to GPU hangs or worse
2174        * in the trusted client. The trusted client can protect itself against
2175        * this sort of attack but only if it can trust the buffer size.
2176        */
2177       if (aligned_alloc_size > mem->bo->size) {
2178          result = vk_errorf(device,
2179                             VK_ERROR_INVALID_EXTERNAL_HANDLE,
2180                             "Aligned requested size too large for the given fd "
2181                             "%" PRIu64 "B > %" PRIu64 "B",
2182                             pAllocateInfo->allocationSize,
2183                             mem->bo->size);
2184          device->ws->ops->buffer_destroy(mem->bo);
2185          goto err_vk_object_free_mem;
2186       }
2187 
2188       /* From the Vulkan spec:
2189        *
2190        *    "Importing memory from a file descriptor transfers ownership of
2191        *    the file descriptor from the application to the Vulkan
2192        *    implementation. The application must not perform any operations on
2193        *    the file descriptor after a successful import."
2194        *
2195        * If the import fails, we leave the file descriptor open.
2196        */
2197       close(fd_info->fd);
2198    } else {
2199       /* Align physical allocations to the page size of the heap that will be
2200        * used when binding device memory (see pvr_bind_memory()) to ensure the
2201        * entire allocation can be mapped.
2202        */
2203       const uint64_t alignment = device->heaps.general_heap->page_size;
2204 
2205       /* FIXME: Need to determine the flags based on
2206        * device->pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags.
2207        *
2208        * The alternative would be to store the flags alongside the memory
2209        * types as an array that's indexed by pAllocateInfo->memoryTypeIndex so
2210        * that they can be looked up.
2211        */
2212       result = device->ws->ops->buffer_create(device->ws,
2213                                               pAllocateInfo->allocationSize,
2214                                               alignment,
2215                                               type,
2216                                               PVR_WINSYS_BO_FLAG_CPU_ACCESS,
2217                                               &mem->bo);
2218       if (result != VK_SUCCESS)
2219          goto err_vk_object_free_mem;
2220    }
2221 
2222    heap_used = p_atomic_add_return(&device->pdevice->heap_used, mem->bo->size);
2223    if (heap_used > device->pdevice->memory.memoryHeaps[0].size) {
2224       free_memory(device, mem, pAllocator);
2225       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2226    }
2227 
2228    *pMem = pvr_device_memory_to_handle(mem);
2229 
2230    return VK_SUCCESS;
2231 
2232 err_vk_object_free_mem:
2233    vk_object_free(&device->vk, pAllocator, mem);
2234 
2235    return result;
2236 }
2237 
pvr_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)2238 VkResult pvr_GetMemoryFdKHR(VkDevice _device,
2239                             const VkMemoryGetFdInfoKHR *pGetFdInfo,
2240                             int *pFd)
2241 {
2242    PVR_FROM_HANDLE(pvr_device, device, _device);
2243    PVR_FROM_HANDLE(pvr_device_memory, mem, pGetFdInfo->memory);
2244 
2245    assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
2246 
2247    assert(
2248       pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
2249       pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
2250 
2251    return device->ws->ops->buffer_get_fd(mem->bo, pFd);
2252 }
2253 
2254 VkResult
pvr_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)2255 pvr_GetMemoryFdPropertiesKHR(VkDevice _device,
2256                              VkExternalMemoryHandleTypeFlagBits handleType,
2257                              int fd,
2258                              VkMemoryFdPropertiesKHR *pMemoryFdProperties)
2259 {
2260    PVR_FROM_HANDLE(pvr_device, device, _device);
2261 
2262    switch (handleType) {
2263    case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
2264       /* FIXME: This should only allow memory types having
2265        * VK_MEMORY_PROPERTY_HOST_CACHED_BIT flag set, as
2266        * dma-buf should be imported using cacheable memory types,
2267        * given exporter's mmap will always map it as cacheable.
2268        * Ref:
2269        * https://www.kernel.org/doc/html/latest/driver-api/dma-buf.html#c.dma_buf_ops
2270        */
2271       pMemoryFdProperties->memoryTypeBits =
2272          (1 << device->pdevice->memory.memoryTypeCount) - 1;
2273       return VK_SUCCESS;
2274    default:
2275       return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
2276    }
2277 }
2278 
pvr_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)2279 void pvr_FreeMemory(VkDevice _device,
2280                     VkDeviceMemory _mem,
2281                     const VkAllocationCallbacks *pAllocator)
2282 {
2283    PVR_FROM_HANDLE(pvr_device, device, _device);
2284    PVR_FROM_HANDLE(pvr_device_memory, mem, _mem);
2285 
2286    free_memory(device, mem, pAllocator);
2287 }
2288 
pvr_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)2289 VkResult pvr_MapMemory(VkDevice _device,
2290                        VkDeviceMemory _memory,
2291                        VkDeviceSize offset,
2292                        VkDeviceSize size,
2293                        VkMemoryMapFlags flags,
2294                        void **ppData)
2295 {
2296    PVR_FROM_HANDLE(pvr_device, device, _device);
2297    PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
2298    VkResult result;
2299 
2300    if (!mem) {
2301       *ppData = NULL;
2302       return VK_SUCCESS;
2303    }
2304 
2305    if (size == VK_WHOLE_SIZE)
2306       size = mem->bo->size - offset;
2307 
2308    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
2309     *
2310     *  * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
2311     *    assert(size != 0);
2312     *  * If size is not equal to VK_WHOLE_SIZE, size must be less than or
2313     *    equal to the size of the memory minus offset
2314     */
2315 
2316    assert(size > 0);
2317    assert(offset + size <= mem->bo->size);
2318 
2319    /* Check if already mapped */
2320    if (mem->bo->map) {
2321       *ppData = (uint8_t *)mem->bo->map + offset;
2322       return VK_SUCCESS;
2323    }
2324 
2325    /* Map it all at once */
2326    result = device->ws->ops->buffer_map(mem->bo);
2327    if (result != VK_SUCCESS)
2328       return result;
2329 
2330    *ppData = (uint8_t *)mem->bo->map + offset;
2331 
2332    return VK_SUCCESS;
2333 }
2334 
pvr_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)2335 void pvr_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
2336 {
2337    PVR_FROM_HANDLE(pvr_device, device, _device);
2338    PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
2339 
2340    if (!mem || !mem->bo->map)
2341       return;
2342 
2343    device->ws->ops->buffer_unmap(mem->bo);
2344 }
2345 
pvr_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2346 VkResult pvr_FlushMappedMemoryRanges(VkDevice _device,
2347                                      uint32_t memoryRangeCount,
2348                                      const VkMappedMemoryRange *pMemoryRanges)
2349 {
2350    return VK_SUCCESS;
2351 }
2352 
2353 VkResult
pvr_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)2354 pvr_InvalidateMappedMemoryRanges(VkDevice _device,
2355                                  uint32_t memoryRangeCount,
2356                                  const VkMappedMemoryRange *pMemoryRanges)
2357 {
2358    return VK_SUCCESS;
2359 }
2360 
pvr_GetImageSparseMemoryRequirements2(VkDevice device,const VkImageSparseMemoryRequirementsInfo2 * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)2361 void pvr_GetImageSparseMemoryRequirements2(
2362    VkDevice device,
2363    const VkImageSparseMemoryRequirementsInfo2 *pInfo,
2364    uint32_t *pSparseMemoryRequirementCount,
2365    VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
2366 {
2367    *pSparseMemoryRequirementCount = 0;
2368 }
2369 
pvr_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)2370 void pvr_GetDeviceMemoryCommitment(VkDevice device,
2371                                    VkDeviceMemory memory,
2372                                    VkDeviceSize *pCommittedMemoryInBytes)
2373 {
2374    *pCommittedMemoryInBytes = 0;
2375 }
2376 
pvr_bind_memory(struct pvr_device * device,struct pvr_device_memory * mem,VkDeviceSize offset,VkDeviceSize size,VkDeviceSize alignment,struct pvr_winsys_vma ** const vma_out,pvr_dev_addr_t * const dev_addr_out)2377 VkResult pvr_bind_memory(struct pvr_device *device,
2378                          struct pvr_device_memory *mem,
2379                          VkDeviceSize offset,
2380                          VkDeviceSize size,
2381                          VkDeviceSize alignment,
2382                          struct pvr_winsys_vma **const vma_out,
2383                          pvr_dev_addr_t *const dev_addr_out)
2384 {
2385    VkDeviceSize virt_size =
2386       size + (offset & (device->heaps.general_heap->page_size - 1));
2387    struct pvr_winsys_vma *vma;
2388    pvr_dev_addr_t dev_addr;
2389    VkResult result;
2390 
2391    /* Valid usage:
2392     *
2393     *   "memoryOffset must be an integer multiple of the alignment member of
2394     *    the VkMemoryRequirements structure returned from a call to
2395     *    vkGetBufferMemoryRequirements with buffer"
2396     *
2397     *   "memoryOffset must be an integer multiple of the alignment member of
2398     *    the VkMemoryRequirements structure returned from a call to
2399     *    vkGetImageMemoryRequirements with image"
2400     */
2401    assert(offset % alignment == 0);
2402    assert(offset < mem->bo->size);
2403 
2404    result = device->ws->ops->heap_alloc(device->heaps.general_heap,
2405                                         virt_size,
2406                                         alignment,
2407                                         &vma);
2408    if (result != VK_SUCCESS)
2409       goto err_out;
2410 
2411    result = device->ws->ops->vma_map(vma, mem->bo, offset, size, &dev_addr);
2412    if (result != VK_SUCCESS)
2413       goto err_free_vma;
2414 
2415    *dev_addr_out = dev_addr;
2416    *vma_out = vma;
2417 
2418    return VK_SUCCESS;
2419 
2420 err_free_vma:
2421    device->ws->ops->heap_free(vma);
2422 
2423 err_out:
2424    return result;
2425 }
2426 
pvr_unbind_memory(struct pvr_device * device,struct pvr_winsys_vma * vma)2427 void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma)
2428 {
2429    device->ws->ops->vma_unmap(vma);
2430    device->ws->ops->heap_free(vma);
2431 }
2432 
pvr_BindBufferMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)2433 VkResult pvr_BindBufferMemory2(VkDevice _device,
2434                                uint32_t bindInfoCount,
2435                                const VkBindBufferMemoryInfo *pBindInfos)
2436 {
2437    PVR_FROM_HANDLE(pvr_device, device, _device);
2438    uint32_t i;
2439 
2440    for (i = 0; i < bindInfoCount; i++) {
2441       PVR_FROM_HANDLE(pvr_device_memory, mem, pBindInfos[i].memory);
2442       PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2443 
2444       VkResult result = pvr_bind_memory(device,
2445                                         mem,
2446                                         pBindInfos[i].memoryOffset,
2447                                         buffer->vk.size,
2448                                         buffer->alignment,
2449                                         &buffer->vma,
2450                                         &buffer->dev_addr);
2451       if (result != VK_SUCCESS) {
2452          while (i--) {
2453             PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2454             pvr_unbind_memory(device, buffer->vma);
2455          }
2456 
2457          return result;
2458       }
2459    }
2460 
2461    return VK_SUCCESS;
2462 }
2463 
pvr_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)2464 VkResult pvr_QueueBindSparse(VkQueue _queue,
2465                              uint32_t bindInfoCount,
2466                              const VkBindSparseInfo *pBindInfo,
2467                              VkFence fence)
2468 {
2469    return VK_SUCCESS;
2470 }
2471 
2472 /* Event functions. */
2473 
pvr_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)2474 VkResult pvr_CreateEvent(VkDevice _device,
2475                          const VkEventCreateInfo *pCreateInfo,
2476                          const VkAllocationCallbacks *pAllocator,
2477                          VkEvent *pEvent)
2478 {
2479    PVR_FROM_HANDLE(pvr_device, device, _device);
2480 
2481    struct pvr_event *event = vk_object_alloc(&device->vk,
2482                                              pAllocator,
2483                                              sizeof(*event),
2484                                              VK_OBJECT_TYPE_EVENT);
2485    if (!event)
2486       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2487 
2488    event->sync = NULL;
2489    event->state = PVR_EVENT_STATE_RESET_BY_HOST;
2490 
2491    *pEvent = pvr_event_to_handle(event);
2492 
2493    return VK_SUCCESS;
2494 }
2495 
pvr_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)2496 void pvr_DestroyEvent(VkDevice _device,
2497                       VkEvent _event,
2498                       const VkAllocationCallbacks *pAllocator)
2499 {
2500    PVR_FROM_HANDLE(pvr_device, device, _device);
2501    PVR_FROM_HANDLE(pvr_event, event, _event);
2502 
2503    if (!event)
2504       return;
2505 
2506    if (event->sync)
2507       vk_sync_destroy(&device->vk, event->sync);
2508 
2509    vk_object_free(&device->vk, pAllocator, event);
2510 }
2511 
pvr_GetEventStatus(VkDevice _device,VkEvent _event)2512 VkResult pvr_GetEventStatus(VkDevice _device, VkEvent _event)
2513 {
2514    PVR_FROM_HANDLE(pvr_device, device, _device);
2515    PVR_FROM_HANDLE(pvr_event, event, _event);
2516    VkResult result;
2517 
2518    switch (event->state) {
2519    case PVR_EVENT_STATE_SET_BY_DEVICE:
2520       if (!event->sync)
2521          return VK_EVENT_RESET;
2522 
2523       result =
2524          vk_sync_wait(&device->vk, event->sync, 0U, VK_SYNC_WAIT_COMPLETE, 0);
2525       result = (result == VK_SUCCESS) ? VK_EVENT_SET : VK_EVENT_RESET;
2526       break;
2527 
2528    case PVR_EVENT_STATE_RESET_BY_DEVICE:
2529       if (!event->sync)
2530          return VK_EVENT_RESET;
2531 
2532       result =
2533          vk_sync_wait(&device->vk, event->sync, 0U, VK_SYNC_WAIT_COMPLETE, 0);
2534       result = (result == VK_SUCCESS) ? VK_EVENT_RESET : VK_EVENT_SET;
2535       break;
2536 
2537    case PVR_EVENT_STATE_SET_BY_HOST:
2538       result = VK_EVENT_SET;
2539       break;
2540 
2541    case PVR_EVENT_STATE_RESET_BY_HOST:
2542       result = VK_EVENT_RESET;
2543       break;
2544 
2545    default:
2546       unreachable("Event object in unknown state");
2547    }
2548 
2549    return result;
2550 }
2551 
pvr_SetEvent(VkDevice _device,VkEvent _event)2552 VkResult pvr_SetEvent(VkDevice _device, VkEvent _event)
2553 {
2554    PVR_FROM_HANDLE(pvr_event, event, _event);
2555 
2556    if (event->sync) {
2557       PVR_FROM_HANDLE(pvr_device, device, _device);
2558 
2559       const VkResult result = vk_sync_signal(&device->vk, event->sync, 0);
2560       if (result != VK_SUCCESS)
2561          return result;
2562    }
2563 
2564    event->state = PVR_EVENT_STATE_SET_BY_HOST;
2565 
2566    return VK_SUCCESS;
2567 }
2568 
pvr_ResetEvent(VkDevice _device,VkEvent _event)2569 VkResult pvr_ResetEvent(VkDevice _device, VkEvent _event)
2570 {
2571    PVR_FROM_HANDLE(pvr_event, event, _event);
2572 
2573    if (event->sync) {
2574       PVR_FROM_HANDLE(pvr_device, device, _device);
2575 
2576       const VkResult result = vk_sync_reset(&device->vk, event->sync);
2577       if (result != VK_SUCCESS)
2578          return result;
2579    }
2580 
2581    event->state = PVR_EVENT_STATE_RESET_BY_HOST;
2582 
2583    return VK_SUCCESS;
2584 }
2585 
2586 /* Buffer functions. */
2587 
pvr_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)2588 VkResult pvr_CreateBuffer(VkDevice _device,
2589                           const VkBufferCreateInfo *pCreateInfo,
2590                           const VkAllocationCallbacks *pAllocator,
2591                           VkBuffer *pBuffer)
2592 {
2593    PVR_FROM_HANDLE(pvr_device, device, _device);
2594    const uint32_t alignment = 4096;
2595    struct pvr_buffer *buffer;
2596 
2597    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2598    assert(pCreateInfo->usage != 0);
2599 
2600    /* We check against (ULONG_MAX - alignment) to prevent overflow issues */
2601    if (pCreateInfo->size >= ULONG_MAX - alignment)
2602       return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2603 
2604    buffer =
2605       vk_buffer_create(&device->vk, pCreateInfo, pAllocator, sizeof(*buffer));
2606    if (!buffer)
2607       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2608 
2609    buffer->alignment = alignment;
2610 
2611    *pBuffer = pvr_buffer_to_handle(buffer);
2612 
2613    return VK_SUCCESS;
2614 }
2615 
pvr_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)2616 void pvr_DestroyBuffer(VkDevice _device,
2617                        VkBuffer _buffer,
2618                        const VkAllocationCallbacks *pAllocator)
2619 {
2620    PVR_FROM_HANDLE(pvr_device, device, _device);
2621    PVR_FROM_HANDLE(pvr_buffer, buffer, _buffer);
2622 
2623    if (!buffer)
2624       return;
2625 
2626    if (buffer->vma)
2627       pvr_unbind_memory(device, buffer->vma);
2628 
2629    vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
2630 }
2631 
pvr_gpu_upload(struct pvr_device * device,struct pvr_winsys_heap * heap,const void * data,size_t size,uint64_t alignment,struct pvr_suballoc_bo ** const pvr_bo_out)2632 VkResult pvr_gpu_upload(struct pvr_device *device,
2633                         struct pvr_winsys_heap *heap,
2634                         const void *data,
2635                         size_t size,
2636                         uint64_t alignment,
2637                         struct pvr_suballoc_bo **const pvr_bo_out)
2638 {
2639    struct pvr_suballoc_bo *suballoc_bo = NULL;
2640    struct pvr_suballocator *allocator;
2641    VkResult result;
2642    void *map;
2643 
2644    assert(size > 0);
2645 
2646    if (heap == device->heaps.general_heap)
2647       allocator = &device->suballoc_general;
2648    else if (heap == device->heaps.pds_heap)
2649       allocator = &device->suballoc_pds;
2650    else if (heap == device->heaps.transfer_frag_heap)
2651       allocator = &device->suballoc_transfer;
2652    else if (heap == device->heaps.usc_heap)
2653       allocator = &device->suballoc_usc;
2654    else
2655       unreachable("Unknown heap type");
2656 
2657    result = pvr_bo_suballoc(allocator, size, alignment, false, &suballoc_bo);
2658    if (result != VK_SUCCESS)
2659       return result;
2660 
2661    map = pvr_bo_suballoc_get_map_addr(suballoc_bo);
2662    memcpy(map, data, size);
2663 
2664    *pvr_bo_out = suballoc_bo;
2665 
2666    return VK_SUCCESS;
2667 }
2668 
pvr_gpu_upload_usc(struct pvr_device * device,const void * code,size_t code_size,uint64_t code_alignment,struct pvr_suballoc_bo ** const pvr_bo_out)2669 VkResult pvr_gpu_upload_usc(struct pvr_device *device,
2670                             const void *code,
2671                             size_t code_size,
2672                             uint64_t code_alignment,
2673                             struct pvr_suballoc_bo **const pvr_bo_out)
2674 {
2675    struct pvr_suballoc_bo *suballoc_bo = NULL;
2676    VkResult result;
2677    void *map;
2678 
2679    assert(code_size > 0);
2680 
2681    /* The USC will prefetch the next instruction, so over allocate by 1
2682     * instruction to prevent reading off the end of a page into a potentially
2683     * unallocated page.
2684     */
2685    result = pvr_bo_suballoc(&device->suballoc_usc,
2686                             code_size + ROGUE_MAX_INSTR_BYTES,
2687                             code_alignment,
2688                             false,
2689                             &suballoc_bo);
2690    if (result != VK_SUCCESS)
2691       return result;
2692 
2693    map = pvr_bo_suballoc_get_map_addr(suballoc_bo);
2694    memcpy(map, code, code_size);
2695 
2696    *pvr_bo_out = suballoc_bo;
2697 
2698    return VK_SUCCESS;
2699 }
2700 
2701 /**
2702  * \brief Upload PDS program data and code segments from host memory to device
2703  * memory.
2704  *
2705  * \param[in] device            Logical device pointer.
2706  * \param[in] data              Pointer to PDS data segment to upload.
2707  * \param[in] data_size_dwords  Size of PDS data segment in dwords.
2708  * \param[in] data_alignment    Required alignment of the PDS data segment in
2709  *                              bytes. Must be a power of two.
2710  * \param[in] code              Pointer to PDS code segment to upload.
2711  * \param[in] code_size_dwords  Size of PDS code segment in dwords.
2712  * \param[in] code_alignment    Required alignment of the PDS code segment in
2713  *                              bytes. Must be a power of two.
2714  * \param[in] min_alignment     Minimum alignment of the bo holding the PDS
2715  *                              program in bytes.
2716  * \param[out] pds_upload_out   On success will be initialized based on the
2717  *                              uploaded PDS program.
2718  * \return VK_SUCCESS on success, or error code otherwise.
2719  */
pvr_gpu_upload_pds(struct pvr_device * device,const uint32_t * data,uint32_t data_size_dwords,uint32_t data_alignment,const uint32_t * code,uint32_t code_size_dwords,uint32_t code_alignment,uint64_t min_alignment,struct pvr_pds_upload * const pds_upload_out)2720 VkResult pvr_gpu_upload_pds(struct pvr_device *device,
2721                             const uint32_t *data,
2722                             uint32_t data_size_dwords,
2723                             uint32_t data_alignment,
2724                             const uint32_t *code,
2725                             uint32_t code_size_dwords,
2726                             uint32_t code_alignment,
2727                             uint64_t min_alignment,
2728                             struct pvr_pds_upload *const pds_upload_out)
2729 {
2730    /* All alignment and sizes below are in bytes. */
2731    const size_t data_size = PVR_DW_TO_BYTES(data_size_dwords);
2732    const size_t code_size = PVR_DW_TO_BYTES(code_size_dwords);
2733    const uint64_t data_aligned_size = ALIGN_POT(data_size, data_alignment);
2734    const uint64_t code_aligned_size = ALIGN_POT(code_size, code_alignment);
2735    const uint32_t code_offset = ALIGN_POT(data_aligned_size, code_alignment);
2736    const uint64_t bo_alignment = MAX2(min_alignment, data_alignment);
2737    const uint64_t bo_size = (!!code) ? (code_offset + code_aligned_size)
2738                                      : data_aligned_size;
2739    VkResult result;
2740    void *map;
2741 
2742    assert(code || data);
2743    assert(!code || (code_size_dwords != 0 && code_alignment != 0));
2744    assert(!data || (data_size_dwords != 0 && data_alignment != 0));
2745 
2746    result = pvr_bo_suballoc(&device->suballoc_pds,
2747                             bo_size,
2748                             bo_alignment,
2749                             true,
2750                             &pds_upload_out->pvr_bo);
2751    if (result != VK_SUCCESS)
2752       return result;
2753 
2754    map = pvr_bo_suballoc_get_map_addr(pds_upload_out->pvr_bo);
2755 
2756    if (data) {
2757       memcpy(map, data, data_size);
2758 
2759       pds_upload_out->data_offset = pds_upload_out->pvr_bo->dev_addr.addr -
2760                                     device->heaps.pds_heap->base_addr.addr;
2761 
2762       /* Store data size in dwords. */
2763       assert(data_aligned_size % 4 == 0);
2764       pds_upload_out->data_size = data_aligned_size / 4;
2765    } else {
2766       pds_upload_out->data_offset = 0;
2767       pds_upload_out->data_size = 0;
2768    }
2769 
2770    if (code) {
2771       memcpy((uint8_t *)map + code_offset, code, code_size);
2772 
2773       pds_upload_out->code_offset =
2774          (pds_upload_out->pvr_bo->dev_addr.addr + code_offset) -
2775          device->heaps.pds_heap->base_addr.addr;
2776 
2777       /* Store code size in dwords. */
2778       assert(code_aligned_size % 4 == 0);
2779       pds_upload_out->code_size = code_aligned_size / 4;
2780    } else {
2781       pds_upload_out->code_offset = 0;
2782       pds_upload_out->code_size = 0;
2783    }
2784 
2785    return VK_SUCCESS;
2786 }
2787 
2788 static VkResult
pvr_framebuffer_create_ppp_state(struct pvr_device * device,struct pvr_framebuffer * framebuffer)2789 pvr_framebuffer_create_ppp_state(struct pvr_device *device,
2790                                  struct pvr_framebuffer *framebuffer)
2791 {
2792    const uint32_t cache_line_size =
2793       rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
2794    uint32_t ppp_state[3];
2795    VkResult result;
2796 
2797    pvr_csb_pack (&ppp_state[0], TA_STATE_HEADER, header) {
2798       header.pres_terminate = true;
2799    }
2800 
2801    pvr_csb_pack (&ppp_state[1], TA_STATE_TERMINATE0, term0) {
2802       term0.clip_right =
2803          DIV_ROUND_UP(
2804             framebuffer->width,
2805             PVRX(TA_STATE_TERMINATE0_CLIP_RIGHT_BLOCK_SIZE_IN_PIXELS)) -
2806          1;
2807       term0.clip_bottom =
2808          DIV_ROUND_UP(
2809             framebuffer->height,
2810             PVRX(TA_STATE_TERMINATE0_CLIP_BOTTOM_BLOCK_SIZE_IN_PIXELS)) -
2811          1;
2812    }
2813 
2814    pvr_csb_pack (&ppp_state[2], TA_STATE_TERMINATE1, term1) {
2815       term1.render_target = 0;
2816       term1.clip_left = 0;
2817    }
2818 
2819    result = pvr_gpu_upload(device,
2820                            device->heaps.general_heap,
2821                            ppp_state,
2822                            sizeof(ppp_state),
2823                            cache_line_size,
2824                            &framebuffer->ppp_state_bo);
2825    if (result != VK_SUCCESS)
2826       return result;
2827 
2828    /* Calculate the size of PPP state in dwords. */
2829    framebuffer->ppp_state_size = sizeof(ppp_state) / sizeof(uint32_t);
2830 
2831    return VK_SUCCESS;
2832 }
2833 
pvr_render_targets_init(struct pvr_render_target * render_targets,uint32_t render_targets_count)2834 static bool pvr_render_targets_init(struct pvr_render_target *render_targets,
2835                                     uint32_t render_targets_count)
2836 {
2837    uint32_t i;
2838 
2839    for (i = 0; i < render_targets_count; i++) {
2840       if (pthread_mutex_init(&render_targets[i].mutex, NULL))
2841          goto err_mutex_destroy;
2842    }
2843 
2844    return true;
2845 
2846 err_mutex_destroy:
2847    while (i--)
2848       pthread_mutex_destroy(&render_targets[i].mutex);
2849 
2850    return false;
2851 }
2852 
pvr_render_targets_fini(struct pvr_render_target * render_targets,uint32_t render_targets_count)2853 static void pvr_render_targets_fini(struct pvr_render_target *render_targets,
2854                                     uint32_t render_targets_count)
2855 {
2856    for (uint32_t i = 0; i < render_targets_count; i++) {
2857       if (render_targets[i].valid) {
2858          pvr_render_target_dataset_destroy(render_targets[i].rt_dataset);
2859          render_targets[i].valid = false;
2860       }
2861 
2862       pthread_mutex_destroy(&render_targets[i].mutex);
2863    }
2864 }
2865 
pvr_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)2866 VkResult pvr_CreateFramebuffer(VkDevice _device,
2867                                const VkFramebufferCreateInfo *pCreateInfo,
2868                                const VkAllocationCallbacks *pAllocator,
2869                                VkFramebuffer *pFramebuffer)
2870 {
2871    PVR_FROM_HANDLE(pvr_render_pass, pass, pCreateInfo->renderPass);
2872    PVR_FROM_HANDLE(pvr_device, device, _device);
2873    struct pvr_spm_bgobj_state *spm_bgobj_state_per_render;
2874    struct pvr_spm_eot_state *spm_eot_state_per_render;
2875    struct pvr_render_target *render_targets;
2876    struct pvr_framebuffer *framebuffer;
2877    struct pvr_image_view **attachments;
2878    uint32_t render_targets_count;
2879    uint64_t scratch_buffer_size;
2880    VkResult result;
2881 
2882    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2883 
2884    render_targets_count =
2885       PVR_RENDER_TARGETS_PER_FRAMEBUFFER(&device->pdevice->dev_info);
2886 
2887    VK_MULTIALLOC(ma);
2888    vk_multialloc_add(&ma, &framebuffer, __typeof__(*framebuffer), 1);
2889    vk_multialloc_add(&ma,
2890                      &attachments,
2891                      __typeof__(*attachments),
2892                      pCreateInfo->attachmentCount);
2893    vk_multialloc_add(&ma,
2894                      &render_targets,
2895                      __typeof__(*render_targets),
2896                      render_targets_count);
2897    vk_multialloc_add(&ma,
2898                      &spm_eot_state_per_render,
2899                      __typeof__(*spm_eot_state_per_render),
2900                      pass->hw_setup->render_count);
2901    vk_multialloc_add(&ma,
2902                      &spm_bgobj_state_per_render,
2903                      __typeof__(*spm_bgobj_state_per_render),
2904                      pass->hw_setup->render_count);
2905 
2906    if (!vk_multialloc_zalloc2(&ma,
2907                               &device->vk.alloc,
2908                               pAllocator,
2909                               VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
2910       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2911 
2912    vk_object_base_init(&device->vk,
2913                        &framebuffer->base,
2914                        VK_OBJECT_TYPE_FRAMEBUFFER);
2915 
2916    framebuffer->width = pCreateInfo->width;
2917    framebuffer->height = pCreateInfo->height;
2918    framebuffer->layers = pCreateInfo->layers;
2919 
2920    framebuffer->attachments = attachments;
2921    framebuffer->attachment_count = pCreateInfo->attachmentCount;
2922    for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
2923       framebuffer->attachments[i] =
2924          pvr_image_view_from_handle(pCreateInfo->pAttachments[i]);
2925    }
2926 
2927    result = pvr_framebuffer_create_ppp_state(device, framebuffer);
2928    if (result != VK_SUCCESS)
2929       goto err_free_framebuffer;
2930 
2931    framebuffer->render_targets = render_targets;
2932    framebuffer->render_targets_count = render_targets_count;
2933    if (!pvr_render_targets_init(framebuffer->render_targets,
2934                                 render_targets_count)) {
2935       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2936       goto err_free_ppp_state_bo;
2937    }
2938 
2939    scratch_buffer_size =
2940       pvr_spm_scratch_buffer_calc_required_size(pass,
2941                                                 framebuffer->width,
2942                                                 framebuffer->height);
2943 
2944    result = pvr_spm_scratch_buffer_get_buffer(device,
2945                                               scratch_buffer_size,
2946                                               &framebuffer->scratch_buffer);
2947    if (result != VK_SUCCESS)
2948       goto err_finish_render_targets;
2949 
2950    for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
2951       uint32_t emit_count;
2952 
2953       result = pvr_spm_init_eot_state(device,
2954                                       &spm_eot_state_per_render[i],
2955                                       framebuffer,
2956                                       &pass->hw_setup->renders[i],
2957                                       &emit_count);
2958       if (result != VK_SUCCESS)
2959          goto err_finish_eot_state;
2960 
2961       result = pvr_spm_init_bgobj_state(device,
2962                                         &spm_bgobj_state_per_render[i],
2963                                         framebuffer,
2964                                         &pass->hw_setup->renders[i],
2965                                         emit_count);
2966       if (result != VK_SUCCESS)
2967          goto err_finish_bgobj_state;
2968 
2969       continue;
2970 
2971 err_finish_bgobj_state:
2972       pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[i]);
2973 
2974       for (uint32_t j = 0; j < i; j++)
2975          pvr_spm_finish_bgobj_state(device, &spm_bgobj_state_per_render[j]);
2976 
2977 err_finish_eot_state:
2978       for (uint32_t j = 0; j < i; j++)
2979          pvr_spm_finish_eot_state(device, &spm_eot_state_per_render[j]);
2980 
2981       goto err_finish_render_targets;
2982    }
2983 
2984    framebuffer->render_count = pass->hw_setup->render_count;
2985    framebuffer->spm_eot_state_per_render = spm_eot_state_per_render;
2986    framebuffer->spm_bgobj_state_per_render = spm_bgobj_state_per_render;
2987 
2988    *pFramebuffer = pvr_framebuffer_to_handle(framebuffer);
2989 
2990    return VK_SUCCESS;
2991 
2992 err_finish_render_targets:
2993    pvr_render_targets_fini(framebuffer->render_targets, render_targets_count);
2994 
2995 err_free_ppp_state_bo:
2996    pvr_bo_suballoc_free(framebuffer->ppp_state_bo);
2997 
2998 err_free_framebuffer:
2999    vk_object_base_finish(&framebuffer->base);
3000    vk_free2(&device->vk.alloc, pAllocator, framebuffer);
3001 
3002    return result;
3003 }
3004 
pvr_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)3005 void pvr_DestroyFramebuffer(VkDevice _device,
3006                             VkFramebuffer _fb,
3007                             const VkAllocationCallbacks *pAllocator)
3008 {
3009    PVR_FROM_HANDLE(pvr_framebuffer, framebuffer, _fb);
3010    PVR_FROM_HANDLE(pvr_device, device, _device);
3011 
3012    if (!framebuffer)
3013       return;
3014 
3015    for (uint32_t i = 0; i < framebuffer->render_count; i++) {
3016       pvr_spm_finish_bgobj_state(device,
3017                                  &framebuffer->spm_bgobj_state_per_render[i]);
3018 
3019       pvr_spm_finish_eot_state(device,
3020                                &framebuffer->spm_eot_state_per_render[i]);
3021    }
3022 
3023    pvr_spm_scratch_buffer_release(device, framebuffer->scratch_buffer);
3024    pvr_render_targets_fini(framebuffer->render_targets,
3025                            framebuffer->render_targets_count);
3026    pvr_bo_suballoc_free(framebuffer->ppp_state_bo);
3027    vk_object_base_finish(&framebuffer->base);
3028    vk_free2(&device->vk.alloc, pAllocator, framebuffer);
3029 }
3030 
3031 static uint32_t
pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info * dev_info,VkFilter filter)3032 pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info *dev_info,
3033                                   VkFilter filter)
3034 {
3035    switch (filter) {
3036    case VK_FILTER_NEAREST:
3037       return PVRX(TEXSTATE_FILTER_POINT);
3038    case VK_FILTER_LINEAR:
3039       return PVRX(TEXSTATE_FILTER_LINEAR);
3040    default:
3041       unreachable("Unknown filter type.");
3042    }
3043 }
3044 
3045 static uint32_t
pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)3046 pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)
3047 {
3048    switch (addr_mode) {
3049    case VK_SAMPLER_ADDRESS_MODE_REPEAT:
3050       return PVRX(TEXSTATE_ADDRMODE_REPEAT);
3051    case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
3052       return PVRX(TEXSTATE_ADDRMODE_FLIP);
3053    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
3054       return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
3055    case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
3056       return PVRX(TEXSTATE_ADDRMODE_FLIP_ONCE_THEN_CLAMP);
3057    case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
3058       return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_BORDER);
3059    default:
3060       unreachable("Invalid sampler address mode.");
3061    }
3062 }
3063 
pvr_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)3064 VkResult pvr_CreateSampler(VkDevice _device,
3065                            const VkSamplerCreateInfo *pCreateInfo,
3066                            const VkAllocationCallbacks *pAllocator,
3067                            VkSampler *pSampler)
3068 {
3069    PVR_FROM_HANDLE(pvr_device, device, _device);
3070    uint32_t border_color_table_index;
3071    struct pvr_sampler *sampler;
3072    float lod_rounding_bias;
3073    VkFilter min_filter;
3074    VkFilter mag_filter;
3075    VkResult result;
3076    float min_lod;
3077    float max_lod;
3078 
3079    STATIC_ASSERT(sizeof(((union pvr_sampler_descriptor *)NULL)->data) ==
3080                  sizeof(((union pvr_sampler_descriptor *)NULL)->words));
3081 
3082    sampler =
3083       vk_sampler_create(&device->vk, pCreateInfo, pAllocator, sizeof(*sampler));
3084    if (!sampler) {
3085       result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
3086       goto err_out;
3087    }
3088 
3089    mag_filter = pCreateInfo->magFilter;
3090    min_filter = pCreateInfo->minFilter;
3091 
3092    result =
3093       pvr_border_color_table_get_or_create_entry(&device->border_color_table,
3094                                                  sampler,
3095                                                  &border_color_table_index);
3096    if (result != VK_SUCCESS)
3097       goto err_free_sampler;
3098 
3099    if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025)) {
3100       /* The min/mag filters may need adjustment here, the GPU should decide
3101        * which of the two filters to use based on the clamped LOD value: LOD
3102        * <= 0 implies magnification, while LOD > 0 implies minification.
3103        *
3104        * As a workaround, we override magFilter with minFilter if we know that
3105        * the magnification filter will never be used due to clamping anyway
3106        * (i.e. minLod > 0). Conversely, we override minFilter with magFilter
3107        * if maxLod <= 0.
3108        */
3109       if (pCreateInfo->minLod > 0.0f) {
3110          /* The clamped LOD will always be positive => always minify. */
3111          mag_filter = pCreateInfo->minFilter;
3112       }
3113 
3114       if (pCreateInfo->maxLod <= 0.0f) {
3115          /* The clamped LOD will always be negative or zero => always
3116           * magnify.
3117           */
3118          min_filter = pCreateInfo->magFilter;
3119       }
3120    }
3121 
3122    if (pCreateInfo->compareEnable) {
3123       sampler->descriptor.data.compare_op =
3124          (uint32_t)pvr_texstate_cmpmode(pCreateInfo->compareOp);
3125    } else {
3126       sampler->descriptor.data.compare_op =
3127          (uint32_t)pvr_texstate_cmpmode(VK_COMPARE_OP_NEVER);
3128    }
3129 
3130    sampler->descriptor.data.word3 = 0;
3131    pvr_csb_pack (&sampler->descriptor.data.sampler_word,
3132                  TEXSTATE_SAMPLER,
3133                  word) {
3134       const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
3135       const float lod_clamp_max = (float)PVRX(TEXSTATE_CLAMP_MAX) /
3136                                   (1 << PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
3137       const float max_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MAX_UINT) -
3138                                          PVRX(TEXSTATE_DADJUST_ZERO_UINT))) /
3139                                 (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
3140       const float min_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MIN_UINT) -
3141                                          PVRX(TEXSTATE_DADJUST_ZERO_UINT))) /
3142                                 (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
3143 
3144       word.magfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, mag_filter);
3145       word.minfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, min_filter);
3146 
3147       if (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR)
3148          word.mipfilter = true;
3149 
3150       word.addrmode_u =
3151          pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeU);
3152       word.addrmode_v =
3153          pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeV);
3154       word.addrmode_w =
3155          pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeW);
3156 
3157       /* TODO: Figure out defines for these. */
3158       if (word.addrmode_u == PVRX(TEXSTATE_ADDRMODE_FLIP))
3159          sampler->descriptor.data.word3 |= 0x40000000;
3160 
3161       if (word.addrmode_v == PVRX(TEXSTATE_ADDRMODE_FLIP))
3162          sampler->descriptor.data.word3 |= 0x20000000;
3163 
3164       /* The Vulkan 1.0.205 spec says:
3165        *
3166        *    The absolute value of mipLodBias must be less than or equal to
3167        *    VkPhysicalDeviceLimits::maxSamplerLodBias.
3168        */
3169       word.dadjust =
3170          PVRX(TEXSTATE_DADJUST_ZERO_UINT) +
3171          util_signed_fixed(
3172             CLAMP(pCreateInfo->mipLodBias, min_dadjust, max_dadjust),
3173             PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
3174 
3175       /* Anisotropy is not supported for now. */
3176       word.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
3177 
3178       if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025) &&
3179           pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_NEAREST) {
3180          /* When MIPMAP_MODE_NEAREST is enabled, the LOD level should be
3181           * selected by adding 0.5 and then truncating the input LOD value.
3182           * This hardware adds the 0.5 bias before clamping against
3183           * lodmin/lodmax, while Vulkan specifies the bias to be added after
3184           * clamping. We compensate for this difference by adding the 0.5
3185           * bias to the LOD bounds, too.
3186           */
3187          lod_rounding_bias = 0.5f;
3188       } else {
3189          lod_rounding_bias = 0.0f;
3190       }
3191 
3192       min_lod = pCreateInfo->minLod + lod_rounding_bias;
3193       word.minlod = util_unsigned_fixed(CLAMP(min_lod, 0.0f, lod_clamp_max),
3194                                         PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
3195 
3196       max_lod = pCreateInfo->maxLod + lod_rounding_bias;
3197       word.maxlod = util_unsigned_fixed(CLAMP(max_lod, 0.0f, lod_clamp_max),
3198                                         PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
3199 
3200       word.bordercolor_index = border_color_table_index;
3201 
3202       if (pCreateInfo->unnormalizedCoordinates)
3203          word.non_normalized_coords = true;
3204    }
3205 
3206    *pSampler = pvr_sampler_to_handle(sampler);
3207 
3208    return VK_SUCCESS;
3209 
3210 err_free_sampler:
3211    vk_object_free(&device->vk, pAllocator, sampler);
3212 
3213 err_out:
3214    return result;
3215 }
3216 
pvr_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)3217 void pvr_DestroySampler(VkDevice _device,
3218                         VkSampler _sampler,
3219                         const VkAllocationCallbacks *pAllocator)
3220 {
3221    PVR_FROM_HANDLE(pvr_device, device, _device);
3222    PVR_FROM_HANDLE(pvr_sampler, sampler, _sampler);
3223 
3224    if (!sampler)
3225       return;
3226 
3227    vk_sampler_destroy(&device->vk, pAllocator, &sampler->vk);
3228 }
3229 
pvr_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3230 void pvr_GetBufferMemoryRequirements2(
3231    VkDevice _device,
3232    const VkBufferMemoryRequirementsInfo2 *pInfo,
3233    VkMemoryRequirements2 *pMemoryRequirements)
3234 {
3235    PVR_FROM_HANDLE(pvr_buffer, buffer, pInfo->buffer);
3236    PVR_FROM_HANDLE(pvr_device, device, _device);
3237    uint64_t size;
3238 
3239    /* The Vulkan 1.0.166 spec says:
3240     *
3241     *    memoryTypeBits is a bitmask and contains one bit set for every
3242     *    supported memory type for the resource. Bit 'i' is set if and only
3243     *    if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
3244     *    structure for the physical device is supported for the resource.
3245     *
3246     * All types are currently supported for buffers.
3247     */
3248    pMemoryRequirements->memoryRequirements.memoryTypeBits =
3249       (1ul << device->pdevice->memory.memoryTypeCount) - 1;
3250 
3251    pMemoryRequirements->memoryRequirements.alignment = buffer->alignment;
3252 
3253    size = buffer->vk.size;
3254 
3255    if (size % device->ws->page_size == 0 ||
3256        size % device->ws->page_size >
3257           device->ws->page_size - PVR_BUFFER_MEMORY_PADDING_SIZE) {
3258       /* TODO: We can save memory by having one extra virtual page mapped
3259        * in and having the first and last virtual page mapped to the first
3260        * physical address.
3261        */
3262       size += PVR_BUFFER_MEMORY_PADDING_SIZE;
3263    }
3264 
3265    pMemoryRequirements->memoryRequirements.size =
3266       ALIGN_POT(size, buffer->alignment);
3267 }
3268 
pvr_GetImageMemoryRequirements2(VkDevice _device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)3269 void pvr_GetImageMemoryRequirements2(VkDevice _device,
3270                                      const VkImageMemoryRequirementsInfo2 *pInfo,
3271                                      VkMemoryRequirements2 *pMemoryRequirements)
3272 {
3273    PVR_FROM_HANDLE(pvr_device, device, _device);
3274    PVR_FROM_HANDLE(pvr_image, image, pInfo->image);
3275 
3276    /* The Vulkan 1.0.166 spec says:
3277     *
3278     *    memoryTypeBits is a bitmask and contains one bit set for every
3279     *    supported memory type for the resource. Bit 'i' is set if and only
3280     *    if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
3281     *    structure for the physical device is supported for the resource.
3282     *
3283     * All types are currently supported for images.
3284     */
3285    const uint32_t memory_types =
3286       (1ul << device->pdevice->memory.memoryTypeCount) - 1;
3287 
3288    /* TODO: The returned size is aligned here in case of arrays/CEM (as is done
3289     * in GetImageMemoryRequirements()), but this should be known at image
3290     * creation time (pCreateInfo->arrayLayers > 1). This is confirmed in
3291     * ImageCreate()/ImageGetMipMapOffsetInBytes() where it aligns the size to
3292     * 4096 if pCreateInfo->arrayLayers > 1. So is the alignment here actually
3293     * necessary? If not, what should it be when pCreateInfo->arrayLayers == 1?
3294     *
3295     * Note: Presumably the 4096 alignment requirement comes from the Vulkan
3296     * driver setting RGX_CR_TPU_TAG_CEM_4K_FACE_PACKING_EN when setting up
3297     * render and compute jobs.
3298     */
3299    pMemoryRequirements->memoryRequirements.alignment = image->alignment;
3300    pMemoryRequirements->memoryRequirements.size =
3301       align64(image->size, image->alignment);
3302    pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
3303 }
3304