1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * based in part on anv driver which is:
5 * Copyright © 2015 Intel Corporation
6 *
7 * based in part on v3dv driver which is:
8 * Copyright © 2019 Raspberry Pi
9 *
10 * Permission is hereby granted, free of charge, to any person obtaining a copy
11 * of this software and associated documentation files (the "Software"), to deal
12 * in the Software without restriction, including without limitation the rights
13 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 * copies of the Software, and to permit persons to whom the Software is
15 * furnished to do so, subject to the following conditions:
16 *
17 * The above copyright notice and this permission notice (including the next
18 * paragraph) shall be included in all copies or substantial portions of the
19 * Software.
20 *
21 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
22 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
23 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
24 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
25 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
26 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
27 * SOFTWARE.
28 */
29
30 #include <assert.h>
31 #include <fcntl.h>
32 #include <inttypes.h>
33 #include <stdbool.h>
34 #include <stddef.h>
35 #include <stdint.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <vulkan/vulkan.h>
39 #include <xf86drm.h>
40
41 #include "hwdef/rogue_hw_utils.h"
42 #include "pipe/p_defines.h"
43 #include "pvr_bo.h"
44 #include "pvr_csb.h"
45 #include "pvr_csb_enum_helpers.h"
46 #include "pvr_debug.h"
47 #include "pvr_device_info.h"
48 #include "pvr_hardcode.h"
49 #include "pvr_job_render.h"
50 #include "pvr_limits.h"
51 #include "pvr_nop_usc.h"
52 #include "pvr_pds.h"
53 #include "pvr_private.h"
54 #include "pvr_tex_state.h"
55 #include "pvr_types.h"
56 #include "pvr_winsys.h"
57 #include "rogue/rogue_compiler.h"
58 #include "util/build_id.h"
59 #include "util/log.h"
60 #include "util/mesa-sha1.h"
61 #include "util/os_misc.h"
62 #include "util/u_math.h"
63 #include "vk_alloc.h"
64 #include "vk_log.h"
65 #include "vk_object.h"
66 #include "vk_util.h"
67
68 #define PVR_GLOBAL_FREE_LIST_INITIAL_SIZE (2U * 1024U * 1024U)
69 #define PVR_GLOBAL_FREE_LIST_MAX_SIZE (256U * 1024U * 1024U)
70 #define PVR_GLOBAL_FREE_LIST_GROW_SIZE (1U * 1024U * 1024U)
71
72 /* The grow threshold is a percentage. This is intended to be 12.5%, but has
73 * been rounded up since the percentage is treated as an integer.
74 */
75 #define PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD 13U
76
77 #if defined(VK_USE_PLATFORM_DISPLAY_KHR)
78 # define PVR_USE_WSI_PLATFORM
79 #endif
80
81 #define PVR_API_VERSION VK_MAKE_VERSION(1, 0, VK_HEADER_VERSION)
82
83 #define DEF_DRIVER(str_name) \
84 { \
85 .name = str_name, .len = sizeof(str_name) - 1 \
86 }
87
88 struct pvr_drm_device_info {
89 const char *name;
90 size_t len;
91 };
92
93 /* This is the list of supported DRM display drivers. */
94 static const struct pvr_drm_device_info pvr_display_devices[] = {
95 DEF_DRIVER("mediatek-drm"),
96 DEF_DRIVER("ti,am65x-dss"),
97 };
98
99 /* This is the list of supported DRM render drivers. */
100 static const struct pvr_drm_device_info pvr_render_devices[] = {
101 DEF_DRIVER("mediatek,mt8173-gpu"),
102 DEF_DRIVER("ti,am62-gpu"),
103 };
104
105 #undef DEF_DRIVER
106
107 static const struct vk_instance_extension_table pvr_instance_extensions = {
108 #if defined(VK_USE_PLATFORM_DISPLAY_KHR)
109 .KHR_display = true,
110 #endif
111 .KHR_external_memory_capabilities = true,
112 .KHR_get_physical_device_properties2 = true,
113 #if defined(PVR_USE_WSI_PLATFORM)
114 .KHR_surface = true,
115 #endif
116 .EXT_debug_report = true,
117 .EXT_debug_utils = true,
118 };
119
pvr_physical_device_get_supported_extensions(const struct pvr_physical_device * pdevice,struct vk_device_extension_table * extensions)120 static void pvr_physical_device_get_supported_extensions(
121 const struct pvr_physical_device *pdevice,
122 struct vk_device_extension_table *extensions)
123 {
124 /* clang-format off */
125 *extensions = (struct vk_device_extension_table){
126 .KHR_external_memory = true,
127 .KHR_external_memory_fd = true,
128 #if defined(PVR_USE_WSI_PLATFORM)
129 .KHR_swapchain = true,
130 #endif
131 .EXT_external_memory_dma_buf = true,
132 .EXT_private_data = true,
133 };
134 /* clang-format on */
135 }
136
pvr_EnumerateInstanceVersion(uint32_t * pApiVersion)137 VkResult pvr_EnumerateInstanceVersion(uint32_t *pApiVersion)
138 {
139 *pApiVersion = PVR_API_VERSION;
140 return VK_SUCCESS;
141 }
142
143 VkResult
pvr_EnumerateInstanceExtensionProperties(const char * pLayerName,uint32_t * pPropertyCount,VkExtensionProperties * pProperties)144 pvr_EnumerateInstanceExtensionProperties(const char *pLayerName,
145 uint32_t *pPropertyCount,
146 VkExtensionProperties *pProperties)
147 {
148 if (pLayerName)
149 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
150
151 return vk_enumerate_instance_extension_properties(&pvr_instance_extensions,
152 pPropertyCount,
153 pProperties);
154 }
155
pvr_CreateInstance(const VkInstanceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkInstance * pInstance)156 VkResult pvr_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
157 const VkAllocationCallbacks *pAllocator,
158 VkInstance *pInstance)
159 {
160 struct vk_instance_dispatch_table dispatch_table;
161 struct pvr_instance *instance;
162 VkResult result;
163
164 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
165
166 if (!pAllocator)
167 pAllocator = vk_default_allocator();
168
169 instance = vk_alloc(pAllocator,
170 sizeof(*instance),
171 8,
172 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
173 if (!instance)
174 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
175
176 vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
177 &pvr_instance_entrypoints,
178 true);
179
180 vk_instance_dispatch_table_from_entrypoints(&dispatch_table,
181 &wsi_instance_entrypoints,
182 false);
183
184 result = vk_instance_init(&instance->vk,
185 &pvr_instance_extensions,
186 &dispatch_table,
187 pCreateInfo,
188 pAllocator);
189 if (result != VK_SUCCESS) {
190 vk_free(pAllocator, instance);
191 return vk_error(NULL, result);
192 }
193
194 pvr_process_debug_variable();
195
196 instance->physical_devices_count = -1;
197
198 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
199
200 *pInstance = pvr_instance_to_handle(instance);
201
202 return VK_SUCCESS;
203 }
204
pvr_physical_device_finish(struct pvr_physical_device * pdevice)205 static void pvr_physical_device_finish(struct pvr_physical_device *pdevice)
206 {
207 /* Be careful here. The device might not have been initialized. This can
208 * happen since initialization is done in vkEnumeratePhysicalDevices() but
209 * finish is done in vkDestroyInstance(). Make sure that you check for NULL
210 * before freeing or that the freeing functions accept NULL pointers.
211 */
212
213 if (pdevice->compiler)
214 rogue_compiler_destroy(pdevice->compiler);
215
216 pvr_wsi_finish(pdevice);
217
218 free(pdevice->name);
219
220 if (pdevice->ws)
221 pvr_winsys_destroy(pdevice->ws);
222
223 if (pdevice->master_fd >= 0) {
224 vk_free(&pdevice->vk.instance->alloc, pdevice->master_path);
225 close(pdevice->master_fd);
226 }
227
228 if (pdevice->render_fd >= 0) {
229 vk_free(&pdevice->vk.instance->alloc, pdevice->render_path);
230 close(pdevice->render_fd);
231 }
232 vk_physical_device_finish(&pdevice->vk);
233 }
234
pvr_DestroyInstance(VkInstance _instance,const VkAllocationCallbacks * pAllocator)235 void pvr_DestroyInstance(VkInstance _instance,
236 const VkAllocationCallbacks *pAllocator)
237 {
238 PVR_FROM_HANDLE(pvr_instance, instance, _instance);
239
240 if (!instance)
241 return;
242
243 pvr_physical_device_finish(&instance->physical_device);
244
245 VG(VALGRIND_DESTROY_MEMPOOL(instance));
246
247 vk_instance_finish(&instance->vk);
248 vk_free(&instance->vk.alloc, instance);
249 }
250
251 static VkResult
pvr_physical_device_init_uuids(struct pvr_physical_device * pdevice)252 pvr_physical_device_init_uuids(struct pvr_physical_device *pdevice)
253 {
254 struct mesa_sha1 sha1_ctx;
255 unsigned build_id_len;
256 uint8_t sha1[20];
257 uint64_t bvnc;
258
259 const struct build_id_note *note =
260 build_id_find_nhdr_for_addr(pvr_physical_device_init_uuids);
261 if (!note) {
262 return vk_errorf(pdevice,
263 VK_ERROR_INITIALIZATION_FAILED,
264 "Failed to find build-id");
265 }
266
267 build_id_len = build_id_length(note);
268 if (build_id_len < 20) {
269 return vk_errorf(pdevice,
270 VK_ERROR_INITIALIZATION_FAILED,
271 "Build-id too short. It needs to be a SHA");
272 }
273
274 bvnc = pvr_get_packed_bvnc(&pdevice->dev_info);
275
276 _mesa_sha1_init(&sha1_ctx);
277 _mesa_sha1_update(&sha1_ctx, build_id_data(note), build_id_len);
278 _mesa_sha1_update(&sha1_ctx, &bvnc, sizeof(bvnc));
279 _mesa_sha1_final(&sha1_ctx, sha1);
280 memcpy(pdevice->pipeline_cache_uuid, sha1, VK_UUID_SIZE);
281
282 return VK_SUCCESS;
283 }
284
pvr_compute_heap_size(void)285 static uint64_t pvr_compute_heap_size(void)
286 {
287 /* Query the total ram from the system */
288 uint64_t total_ram;
289 if (!os_get_total_physical_memory(&total_ram))
290 return 0;
291
292 /* We don't want to burn too much ram with the GPU. If the user has 4GiB
293 * or less, we use at most half. If they have more than 4GiB, we use 3/4.
294 */
295 uint64_t available_ram;
296 if (total_ram <= 4ULL * 1024ULL * 1024ULL * 1024ULL)
297 available_ram = total_ram / 2U;
298 else
299 available_ram = total_ram * 3U / 4U;
300
301 return available_ram;
302 }
303
pvr_physical_device_init(struct pvr_physical_device * pdevice,struct pvr_instance * instance,drmDevicePtr drm_render_device,drmDevicePtr drm_primary_device)304 static VkResult pvr_physical_device_init(struct pvr_physical_device *pdevice,
305 struct pvr_instance *instance,
306 drmDevicePtr drm_render_device,
307 drmDevicePtr drm_primary_device)
308 {
309 const char *path = drm_render_device->nodes[DRM_NODE_RENDER];
310 struct vk_device_extension_table supported_extensions;
311 struct vk_physical_device_dispatch_table dispatch_table;
312 const char *primary_path;
313 VkResult result;
314 int ret;
315
316 if (!getenv("PVR_I_WANT_A_BROKEN_VULKAN_DRIVER")) {
317 return vk_errorf(instance,
318 VK_ERROR_INCOMPATIBLE_DRIVER,
319 "WARNING: powervr is not a conformant Vulkan "
320 "implementation. Pass "
321 "PVR_I_WANT_A_BROKEN_VULKAN_DRIVER=1 if you know "
322 "what you're doing.");
323 }
324
325 pvr_physical_device_get_supported_extensions(pdevice, &supported_extensions);
326
327 vk_physical_device_dispatch_table_from_entrypoints(
328 &dispatch_table,
329 &pvr_physical_device_entrypoints,
330 true);
331
332 vk_physical_device_dispatch_table_from_entrypoints(
333 &dispatch_table,
334 &wsi_physical_device_entrypoints,
335 false);
336
337 result = vk_physical_device_init(&pdevice->vk,
338 &instance->vk,
339 &supported_extensions,
340 &dispatch_table);
341 if (result != VK_SUCCESS)
342 return result;
343
344 pdevice->instance = instance;
345
346 pdevice->render_fd = open(path, O_RDWR | O_CLOEXEC);
347 if (pdevice->render_fd < 0) {
348 result = vk_errorf(instance,
349 VK_ERROR_INCOMPATIBLE_DRIVER,
350 "Failed to open device %s",
351 path);
352 goto err_vk_physical_device_finish;
353 }
354
355 pdevice->render_path = vk_strdup(&pdevice->vk.instance->alloc,
356 path,
357 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
358 if (!pdevice->render_path) {
359 result = VK_ERROR_OUT_OF_HOST_MEMORY;
360 goto err_close_render_fd;
361 }
362
363 if (instance->vk.enabled_extensions.KHR_display) {
364 primary_path = drm_primary_device->nodes[DRM_NODE_PRIMARY];
365
366 pdevice->master_fd = open(primary_path, O_RDWR | O_CLOEXEC);
367 } else {
368 pdevice->master_fd = -1;
369 }
370
371 if (pdevice->master_fd >= 0) {
372 pdevice->master_path = vk_strdup(&pdevice->vk.instance->alloc,
373 primary_path,
374 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
375 if (!pdevice->master_path) {
376 result = VK_ERROR_OUT_OF_HOST_MEMORY;
377 goto err_close_master_fd;
378 }
379 } else {
380 pdevice->master_path = NULL;
381 }
382
383 pdevice->ws = pvr_winsys_create(pdevice->master_fd,
384 pdevice->render_fd,
385 &pdevice->vk.instance->alloc);
386 if (!pdevice->ws) {
387 result = VK_ERROR_INITIALIZATION_FAILED;
388 goto err_vk_free_master_path;
389 }
390
391 pdevice->vk.supported_sync_types = pdevice->ws->sync_types;
392
393 ret = pdevice->ws->ops->device_info_init(pdevice->ws,
394 &pdevice->dev_info,
395 &pdevice->dev_runtime_info);
396 if (ret) {
397 result = VK_ERROR_INITIALIZATION_FAILED;
398 goto err_pvr_winsys_destroy;
399 }
400
401 result = pvr_physical_device_init_uuids(pdevice);
402 if (result != VK_SUCCESS)
403 goto err_pvr_winsys_destroy;
404
405 if (asprintf(&pdevice->name,
406 "Imagination PowerVR %s %s",
407 pdevice->dev_info.ident.series_name,
408 pdevice->dev_info.ident.public_name) < 0) {
409 result = vk_errorf(instance,
410 VK_ERROR_OUT_OF_HOST_MEMORY,
411 "Unable to allocate memory to store device name");
412 goto err_pvr_winsys_destroy;
413 }
414
415 /* Setup available memory heaps and types */
416 pdevice->memory.memoryHeapCount = 1;
417 pdevice->memory.memoryHeaps[0].size = pvr_compute_heap_size();
418 pdevice->memory.memoryHeaps[0].flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT;
419
420 pdevice->memory.memoryTypeCount = 1;
421 pdevice->memory.memoryTypes[0].propertyFlags =
422 VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
423 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
424 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
425 pdevice->memory.memoryTypes[0].heapIndex = 0;
426
427 result = pvr_wsi_init(pdevice);
428 if (result != VK_SUCCESS) {
429 vk_error(instance, result);
430 goto err_free_name;
431 }
432
433 pdevice->compiler = rogue_compiler_create(&pdevice->dev_info);
434 if (!pdevice->compiler) {
435 result = vk_errorf(instance,
436 VK_ERROR_INITIALIZATION_FAILED,
437 "Failed to initialize Rogue compiler");
438 goto err_wsi_finish;
439 }
440
441 return VK_SUCCESS;
442
443 err_wsi_finish:
444 pvr_wsi_finish(pdevice);
445
446 err_free_name:
447 free(pdevice->name);
448
449 err_pvr_winsys_destroy:
450 pvr_winsys_destroy(pdevice->ws);
451
452 err_vk_free_master_path:
453 vk_free(&pdevice->vk.instance->alloc, pdevice->master_path);
454
455 err_close_master_fd:
456 if (pdevice->master_fd >= 0)
457 close(pdevice->master_fd);
458
459 vk_free(&pdevice->vk.instance->alloc, pdevice->render_path);
460
461 err_close_render_fd:
462 close(pdevice->render_fd);
463
464 err_vk_physical_device_finish:
465 vk_physical_device_finish(&pdevice->vk);
466
467 return result;
468 }
469
pvr_drm_device_is_supported(drmDevicePtr drm_dev,int node_type)470 static bool pvr_drm_device_is_supported(drmDevicePtr drm_dev, int node_type)
471 {
472 char **compat = drm_dev->deviceinfo.platform->compatible;
473
474 if (!(drm_dev->available_nodes & BITFIELD_BIT(node_type))) {
475 assert(node_type == DRM_NODE_RENDER || node_type == DRM_NODE_PRIMARY);
476 return false;
477 }
478
479 if (node_type == DRM_NODE_RENDER) {
480 while (*compat) {
481 for (size_t i = 0U; i < ARRAY_SIZE(pvr_render_devices); i++) {
482 const char *const name = pvr_render_devices[i].name;
483 const size_t len = pvr_render_devices[i].len;
484
485 if (strncmp(*compat, name, len) == 0)
486 return true;
487 }
488
489 compat++;
490 }
491
492 return false;
493 } else if (node_type == DRM_NODE_PRIMARY) {
494 while (*compat) {
495 for (size_t i = 0U; i < ARRAY_SIZE(pvr_display_devices); i++) {
496 const char *const name = pvr_display_devices[i].name;
497 const size_t len = pvr_display_devices[i].len;
498
499 if (strncmp(*compat, name, len) == 0)
500 return true;
501 }
502
503 compat++;
504 }
505
506 return false;
507 }
508
509 unreachable("Incorrect node_type.");
510 }
511
pvr_enumerate_devices(struct pvr_instance * instance)512 static VkResult pvr_enumerate_devices(struct pvr_instance *instance)
513 {
514 /* FIXME: It should be possible to query the number of devices via
515 * drmGetDevices2 by passing in NULL for the 'devices' parameter. However,
516 * this was broken by libdrm commit
517 * 8cb12a2528d795c45bba5f03b3486b4040fb0f45, so, until this is fixed in
518 * upstream, hard-code the maximum number of devices.
519 */
520 drmDevicePtr drm_primary_device = NULL;
521 drmDevicePtr drm_render_device = NULL;
522 drmDevicePtr drm_devices[8];
523 int max_drm_devices;
524 VkResult result;
525
526 instance->physical_devices_count = 0;
527
528 max_drm_devices = drmGetDevices2(0, drm_devices, ARRAY_SIZE(drm_devices));
529 if (max_drm_devices < 1)
530 return VK_SUCCESS;
531
532 for (unsigned i = 0; i < (unsigned)max_drm_devices; i++) {
533 if (drm_devices[i]->bustype != DRM_BUS_PLATFORM)
534 continue;
535
536 if (pvr_drm_device_is_supported(drm_devices[i], DRM_NODE_RENDER)) {
537 drm_render_device = drm_devices[i];
538
539 mesa_logd("Found compatible render device '%s'.",
540 drm_render_device->nodes[DRM_NODE_RENDER]);
541 } else if (pvr_drm_device_is_supported(drm_devices[i],
542 DRM_NODE_PRIMARY)) {
543 drm_primary_device = drm_devices[i];
544
545 mesa_logd("Found compatible primary device '%s'.",
546 drm_primary_device->nodes[DRM_NODE_PRIMARY]);
547 }
548 }
549
550 if (drm_render_device && drm_primary_device) {
551 result = pvr_physical_device_init(&instance->physical_device,
552 instance,
553 drm_render_device,
554 drm_primary_device);
555 if (result == VK_SUCCESS)
556 instance->physical_devices_count = 1;
557 else if (result == VK_ERROR_INCOMPATIBLE_DRIVER)
558 result = VK_SUCCESS;
559 } else {
560 result = VK_SUCCESS;
561 }
562
563 drmFreeDevices(drm_devices, max_drm_devices);
564
565 return result;
566 }
567
pvr_EnumeratePhysicalDevices(VkInstance _instance,uint32_t * pPhysicalDeviceCount,VkPhysicalDevice * pPhysicalDevices)568 VkResult pvr_EnumeratePhysicalDevices(VkInstance _instance,
569 uint32_t *pPhysicalDeviceCount,
570 VkPhysicalDevice *pPhysicalDevices)
571 {
572 VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice,
573 out,
574 pPhysicalDevices,
575 pPhysicalDeviceCount);
576 PVR_FROM_HANDLE(pvr_instance, instance, _instance);
577 VkResult result;
578
579 if (instance->physical_devices_count < 0) {
580 result = pvr_enumerate_devices(instance);
581 if (result != VK_SUCCESS)
582 return result;
583 }
584
585 if (instance->physical_devices_count == 0)
586 return VK_SUCCESS;
587
588 assert(instance->physical_devices_count == 1);
589 vk_outarray_append_typed (VkPhysicalDevice, &out, p) {
590 *p = pvr_physical_device_to_handle(&instance->physical_device);
591 }
592
593 return vk_outarray_status(&out);
594 }
595
pvr_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceFeatures2 * pFeatures)596 void pvr_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
597 VkPhysicalDeviceFeatures2 *pFeatures)
598 {
599 PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
600
601 pFeatures->features = (VkPhysicalDeviceFeatures){
602 .robustBufferAccess =
603 PVR_HAS_FEATURE(&pdevice->dev_info, robust_buffer_access),
604 .fullDrawIndexUint32 = true,
605 .imageCubeArray = true,
606 .independentBlend = true,
607 .geometryShader = false,
608 .tessellationShader = false,
609 .sampleRateShading = true,
610 .dualSrcBlend = false,
611 .logicOp = true,
612 .multiDrawIndirect = true,
613 .drawIndirectFirstInstance = true,
614 .depthClamp = true,
615 .depthBiasClamp = true,
616 .fillModeNonSolid = false,
617 .depthBounds = false,
618 .wideLines = true,
619 .largePoints = true,
620 .alphaToOne = true,
621 .multiViewport = false,
622 .samplerAnisotropy = true,
623 .textureCompressionETC2 = true,
624 .textureCompressionASTC_LDR = PVR_HAS_FEATURE(&pdevice->dev_info, astc),
625 .textureCompressionBC = false,
626 .occlusionQueryPrecise = true,
627 .pipelineStatisticsQuery = false,
628 .vertexPipelineStoresAndAtomics = true,
629 .fragmentStoresAndAtomics = true,
630 .shaderTessellationAndGeometryPointSize = false,
631 .shaderImageGatherExtended = false,
632 .shaderStorageImageExtendedFormats = true,
633 .shaderStorageImageMultisample = false,
634 .shaderStorageImageReadWithoutFormat = true,
635 .shaderStorageImageWriteWithoutFormat = false,
636 .shaderUniformBufferArrayDynamicIndexing = true,
637 .shaderSampledImageArrayDynamicIndexing = true,
638 .shaderStorageBufferArrayDynamicIndexing = true,
639 .shaderStorageImageArrayDynamicIndexing = true,
640 .shaderClipDistance = true,
641 .shaderCullDistance = true,
642 .shaderFloat64 = false,
643 .shaderInt64 = true,
644 .shaderInt16 = true,
645 .shaderResourceResidency = false,
646 .shaderResourceMinLod = false,
647 .sparseBinding = false,
648 .sparseResidencyBuffer = false,
649 .sparseResidencyImage2D = false,
650 .sparseResidencyImage3D = false,
651 .sparseResidency2Samples = false,
652 .sparseResidency4Samples = false,
653 .sparseResidency8Samples = false,
654 .sparseResidency16Samples = false,
655 .sparseResidencyAliased = false,
656 .variableMultisampleRate = false,
657 .inheritedQueries = false,
658 };
659
660 vk_foreach_struct (ext, pFeatures->pNext) {
661 pvr_debug_ignored_stype(ext->sType);
662 }
663 }
664
665 /* TODO: See if this function can be improved once fully implemented. */
pvr_calc_fscommon_size_and_tiles_in_flight(const struct pvr_physical_device * pdevice,uint32_t fs_common_size,uint32_t min_tiles_in_flight)666 uint32_t pvr_calc_fscommon_size_and_tiles_in_flight(
667 const struct pvr_physical_device *pdevice,
668 uint32_t fs_common_size,
669 uint32_t min_tiles_in_flight)
670 {
671 const struct pvr_device_runtime_info *dev_runtime_info =
672 &pdevice->dev_runtime_info;
673 const struct pvr_device_info *dev_info = &pdevice->dev_info;
674 uint32_t max_tiles_in_flight;
675 uint32_t num_allocs;
676
677 if (PVR_HAS_FEATURE(dev_info, s8xe)) {
678 num_allocs = PVR_GET_FEATURE_VALUE(dev_info, num_raster_pipes, 0U);
679 } else {
680 uint32_t min_cluster_per_phantom = 0;
681
682 if (dev_runtime_info->num_phantoms > 1) {
683 pvr_finishme("Unimplemented path!!");
684 } else {
685 min_cluster_per_phantom =
686 PVR_GET_FEATURE_VALUE(dev_info, num_clusters, 1U);
687 }
688
689 if (dev_runtime_info->num_phantoms > 1)
690 pvr_finishme("Unimplemented path!!");
691
692 if (dev_runtime_info->num_phantoms > 2)
693 pvr_finishme("Unimplemented path!!");
694
695 if (dev_runtime_info->num_phantoms > 3)
696 pvr_finishme("Unimplemented path!!");
697
698 if (min_cluster_per_phantom >= 4)
699 num_allocs = 1;
700 else if (min_cluster_per_phantom == 2)
701 num_allocs = 2;
702 else
703 num_allocs = 4;
704 }
705
706 max_tiles_in_flight =
707 PVR_GET_FEATURE_VALUE(dev_info, isp_max_tiles_in_flight, 1U);
708
709 if (fs_common_size == UINT_MAX) {
710 const struct pvr_device_runtime_info *dev_runtime_info =
711 &pdevice->dev_runtime_info;
712 uint32_t max_common_size;
713
714 num_allocs *= MIN2(min_tiles_in_flight, max_tiles_in_flight);
715
716 if (!PVR_HAS_ERN(dev_info, 38748)) {
717 /* Hardware needs space for one extra shared allocation. */
718 num_allocs += 1;
719 }
720
721 max_common_size =
722 dev_runtime_info->reserved_shared_size - dev_runtime_info->max_coeffs;
723
724 /* Double resource requirements to deal with fragmentation. */
725 max_common_size /= num_allocs * 2;
726 max_common_size =
727 ROUND_DOWN_TO(max_common_size,
728 PVRX(TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE));
729
730 return max_common_size;
731 } else if (fs_common_size == 0) {
732 return max_tiles_in_flight;
733 }
734
735 pvr_finishme("Unimplemented path!!");
736
737 return 0;
738 }
739
740 struct pvr_descriptor_limits {
741 uint32_t max_per_stage_resources;
742 uint32_t max_per_stage_samplers;
743 uint32_t max_per_stage_uniform_buffers;
744 uint32_t max_per_stage_storage_buffers;
745 uint32_t max_per_stage_sampled_images;
746 uint32_t max_per_stage_storage_images;
747 uint32_t max_per_stage_input_attachments;
748 };
749
750 static const struct pvr_descriptor_limits *
pvr_get_physical_device_descriptor_limits(struct pvr_physical_device * pdevice)751 pvr_get_physical_device_descriptor_limits(struct pvr_physical_device *pdevice)
752 {
753 enum pvr_descriptor_cs_level {
754 /* clang-format off */
755 CS4096, /* 6XT and some XE cores with large CS. */
756 CS2560, /* Mid range Rogue XE cores. */
757 CS2048, /* Low end Rogue XE cores. */
758 CS1536, /* Ultra-low-end 9XEP. */
759 CS680, /* lower limits for older devices. */
760 CS408, /* 7XE. */
761 /* clang-format on */
762 };
763
764 static const struct pvr_descriptor_limits descriptor_limits[] = {
765 [CS4096] = { 1160U, 256U, 192U, 144U, 256U, 256U, 8U, },
766 [CS2560] = { 648U, 128U, 128U, 128U, 128U, 128U, 8U, },
767 [CS2048] = { 584U, 128U, 96U, 64U, 128U, 128U, 8U, },
768 [CS1536] = { 456U, 64U, 96U, 64U, 128U, 64U, 8U, },
769 [CS680] = { 224U, 32U, 64U, 36U, 48U, 8U, 8U, },
770 [CS408] = { 128U, 16U, 40U, 28U, 16U, 8U, 8U, },
771 };
772
773 const uint32_t common_size =
774 pvr_calc_fscommon_size_and_tiles_in_flight(pdevice, -1, 1);
775 enum pvr_descriptor_cs_level cs_level;
776
777 if (common_size >= 2048) {
778 cs_level = CS2048;
779 } else if (common_size >= 1526) {
780 cs_level = CS1536;
781 } else if (common_size >= 680) {
782 cs_level = CS680;
783 } else if (common_size >= 408) {
784 cs_level = CS408;
785 } else {
786 mesa_loge("This core appears to have a very limited amount of shared "
787 "register space and may not meet the Vulkan spec limits.");
788 abort();
789 }
790
791 return &descriptor_limits[cs_level];
792 }
793
pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceProperties2 * pProperties)794 void pvr_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
795 VkPhysicalDeviceProperties2 *pProperties)
796 {
797 PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
798 const struct pvr_descriptor_limits *descriptor_limits =
799 pvr_get_physical_device_descriptor_limits(pdevice);
800
801 /* Default value based on the minimum value found in all existing cores. */
802 const uint32_t max_multisample =
803 PVR_GET_FEATURE_VALUE(&pdevice->dev_info, max_multisample, 4);
804
805 /* Default value based on the minimum value found in all existing cores. */
806 const uint32_t uvs_banks =
807 PVR_GET_FEATURE_VALUE(&pdevice->dev_info, uvs_banks, 2);
808
809 /* Default value based on the minimum value found in all existing cores. */
810 const uint32_t uvs_pba_entries =
811 PVR_GET_FEATURE_VALUE(&pdevice->dev_info, uvs_pba_entries, 160);
812
813 /* Default value based on the minimum value found in all existing cores. */
814 const uint32_t num_user_clip_planes =
815 PVR_GET_FEATURE_VALUE(&pdevice->dev_info, num_user_clip_planes, 8);
816
817 const uint32_t sub_pixel_precision =
818 PVR_HAS_FEATURE(&pdevice->dev_info, simple_internal_parameter_format)
819 ? 4U
820 : 8U;
821
822 const uint32_t max_render_size =
823 rogue_get_render_size_max(&pdevice->dev_info);
824
825 const uint32_t max_sample_bits = ((max_multisample << 1) - 1);
826
827 const uint32_t max_user_vertex_components =
828 ((uvs_banks <= 8U) && (uvs_pba_entries == 160U)) ? 64U : 128U;
829
830 /* The workgroup invocations are limited by the case where we have a compute
831 * barrier - each slot has a fixed number of invocations, the whole workgroup
832 * may need to span multiple slots. As each slot will WAIT at the barrier
833 * until the last invocation completes, all have to be schedulable at the
834 * same time.
835 *
836 * Typically all Rogue cores have 16 slots. Some of the smallest cores are
837 * reduced to 14.
838 *
839 * The compute barrier slot exhaustion scenario can be tested with:
840 * dEQP-VK.memory_model.message_passing*u32.coherent.fence_fence
841 * .atomicwrite*guard*comp
842 */
843
844 /* Default value based on the minimum value found in all existing cores. */
845 const uint32_t usc_slots =
846 PVR_GET_FEATURE_VALUE(&pdevice->dev_info, usc_slots, 14);
847
848 /* Default value based on the minimum value found in all existing cores. */
849 const uint32_t max_instances_per_pds_task =
850 PVR_GET_FEATURE_VALUE(&pdevice->dev_info,
851 max_instances_per_pds_task,
852 32U);
853
854 const uint32_t max_compute_work_group_invocations =
855 (usc_slots * max_instances_per_pds_task >= 512U) ? 512U : 384U;
856
857 VkPhysicalDeviceLimits limits = {
858 .maxImageDimension1D = max_render_size,
859 .maxImageDimension2D = max_render_size,
860 .maxImageDimension3D = 2U * 1024U,
861 .maxImageDimensionCube = max_render_size,
862 .maxImageArrayLayers = 2U * 1024U,
863 .maxTexelBufferElements = 64U * 1024U,
864 .maxUniformBufferRange = 128U * 1024U * 1024U,
865 .maxStorageBufferRange = 128U * 1024U * 1024U,
866 .maxPushConstantsSize = PVR_MAX_PUSH_CONSTANTS_SIZE,
867 .maxMemoryAllocationCount = UINT32_MAX,
868 .maxSamplerAllocationCount = UINT32_MAX,
869 .bufferImageGranularity = 1U,
870 .sparseAddressSpaceSize = 256ULL * 1024ULL * 1024ULL * 1024ULL,
871
872 /* Maximum number of descriptor sets that can be bound at the same time.
873 */
874 .maxBoundDescriptorSets = PVR_MAX_DESCRIPTOR_SETS,
875
876 .maxPerStageResources = descriptor_limits->max_per_stage_resources,
877 .maxPerStageDescriptorSamplers =
878 descriptor_limits->max_per_stage_samplers,
879 .maxPerStageDescriptorUniformBuffers =
880 descriptor_limits->max_per_stage_uniform_buffers,
881 .maxPerStageDescriptorStorageBuffers =
882 descriptor_limits->max_per_stage_storage_buffers,
883 .maxPerStageDescriptorSampledImages =
884 descriptor_limits->max_per_stage_sampled_images,
885 .maxPerStageDescriptorStorageImages =
886 descriptor_limits->max_per_stage_storage_images,
887 .maxPerStageDescriptorInputAttachments =
888 descriptor_limits->max_per_stage_input_attachments,
889
890 .maxDescriptorSetSamplers = 256U,
891 .maxDescriptorSetUniformBuffers = 256U,
892 .maxDescriptorSetUniformBuffersDynamic = 8U,
893 .maxDescriptorSetStorageBuffers = 256U,
894 .maxDescriptorSetStorageBuffersDynamic = 8U,
895 .maxDescriptorSetSampledImages = 256U,
896 .maxDescriptorSetStorageImages = 256U,
897 .maxDescriptorSetInputAttachments = 256U,
898
899 /* Vertex Shader Limits */
900 .maxVertexInputAttributes = PVR_MAX_VERTEX_INPUT_BINDINGS,
901 .maxVertexInputBindings = PVR_MAX_VERTEX_INPUT_BINDINGS,
902 .maxVertexInputAttributeOffset = 0xFFFF,
903 .maxVertexInputBindingStride = 1024U * 1024U * 1024U * 2U,
904 .maxVertexOutputComponents = max_user_vertex_components,
905
906 /* Tessellation Limits */
907 .maxTessellationGenerationLevel = 0,
908 .maxTessellationPatchSize = 0,
909 .maxTessellationControlPerVertexInputComponents = 0,
910 .maxTessellationControlPerVertexOutputComponents = 0,
911 .maxTessellationControlPerPatchOutputComponents = 0,
912 .maxTessellationControlTotalOutputComponents = 0,
913 .maxTessellationEvaluationInputComponents = 0,
914 .maxTessellationEvaluationOutputComponents = 0,
915
916 /* Geometry Shader Limits */
917 .maxGeometryShaderInvocations = 0,
918 .maxGeometryInputComponents = 0,
919 .maxGeometryOutputComponents = 0,
920 .maxGeometryOutputVertices = 0,
921 .maxGeometryTotalOutputComponents = 0,
922
923 /* Fragment Shader Limits */
924 .maxFragmentInputComponents = max_user_vertex_components,
925 .maxFragmentOutputAttachments = PVR_MAX_COLOR_ATTACHMENTS,
926 .maxFragmentDualSrcAttachments = 0,
927 .maxFragmentCombinedOutputResources =
928 descriptor_limits->max_per_stage_storage_buffers +
929 descriptor_limits->max_per_stage_storage_images +
930 PVR_MAX_COLOR_ATTACHMENTS,
931
932 /* Compute Shader Limits */
933 .maxComputeSharedMemorySize = 16U * 1024U,
934 .maxComputeWorkGroupCount = { 64U * 1024U, 64U * 1024U, 64U * 1024U },
935 .maxComputeWorkGroupInvocations = max_compute_work_group_invocations,
936 .maxComputeWorkGroupSize = { max_compute_work_group_invocations,
937 max_compute_work_group_invocations,
938 64U },
939
940 /* Rasterization Limits */
941 .subPixelPrecisionBits = sub_pixel_precision,
942 .subTexelPrecisionBits = 8U,
943 .mipmapPrecisionBits = 8U,
944
945 .maxDrawIndexedIndexValue = UINT32_MAX,
946 .maxDrawIndirectCount = 2U * 1024U * 1024U * 1024U,
947 .maxSamplerLodBias = 16.0f,
948 .maxSamplerAnisotropy = 1.0f,
949 .maxViewports = PVR_MAX_VIEWPORTS,
950
951 .maxViewportDimensions[0] = max_render_size,
952 .maxViewportDimensions[1] = max_render_size,
953 .viewportBoundsRange[0] = -(int32_t)(2U * max_render_size),
954 .viewportBoundsRange[1] = 2U * max_render_size,
955
956 .viewportSubPixelBits = 0,
957 .minMemoryMapAlignment = 64U,
958 .minTexelBufferOffsetAlignment = 16U,
959 .minUniformBufferOffsetAlignment = 4U,
960 .minStorageBufferOffsetAlignment = 4U,
961
962 .minTexelOffset = -8,
963 .maxTexelOffset = 7U,
964 .minTexelGatherOffset = -8,
965 .maxTexelGatherOffset = 7,
966 .minInterpolationOffset = -0.5,
967 .maxInterpolationOffset = 0.5,
968 .subPixelInterpolationOffsetBits = 4U,
969
970 .maxFramebufferWidth = max_render_size,
971 .maxFramebufferHeight = max_render_size,
972 .maxFramebufferLayers = PVR_MAX_FRAMEBUFFER_LAYERS,
973
974 .framebufferColorSampleCounts = max_sample_bits,
975 .framebufferDepthSampleCounts = max_sample_bits,
976 .framebufferStencilSampleCounts = max_sample_bits,
977 .framebufferNoAttachmentsSampleCounts = max_sample_bits,
978 .maxColorAttachments = PVR_MAX_COLOR_ATTACHMENTS,
979 .sampledImageColorSampleCounts = max_sample_bits,
980 .sampledImageIntegerSampleCounts = max_sample_bits,
981 .sampledImageDepthSampleCounts = max_sample_bits,
982 .sampledImageStencilSampleCounts = max_sample_bits,
983 .storageImageSampleCounts = max_sample_bits,
984 .maxSampleMaskWords = 1U,
985 .timestampComputeAndGraphics = false,
986 .timestampPeriod = 0.0f,
987 .maxClipDistances = num_user_clip_planes,
988 .maxCullDistances = num_user_clip_planes,
989 .maxCombinedClipAndCullDistances = num_user_clip_planes,
990 .discreteQueuePriorities = 2U,
991 .pointSizeRange[0] = 1.0f,
992 .pointSizeRange[1] = 511.0f,
993 .pointSizeGranularity = 0.0625f,
994 .lineWidthRange[0] = 1.0f / 16.0f,
995 .lineWidthRange[1] = 16.0f,
996 .lineWidthGranularity = 1.0f / 16.0f,
997 .strictLines = false,
998 .standardSampleLocations = true,
999 .optimalBufferCopyOffsetAlignment = 4U,
1000 .optimalBufferCopyRowPitchAlignment = 4U,
1001 .nonCoherentAtomSize = 1U,
1002 };
1003
1004 pProperties->properties = (VkPhysicalDeviceProperties){
1005 .apiVersion = PVR_API_VERSION,
1006 .driverVersion = vk_get_driver_version(),
1007 .vendorID = VK_VENDOR_ID_IMAGINATION,
1008 .deviceID = pdevice->dev_info.ident.device_id,
1009 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
1010 .limits = limits,
1011 .sparseProperties = { 0 },
1012 };
1013
1014 snprintf(pProperties->properties.deviceName,
1015 sizeof(pProperties->properties.deviceName),
1016 "%s",
1017 pdevice->name);
1018
1019 memcpy(pProperties->properties.pipelineCacheUUID,
1020 pdevice->pipeline_cache_uuid,
1021 VK_UUID_SIZE);
1022
1023 vk_foreach_struct (ext, pProperties->pNext) {
1024 pvr_debug_ignored_stype(ext->sType);
1025 }
1026 }
1027
1028 const static VkQueueFamilyProperties pvr_queue_family_properties = {
1029 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_GRAPHICS_BIT |
1030 VK_QUEUE_TRANSFER_BIT,
1031 .queueCount = PVR_MAX_QUEUES,
1032 .timestampValidBits = 0,
1033 .minImageTransferGranularity = { 1, 1, 1 },
1034 };
1035
pvr_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice,uint32_t * pCount,VkQueueFamilyProperties * pQueueFamilyProperties)1036 void pvr_GetPhysicalDeviceQueueFamilyProperties(
1037 VkPhysicalDevice physicalDevice,
1038 uint32_t *pCount,
1039 VkQueueFamilyProperties *pQueueFamilyProperties)
1040 {
1041 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties,
1042 out,
1043 pQueueFamilyProperties,
1044 pCount);
1045
1046 vk_outarray_append_typed (VkQueueFamilyProperties, &out, p) {
1047 *p = pvr_queue_family_properties;
1048 }
1049 }
1050
pvr_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice,uint32_t * pQueueFamilyPropertyCount,VkQueueFamilyProperties2 * pQueueFamilyProperties)1051 void pvr_GetPhysicalDeviceQueueFamilyProperties2(
1052 VkPhysicalDevice physicalDevice,
1053 uint32_t *pQueueFamilyPropertyCount,
1054 VkQueueFamilyProperties2 *pQueueFamilyProperties)
1055 {
1056 VK_OUTARRAY_MAKE_TYPED(VkQueueFamilyProperties2,
1057 out,
1058 pQueueFamilyProperties,
1059 pQueueFamilyPropertyCount);
1060
1061 vk_outarray_append_typed (VkQueueFamilyProperties2, &out, p) {
1062 p->queueFamilyProperties = pvr_queue_family_properties;
1063
1064 vk_foreach_struct (ext, p->pNext) {
1065 pvr_debug_ignored_stype(ext->sType);
1066 }
1067 }
1068 }
1069
pvr_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,VkPhysicalDeviceMemoryProperties2 * pMemoryProperties)1070 void pvr_GetPhysicalDeviceMemoryProperties2(
1071 VkPhysicalDevice physicalDevice,
1072 VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
1073 {
1074 PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1075
1076 pMemoryProperties->memoryProperties = pdevice->memory;
1077
1078 vk_foreach_struct (ext, pMemoryProperties->pNext) {
1079 pvr_debug_ignored_stype(ext->sType);
1080 }
1081 }
1082
pvr_GetInstanceProcAddr(VkInstance _instance,const char * pName)1083 PFN_vkVoidFunction pvr_GetInstanceProcAddr(VkInstance _instance,
1084 const char *pName)
1085 {
1086 PVR_FROM_HANDLE(pvr_instance, instance, _instance);
1087 return vk_instance_get_proc_addr(&instance->vk,
1088 &pvr_instance_entrypoints,
1089 pName);
1090 }
1091
1092 /* With version 1+ of the loader interface the ICD should expose
1093 * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in
1094 * apps.
1095 */
1096 PUBLIC
1097 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetInstanceProcAddr(VkInstance instance,const char * pName)1098 vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
1099 {
1100 return pvr_GetInstanceProcAddr(instance, pName);
1101 }
1102
1103 /* With version 4+ of the loader interface the ICD should expose
1104 * vk_icdGetPhysicalDeviceProcAddr().
1105 */
1106 PUBLIC
1107 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance,const char * pName)1108 vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, const char *pName)
1109 {
1110 PVR_FROM_HANDLE(pvr_instance, instance, _instance);
1111 return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
1112 }
1113
pvr_device_init_compute_fence_program(struct pvr_device * device)1114 static VkResult pvr_device_init_compute_fence_program(struct pvr_device *device)
1115 {
1116 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1117 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
1118 struct pvr_pds_compute_shader_program program = { 0U };
1119 size_t staging_buffer_size;
1120 uint32_t *staging_buffer;
1121 uint32_t *data_buffer;
1122 uint32_t *code_buffer;
1123 VkResult result;
1124
1125 STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) ==
1126 ARRAY_SIZE(program.work_group_input_regs));
1127 STATIC_ASSERT(ARRAY_SIZE(program.local_input_regs) ==
1128 ARRAY_SIZE(program.global_input_regs));
1129
1130 /* Initialize PDS structure. */
1131 for (uint32_t i = 0U; i < ARRAY_SIZE(program.local_input_regs); i++) {
1132 program.local_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
1133 program.work_group_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
1134 program.global_input_regs[i] = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
1135 }
1136
1137 program.barrier_coefficient = PVR_PDS_COMPUTE_INPUT_REG_UNUSED;
1138
1139 /* Fence kernel. */
1140 program.fence = true;
1141 program.clear_pds_barrier = true;
1142
1143 /* Calculate how much space we'll need for the compute shader PDS program.
1144 */
1145 pvr_pds_set_sizes_compute_shader(&program, dev_info);
1146
1147 /* FIXME: Fix the below inconsistency of code size being in bytes whereas
1148 * data size being in dwords.
1149 */
1150 /* Code size is in bytes, data size in dwords. */
1151 staging_buffer_size =
1152 program.data_size * sizeof(uint32_t) + program.code_size;
1153
1154 staging_buffer = vk_alloc(&device->vk.alloc,
1155 staging_buffer_size,
1156 8U,
1157 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1158 if (!staging_buffer)
1159 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1160
1161 data_buffer = staging_buffer;
1162 code_buffer = pvr_pds_generate_compute_shader_data_segment(&program,
1163 data_buffer,
1164 dev_info);
1165 pvr_pds_generate_compute_shader_code_segment(&program,
1166 code_buffer,
1167 dev_info);
1168 result = pvr_gpu_upload_pds(device,
1169 data_buffer,
1170 program.data_size,
1171 PVRX(CDMCTRL_KERNEL1_DATA_ADDR_ALIGNMENT),
1172 code_buffer,
1173 program.code_size / sizeof(uint32_t),
1174 PVRX(CDMCTRL_KERNEL2_CODE_ADDR_ALIGNMENT),
1175 cache_line_size,
1176 &device->pds_compute_fence_program);
1177
1178 vk_free(&device->vk.alloc, staging_buffer);
1179
1180 return result;
1181 }
1182
pvr_pds_idfwdf_programs_create_and_upload(struct pvr_device * device,pvr_dev_addr_t usc_addr,uint32_t shareds,uint32_t temps,pvr_dev_addr_t shareds_buffer_addr,struct pvr_pds_upload * const upload_out,struct pvr_pds_upload * const sw_compute_barrier_upload_out)1183 static VkResult pvr_pds_idfwdf_programs_create_and_upload(
1184 struct pvr_device *device,
1185 pvr_dev_addr_t usc_addr,
1186 uint32_t shareds,
1187 uint32_t temps,
1188 pvr_dev_addr_t shareds_buffer_addr,
1189 struct pvr_pds_upload *const upload_out,
1190 struct pvr_pds_upload *const sw_compute_barrier_upload_out)
1191 {
1192 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1193 struct pvr_pds_vertex_shader_sa_program program = {
1194 .kick_usc = true,
1195 .clear_pds_barrier = PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info),
1196 };
1197 size_t staging_buffer_size;
1198 uint32_t *staging_buffer;
1199 VkResult result;
1200
1201 /* We'll need to DMA the shareds into the USC's Common Store. */
1202 program.num_dma_kicks = pvr_pds_encode_dma_burst(program.dma_control,
1203 program.dma_address,
1204 0,
1205 shareds,
1206 shareds_buffer_addr.addr,
1207 dev_info);
1208
1209 /* DMA temp regs. */
1210 pvr_pds_setup_doutu(&program.usc_task_control,
1211 usc_addr.addr,
1212 temps,
1213 PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
1214 false);
1215
1216 pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1217
1218 staging_buffer_size =
1219 (program.code_size + program.data_size) * sizeof(*staging_buffer);
1220
1221 staging_buffer = vk_alloc(&device->vk.alloc,
1222 staging_buffer_size,
1223 8,
1224 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1225 if (!staging_buffer)
1226 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1227
1228 /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1229 pvr_pds_vertex_shader_sa(&program,
1230 staging_buffer,
1231 PDS_GENERATE_DATA_SEGMENT,
1232 dev_info);
1233 pvr_pds_vertex_shader_sa(&program,
1234 &staging_buffer[program.data_size],
1235 PDS_GENERATE_CODE_SEGMENT,
1236 dev_info);
1237
1238 /* At the time of writing, the SW_COMPUTE_PDS_BARRIER variant of the program
1239 * is bigger so we handle it first (if needed) and realloc() for a smaller
1240 * size.
1241 */
1242 if (PVR_NEED_SW_COMPUTE_PDS_BARRIER(dev_info)) {
1243 /* FIXME: Figure out the define for alignment of 16. */
1244 result = pvr_gpu_upload_pds(device,
1245 &staging_buffer[0],
1246 program.data_size,
1247 16,
1248 &staging_buffer[program.data_size],
1249 program.code_size,
1250 16,
1251 16,
1252 sw_compute_barrier_upload_out);
1253 if (result != VK_SUCCESS) {
1254 vk_free(&device->vk.alloc, staging_buffer);
1255 return result;
1256 }
1257
1258 program.clear_pds_barrier = false;
1259
1260 pvr_pds_vertex_shader_sa(&program, NULL, PDS_GENERATE_SIZES, dev_info);
1261
1262 staging_buffer_size =
1263 (program.code_size + program.data_size) * sizeof(*staging_buffer);
1264
1265 staging_buffer = vk_realloc(&device->vk.alloc,
1266 staging_buffer,
1267 staging_buffer_size,
1268 8,
1269 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1270 if (!staging_buffer) {
1271 pvr_bo_free(device, sw_compute_barrier_upload_out->pvr_bo);
1272
1273 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1274 }
1275
1276 /* FIXME: Add support for PDS_GENERATE_CODEDATA_SEGMENTS? */
1277 pvr_pds_vertex_shader_sa(&program,
1278 staging_buffer,
1279 PDS_GENERATE_DATA_SEGMENT,
1280 dev_info);
1281 pvr_pds_vertex_shader_sa(&program,
1282 &staging_buffer[program.data_size],
1283 PDS_GENERATE_CODE_SEGMENT,
1284 dev_info);
1285 } else {
1286 *sw_compute_barrier_upload_out = (struct pvr_pds_upload){
1287 .pvr_bo = NULL,
1288 };
1289 }
1290
1291 /* FIXME: Figure out the define for alignment of 16. */
1292 result = pvr_gpu_upload_pds(device,
1293 &staging_buffer[0],
1294 program.data_size,
1295 16,
1296 &staging_buffer[program.data_size],
1297 program.code_size,
1298 16,
1299 16,
1300 upload_out);
1301 if (result != VK_SUCCESS) {
1302 vk_free(&device->vk.alloc, staging_buffer);
1303 pvr_bo_free(device, sw_compute_barrier_upload_out->pvr_bo);
1304
1305 return result;
1306 }
1307
1308 vk_free(&device->vk.alloc, staging_buffer);
1309
1310 return VK_SUCCESS;
1311 }
1312
pvr_device_init_compute_idfwdf_state(struct pvr_device * device)1313 static VkResult pvr_device_init_compute_idfwdf_state(struct pvr_device *device)
1314 {
1315 uint64_t sampler_state[ROGUE_NUM_TEXSTATE_SAMPLER_WORDS];
1316 uint64_t image_state[ROGUE_NUM_TEXSTATE_IMAGE_WORDS];
1317 const struct rogue_shader_binary *usc_program;
1318 struct pvr_texture_state_info tex_info;
1319 uint32_t *dword_ptr;
1320 uint32_t usc_shareds;
1321 uint32_t usc_temps;
1322 VkResult result;
1323
1324 pvr_hard_code_get_idfwdf_program(&device->pdevice->dev_info,
1325 &usc_program,
1326 &usc_shareds,
1327 &usc_temps);
1328
1329 device->idfwdf_state.usc_shareds = usc_shareds;
1330
1331 /* FIXME: Figure out the define for alignment of 16. */
1332 result = pvr_gpu_upload_usc(device,
1333 usc_program->data,
1334 usc_program->size,
1335 16,
1336 &device->idfwdf_state.usc);
1337 if (result != VK_SUCCESS)
1338 return result;
1339
1340 /* TODO: Get the store buffer size from the compiler? */
1341 /* TODO: How was the size derived here? */
1342 result = pvr_bo_alloc(device,
1343 device->heaps.general_heap,
1344 4 * sizeof(float) * 4 * 2,
1345 4,
1346 0,
1347 &device->idfwdf_state.store_bo);
1348 if (result != VK_SUCCESS)
1349 goto err_free_usc_program;
1350
1351 result = pvr_bo_alloc(device,
1352 device->heaps.general_heap,
1353 usc_shareds * ROGUE_REG_SIZE_BYTES,
1354 ROGUE_REG_SIZE_BYTES,
1355 PVR_BO_ALLOC_FLAG_CPU_MAPPED,
1356 &device->idfwdf_state.shareds_bo);
1357 if (result != VK_SUCCESS)
1358 goto err_free_store_buffer;
1359
1360 /* Pack state words. */
1361
1362 pvr_csb_pack (&sampler_state[0], TEXSTATE_SAMPLER, sampler) {
1363 sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
1364 sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
1365 sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1366 sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1367 }
1368
1369 /* clang-format off */
1370 pvr_csb_pack (&sampler_state[1], TEXSTATE_SAMPLER_WORD1, sampler_word1) {}
1371 /* clang-format on */
1372
1373 STATIC_ASSERT(1 + 1 == ROGUE_NUM_TEXSTATE_SAMPLER_WORDS);
1374
1375 tex_info = (struct pvr_texture_state_info){
1376 .format = VK_FORMAT_R32G32B32A32_SFLOAT,
1377 .mem_layout = PVR_MEMLAYOUT_LINEAR,
1378 .flags = PVR_TEXFLAGS_INDEX_LOOKUP,
1379 /* TODO: Is this correct? Is it 2D, 3D, or 2D_ARRAY? */
1380 .type = VK_IMAGE_VIEW_TYPE_2D,
1381 .extent = { .width = 4, .height = 2, .depth = 0 },
1382 .mip_levels = 1,
1383 .sample_count = 1,
1384 .stride = 4,
1385 .swizzle = { PIPE_SWIZZLE_X,
1386 PIPE_SWIZZLE_Y,
1387 PIPE_SWIZZLE_Z,
1388 PIPE_SWIZZLE_W },
1389 .addr = device->idfwdf_state.store_bo->vma->dev_addr,
1390 };
1391
1392 result = pvr_pack_tex_state(device, &tex_info, image_state);
1393 if (result != VK_SUCCESS)
1394 goto err_free_shareds_buffer;
1395
1396 /* Fill the shareds buffer. */
1397
1398 dword_ptr = (uint32_t *)device->idfwdf_state.shareds_bo->bo->map;
1399
1400 #define HIGH_32(val) ((uint32_t)((val) >> 32U))
1401 #define LOW_32(val) ((uint32_t)(val))
1402
1403 /* TODO: Should we use compiler info to setup the shareds data instead of
1404 * assuming there's always 12 and this is how they should be setup?
1405 */
1406
1407 dword_ptr[0] = HIGH_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1408 dword_ptr[1] = LOW_32(device->idfwdf_state.store_bo->vma->dev_addr.addr);
1409
1410 /* Pad the shareds as the texture/sample state words are 128 bit aligned. */
1411 dword_ptr[2] = 0U;
1412 dword_ptr[3] = 0U;
1413
1414 dword_ptr[4] = LOW_32(image_state[0]);
1415 dword_ptr[5] = HIGH_32(image_state[0]);
1416 dword_ptr[6] = LOW_32(image_state[1]);
1417 dword_ptr[7] = HIGH_32(image_state[1]);
1418
1419 dword_ptr[8] = LOW_32(sampler_state[0]);
1420 dword_ptr[9] = HIGH_32(sampler_state[0]);
1421 dword_ptr[10] = LOW_32(sampler_state[1]);
1422 dword_ptr[11] = HIGH_32(sampler_state[1]);
1423 assert(11 + 1 == usc_shareds);
1424
1425 #undef HIGH_32
1426 #undef LOW_32
1427
1428 pvr_bo_cpu_unmap(device, device->idfwdf_state.shareds_bo);
1429 dword_ptr = NULL;
1430
1431 /* Generate and upload PDS programs. */
1432 result = pvr_pds_idfwdf_programs_create_and_upload(
1433 device,
1434 device->idfwdf_state.usc->vma->dev_addr,
1435 usc_shareds,
1436 usc_temps,
1437 device->idfwdf_state.shareds_bo->vma->dev_addr,
1438 &device->idfwdf_state.pds,
1439 &device->idfwdf_state.sw_compute_barrier_pds);
1440 if (result != VK_SUCCESS)
1441 goto err_free_shareds_buffer;
1442
1443 return VK_SUCCESS;
1444
1445 err_free_shareds_buffer:
1446 pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1447
1448 err_free_store_buffer:
1449 pvr_bo_free(device, device->idfwdf_state.store_bo);
1450
1451 err_free_usc_program:
1452 pvr_bo_free(device, device->idfwdf_state.usc);
1453
1454 return result;
1455 }
1456
pvr_device_finish_compute_idfwdf_state(struct pvr_device * device)1457 static void pvr_device_finish_compute_idfwdf_state(struct pvr_device *device)
1458 {
1459 pvr_bo_free(device, device->idfwdf_state.pds.pvr_bo);
1460 pvr_bo_free(device, device->idfwdf_state.sw_compute_barrier_pds.pvr_bo);
1461 pvr_bo_free(device, device->idfwdf_state.shareds_bo);
1462 pvr_bo_free(device, device->idfwdf_state.store_bo);
1463 pvr_bo_free(device, device->idfwdf_state.usc);
1464 }
1465
1466 /* FIXME: We should be calculating the size when we upload the code in
1467 * pvr_srv_setup_static_pixel_event_program().
1468 */
pvr_device_get_pixel_event_pds_program_data_size(const struct pvr_device_info * dev_info,uint32_t * const data_size_in_dwords_out)1469 static void pvr_device_get_pixel_event_pds_program_data_size(
1470 const struct pvr_device_info *dev_info,
1471 uint32_t *const data_size_in_dwords_out)
1472 {
1473 struct pvr_pds_event_program program = {
1474 /* No data to DMA, just a DOUTU needed. */
1475 .num_emit_word_pairs = 0,
1476 };
1477
1478 pvr_pds_set_sizes_pixel_event(&program, dev_info);
1479
1480 *data_size_in_dwords_out = program.data_size;
1481 }
1482
pvr_device_init_nop_program(struct pvr_device * device)1483 static VkResult pvr_device_init_nop_program(struct pvr_device *device)
1484 {
1485 const uint32_t cache_line_size =
1486 rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
1487 struct pvr_pds_kickusc_program program = { 0 };
1488 uint32_t staging_buffer_size;
1489 uint32_t *staging_buffer;
1490 VkResult result;
1491
1492 result = pvr_gpu_upload_usc(device,
1493 pvr_nop_usc_code,
1494 sizeof(pvr_nop_usc_code),
1495 cache_line_size,
1496 &device->nop_program.usc);
1497 if (result != VK_SUCCESS)
1498 return result;
1499
1500 /* Setup a PDS program that kicks the static USC program. */
1501 pvr_pds_setup_doutu(&program.usc_task_control,
1502 device->nop_program.usc->vma->dev_addr.addr,
1503 0U,
1504 PVRX(PDSINST_DOUTU_SAMPLE_RATE_INSTANCE),
1505 false);
1506
1507 pvr_pds_set_sizes_pixel_shader(&program);
1508
1509 staging_buffer_size =
1510 (program.code_size + program.data_size) * sizeof(*staging_buffer);
1511
1512 staging_buffer = vk_alloc(&device->vk.alloc,
1513 staging_buffer_size,
1514 8U,
1515 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1516 if (!staging_buffer) {
1517 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1518 goto err_free_nop_usc_bo;
1519 }
1520
1521 pvr_pds_generate_pixel_shader_program(&program, staging_buffer);
1522
1523 /* FIXME: Figure out the define for alignment of 16. */
1524 result = pvr_gpu_upload_pds(device,
1525 staging_buffer,
1526 program.data_size,
1527 16U,
1528 &staging_buffer[program.data_size],
1529 program.code_size,
1530 16U,
1531 16U,
1532 &device->nop_program.pds);
1533 if (result != VK_SUCCESS)
1534 goto err_free_staging_buffer;
1535
1536 vk_free(&device->vk.alloc, staging_buffer);
1537
1538 return VK_SUCCESS;
1539
1540 err_free_staging_buffer:
1541 vk_free(&device->vk.alloc, staging_buffer);
1542
1543 err_free_nop_usc_bo:
1544 pvr_bo_free(device, device->nop_program.usc);
1545
1546 return result;
1547 }
1548
pvr_device_init_default_sampler_state(struct pvr_device * device)1549 static void pvr_device_init_default_sampler_state(struct pvr_device *device)
1550 {
1551 pvr_csb_pack (&device->input_attachment_sampler, TEXSTATE_SAMPLER, sampler) {
1552 sampler.addrmode_u = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1553 sampler.addrmode_v = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1554 sampler.addrmode_w = PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
1555 sampler.dadjust = PVRX(TEXSTATE_DADJUST_ZERO_UINT);
1556 sampler.magfilter = PVRX(TEXSTATE_FILTER_POINT);
1557 sampler.minfilter = PVRX(TEXSTATE_FILTER_POINT);
1558 sampler.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
1559 sampler.non_normalized_coords = true;
1560 }
1561 }
1562
pvr_CreateDevice(VkPhysicalDevice physicalDevice,const VkDeviceCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkDevice * pDevice)1563 VkResult pvr_CreateDevice(VkPhysicalDevice physicalDevice,
1564 const VkDeviceCreateInfo *pCreateInfo,
1565 const VkAllocationCallbacks *pAllocator,
1566 VkDevice *pDevice)
1567 {
1568 PVR_FROM_HANDLE(pvr_physical_device, pdevice, physicalDevice);
1569 struct pvr_instance *instance = pdevice->instance;
1570 struct vk_device_dispatch_table dispatch_table;
1571 struct pvr_device *device;
1572 VkResult result;
1573
1574 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
1575
1576 device = vk_alloc2(&pdevice->vk.instance->alloc,
1577 pAllocator,
1578 sizeof(*device),
1579 8,
1580 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
1581 if (!device)
1582 return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1583
1584 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1585 &pvr_device_entrypoints,
1586 true);
1587
1588 vk_device_dispatch_table_from_entrypoints(&dispatch_table,
1589 &wsi_device_entrypoints,
1590 false);
1591
1592 result = vk_device_init(&device->vk,
1593 &pdevice->vk,
1594 &dispatch_table,
1595 pCreateInfo,
1596 pAllocator);
1597 if (result != VK_SUCCESS)
1598 goto err_free_device;
1599
1600 device->render_fd = open(pdevice->render_path, O_RDWR | O_CLOEXEC);
1601 if (device->render_fd < 0) {
1602 result = vk_errorf(instance,
1603 VK_ERROR_INITIALIZATION_FAILED,
1604 "Failed to open device %s",
1605 pdevice->render_path);
1606 goto err_vk_device_finish;
1607 }
1608
1609 if (pdevice->master_path)
1610 device->master_fd = open(pdevice->master_path, O_RDWR | O_CLOEXEC);
1611 else
1612 device->master_fd = -1;
1613
1614 vk_device_set_drm_fd(&device->vk, device->render_fd);
1615
1616 device->instance = instance;
1617 device->pdevice = pdevice;
1618
1619 device->ws = pvr_winsys_create(device->master_fd,
1620 device->render_fd,
1621 &device->vk.alloc);
1622 if (!device->ws) {
1623 result = VK_ERROR_INITIALIZATION_FAILED;
1624 goto err_close_master_fd;
1625 }
1626
1627 device->ws->ops->get_heaps_info(device->ws, &device->heaps);
1628
1629 result = pvr_free_list_create(device,
1630 PVR_GLOBAL_FREE_LIST_INITIAL_SIZE,
1631 PVR_GLOBAL_FREE_LIST_MAX_SIZE,
1632 PVR_GLOBAL_FREE_LIST_GROW_SIZE,
1633 PVR_GLOBAL_FREE_LIST_GROW_THRESHOLD,
1634 NULL /* parent_free_list */,
1635 &device->global_free_list);
1636 if (result != VK_SUCCESS)
1637 goto err_pvr_winsys_destroy;
1638
1639 result = pvr_device_init_nop_program(device);
1640 if (result != VK_SUCCESS)
1641 goto err_pvr_free_list_destroy;
1642
1643 result = pvr_device_init_compute_fence_program(device);
1644 if (result != VK_SUCCESS)
1645 goto err_pvr_free_nop_program;
1646
1647 result = pvr_device_init_compute_idfwdf_state(device);
1648 if (result != VK_SUCCESS)
1649 goto err_pvr_free_compute_fence;
1650
1651 result = pvr_queues_create(device, pCreateInfo);
1652 if (result != VK_SUCCESS)
1653 goto err_pvr_finish_compute_idfwdf;
1654
1655 pvr_device_init_default_sampler_state(device);
1656
1657 if (pCreateInfo->pEnabledFeatures)
1658 memcpy(&device->features,
1659 pCreateInfo->pEnabledFeatures,
1660 sizeof(device->features));
1661
1662 /* FIXME: Move this to a later stage and possibly somewhere other than
1663 * pvr_device. The purpose of this is so that we don't have to get the size
1664 * on each kick.
1665 */
1666 pvr_device_get_pixel_event_pds_program_data_size(
1667 &pdevice->dev_info,
1668 &device->pixel_event_data_size_in_dwords);
1669
1670 device->global_queue_job_count = 0;
1671 device->global_queue_present_count = 0;
1672
1673 *pDevice = pvr_device_to_handle(device);
1674
1675 return VK_SUCCESS;
1676
1677 err_pvr_finish_compute_idfwdf:
1678 pvr_device_finish_compute_idfwdf_state(device);
1679
1680 err_pvr_free_compute_fence:
1681 pvr_bo_free(device, device->pds_compute_fence_program.pvr_bo);
1682
1683 err_pvr_free_nop_program:
1684 pvr_bo_free(device, device->nop_program.pds.pvr_bo);
1685 pvr_bo_free(device, device->nop_program.usc);
1686
1687 err_pvr_free_list_destroy:
1688 pvr_free_list_destroy(device->global_free_list);
1689
1690 err_pvr_winsys_destroy:
1691 pvr_winsys_destroy(device->ws);
1692
1693 err_close_master_fd:
1694 if (device->master_fd >= 0)
1695 close(device->master_fd);
1696
1697 close(device->render_fd);
1698
1699 err_vk_device_finish:
1700 vk_device_finish(&device->vk);
1701
1702 err_free_device:
1703 vk_free(&device->vk.alloc, device);
1704
1705 return result;
1706 }
1707
pvr_DestroyDevice(VkDevice _device,const VkAllocationCallbacks * pAllocator)1708 void pvr_DestroyDevice(VkDevice _device,
1709 const VkAllocationCallbacks *pAllocator)
1710 {
1711 PVR_FROM_HANDLE(pvr_device, device, _device);
1712
1713 pvr_queues_destroy(device);
1714 pvr_device_finish_compute_idfwdf_state(device);
1715 pvr_bo_free(device, device->pds_compute_fence_program.pvr_bo);
1716 pvr_bo_free(device, device->nop_program.pds.pvr_bo);
1717 pvr_bo_free(device, device->nop_program.usc);
1718 pvr_free_list_destroy(device->global_free_list);
1719 pvr_winsys_destroy(device->ws);
1720 close(device->render_fd);
1721 vk_device_finish(&device->vk);
1722 vk_free(&device->vk.alloc, device);
1723 }
1724
pvr_EnumerateInstanceLayerProperties(uint32_t * pPropertyCount,VkLayerProperties * pProperties)1725 VkResult pvr_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount,
1726 VkLayerProperties *pProperties)
1727 {
1728 if (!pProperties) {
1729 *pPropertyCount = 0;
1730 return VK_SUCCESS;
1731 }
1732
1733 return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
1734 }
1735
pvr_AllocateMemory(VkDevice _device,const VkMemoryAllocateInfo * pAllocateInfo,const VkAllocationCallbacks * pAllocator,VkDeviceMemory * pMem)1736 VkResult pvr_AllocateMemory(VkDevice _device,
1737 const VkMemoryAllocateInfo *pAllocateInfo,
1738 const VkAllocationCallbacks *pAllocator,
1739 VkDeviceMemory *pMem)
1740 {
1741 const VkImportMemoryFdInfoKHR *fd_info = NULL;
1742 PVR_FROM_HANDLE(pvr_device, device, _device);
1743 enum pvr_winsys_bo_type type = PVR_WINSYS_BO_TYPE_GPU;
1744 struct pvr_device_memory *mem;
1745 VkResult result;
1746
1747 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
1748 assert(pAllocateInfo->allocationSize > 0);
1749
1750 mem = vk_object_alloc(&device->vk,
1751 pAllocator,
1752 sizeof(*mem),
1753 VK_OBJECT_TYPE_DEVICE_MEMORY);
1754 if (!mem)
1755 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1756
1757 vk_foreach_struct_const (ext, pAllocateInfo->pNext) {
1758 switch ((unsigned)ext->sType) {
1759 case VK_STRUCTURE_TYPE_WSI_MEMORY_ALLOCATE_INFO_MESA:
1760 type = PVR_WINSYS_BO_TYPE_DISPLAY;
1761 break;
1762 case VK_STRUCTURE_TYPE_IMPORT_MEMORY_FD_INFO_KHR:
1763 fd_info = (void *)ext;
1764 break;
1765 default:
1766 pvr_debug_ignored_stype(ext->sType);
1767 break;
1768 }
1769 }
1770
1771 if (fd_info && fd_info->handleType) {
1772 VkDeviceSize aligned_alloc_size =
1773 ALIGN_POT(pAllocateInfo->allocationSize, device->ws->page_size);
1774
1775 assert(
1776 fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
1777 fd_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1778
1779 result = device->ws->ops->buffer_create_from_fd(device->ws,
1780 fd_info->fd,
1781 &mem->bo);
1782 if (result != VK_SUCCESS)
1783 goto err_vk_object_free_mem;
1784
1785 /* For security purposes, we reject importing the bo if it's smaller
1786 * than the requested allocation size. This prevents a malicious client
1787 * from passing a buffer to a trusted client, lying about the size, and
1788 * telling the trusted client to try and texture from an image that goes
1789 * out-of-bounds. This sort of thing could lead to GPU hangs or worse
1790 * in the trusted client. The trusted client can protect itself against
1791 * this sort of attack but only if it can trust the buffer size.
1792 */
1793 if (aligned_alloc_size > mem->bo->size) {
1794 result = vk_errorf(device,
1795 VK_ERROR_INVALID_EXTERNAL_HANDLE,
1796 "Aligned requested size too large for the given fd "
1797 "%" PRIu64 "B > %" PRIu64 "B",
1798 pAllocateInfo->allocationSize,
1799 mem->bo->size);
1800 device->ws->ops->buffer_destroy(mem->bo);
1801 goto err_vk_object_free_mem;
1802 }
1803
1804 /* From the Vulkan spec:
1805 *
1806 * "Importing memory from a file descriptor transfers ownership of
1807 * the file descriptor from the application to the Vulkan
1808 * implementation. The application must not perform any operations on
1809 * the file descriptor after a successful import."
1810 *
1811 * If the import fails, we leave the file descriptor open.
1812 */
1813 close(fd_info->fd);
1814 } else {
1815 /* Align physical allocations to the page size of the heap that will be
1816 * used when binding device memory (see pvr_bind_memory()) to ensure the
1817 * entire allocation can be mapped.
1818 */
1819 const uint64_t alignment = device->heaps.general_heap->page_size;
1820
1821 /* FIXME: Need to determine the flags based on
1822 * device->pdevice->memory.memoryTypes[pAllocateInfo->memoryTypeIndex].propertyFlags.
1823 *
1824 * The alternative would be to store the flags alongside the memory
1825 * types as an array that's indexed by pAllocateInfo->memoryTypeIndex so
1826 * that they can be looked up.
1827 */
1828 result = device->ws->ops->buffer_create(device->ws,
1829 pAllocateInfo->allocationSize,
1830 alignment,
1831 type,
1832 PVR_WINSYS_BO_FLAG_CPU_ACCESS,
1833 &mem->bo);
1834 if (result != VK_SUCCESS)
1835 goto err_vk_object_free_mem;
1836 }
1837
1838 *pMem = pvr_device_memory_to_handle(mem);
1839
1840 return VK_SUCCESS;
1841
1842 err_vk_object_free_mem:
1843 vk_object_free(&device->vk, pAllocator, mem);
1844
1845 return result;
1846 }
1847
pvr_GetMemoryFdKHR(VkDevice _device,const VkMemoryGetFdInfoKHR * pGetFdInfo,int * pFd)1848 VkResult pvr_GetMemoryFdKHR(VkDevice _device,
1849 const VkMemoryGetFdInfoKHR *pGetFdInfo,
1850 int *pFd)
1851 {
1852 PVR_FROM_HANDLE(pvr_device, device, _device);
1853 PVR_FROM_HANDLE(pvr_device_memory, mem, pGetFdInfo->memory);
1854
1855 assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
1856
1857 assert(
1858 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
1859 pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
1860
1861 return device->ws->ops->buffer_get_fd(mem->bo, pFd);
1862 }
1863
1864 VkResult
pvr_GetMemoryFdPropertiesKHR(VkDevice _device,VkExternalMemoryHandleTypeFlagBits handleType,int fd,VkMemoryFdPropertiesKHR * pMemoryFdProperties)1865 pvr_GetMemoryFdPropertiesKHR(VkDevice _device,
1866 VkExternalMemoryHandleTypeFlagBits handleType,
1867 int fd,
1868 VkMemoryFdPropertiesKHR *pMemoryFdProperties)
1869 {
1870 PVR_FROM_HANDLE(pvr_device, device, _device);
1871
1872 switch (handleType) {
1873 case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT:
1874 /* FIXME: This should only allow memory types having
1875 * VK_MEMORY_PROPERTY_HOST_CACHED_BIT flag set, as
1876 * dma-buf should be imported using cacheable memory types,
1877 * given exporter's mmap will always map it as cacheable.
1878 * Ref:
1879 * https://www.kernel.org/doc/html/latest/driver-api/dma-buf.html#c.dma_buf_ops
1880 */
1881 pMemoryFdProperties->memoryTypeBits =
1882 (1 << device->pdevice->memory.memoryTypeCount) - 1;
1883 return VK_SUCCESS;
1884 default:
1885 return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE);
1886 }
1887 }
1888
pvr_FreeMemory(VkDevice _device,VkDeviceMemory _mem,const VkAllocationCallbacks * pAllocator)1889 void pvr_FreeMemory(VkDevice _device,
1890 VkDeviceMemory _mem,
1891 const VkAllocationCallbacks *pAllocator)
1892 {
1893 PVR_FROM_HANDLE(pvr_device, device, _device);
1894 PVR_FROM_HANDLE(pvr_device_memory, mem, _mem);
1895
1896 if (!mem)
1897 return;
1898
1899 device->ws->ops->buffer_destroy(mem->bo);
1900
1901 vk_object_free(&device->vk, pAllocator, mem);
1902 }
1903
pvr_MapMemory(VkDevice _device,VkDeviceMemory _memory,VkDeviceSize offset,VkDeviceSize size,VkMemoryMapFlags flags,void ** ppData)1904 VkResult pvr_MapMemory(VkDevice _device,
1905 VkDeviceMemory _memory,
1906 VkDeviceSize offset,
1907 VkDeviceSize size,
1908 VkMemoryMapFlags flags,
1909 void **ppData)
1910 {
1911 PVR_FROM_HANDLE(pvr_device, device, _device);
1912 PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
1913 void *map;
1914
1915 if (!mem) {
1916 *ppData = NULL;
1917 return VK_SUCCESS;
1918 }
1919
1920 if (size == VK_WHOLE_SIZE)
1921 size = mem->bo->size - offset;
1922
1923 /* From the Vulkan spec version 1.0.32 docs for MapMemory:
1924 *
1925 * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
1926 * assert(size != 0);
1927 * * If size is not equal to VK_WHOLE_SIZE, size must be less than or
1928 * equal to the size of the memory minus offset
1929 */
1930
1931 assert(size > 0);
1932 assert(offset + size <= mem->bo->size);
1933
1934 /* Check if already mapped */
1935 if (mem->bo->map) {
1936 *ppData = mem->bo->map + offset;
1937 return VK_SUCCESS;
1938 }
1939
1940 /* Map it all at once */
1941 map = device->ws->ops->buffer_map(mem->bo);
1942 if (!map)
1943 return vk_error(device, VK_ERROR_MEMORY_MAP_FAILED);
1944
1945 *ppData = map + offset;
1946
1947 return VK_SUCCESS;
1948 }
1949
pvr_UnmapMemory(VkDevice _device,VkDeviceMemory _memory)1950 void pvr_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
1951 {
1952 PVR_FROM_HANDLE(pvr_device, device, _device);
1953 PVR_FROM_HANDLE(pvr_device_memory, mem, _memory);
1954
1955 if (!mem || !mem->bo->map)
1956 return;
1957
1958 device->ws->ops->buffer_unmap(mem->bo);
1959 }
1960
pvr_FlushMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)1961 VkResult pvr_FlushMappedMemoryRanges(VkDevice _device,
1962 uint32_t memoryRangeCount,
1963 const VkMappedMemoryRange *pMemoryRanges)
1964 {
1965 return VK_SUCCESS;
1966 }
1967
1968 VkResult
pvr_InvalidateMappedMemoryRanges(VkDevice _device,uint32_t memoryRangeCount,const VkMappedMemoryRange * pMemoryRanges)1969 pvr_InvalidateMappedMemoryRanges(VkDevice _device,
1970 uint32_t memoryRangeCount,
1971 const VkMappedMemoryRange *pMemoryRanges)
1972 {
1973 return VK_SUCCESS;
1974 }
1975
pvr_GetImageSparseMemoryRequirements2(VkDevice device,const VkImageSparseMemoryRequirementsInfo2 * pInfo,uint32_t * pSparseMemoryRequirementCount,VkSparseImageMemoryRequirements2 * pSparseMemoryRequirements)1976 void pvr_GetImageSparseMemoryRequirements2(
1977 VkDevice device,
1978 const VkImageSparseMemoryRequirementsInfo2 *pInfo,
1979 uint32_t *pSparseMemoryRequirementCount,
1980 VkSparseImageMemoryRequirements2 *pSparseMemoryRequirements)
1981 {
1982 *pSparseMemoryRequirementCount = 0;
1983 }
1984
pvr_GetDeviceMemoryCommitment(VkDevice device,VkDeviceMemory memory,VkDeviceSize * pCommittedMemoryInBytes)1985 void pvr_GetDeviceMemoryCommitment(VkDevice device,
1986 VkDeviceMemory memory,
1987 VkDeviceSize *pCommittedMemoryInBytes)
1988 {
1989 *pCommittedMemoryInBytes = 0;
1990 }
1991
pvr_bind_memory(struct pvr_device * device,struct pvr_device_memory * mem,VkDeviceSize offset,VkDeviceSize size,VkDeviceSize alignment,struct pvr_winsys_vma ** const vma_out,pvr_dev_addr_t * const dev_addr_out)1992 VkResult pvr_bind_memory(struct pvr_device *device,
1993 struct pvr_device_memory *mem,
1994 VkDeviceSize offset,
1995 VkDeviceSize size,
1996 VkDeviceSize alignment,
1997 struct pvr_winsys_vma **const vma_out,
1998 pvr_dev_addr_t *const dev_addr_out)
1999 {
2000 VkDeviceSize virt_size =
2001 size + (offset & (device->heaps.general_heap->page_size - 1));
2002 struct pvr_winsys_vma *vma;
2003 pvr_dev_addr_t dev_addr;
2004
2005 /* Valid usage:
2006 *
2007 * "memoryOffset must be an integer multiple of the alignment member of
2008 * the VkMemoryRequirements structure returned from a call to
2009 * vkGetBufferMemoryRequirements with buffer"
2010 *
2011 * "memoryOffset must be an integer multiple of the alignment member of
2012 * the VkMemoryRequirements structure returned from a call to
2013 * vkGetImageMemoryRequirements with image"
2014 */
2015 assert(offset % alignment == 0);
2016 assert(offset < mem->bo->size);
2017
2018 vma = device->ws->ops->heap_alloc(device->heaps.general_heap,
2019 virt_size,
2020 alignment);
2021 if (!vma)
2022 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2023
2024 dev_addr = device->ws->ops->vma_map(vma, mem->bo, offset, size);
2025 if (!dev_addr.addr) {
2026 device->ws->ops->heap_free(vma);
2027 return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2028 }
2029
2030 *dev_addr_out = dev_addr;
2031 *vma_out = vma;
2032
2033 return VK_SUCCESS;
2034 }
2035
pvr_unbind_memory(struct pvr_device * device,struct pvr_winsys_vma * vma)2036 void pvr_unbind_memory(struct pvr_device *device, struct pvr_winsys_vma *vma)
2037 {
2038 device->ws->ops->vma_unmap(vma);
2039 device->ws->ops->heap_free(vma);
2040 }
2041
pvr_BindBufferMemory2(VkDevice _device,uint32_t bindInfoCount,const VkBindBufferMemoryInfo * pBindInfos)2042 VkResult pvr_BindBufferMemory2(VkDevice _device,
2043 uint32_t bindInfoCount,
2044 const VkBindBufferMemoryInfo *pBindInfos)
2045 {
2046 PVR_FROM_HANDLE(pvr_device, device, _device);
2047 uint32_t i;
2048
2049 for (i = 0; i < bindInfoCount; i++) {
2050 PVR_FROM_HANDLE(pvr_device_memory, mem, pBindInfos[i].memory);
2051 PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2052
2053 VkResult result = pvr_bind_memory(device,
2054 mem,
2055 pBindInfos[i].memoryOffset,
2056 buffer->vk.size,
2057 buffer->alignment,
2058 &buffer->vma,
2059 &buffer->dev_addr);
2060 if (result != VK_SUCCESS) {
2061 while (i--) {
2062 PVR_FROM_HANDLE(pvr_buffer, buffer, pBindInfos[i].buffer);
2063 pvr_unbind_memory(device, buffer->vma);
2064 }
2065
2066 return result;
2067 }
2068 }
2069
2070 return VK_SUCCESS;
2071 }
2072
pvr_QueueBindSparse(VkQueue _queue,uint32_t bindInfoCount,const VkBindSparseInfo * pBindInfo,VkFence fence)2073 VkResult pvr_QueueBindSparse(VkQueue _queue,
2074 uint32_t bindInfoCount,
2075 const VkBindSparseInfo *pBindInfo,
2076 VkFence fence)
2077 {
2078 return VK_SUCCESS;
2079 }
2080
2081 /* Event functions. */
2082
pvr_CreateEvent(VkDevice _device,const VkEventCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkEvent * pEvent)2083 VkResult pvr_CreateEvent(VkDevice _device,
2084 const VkEventCreateInfo *pCreateInfo,
2085 const VkAllocationCallbacks *pAllocator,
2086 VkEvent *pEvent)
2087 {
2088 assert(!"Unimplemented");
2089 return VK_SUCCESS;
2090 }
2091
pvr_DestroyEvent(VkDevice _device,VkEvent _event,const VkAllocationCallbacks * pAllocator)2092 void pvr_DestroyEvent(VkDevice _device,
2093 VkEvent _event,
2094 const VkAllocationCallbacks *pAllocator)
2095 {
2096 assert(!"Unimplemented");
2097 }
2098
pvr_GetEventStatus(VkDevice _device,VkEvent _event)2099 VkResult pvr_GetEventStatus(VkDevice _device, VkEvent _event)
2100 {
2101 assert(!"Unimplemented");
2102 return VK_SUCCESS;
2103 }
2104
pvr_SetEvent(VkDevice _device,VkEvent _event)2105 VkResult pvr_SetEvent(VkDevice _device, VkEvent _event)
2106 {
2107 assert(!"Unimplemented");
2108 return VK_SUCCESS;
2109 }
2110
pvr_ResetEvent(VkDevice _device,VkEvent _event)2111 VkResult pvr_ResetEvent(VkDevice _device, VkEvent _event)
2112 {
2113 assert(!"Unimplemented");
2114 return VK_SUCCESS;
2115 }
2116
2117 /* Buffer functions. */
2118
pvr_CreateBuffer(VkDevice _device,const VkBufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkBuffer * pBuffer)2119 VkResult pvr_CreateBuffer(VkDevice _device,
2120 const VkBufferCreateInfo *pCreateInfo,
2121 const VkAllocationCallbacks *pAllocator,
2122 VkBuffer *pBuffer)
2123 {
2124 PVR_FROM_HANDLE(pvr_device, device, _device);
2125 const uint32_t alignment = 4096;
2126 struct pvr_buffer *buffer;
2127
2128 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
2129 assert(pCreateInfo->usage != 0);
2130
2131 /* We check against (ULONG_MAX - alignment) to prevent overflow issues */
2132 if (pCreateInfo->size >= ULONG_MAX - alignment)
2133 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2134
2135 buffer =
2136 vk_buffer_create(&device->vk, pCreateInfo, pAllocator, sizeof(*buffer));
2137 if (!buffer)
2138 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2139
2140 buffer->alignment = alignment;
2141
2142 *pBuffer = pvr_buffer_to_handle(buffer);
2143
2144 return VK_SUCCESS;
2145 }
2146
pvr_DestroyBuffer(VkDevice _device,VkBuffer _buffer,const VkAllocationCallbacks * pAllocator)2147 void pvr_DestroyBuffer(VkDevice _device,
2148 VkBuffer _buffer,
2149 const VkAllocationCallbacks *pAllocator)
2150 {
2151 PVR_FROM_HANDLE(pvr_device, device, _device);
2152 PVR_FROM_HANDLE(pvr_buffer, buffer, _buffer);
2153
2154 if (!buffer)
2155 return;
2156
2157 if (buffer->vma)
2158 pvr_unbind_memory(device, buffer->vma);
2159
2160 vk_buffer_destroy(&device->vk, pAllocator, &buffer->vk);
2161 }
2162
pvr_gpu_upload(struct pvr_device * device,struct pvr_winsys_heap * heap,const void * data,size_t size,uint64_t alignment,struct pvr_bo ** const pvr_bo_out)2163 VkResult pvr_gpu_upload(struct pvr_device *device,
2164 struct pvr_winsys_heap *heap,
2165 const void *data,
2166 size_t size,
2167 uint64_t alignment,
2168 struct pvr_bo **const pvr_bo_out)
2169 {
2170 struct pvr_bo *pvr_bo = NULL;
2171 VkResult result;
2172
2173 assert(size > 0);
2174
2175 result = pvr_bo_alloc(device,
2176 heap,
2177 size,
2178 alignment,
2179 PVR_BO_ALLOC_FLAG_CPU_MAPPED,
2180 &pvr_bo);
2181 if (result != VK_SUCCESS)
2182 return result;
2183
2184 memcpy(pvr_bo->bo->map, data, size);
2185 pvr_bo_cpu_unmap(device, pvr_bo);
2186
2187 *pvr_bo_out = pvr_bo;
2188
2189 return VK_SUCCESS;
2190 }
2191
pvr_gpu_upload_usc(struct pvr_device * device,const void * code,size_t code_size,uint64_t code_alignment,struct pvr_bo ** const pvr_bo_out)2192 VkResult pvr_gpu_upload_usc(struct pvr_device *device,
2193 const void *code,
2194 size_t code_size,
2195 uint64_t code_alignment,
2196 struct pvr_bo **const pvr_bo_out)
2197 {
2198 struct pvr_bo *pvr_bo = NULL;
2199 VkResult result;
2200
2201 assert(code_size > 0);
2202
2203 /* The USC will prefetch the next instruction, so over allocate by 1
2204 * instruction to prevent reading off the end of a page into a potentially
2205 * unallocated page.
2206 */
2207 result = pvr_bo_alloc(device,
2208 device->heaps.usc_heap,
2209 code_size + ROGUE_MAX_INSTR_BYTES,
2210 code_alignment,
2211 PVR_BO_ALLOC_FLAG_CPU_MAPPED,
2212 &pvr_bo);
2213 if (result != VK_SUCCESS)
2214 return result;
2215
2216 memcpy(pvr_bo->bo->map, code, code_size);
2217 pvr_bo_cpu_unmap(device, pvr_bo);
2218
2219 *pvr_bo_out = pvr_bo;
2220
2221 return VK_SUCCESS;
2222 }
2223
2224 /**
2225 * \brief Upload PDS program data and code segments from host memory to device
2226 * memory.
2227 *
2228 * \param[in] device Logical device pointer.
2229 * \param[in] data Pointer to PDS data segment to upload.
2230 * \param[in] data_size_dwords Size of PDS data segment in dwords.
2231 * \param[in] data_alignment Required alignment of the PDS data segment in
2232 * bytes. Must be a power of two.
2233 * \param[in] code Pointer to PDS code segment to upload.
2234 * \param[in] code_size_dwords Size of PDS code segment in dwords.
2235 * \param[in] code_alignment Required alignment of the PDS code segment in
2236 * bytes. Must be a power of two.
2237 * \param[in] min_alignment Minimum alignment of the bo holding the PDS
2238 * program in bytes.
2239 * \param[out] pds_upload_out On success will be initialized based on the
2240 * uploaded PDS program.
2241 * \return VK_SUCCESS on success, or error code otherwise.
2242 */
pvr_gpu_upload_pds(struct pvr_device * device,const uint32_t * data,uint32_t data_size_dwords,uint32_t data_alignment,const uint32_t * code,uint32_t code_size_dwords,uint32_t code_alignment,uint64_t min_alignment,struct pvr_pds_upload * const pds_upload_out)2243 VkResult pvr_gpu_upload_pds(struct pvr_device *device,
2244 const uint32_t *data,
2245 uint32_t data_size_dwords,
2246 uint32_t data_alignment,
2247 const uint32_t *code,
2248 uint32_t code_size_dwords,
2249 uint32_t code_alignment,
2250 uint64_t min_alignment,
2251 struct pvr_pds_upload *const pds_upload_out)
2252 {
2253 /* All alignment and sizes below are in bytes. */
2254 const size_t data_size = data_size_dwords * sizeof(*data);
2255 const size_t code_size = code_size_dwords * sizeof(*code);
2256 const uint64_t data_aligned_size = ALIGN_POT(data_size, data_alignment);
2257 const uint64_t code_aligned_size = ALIGN_POT(code_size, code_alignment);
2258 const uint32_t code_offset = ALIGN_POT(data_aligned_size, code_alignment);
2259 const uint64_t bo_alignment = MAX2(min_alignment, data_alignment);
2260 const uint64_t bo_size = (!!code) ? (code_offset + code_aligned_size)
2261 : data_aligned_size;
2262 const uint64_t bo_flags = PVR_BO_ALLOC_FLAG_CPU_MAPPED |
2263 PVR_BO_ALLOC_FLAG_ZERO_ON_ALLOC;
2264 VkResult result;
2265
2266 assert(code || data);
2267 assert(!code || (code_size_dwords != 0 && code_alignment != 0));
2268 assert(!data || (data_size_dwords != 0 && data_alignment != 0));
2269
2270 result = pvr_bo_alloc(device,
2271 device->heaps.pds_heap,
2272 bo_size,
2273 bo_alignment,
2274 bo_flags,
2275 &pds_upload_out->pvr_bo);
2276 if (result != VK_SUCCESS)
2277 return result;
2278
2279 if (data) {
2280 memcpy(pds_upload_out->pvr_bo->bo->map, data, data_size);
2281
2282 pds_upload_out->data_offset = pds_upload_out->pvr_bo->vma->dev_addr.addr -
2283 device->heaps.pds_heap->base_addr.addr;
2284
2285 /* Store data size in dwords. */
2286 assert(data_aligned_size % 4 == 0);
2287 pds_upload_out->data_size = data_aligned_size / 4;
2288 } else {
2289 pds_upload_out->data_offset = 0;
2290 pds_upload_out->data_size = 0;
2291 }
2292
2293 if (code) {
2294 memcpy((uint8_t *)pds_upload_out->pvr_bo->bo->map + code_offset,
2295 code,
2296 code_size);
2297
2298 pds_upload_out->code_offset =
2299 (pds_upload_out->pvr_bo->vma->dev_addr.addr + code_offset) -
2300 device->heaps.pds_heap->base_addr.addr;
2301
2302 /* Store code size in dwords. */
2303 assert(code_aligned_size % 4 == 0);
2304 pds_upload_out->code_size = code_aligned_size / 4;
2305 } else {
2306 pds_upload_out->code_offset = 0;
2307 pds_upload_out->code_size = 0;
2308 }
2309
2310 pvr_bo_cpu_unmap(device, pds_upload_out->pvr_bo);
2311
2312 return VK_SUCCESS;
2313 }
2314
2315 static VkResult
pvr_framebuffer_create_ppp_state(struct pvr_device * device,struct pvr_framebuffer * framebuffer)2316 pvr_framebuffer_create_ppp_state(struct pvr_device *device,
2317 struct pvr_framebuffer *framebuffer)
2318 {
2319 const uint32_t cache_line_size =
2320 rogue_get_slc_cache_line_size(&device->pdevice->dev_info);
2321 uint32_t ppp_state[3];
2322 VkResult result;
2323
2324 pvr_csb_pack (&ppp_state[0], TA_STATE_HEADER, header) {
2325 header.pres_terminate = true;
2326 }
2327
2328 pvr_csb_pack (&ppp_state[1], TA_STATE_TERMINATE0, term0) {
2329 term0.clip_right =
2330 DIV_ROUND_UP(
2331 framebuffer->width,
2332 PVRX(TA_STATE_TERMINATE0_CLIP_RIGHT_BLOCK_SIZE_IN_PIXELS)) -
2333 1;
2334 term0.clip_bottom =
2335 DIV_ROUND_UP(
2336 framebuffer->height,
2337 PVRX(TA_STATE_TERMINATE0_CLIP_BOTTOM_BLOCK_SIZE_IN_PIXELS)) -
2338 1;
2339 }
2340
2341 pvr_csb_pack (&ppp_state[2], TA_STATE_TERMINATE1, term1) {
2342 term1.render_target = 0;
2343 term1.clip_left = 0;
2344 }
2345
2346 result = pvr_gpu_upload(device,
2347 device->heaps.general_heap,
2348 ppp_state,
2349 sizeof(ppp_state),
2350 cache_line_size,
2351 &framebuffer->ppp_state_bo);
2352 if (result != VK_SUCCESS)
2353 return result;
2354
2355 /* Calculate the size of PPP state in dwords. */
2356 framebuffer->ppp_state_size = sizeof(ppp_state) / sizeof(uint32_t);
2357
2358 return VK_SUCCESS;
2359 }
2360
pvr_render_targets_init(struct pvr_render_target * render_targets,uint32_t render_targets_count)2361 static bool pvr_render_targets_init(struct pvr_render_target *render_targets,
2362 uint32_t render_targets_count)
2363 {
2364 uint32_t i;
2365
2366 for (i = 0; i < render_targets_count; i++) {
2367 if (pthread_mutex_init(&render_targets[i].mutex, NULL))
2368 goto err_mutex_destroy;
2369 }
2370
2371 return true;
2372
2373 err_mutex_destroy:
2374 while (i--)
2375 pthread_mutex_destroy(&render_targets[i].mutex);
2376
2377 return false;
2378 }
2379
pvr_render_targets_fini(struct pvr_render_target * render_targets,uint32_t render_targets_count)2380 static void pvr_render_targets_fini(struct pvr_render_target *render_targets,
2381 uint32_t render_targets_count)
2382 {
2383 for (uint32_t i = 0; i < render_targets_count; i++) {
2384 if (render_targets[i].valid) {
2385 pvr_render_target_dataset_destroy(render_targets[i].rt_dataset);
2386 render_targets[i].valid = false;
2387 }
2388
2389 pthread_mutex_destroy(&render_targets[i].mutex);
2390 }
2391 }
2392
pvr_CreateFramebuffer(VkDevice _device,const VkFramebufferCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkFramebuffer * pFramebuffer)2393 VkResult pvr_CreateFramebuffer(VkDevice _device,
2394 const VkFramebufferCreateInfo *pCreateInfo,
2395 const VkAllocationCallbacks *pAllocator,
2396 VkFramebuffer *pFramebuffer)
2397 {
2398 PVR_FROM_HANDLE(pvr_device, device, _device);
2399 struct pvr_render_target *render_targets;
2400 struct pvr_framebuffer *framebuffer;
2401 struct pvr_image_view **attachments;
2402 uint32_t render_targets_count;
2403 VkResult result;
2404
2405 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
2406
2407 render_targets_count =
2408 PVR_RENDER_TARGETS_PER_FRAMEBUFFER(&device->pdevice->dev_info);
2409
2410 VK_MULTIALLOC(ma);
2411 vk_multialloc_add(&ma, &framebuffer, __typeof__(*framebuffer), 1);
2412 vk_multialloc_add(&ma,
2413 &attachments,
2414 __typeof__(*attachments),
2415 pCreateInfo->attachmentCount);
2416 vk_multialloc_add(&ma,
2417 &render_targets,
2418 __typeof__(*render_targets),
2419 render_targets_count);
2420
2421 if (!vk_multialloc_zalloc2(&ma,
2422 &device->vk.alloc,
2423 pAllocator,
2424 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
2425 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2426
2427 vk_object_base_init(&device->vk,
2428 &framebuffer->base,
2429 VK_OBJECT_TYPE_FRAMEBUFFER);
2430
2431 framebuffer->width = pCreateInfo->width;
2432 framebuffer->height = pCreateInfo->height;
2433 framebuffer->layers = pCreateInfo->layers;
2434
2435 framebuffer->attachments = attachments;
2436 framebuffer->attachment_count = pCreateInfo->attachmentCount;
2437 for (uint32_t i = 0; i < framebuffer->attachment_count; i++) {
2438 framebuffer->attachments[i] =
2439 pvr_image_view_from_handle(pCreateInfo->pAttachments[i]);
2440 }
2441
2442 result = pvr_framebuffer_create_ppp_state(device, framebuffer);
2443 if (result != VK_SUCCESS)
2444 goto err_free_framebuffer;
2445
2446 framebuffer->render_targets = render_targets;
2447 framebuffer->render_targets_count = render_targets_count;
2448 if (!pvr_render_targets_init(framebuffer->render_targets,
2449 render_targets_count)) {
2450 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2451 goto err_free_ppp_state_bo;
2452 }
2453
2454 *pFramebuffer = pvr_framebuffer_to_handle(framebuffer);
2455
2456 return VK_SUCCESS;
2457
2458 err_free_ppp_state_bo:
2459 pvr_bo_free(device, framebuffer->ppp_state_bo);
2460
2461 err_free_framebuffer:
2462 vk_object_base_finish(&framebuffer->base);
2463 vk_free2(&device->vk.alloc, pAllocator, framebuffer);
2464
2465 return result;
2466 }
2467
pvr_DestroyFramebuffer(VkDevice _device,VkFramebuffer _fb,const VkAllocationCallbacks * pAllocator)2468 void pvr_DestroyFramebuffer(VkDevice _device,
2469 VkFramebuffer _fb,
2470 const VkAllocationCallbacks *pAllocator)
2471 {
2472 PVR_FROM_HANDLE(pvr_device, device, _device);
2473 PVR_FROM_HANDLE(pvr_framebuffer, framebuffer, _fb);
2474
2475 if (!framebuffer)
2476 return;
2477
2478 pvr_render_targets_fini(framebuffer->render_targets,
2479 framebuffer->render_targets_count);
2480 pvr_bo_free(device, framebuffer->ppp_state_bo);
2481 vk_object_base_finish(&framebuffer->base);
2482 vk_free2(&device->vk.alloc, pAllocator, framebuffer);
2483 }
2484
2485 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t * pSupportedVersion)2486 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
2487 {
2488 /* For the full details on loader interface versioning, see
2489 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
2490 * What follows is a condensed summary, to help you navigate the large and
2491 * confusing official doc.
2492 *
2493 * - Loader interface v0 is incompatible with later versions. We don't
2494 * support it.
2495 *
2496 * - In loader interface v1:
2497 * - The first ICD entrypoint called by the loader is
2498 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this
2499 * entrypoint.
2500 * - The ICD must statically expose no other Vulkan symbol unless it
2501 * is linked with -Bsymbolic.
2502 * - Each dispatchable Vulkan handle created by the ICD must be
2503 * a pointer to a struct whose first member is VK_LOADER_DATA. The
2504 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
2505 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
2506 * vkDestroySurfaceKHR(). The ICD must be capable of working with
2507 * such loader-managed surfaces.
2508 *
2509 * - Loader interface v2 differs from v1 in:
2510 * - The first ICD entrypoint called by the loader is
2511 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
2512 * statically expose this entrypoint.
2513 *
2514 * - Loader interface v3 differs from v2 in:
2515 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
2516 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
2517 * because the loader no longer does so.
2518 *
2519 * - Loader interface v4 differs from v3 in:
2520 * - The ICD must implement vk_icdGetPhysicalDeviceProcAddr().
2521 */
2522 *pSupportedVersion = MIN2(*pSupportedVersion, 4u);
2523 return VK_SUCCESS;
2524 }
2525
2526 static uint32_t
pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info * dev_info,VkFilter filter)2527 pvr_sampler_get_hw_filter_from_vk(const struct pvr_device_info *dev_info,
2528 VkFilter filter)
2529 {
2530 switch (filter) {
2531 case VK_FILTER_NEAREST:
2532 return PVRX(TEXSTATE_FILTER_POINT);
2533 case VK_FILTER_LINEAR:
2534 return PVRX(TEXSTATE_FILTER_LINEAR);
2535 default:
2536 unreachable("Unknown filter type.");
2537 }
2538 }
2539
2540 static uint32_t
pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)2541 pvr_sampler_get_hw_addr_mode_from_vk(VkSamplerAddressMode addr_mode)
2542 {
2543 switch (addr_mode) {
2544 case VK_SAMPLER_ADDRESS_MODE_REPEAT:
2545 return PVRX(TEXSTATE_ADDRMODE_REPEAT);
2546 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
2547 return PVRX(TEXSTATE_ADDRMODE_FLIP);
2548 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
2549 return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_EDGE);
2550 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
2551 return PVRX(TEXSTATE_ADDRMODE_FLIP_ONCE_THEN_CLAMP);
2552 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
2553 return PVRX(TEXSTATE_ADDRMODE_CLAMP_TO_BORDER);
2554 default:
2555 unreachable("Invalid sampler address mode.");
2556 }
2557 }
2558
pvr_CreateSampler(VkDevice _device,const VkSamplerCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkSampler * pSampler)2559 VkResult pvr_CreateSampler(VkDevice _device,
2560 const VkSamplerCreateInfo *pCreateInfo,
2561 const VkAllocationCallbacks *pAllocator,
2562 VkSampler *pSampler)
2563 {
2564 PVR_FROM_HANDLE(pvr_device, device, _device);
2565 struct pvr_sampler *sampler;
2566 float lod_rounding_bias;
2567 VkFilter min_filter;
2568 VkFilter mag_filter;
2569 float min_lod;
2570 float max_lod;
2571
2572 STATIC_ASSERT(sizeof(((union pvr_sampler_descriptor *)NULL)->data) ==
2573 sizeof(((union pvr_sampler_descriptor *)NULL)->words));
2574
2575 sampler = vk_object_alloc(&device->vk,
2576 pAllocator,
2577 sizeof(*sampler),
2578 VK_OBJECT_TYPE_SAMPLER);
2579 if (!sampler)
2580 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2581
2582 mag_filter = pCreateInfo->magFilter;
2583 min_filter = pCreateInfo->minFilter;
2584
2585 if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025)) {
2586 /* The min/mag filters may need adjustment here, the GPU should decide
2587 * which of the two filters to use based on the clamped LOD value: LOD
2588 * <= 0 implies magnification, while LOD > 0 implies minification.
2589 *
2590 * As a workaround, we override magFilter with minFilter if we know that
2591 * the magnification filter will never be used due to clamping anyway
2592 * (i.e. minLod > 0). Conversely, we override minFilter with magFilter
2593 * if maxLod <= 0.
2594 */
2595 if (pCreateInfo->minLod > 0.0f) {
2596 /* The clamped LOD will always be positive => always minify. */
2597 mag_filter = pCreateInfo->minFilter;
2598 }
2599
2600 if (pCreateInfo->maxLod <= 0.0f) {
2601 /* The clamped LOD will always be negative or zero => always
2602 * magnify.
2603 */
2604 min_filter = pCreateInfo->magFilter;
2605 }
2606 }
2607
2608 if (pCreateInfo->compareEnable) {
2609 sampler->descriptor.data.compare_op =
2610 (uint32_t)pvr_texstate_cmpmode(pCreateInfo->compareOp);
2611 } else {
2612 sampler->descriptor.data.compare_op =
2613 (uint32_t)pvr_texstate_cmpmode(VK_COMPARE_OP_NEVER);
2614 }
2615
2616 sampler->descriptor.data.word3 = 0;
2617 pvr_csb_pack (&sampler->descriptor.data.sampler_word,
2618 TEXSTATE_SAMPLER,
2619 word) {
2620 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
2621 const float lod_clamp_max = (float)PVRX(TEXSTATE_CLAMP_MAX) /
2622 (1 << PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
2623 const float max_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MAX_UINT) -
2624 PVRX(TEXSTATE_DADJUST_ZERO_UINT))) /
2625 (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
2626 const float min_dadjust = ((float)(PVRX(TEXSTATE_DADJUST_MIN_UINT) -
2627 PVRX(TEXSTATE_DADJUST_ZERO_UINT))) /
2628 (1 << PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
2629
2630 word.magfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, mag_filter);
2631 word.minfilter = pvr_sampler_get_hw_filter_from_vk(dev_info, min_filter);
2632
2633 if (pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_LINEAR)
2634 word.mipfilter = true;
2635
2636 word.addrmode_u =
2637 pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeU);
2638 word.addrmode_v =
2639 pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeV);
2640 word.addrmode_w =
2641 pvr_sampler_get_hw_addr_mode_from_vk(pCreateInfo->addressModeW);
2642
2643 /* TODO: Figure out defines for these. */
2644 if (word.addrmode_u == PVRX(TEXSTATE_ADDRMODE_FLIP))
2645 sampler->descriptor.data.word3 |= 0x40000000;
2646
2647 if (word.addrmode_v == PVRX(TEXSTATE_ADDRMODE_FLIP))
2648 sampler->descriptor.data.word3 |= 0x20000000;
2649
2650 /* The Vulkan 1.0.205 spec says:
2651 *
2652 * The absolute value of mipLodBias must be less than or equal to
2653 * VkPhysicalDeviceLimits::maxSamplerLodBias.
2654 */
2655 word.dadjust =
2656 PVRX(TEXSTATE_DADJUST_ZERO_UINT) +
2657 util_signed_fixed(
2658 CLAMP(pCreateInfo->mipLodBias, min_dadjust, max_dadjust),
2659 PVRX(TEXSTATE_DADJUST_FRACTIONAL_BITS));
2660
2661 /* Anisotropy is not supported for now. */
2662 word.anisoctl = PVRX(TEXSTATE_ANISOCTL_DISABLED);
2663
2664 if (PVR_HAS_QUIRK(&device->pdevice->dev_info, 51025) &&
2665 pCreateInfo->mipmapMode == VK_SAMPLER_MIPMAP_MODE_NEAREST) {
2666 /* When MIPMAP_MODE_NEAREST is enabled, the LOD level should be
2667 * selected by adding 0.5 and then truncating the input LOD value.
2668 * This hardware adds the 0.5 bias before clamping against
2669 * lodmin/lodmax, while Vulkan specifies the bias to be added after
2670 * clamping. We compensate for this difference by adding the 0.5
2671 * bias to the LOD bounds, too.
2672 */
2673 lod_rounding_bias = 0.5f;
2674 } else {
2675 lod_rounding_bias = 0.0f;
2676 }
2677
2678 min_lod = pCreateInfo->minLod + lod_rounding_bias;
2679 word.minlod = util_unsigned_fixed(CLAMP(min_lod, 0.0f, lod_clamp_max),
2680 PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
2681
2682 max_lod = pCreateInfo->maxLod + lod_rounding_bias;
2683 word.maxlod = util_unsigned_fixed(CLAMP(max_lod, 0.0f, lod_clamp_max),
2684 PVRX(TEXSTATE_CLAMP_FRACTIONAL_BITS));
2685
2686 word.bordercolor_index = pCreateInfo->borderColor;
2687
2688 if (pCreateInfo->unnormalizedCoordinates)
2689 word.non_normalized_coords = true;
2690 }
2691
2692 *pSampler = pvr_sampler_to_handle(sampler);
2693
2694 return VK_SUCCESS;
2695 }
2696
pvr_DestroySampler(VkDevice _device,VkSampler _sampler,const VkAllocationCallbacks * pAllocator)2697 void pvr_DestroySampler(VkDevice _device,
2698 VkSampler _sampler,
2699 const VkAllocationCallbacks *pAllocator)
2700 {
2701 PVR_FROM_HANDLE(pvr_device, device, _device);
2702 PVR_FROM_HANDLE(pvr_sampler, sampler, _sampler);
2703
2704 if (!sampler)
2705 return;
2706
2707 vk_object_free(&device->vk, pAllocator, sampler);
2708 }
2709
pvr_GetBufferMemoryRequirements2(VkDevice _device,const VkBufferMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2710 void pvr_GetBufferMemoryRequirements2(
2711 VkDevice _device,
2712 const VkBufferMemoryRequirementsInfo2 *pInfo,
2713 VkMemoryRequirements2 *pMemoryRequirements)
2714 {
2715 PVR_FROM_HANDLE(pvr_buffer, buffer, pInfo->buffer);
2716 PVR_FROM_HANDLE(pvr_device, device, _device);
2717
2718 /* The Vulkan 1.0.166 spec says:
2719 *
2720 * memoryTypeBits is a bitmask and contains one bit set for every
2721 * supported memory type for the resource. Bit 'i' is set if and only
2722 * if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
2723 * structure for the physical device is supported for the resource.
2724 *
2725 * All types are currently supported for buffers.
2726 */
2727 pMemoryRequirements->memoryRequirements.memoryTypeBits =
2728 (1ul << device->pdevice->memory.memoryTypeCount) - 1;
2729
2730 pMemoryRequirements->memoryRequirements.alignment = buffer->alignment;
2731 pMemoryRequirements->memoryRequirements.size =
2732 ALIGN_POT(buffer->vk.size, buffer->alignment);
2733 }
2734
pvr_GetImageMemoryRequirements2(VkDevice _device,const VkImageMemoryRequirementsInfo2 * pInfo,VkMemoryRequirements2 * pMemoryRequirements)2735 void pvr_GetImageMemoryRequirements2(VkDevice _device,
2736 const VkImageMemoryRequirementsInfo2 *pInfo,
2737 VkMemoryRequirements2 *pMemoryRequirements)
2738 {
2739 PVR_FROM_HANDLE(pvr_device, device, _device);
2740 PVR_FROM_HANDLE(pvr_image, image, pInfo->image);
2741
2742 /* The Vulkan 1.0.166 spec says:
2743 *
2744 * memoryTypeBits is a bitmask and contains one bit set for every
2745 * supported memory type for the resource. Bit 'i' is set if and only
2746 * if the memory type 'i' in the VkPhysicalDeviceMemoryProperties
2747 * structure for the physical device is supported for the resource.
2748 *
2749 * All types are currently supported for images.
2750 */
2751 const uint32_t memory_types =
2752 (1ul << device->pdevice->memory.memoryTypeCount) - 1;
2753
2754 /* TODO: The returned size is aligned here in case of arrays/CEM (as is done
2755 * in GetImageMemoryRequirements()), but this should be known at image
2756 * creation time (pCreateInfo->arrayLayers > 1). This is confirmed in
2757 * ImageCreate()/ImageGetMipMapOffsetInBytes() where it aligns the size to
2758 * 4096 if pCreateInfo->arrayLayers > 1. So is the alignment here actually
2759 * necessary? If not, what should it be when pCreateInfo->arrayLayers == 1?
2760 *
2761 * Note: Presumably the 4096 alignment requirement comes from the Vulkan
2762 * driver setting RGX_CR_TPU_TAG_CEM_4K_FACE_PACKING_EN when setting up
2763 * render and compute jobs.
2764 */
2765 pMemoryRequirements->memoryRequirements.alignment = image->alignment;
2766 pMemoryRequirements->memoryRequirements.size =
2767 ALIGN(image->size, image->alignment);
2768 pMemoryRequirements->memoryRequirements.memoryTypeBits = memory_types;
2769 }
2770