1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stddef.h>
27 #include <stdint.h>
28 #include <vulkan/vulkan.h>
29
30 #include "pvr_blit.h"
31 #include "pvr_clear.h"
32 #include "pvr_csb.h"
33 #include "pvr_formats.h"
34 #include "pvr_job_transfer.h"
35 #include "pvr_private.h"
36 #include "usc/programs/pvr_shader_factory.h"
37 #include "usc/programs/pvr_static_shaders.h"
38 #include "pvr_types.h"
39 #include "util/bitscan.h"
40 #include "util/list.h"
41 #include "util/macros.h"
42 #include "util/u_math.h"
43 #include "vk_alloc.h"
44 #include "vk_command_buffer.h"
45 #include "vk_command_pool.h"
46 #include "vk_format.h"
47 #include "vk_log.h"
48
49 /* TODO: Investigate where this limit comes from. */
50 #define PVR_MAX_TRANSFER_SIZE_IN_TEXELS 2048U
51
52 static struct pvr_transfer_cmd *
pvr_transfer_cmd_alloc(struct pvr_cmd_buffer * cmd_buffer)53 pvr_transfer_cmd_alloc(struct pvr_cmd_buffer *cmd_buffer)
54 {
55 struct pvr_transfer_cmd *transfer_cmd;
56
57 transfer_cmd = vk_zalloc(&cmd_buffer->vk.pool->alloc,
58 sizeof(*transfer_cmd),
59 8U,
60 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
61 if (!transfer_cmd) {
62 vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
63 return NULL;
64 }
65
66 /* transfer_cmd->mapping_count is already set to zero. */
67 transfer_cmd->sources[0].filter = PVR_FILTER_POINT;
68 transfer_cmd->sources[0].resolve_op = PVR_RESOLVE_BLEND;
69 transfer_cmd->sources[0].addr_mode = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
70 transfer_cmd->cmd_buffer = cmd_buffer;
71
72 return transfer_cmd;
73 }
74
pvr_setup_buffer_surface(struct pvr_transfer_cmd_surface * surface,VkRect2D * rect,pvr_dev_addr_t dev_addr,VkDeviceSize offset,VkFormat vk_format,VkFormat image_format,uint32_t width,uint32_t height,uint32_t stride)75 static void pvr_setup_buffer_surface(struct pvr_transfer_cmd_surface *surface,
76 VkRect2D *rect,
77 pvr_dev_addr_t dev_addr,
78 VkDeviceSize offset,
79 VkFormat vk_format,
80 VkFormat image_format,
81 uint32_t width,
82 uint32_t height,
83 uint32_t stride)
84 {
85 enum pipe_format pformat = vk_format_to_pipe_format(image_format);
86
87 surface->dev_addr = PVR_DEV_ADDR_OFFSET(dev_addr, offset);
88 surface->width = width;
89 surface->height = height;
90 surface->stride = stride;
91 surface->vk_format = vk_format;
92 surface->mem_layout = PVR_MEMLAYOUT_LINEAR;
93 surface->sample_count = 1;
94
95 /* Initialize rectangle extent. Also, rectangle.offset should be set to
96 * zero, as the offset is already adjusted in the device address above. We
97 * don't explicitly set offset to zero as transfer_cmd is zero allocated.
98 */
99 rect->extent.width = width;
100 rect->extent.height = height;
101
102 if (util_format_is_compressed(pformat)) {
103 uint32_t block_width = util_format_get_blockwidth(pformat);
104 uint32_t block_height = util_format_get_blockheight(pformat);
105
106 surface->width = MAX2(1U, DIV_ROUND_UP(surface->width, block_width));
107 surface->height = MAX2(1U, DIV_ROUND_UP(surface->height, block_height));
108 surface->stride = MAX2(1U, DIV_ROUND_UP(surface->stride, block_width));
109
110 rect->offset.x /= block_width;
111 rect->offset.y /= block_height;
112 rect->extent.width =
113 MAX2(1U, DIV_ROUND_UP(rect->extent.width, block_width));
114 rect->extent.height =
115 MAX2(1U, DIV_ROUND_UP(rect->extent.height, block_height));
116 }
117 }
118
pvr_get_raw_copy_format(VkFormat format)119 VkFormat pvr_get_raw_copy_format(VkFormat format)
120 {
121 switch (vk_format_get_blocksize(format)) {
122 case 1:
123 return VK_FORMAT_R8_UINT;
124 case 2:
125 return VK_FORMAT_R8G8_UINT;
126 case 3:
127 return VK_FORMAT_R8G8B8_UINT;
128 case 4:
129 return VK_FORMAT_R32_UINT;
130 case 6:
131 return VK_FORMAT_R16G16B16_UINT;
132 case 8:
133 return VK_FORMAT_R32G32_UINT;
134 case 12:
135 return VK_FORMAT_R32G32B32_UINT;
136 case 16:
137 return VK_FORMAT_R32G32B32A32_UINT;
138 default:
139 unreachable("Unhandled copy block size.");
140 }
141 }
142
pvr_setup_transfer_surface(struct pvr_device * device,struct pvr_transfer_cmd_surface * surface,VkRect2D * rect,const struct pvr_image * image,uint32_t array_layer,uint32_t mip_level,const VkOffset3D * offset,const VkExtent3D * extent,float fdepth,VkFormat format,VkImageAspectFlags aspect_mask)143 static void pvr_setup_transfer_surface(struct pvr_device *device,
144 struct pvr_transfer_cmd_surface *surface,
145 VkRect2D *rect,
146 const struct pvr_image *image,
147 uint32_t array_layer,
148 uint32_t mip_level,
149 const VkOffset3D *offset,
150 const VkExtent3D *extent,
151 float fdepth,
152 VkFormat format,
153 VkImageAspectFlags aspect_mask)
154 {
155 const uint32_t height = MAX2(image->vk.extent.height >> mip_level, 1U);
156 const uint32_t width = MAX2(image->vk.extent.width >> mip_level, 1U);
157 enum pipe_format image_pformat = vk_format_to_pipe_format(image->vk.format);
158 enum pipe_format pformat = vk_format_to_pipe_format(format);
159 const VkImageSubresource sub_resource = {
160 .aspectMask = aspect_mask,
161 .mipLevel = mip_level,
162 .arrayLayer = array_layer,
163 };
164 VkSubresourceLayout info;
165 uint32_t depth;
166
167 if (image->memlayout == PVR_MEMLAYOUT_3DTWIDDLED)
168 depth = MAX2(image->vk.extent.depth >> mip_level, 1U);
169 else
170 depth = 1U;
171
172 pvr_get_image_subresource_layout(image, &sub_resource, &info);
173
174 surface->dev_addr = PVR_DEV_ADDR_OFFSET(image->dev_addr, info.offset);
175 surface->width = width;
176 surface->height = height;
177 surface->depth = depth;
178
179 assert(info.rowPitch % vk_format_get_blocksize(format) == 0);
180 surface->stride = info.rowPitch / vk_format_get_blocksize(format);
181
182 surface->vk_format = format;
183 surface->mem_layout = image->memlayout;
184 surface->sample_count = image->vk.samples;
185
186 if (image->memlayout == PVR_MEMLAYOUT_3DTWIDDLED)
187 surface->z_position = fdepth;
188 else
189 surface->dev_addr.addr += info.depthPitch * ((uint32_t)fdepth);
190
191 rect->offset.x = offset->x;
192 rect->offset.y = offset->y;
193 rect->extent.width = extent->width;
194 rect->extent.height = extent->height;
195
196 if (util_format_is_compressed(image_pformat) &&
197 !util_format_is_compressed(pformat)) {
198 uint32_t block_width = util_format_get_blockwidth(image_pformat);
199 uint32_t block_height = util_format_get_blockheight(image_pformat);
200
201 surface->width = MAX2(1U, DIV_ROUND_UP(surface->width, block_width));
202 surface->height = MAX2(1U, DIV_ROUND_UP(surface->height, block_height));
203 surface->stride = MAX2(1U, DIV_ROUND_UP(surface->stride, block_width));
204
205 rect->offset.x /= block_width;
206 rect->offset.y /= block_height;
207 rect->extent.width =
208 MAX2(1U, DIV_ROUND_UP(rect->extent.width, block_width));
209 rect->extent.height =
210 MAX2(1U, DIV_ROUND_UP(rect->extent.height, block_height));
211 }
212 }
213
pvr_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)214 void pvr_CmdBlitImage2(VkCommandBuffer commandBuffer,
215 const VkBlitImageInfo2 *pBlitImageInfo)
216 {
217 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
218 PVR_FROM_HANDLE(pvr_image, src, pBlitImageInfo->srcImage);
219 PVR_FROM_HANDLE(pvr_image, dst, pBlitImageInfo->dstImage);
220 struct pvr_device *device = cmd_buffer->device;
221 enum pvr_filter filter = PVR_FILTER_DONTCARE;
222
223 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
224
225 if (pBlitImageInfo->filter == VK_FILTER_LINEAR)
226 filter = PVR_FILTER_LINEAR;
227
228 for (uint32_t i = 0U; i < pBlitImageInfo->regionCount; i++) {
229 const VkImageBlit2 *region = &pBlitImageInfo->pRegions[i];
230
231 assert(region->srcSubresource.layerCount ==
232 region->dstSubresource.layerCount);
233 const bool inverted_dst_z =
234 (region->dstOffsets[1].z < region->dstOffsets[0].z);
235 const bool inverted_src_z =
236 (region->srcOffsets[1].z < region->srcOffsets[0].z);
237 const uint32_t min_src_z = inverted_src_z ? region->srcOffsets[1].z
238 : region->srcOffsets[0].z;
239 const uint32_t max_src_z = inverted_src_z ? region->srcOffsets[0].z
240 : region->srcOffsets[1].z;
241 const uint32_t min_dst_z = inverted_dst_z ? region->dstOffsets[1].z
242 : region->dstOffsets[0].z;
243 const uint32_t max_dst_z = inverted_dst_z ? region->dstOffsets[0].z
244 : region->dstOffsets[1].z;
245
246 const uint32_t src_width =
247 region->srcOffsets[1].x - region->srcOffsets[0].x;
248 const uint32_t src_height =
249 region->srcOffsets[1].y - region->srcOffsets[0].y;
250 uint32_t dst_width;
251 uint32_t dst_height;
252
253 float initial_depth_offset;
254 VkExtent3D src_extent;
255 VkExtent3D dst_extent;
256 VkOffset3D dst_offset = region->dstOffsets[0];
257 float z_slice_stride;
258 bool flip_x;
259 bool flip_y;
260
261 if (region->dstOffsets[1].x > region->dstOffsets[0].x) {
262 dst_width = region->dstOffsets[1].x - region->dstOffsets[0].x;
263 flip_x = false;
264 } else {
265 dst_width = region->dstOffsets[0].x - region->dstOffsets[1].x;
266 flip_x = true;
267 dst_offset.x = region->dstOffsets[1].x;
268 }
269
270 if (region->dstOffsets[1].y > region->dstOffsets[0].y) {
271 dst_height = region->dstOffsets[1].y - region->dstOffsets[0].y;
272 flip_y = false;
273 } else {
274 dst_height = region->dstOffsets[0].y - region->dstOffsets[1].y;
275 flip_y = true;
276 dst_offset.y = region->dstOffsets[1].y;
277 }
278
279 /* If any of the extent regions is zero, then reject the blit and
280 * continue.
281 */
282 if (!src_width || !src_height || !dst_width || !dst_height ||
283 !(max_dst_z - min_dst_z) || !(max_src_z - min_src_z)) {
284 mesa_loge("BlitImage: Region %i has an area of zero", i);
285 continue;
286 }
287
288 src_extent = (VkExtent3D){
289 .width = src_width,
290 .height = src_height,
291 .depth = 0U,
292 };
293
294 dst_extent = (VkExtent3D){
295 .width = dst_width,
296 .height = dst_height,
297 .depth = 0U,
298 };
299
300 /* The z_position of a transfer surface is intended to be in the range
301 * of 0.0f <= z_position <= depth. It will be used as a texture coordinate
302 * in the source surface for cases where linear filtering is enabled, so
303 * the fractional part will need to represent the exact midpoint of a z
304 * slice range in the source texture, as it maps to each destination
305 * slice.
306 *
307 * For destination surfaces, the fractional part is discarded, so
308 * we can safely pass the slice index.
309 */
310
311 /* Calculate the ratio of z slices in our source region to that of our
312 * destination region, to get the number of z slices in our source region
313 * to iterate over for each destination slice.
314 *
315 * If our destination region is inverted, we iterate backwards.
316 */
317 z_slice_stride =
318 (inverted_dst_z ? -1.0f : 1.0f) *
319 ((float)(max_src_z - min_src_z) / (float)(max_dst_z - min_dst_z));
320
321 /* Offset the initial depth offset by half of the z slice stride, into the
322 * blit region's z range.
323 */
324 initial_depth_offset =
325 (inverted_dst_z ? max_src_z : min_src_z) + (0.5f * z_slice_stride);
326
327 for (uint32_t j = 0U; j < region->srcSubresource.layerCount; j++) {
328 struct pvr_transfer_cmd_surface src_surface = { 0 };
329 struct pvr_transfer_cmd_surface dst_surface = { 0 };
330 VkRect2D src_rect;
331 VkRect2D dst_rect;
332
333 /* Get the subresource info for the src and dst images, this is
334 * required when incrementing the address of the depth slice used by
335 * the transfer surface.
336 */
337 VkSubresourceLayout src_info, dst_info;
338 const VkImageSubresource src_sub_resource = {
339 .aspectMask = region->srcSubresource.aspectMask,
340 .mipLevel = region->srcSubresource.mipLevel,
341 .arrayLayer = region->srcSubresource.baseArrayLayer + j,
342 };
343 const VkImageSubresource dst_sub_resource = {
344 .aspectMask = region->dstSubresource.aspectMask,
345 .mipLevel = region->dstSubresource.mipLevel,
346 .arrayLayer = region->dstSubresource.baseArrayLayer + j,
347 };
348
349 pvr_get_image_subresource_layout(src, &src_sub_resource, &src_info);
350 pvr_get_image_subresource_layout(dst, &dst_sub_resource, &dst_info);
351
352 /* Setup the transfer surfaces once per image layer, which saves us
353 * from repeating subresource queries by manually incrementing the
354 * depth slices.
355 */
356 pvr_setup_transfer_surface(device,
357 &src_surface,
358 &src_rect,
359 src,
360 region->srcSubresource.baseArrayLayer + j,
361 region->srcSubresource.mipLevel,
362 ®ion->srcOffsets[0],
363 &src_extent,
364 initial_depth_offset,
365 src->vk.format,
366 region->srcSubresource.aspectMask);
367
368 pvr_setup_transfer_surface(device,
369 &dst_surface,
370 &dst_rect,
371 dst,
372 region->dstSubresource.baseArrayLayer + j,
373 region->dstSubresource.mipLevel,
374 &dst_offset,
375 &dst_extent,
376 min_dst_z,
377 dst->vk.format,
378 region->dstSubresource.aspectMask);
379
380 for (uint32_t dst_z = min_dst_z; dst_z < max_dst_z; dst_z++) {
381 struct pvr_transfer_cmd *transfer_cmd;
382 VkResult result;
383
384 /* TODO: See if we can allocate all the transfer cmds in one go. */
385 transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
386 if (!transfer_cmd)
387 return;
388
389 transfer_cmd->sources[0].mappings[0].src_rect = src_rect;
390 transfer_cmd->sources[0].mappings[0].dst_rect = dst_rect;
391 transfer_cmd->sources[0].mappings[0].flip_x = flip_x;
392 transfer_cmd->sources[0].mappings[0].flip_y = flip_y;
393 transfer_cmd->sources[0].mapping_count++;
394
395 transfer_cmd->sources[0].surface = src_surface;
396 transfer_cmd->sources[0].filter = filter;
397 transfer_cmd->source_count = 1;
398
399 transfer_cmd->dst = dst_surface;
400 transfer_cmd->scissor = dst_rect;
401
402 result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
403 if (result != VK_SUCCESS) {
404 vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
405 return;
406 }
407
408 if (src_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
409 src_surface.z_position += z_slice_stride;
410 } else {
411 src_surface.dev_addr.addr +=
412 src_info.depthPitch * ((uint32_t)z_slice_stride);
413 }
414
415 if (dst_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
416 dst_surface.z_position += 1.0f;
417 else
418 dst_surface.dev_addr.addr += dst_info.depthPitch;
419 }
420 }
421 }
422 }
423
pvr_get_copy_format(VkFormat format)424 static VkFormat pvr_get_copy_format(VkFormat format)
425 {
426 switch (format) {
427 case VK_FORMAT_R8_SNORM:
428 return VK_FORMAT_R8_SINT;
429 case VK_FORMAT_R8G8_SNORM:
430 return VK_FORMAT_R8G8_SINT;
431 case VK_FORMAT_R8G8B8_SNORM:
432 return VK_FORMAT_R8G8B8_SINT;
433 case VK_FORMAT_R8G8B8A8_SNORM:
434 return VK_FORMAT_R8G8B8A8_SINT;
435 case VK_FORMAT_B8G8R8A8_SNORM:
436 return VK_FORMAT_B8G8R8A8_SINT;
437 default:
438 return format;
439 }
440 }
441
442 static void
pvr_setup_surface_for_image(struct pvr_device * device,struct pvr_transfer_cmd_surface * surface,VkRect2D * rect,const struct pvr_image * image,uint32_t array_layer,uint32_t array_offset,uint32_t mip_level,const VkOffset3D * offset,const VkExtent3D * extent,uint32_t depth,VkFormat format,const VkImageAspectFlags aspect_mask)443 pvr_setup_surface_for_image(struct pvr_device *device,
444 struct pvr_transfer_cmd_surface *surface,
445 VkRect2D *rect,
446 const struct pvr_image *image,
447 uint32_t array_layer,
448 uint32_t array_offset,
449 uint32_t mip_level,
450 const VkOffset3D *offset,
451 const VkExtent3D *extent,
452 uint32_t depth,
453 VkFormat format,
454 const VkImageAspectFlags aspect_mask)
455 {
456 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
457 pvr_setup_transfer_surface(device,
458 surface,
459 rect,
460 image,
461 array_layer + array_offset,
462 mip_level,
463 offset,
464 extent,
465 0.0f,
466 format,
467 aspect_mask);
468 } else {
469 pvr_setup_transfer_surface(device,
470 surface,
471 rect,
472 image,
473 array_layer,
474 mip_level,
475 offset,
476 extent,
477 (float)depth,
478 format,
479 aspect_mask);
480 }
481 }
482
483 static VkResult
pvr_copy_or_resolve_image_region(struct pvr_cmd_buffer * cmd_buffer,enum pvr_resolve_op resolve_op,const struct pvr_image * src,const struct pvr_image * dst,const VkImageCopy2 * region)484 pvr_copy_or_resolve_image_region(struct pvr_cmd_buffer *cmd_buffer,
485 enum pvr_resolve_op resolve_op,
486 const struct pvr_image *src,
487 const struct pvr_image *dst,
488 const VkImageCopy2 *region)
489 {
490 enum pipe_format src_pformat = vk_format_to_pipe_format(src->vk.format);
491 enum pipe_format dst_pformat = vk_format_to_pipe_format(dst->vk.format);
492 bool src_block_compressed = util_format_is_compressed(src_pformat);
493 bool dst_block_compressed = util_format_is_compressed(dst_pformat);
494 VkExtent3D src_extent;
495 VkExtent3D dst_extent;
496 VkFormat dst_format;
497 VkFormat src_format;
498 uint32_t dst_layers;
499 uint32_t src_layers;
500 uint32_t max_slices;
501 uint32_t flags = 0U;
502
503 if (src->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
504 region->srcSubresource.aspectMask !=
505 (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
506 /* Takes the stencil of the source and the depth of the destination and
507 * combines the two interleaved.
508 */
509 flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
510
511 if (region->srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
512 /* Takes the depth of the source and the stencil of the destination and
513 * combines the two interleaved.
514 */
515 flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
516 }
517 }
518
519 src_extent = region->extent;
520 dst_extent = region->extent;
521
522 if (src_block_compressed && !dst_block_compressed) {
523 uint32_t block_width = util_format_get_blockwidth(src_pformat);
524 uint32_t block_height = util_format_get_blockheight(src_pformat);
525
526 dst_extent.width = MAX2(1U, DIV_ROUND_UP(src_extent.width, block_width));
527 dst_extent.height =
528 MAX2(1U, DIV_ROUND_UP(src_extent.height, block_height));
529 } else if (!src_block_compressed && dst_block_compressed) {
530 uint32_t block_width = util_format_get_blockwidth(dst_pformat);
531 uint32_t block_height = util_format_get_blockheight(dst_pformat);
532
533 dst_extent.width = MAX2(1U, src_extent.width * block_width);
534 dst_extent.height = MAX2(1U, src_extent.height * block_height);
535 }
536
537 if (src->vk.samples > dst->vk.samples) {
538 /* Resolve op needs to know the actual format. */
539 dst_format = dst->vk.format;
540 } else {
541 /* We don't care what format dst is as it's guaranteed to be size
542 * compatible with src.
543 */
544 dst_format = pvr_get_raw_copy_format(src->vk.format);
545 }
546 src_format = dst_format;
547
548 src_layers =
549 vk_image_subresource_layer_count(&src->vk, ®ion->srcSubresource);
550 dst_layers =
551 vk_image_subresource_layer_count(&dst->vk, ®ion->dstSubresource);
552
553 /* srcSubresource.layerCount must match layerCount of dstSubresource in
554 * copies not involving 3D images. In copies involving 3D images, if there is
555 * a 2D image it's layerCount.
556 */
557 max_slices = MAX3(src_layers, dst_layers, region->extent.depth);
558
559 for (uint32_t i = 0U; i < max_slices; i++) {
560 struct pvr_transfer_cmd *transfer_cmd;
561 VkResult result;
562
563 transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
564 if (!transfer_cmd)
565 return VK_ERROR_OUT_OF_HOST_MEMORY;
566
567 transfer_cmd->flags |= flags;
568 transfer_cmd->sources[0].resolve_op = resolve_op;
569
570 pvr_setup_surface_for_image(
571 cmd_buffer->device,
572 &transfer_cmd->sources[0].surface,
573 &transfer_cmd->sources[0].mappings[0U].src_rect,
574 src,
575 region->srcSubresource.baseArrayLayer,
576 i,
577 region->srcSubresource.mipLevel,
578 ®ion->srcOffset,
579 &src_extent,
580 region->srcOffset.z + i,
581 src_format,
582 region->srcSubresource.aspectMask);
583
584 pvr_setup_surface_for_image(cmd_buffer->device,
585 &transfer_cmd->dst,
586 &transfer_cmd->scissor,
587 dst,
588 region->dstSubresource.baseArrayLayer,
589 i,
590 region->dstSubresource.mipLevel,
591 ®ion->dstOffset,
592 &dst_extent,
593 region->dstOffset.z + i,
594 dst_format,
595 region->dstSubresource.aspectMask);
596
597 transfer_cmd->sources[0].mappings[0U].dst_rect = transfer_cmd->scissor;
598 transfer_cmd->sources[0].mapping_count++;
599 transfer_cmd->source_count = 1;
600
601 result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
602 if (result != VK_SUCCESS) {
603 vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
604 return result;
605 }
606 }
607
608 return VK_SUCCESS;
609 }
610
611 VkResult
pvr_copy_or_resolve_color_image_region(struct pvr_cmd_buffer * cmd_buffer,const struct pvr_image * src,const struct pvr_image * dst,const VkImageCopy2 * region)612 pvr_copy_or_resolve_color_image_region(struct pvr_cmd_buffer *cmd_buffer,
613 const struct pvr_image *src,
614 const struct pvr_image *dst,
615 const VkImageCopy2 *region)
616 {
617 enum pvr_resolve_op resolve_op = PVR_RESOLVE_BLEND;
618
619 if (src->vk.samples > 1U && dst->vk.samples < 2U) {
620 /* Integer resolve picks a single sample. */
621 if (vk_format_is_int(src->vk.format))
622 resolve_op = PVR_RESOLVE_SAMPLE0;
623 }
624
625 return pvr_copy_or_resolve_image_region(cmd_buffer,
626 resolve_op,
627 src,
628 dst,
629 region);
630 }
631
pvr_can_merge_ds_regions(const VkImageCopy2 * pRegionA,const VkImageCopy2 * pRegionB)632 static bool pvr_can_merge_ds_regions(const VkImageCopy2 *pRegionA,
633 const VkImageCopy2 *pRegionB)
634 {
635 assert(pRegionA->srcSubresource.aspectMask != 0U);
636 assert(pRegionB->srcSubresource.aspectMask != 0U);
637
638 if (!((pRegionA->srcSubresource.aspectMask ^
639 pRegionB->srcSubresource.aspectMask) &
640 (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
641 return false;
642 }
643
644 /* Assert if aspectMask mismatch between src and dst, given it's a depth and
645 * stencil image so not multi-planar and from the Vulkan 1.0.223 spec:
646 *
647 * If neither srcImage nor dstImage has a multi-planar image format then
648 * for each element of pRegions, srcSubresource.aspectMask and
649 * dstSubresource.aspectMask must match.
650 */
651 assert(pRegionA->srcSubresource.aspectMask ==
652 pRegionA->dstSubresource.aspectMask);
653 assert(pRegionB->srcSubresource.aspectMask ==
654 pRegionB->dstSubresource.aspectMask);
655
656 if (!(pRegionA->srcSubresource.mipLevel ==
657 pRegionB->srcSubresource.mipLevel &&
658 pRegionA->srcSubresource.baseArrayLayer ==
659 pRegionB->srcSubresource.baseArrayLayer &&
660 pRegionA->srcSubresource.layerCount ==
661 pRegionB->srcSubresource.layerCount)) {
662 return false;
663 }
664
665 if (!(pRegionA->dstSubresource.mipLevel ==
666 pRegionB->dstSubresource.mipLevel &&
667 pRegionA->dstSubresource.baseArrayLayer ==
668 pRegionB->dstSubresource.baseArrayLayer &&
669 pRegionA->dstSubresource.layerCount ==
670 pRegionB->dstSubresource.layerCount)) {
671 return false;
672 }
673
674 if (!(pRegionA->srcOffset.x == pRegionB->srcOffset.x &&
675 pRegionA->srcOffset.y == pRegionB->srcOffset.y &&
676 pRegionA->srcOffset.z == pRegionB->srcOffset.z)) {
677 return false;
678 }
679
680 if (!(pRegionA->dstOffset.x == pRegionB->dstOffset.x &&
681 pRegionA->dstOffset.y == pRegionB->dstOffset.y &&
682 pRegionA->dstOffset.z == pRegionB->dstOffset.z)) {
683 return false;
684 }
685
686 if (!(pRegionA->extent.width == pRegionB->extent.width &&
687 pRegionA->extent.height == pRegionB->extent.height &&
688 pRegionA->extent.depth == pRegionB->extent.depth)) {
689 return false;
690 }
691
692 return true;
693 }
694
pvr_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)695 void pvr_CmdCopyImage2(VkCommandBuffer commandBuffer,
696 const VkCopyImageInfo2 *pCopyImageInfo)
697 {
698 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
699 PVR_FROM_HANDLE(pvr_image, src, pCopyImageInfo->srcImage);
700 PVR_FROM_HANDLE(pvr_image, dst, pCopyImageInfo->dstImage);
701
702 const bool can_merge_ds = src->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
703 dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT;
704
705 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
706
707 for (uint32_t i = 0U; i < pCopyImageInfo->regionCount; i++) {
708 VkResult result;
709
710 /* If an application has split a copy between D24S8 images into two
711 * separate copy regions (one for the depth aspect and one for the
712 * stencil aspect) attempt to merge the two regions back into one blit.
713 *
714 * This can only be merged if both regions are identical apart from the
715 * aspectMask, one of which has to be depth and the other has to be
716 * stencil.
717 *
718 * Only attempt to merge consecutive regions, ignore the case of merging
719 * non-consecutive regions.
720 */
721 if (can_merge_ds && i != (pCopyImageInfo->regionCount - 1)) {
722 const bool ret =
723 pvr_can_merge_ds_regions(&pCopyImageInfo->pRegions[i],
724 &pCopyImageInfo->pRegions[i + 1]);
725 if (ret) {
726 VkImageCopy2 region = pCopyImageInfo->pRegions[i];
727
728 region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT |
729 VK_IMAGE_ASPECT_STENCIL_BIT;
730 region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT |
731 VK_IMAGE_ASPECT_STENCIL_BIT;
732
733 result = pvr_copy_or_resolve_color_image_region(cmd_buffer,
734 src,
735 dst,
736 ®ion);
737 if (result != VK_SUCCESS)
738 return;
739
740 /* Skip the next region as it has been processed with the last
741 * region.
742 */
743 i++;
744
745 continue;
746 }
747 }
748
749 result =
750 pvr_copy_or_resolve_color_image_region(cmd_buffer,
751 src,
752 dst,
753 &pCopyImageInfo->pRegions[i]);
754 if (result != VK_SUCCESS)
755 return;
756 }
757 }
758
759 VkResult
pvr_copy_buffer_to_image_region_format(struct pvr_cmd_buffer * const cmd_buffer,const pvr_dev_addr_t buffer_dev_addr,const struct pvr_image * const image,const VkBufferImageCopy2 * const region,const VkFormat src_format,const VkFormat dst_format,const uint32_t flags)760 pvr_copy_buffer_to_image_region_format(struct pvr_cmd_buffer *const cmd_buffer,
761 const pvr_dev_addr_t buffer_dev_addr,
762 const struct pvr_image *const image,
763 const VkBufferImageCopy2 *const region,
764 const VkFormat src_format,
765 const VkFormat dst_format,
766 const uint32_t flags)
767 {
768 enum pipe_format pformat = vk_format_to_pipe_format(dst_format);
769 uint32_t row_length_in_texels;
770 uint32_t buffer_slice_size;
771 uint32_t buffer_layer_size;
772 uint32_t height_in_blks;
773 uint32_t row_length;
774
775 if (region->bufferRowLength == 0)
776 row_length_in_texels = region->imageExtent.width;
777 else
778 row_length_in_texels = region->bufferRowLength;
779
780 if (region->bufferImageHeight == 0)
781 height_in_blks = region->imageExtent.height;
782 else
783 height_in_blks = region->bufferImageHeight;
784
785 if (util_format_is_compressed(pformat)) {
786 uint32_t block_width = util_format_get_blockwidth(pformat);
787 uint32_t block_height = util_format_get_blockheight(pformat);
788 uint32_t block_size = util_format_get_blocksize(pformat);
789
790 height_in_blks = DIV_ROUND_UP(height_in_blks, block_height);
791 row_length_in_texels =
792 DIV_ROUND_UP(row_length_in_texels, block_width) * block_size;
793 }
794
795 row_length = row_length_in_texels * vk_format_get_blocksize(src_format);
796
797 buffer_slice_size = height_in_blks * row_length;
798 buffer_layer_size = buffer_slice_size * region->imageExtent.depth;
799
800 for (uint32_t i = 0; i < region->imageExtent.depth; i++) {
801 const uint32_t depth = i + (uint32_t)region->imageOffset.z;
802
803 for (uint32_t j = 0; j < region->imageSubresource.layerCount; j++) {
804 const VkDeviceSize buffer_offset = region->bufferOffset +
805 (j * buffer_layer_size) +
806 (i * buffer_slice_size);
807 struct pvr_transfer_cmd *transfer_cmd;
808 VkResult result;
809
810 transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
811 if (!transfer_cmd)
812 return VK_ERROR_OUT_OF_HOST_MEMORY;
813
814 transfer_cmd->flags = flags;
815
816 pvr_setup_buffer_surface(
817 &transfer_cmd->sources[0].surface,
818 &transfer_cmd->sources[0].mappings[0].src_rect,
819 buffer_dev_addr,
820 buffer_offset,
821 src_format,
822 image->vk.format,
823 region->imageExtent.width,
824 region->imageExtent.height,
825 row_length_in_texels);
826
827 transfer_cmd->sources[0].surface.depth = 1;
828 transfer_cmd->source_count = 1;
829
830 pvr_setup_transfer_surface(cmd_buffer->device,
831 &transfer_cmd->dst,
832 &transfer_cmd->scissor,
833 image,
834 region->imageSubresource.baseArrayLayer + j,
835 region->imageSubresource.mipLevel,
836 ®ion->imageOffset,
837 ®ion->imageExtent,
838 depth,
839 dst_format,
840 region->imageSubresource.aspectMask);
841
842 transfer_cmd->sources[0].mappings[0].dst_rect = transfer_cmd->scissor;
843 transfer_cmd->sources[0].mapping_count++;
844
845 result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
846 if (result != VK_SUCCESS) {
847 vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
848 return result;
849 }
850 }
851 }
852
853 return VK_SUCCESS;
854 }
855
856 VkResult
pvr_copy_buffer_to_image_region(struct pvr_cmd_buffer * const cmd_buffer,const pvr_dev_addr_t buffer_dev_addr,const struct pvr_image * const image,const VkBufferImageCopy2 * const region)857 pvr_copy_buffer_to_image_region(struct pvr_cmd_buffer *const cmd_buffer,
858 const pvr_dev_addr_t buffer_dev_addr,
859 const struct pvr_image *const image,
860 const VkBufferImageCopy2 *const region)
861 {
862 const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
863 VkFormat src_format;
864 VkFormat dst_format;
865 uint32_t flags = 0;
866
867 if (vk_format_has_depth(image->vk.format) &&
868 vk_format_has_stencil(image->vk.format)) {
869 flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
870
871 if ((aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0) {
872 src_format = vk_format_stencil_only(image->vk.format);
873 } else {
874 src_format = vk_format_depth_only(image->vk.format);
875 flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
876 }
877
878 dst_format = image->vk.format;
879 } else {
880 src_format = pvr_get_raw_copy_format(image->vk.format);
881 dst_format = src_format;
882 }
883
884 return pvr_copy_buffer_to_image_region_format(cmd_buffer,
885 buffer_dev_addr,
886 image,
887 region,
888 src_format,
889 dst_format,
890 flags);
891 }
892
pvr_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)893 void pvr_CmdCopyBufferToImage2(
894 VkCommandBuffer commandBuffer,
895 const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
896 {
897 PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferToImageInfo->srcBuffer);
898 PVR_FROM_HANDLE(pvr_image, dst, pCopyBufferToImageInfo->dstImage);
899 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
900
901 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
902
903 for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++) {
904 const VkResult result =
905 pvr_copy_buffer_to_image_region(cmd_buffer,
906 src->dev_addr,
907 dst,
908 &pCopyBufferToImageInfo->pRegions[i]);
909 if (result != VK_SUCCESS)
910 return;
911 }
912 }
913
914 VkResult
pvr_copy_image_to_buffer_region_format(struct pvr_cmd_buffer * const cmd_buffer,const struct pvr_image * const image,const pvr_dev_addr_t buffer_dev_addr,const VkBufferImageCopy2 * const region,const VkFormat src_format,const VkFormat dst_format)915 pvr_copy_image_to_buffer_region_format(struct pvr_cmd_buffer *const cmd_buffer,
916 const struct pvr_image *const image,
917 const pvr_dev_addr_t buffer_dev_addr,
918 const VkBufferImageCopy2 *const region,
919 const VkFormat src_format,
920 const VkFormat dst_format)
921 {
922 enum pipe_format pformat = vk_format_to_pipe_format(image->vk.format);
923 struct pvr_transfer_cmd_surface dst_surface = { 0 };
924 VkImageSubresource sub_resource;
925 uint32_t buffer_image_height;
926 uint32_t buffer_row_length;
927 uint32_t buffer_slice_size;
928 uint32_t max_array_layers;
929 VkRect2D dst_rect = { 0 };
930 uint32_t max_depth_slice;
931 VkSubresourceLayout info;
932
933 /* Only images with VK_SAMPLE_COUNT_1_BIT can be copied to buffer. */
934 assert(image->vk.samples == 1);
935
936 if (region->bufferRowLength == 0)
937 buffer_row_length = region->imageExtent.width;
938 else
939 buffer_row_length = region->bufferRowLength;
940
941 if (region->bufferImageHeight == 0)
942 buffer_image_height = region->imageExtent.height;
943 else
944 buffer_image_height = region->bufferImageHeight;
945
946 max_array_layers =
947 region->imageSubresource.baseArrayLayer +
948 vk_image_subresource_layer_count(&image->vk, ®ion->imageSubresource);
949
950 buffer_slice_size = buffer_image_height * buffer_row_length *
951 vk_format_get_blocksize(dst_format);
952
953 max_depth_slice = region->imageExtent.depth + region->imageOffset.z;
954
955 pvr_setup_buffer_surface(&dst_surface,
956 &dst_rect,
957 buffer_dev_addr,
958 region->bufferOffset,
959 dst_format,
960 image->vk.format,
961 buffer_row_length,
962 buffer_image_height,
963 buffer_row_length);
964
965 dst_rect.extent.width = region->imageExtent.width;
966 dst_rect.extent.height = region->imageExtent.height;
967
968 if (util_format_is_compressed(pformat)) {
969 uint32_t block_width = util_format_get_blockwidth(pformat);
970 uint32_t block_height = util_format_get_blockheight(pformat);
971
972 dst_rect.extent.width =
973 MAX2(1U, DIV_ROUND_UP(dst_rect.extent.width, block_width));
974 dst_rect.extent.height =
975 MAX2(1U, DIV_ROUND_UP(dst_rect.extent.height, block_height));
976 }
977
978 sub_resource = (VkImageSubresource){
979 .aspectMask = region->imageSubresource.aspectMask,
980 .mipLevel = region->imageSubresource.mipLevel,
981 .arrayLayer = region->imageSubresource.baseArrayLayer,
982 };
983
984 pvr_get_image_subresource_layout(image, &sub_resource, &info);
985
986 for (uint32_t i = region->imageSubresource.baseArrayLayer;
987 i < max_array_layers;
988 i++) {
989 struct pvr_transfer_cmd_surface src_surface = { 0 };
990 VkRect2D src_rect = { 0 };
991
992 /* Note: Set the depth to the initial depth offset, the memory address (or
993 * the z_position) for the depth slice will be incremented manually in the
994 * loop below.
995 */
996 pvr_setup_transfer_surface(cmd_buffer->device,
997 &src_surface,
998 &src_rect,
999 image,
1000 i,
1001 region->imageSubresource.mipLevel,
1002 ®ion->imageOffset,
1003 ®ion->imageExtent,
1004 region->imageOffset.z,
1005 src_format,
1006 region->imageSubresource.aspectMask);
1007
1008 for (uint32_t j = region->imageOffset.z; j < max_depth_slice; j++) {
1009 struct pvr_transfer_cmd *transfer_cmd;
1010 VkResult result;
1011
1012 /* TODO: See if we can allocate all the transfer cmds in one go. */
1013 transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
1014 if (!transfer_cmd)
1015 return vk_error(cmd_buffer->device, VK_ERROR_OUT_OF_HOST_MEMORY);
1016
1017 transfer_cmd->sources[0].mappings[0].src_rect = src_rect;
1018 transfer_cmd->sources[0].mappings[0].dst_rect = dst_rect;
1019 transfer_cmd->sources[0].mapping_count++;
1020
1021 transfer_cmd->sources[0].surface = src_surface;
1022 transfer_cmd->source_count = 1;
1023
1024 transfer_cmd->dst = dst_surface;
1025 transfer_cmd->scissor = dst_rect;
1026
1027 result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
1028 if (result != VK_SUCCESS) {
1029 vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
1030 return result;
1031 }
1032
1033 dst_surface.dev_addr.addr += buffer_slice_size;
1034
1035 if (src_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
1036 src_surface.z_position += 1.0f;
1037 else
1038 src_surface.dev_addr.addr += info.depthPitch;
1039 }
1040 }
1041
1042 return VK_SUCCESS;
1043 }
1044
1045 VkResult
pvr_copy_image_to_buffer_region(struct pvr_cmd_buffer * const cmd_buffer,const struct pvr_image * const image,const pvr_dev_addr_t buffer_dev_addr,const VkBufferImageCopy2 * const region)1046 pvr_copy_image_to_buffer_region(struct pvr_cmd_buffer *const cmd_buffer,
1047 const struct pvr_image *const image,
1048 const pvr_dev_addr_t buffer_dev_addr,
1049 const VkBufferImageCopy2 *const region)
1050 {
1051 const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
1052
1053 VkFormat src_format = pvr_get_copy_format(image->vk.format);
1054 VkFormat dst_format;
1055
1056 /* Color and depth aspect copies can be done using an appropriate raw format.
1057 */
1058 if (aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT)) {
1059 src_format = pvr_get_raw_copy_format(src_format);
1060 dst_format = src_format;
1061 } else if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
1062 /* From the Vulkan spec:
1063 *
1064 * Data copied to or from the stencil aspect of any depth/stencil
1065 * format is tightly packed with one VK_FORMAT_S8_UINT value per texel.
1066 */
1067 dst_format = VK_FORMAT_S8_UINT;
1068 } else {
1069 /* YUV Planes require specific formats. */
1070 dst_format = src_format;
1071 }
1072
1073 return pvr_copy_image_to_buffer_region_format(cmd_buffer,
1074 image,
1075 buffer_dev_addr,
1076 region,
1077 src_format,
1078 dst_format);
1079 }
1080
pvr_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)1081 void pvr_CmdCopyImageToBuffer2(
1082 VkCommandBuffer commandBuffer,
1083 const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
1084 {
1085 PVR_FROM_HANDLE(pvr_buffer, dst, pCopyImageToBufferInfo->dstBuffer);
1086 PVR_FROM_HANDLE(pvr_image, src, pCopyImageToBufferInfo->srcImage);
1087 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1088
1089 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1090
1091 for (uint32_t i = 0U; i < pCopyImageToBufferInfo->regionCount; i++) {
1092 const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[i];
1093
1094 const VkResult result = pvr_copy_image_to_buffer_region(cmd_buffer,
1095 src,
1096 dst->dev_addr,
1097 region);
1098 if (result != VK_SUCCESS)
1099 return;
1100 }
1101 }
1102
pvr_calc_mip_level_extents(const struct pvr_image * image,uint16_t mip_level,VkExtent3D * extent_out)1103 static void pvr_calc_mip_level_extents(const struct pvr_image *image,
1104 uint16_t mip_level,
1105 VkExtent3D *extent_out)
1106 {
1107 /* 3D textures are clamped to 4x4x4. */
1108 const uint32_t clamp = (image->vk.image_type == VK_IMAGE_TYPE_3D) ? 4 : 1;
1109 const VkExtent3D *extent = &image->vk.extent;
1110
1111 extent_out->width = MAX2(extent->width >> mip_level, clamp);
1112 extent_out->height = MAX2(extent->height >> mip_level, clamp);
1113 extent_out->depth = MAX2(extent->depth >> mip_level, clamp);
1114 }
1115
pvr_clear_image_range(struct pvr_cmd_buffer * cmd_buffer,const struct pvr_image * image,const VkClearColorValue * pColor,const VkImageSubresourceRange * psRange,uint32_t flags)1116 static VkResult pvr_clear_image_range(struct pvr_cmd_buffer *cmd_buffer,
1117 const struct pvr_image *image,
1118 const VkClearColorValue *pColor,
1119 const VkImageSubresourceRange *psRange,
1120 uint32_t flags)
1121 {
1122 const uint32_t layer_count =
1123 vk_image_subresource_layer_count(&image->vk, psRange);
1124 const uint32_t max_layers = psRange->baseArrayLayer + layer_count;
1125 VkFormat format = image->vk.format;
1126 const VkOffset3D offset = { 0 };
1127 VkExtent3D mip_extent;
1128
1129 assert((psRange->baseArrayLayer + layer_count) <= image->vk.array_layers);
1130
1131 for (uint32_t layer = psRange->baseArrayLayer; layer < max_layers; layer++) {
1132 const uint32_t level_count =
1133 vk_image_subresource_level_count(&image->vk, psRange);
1134 const uint32_t max_level = psRange->baseMipLevel + level_count;
1135
1136 assert((psRange->baseMipLevel + level_count) <= image->vk.mip_levels);
1137
1138 for (uint32_t level = psRange->baseMipLevel; level < max_level; level++) {
1139 pvr_calc_mip_level_extents(image, level, &mip_extent);
1140
1141 for (uint32_t depth = 0; depth < mip_extent.depth; depth++) {
1142 struct pvr_transfer_cmd *transfer_cmd;
1143 VkResult result;
1144
1145 transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
1146 if (!transfer_cmd)
1147 return VK_ERROR_OUT_OF_HOST_MEMORY;
1148
1149 transfer_cmd->flags |= flags;
1150 transfer_cmd->flags |= PVR_TRANSFER_CMD_FLAGS_FILL;
1151
1152 for (uint32_t i = 0; i < ARRAY_SIZE(transfer_cmd->clear_color); i++)
1153 transfer_cmd->clear_color[i].ui = pColor->uint32[i];
1154
1155 pvr_setup_transfer_surface(cmd_buffer->device,
1156 &transfer_cmd->dst,
1157 &transfer_cmd->scissor,
1158 image,
1159 layer,
1160 level,
1161 &offset,
1162 &mip_extent,
1163 depth,
1164 format,
1165 psRange->aspectMask);
1166
1167 result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
1168 if (result != VK_SUCCESS) {
1169 vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
1170 return result;
1171 }
1172 }
1173 }
1174 }
1175
1176 return VK_SUCCESS;
1177 }
1178
pvr_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1179 void pvr_CmdClearColorImage(VkCommandBuffer commandBuffer,
1180 VkImage _image,
1181 VkImageLayout imageLayout,
1182 const VkClearColorValue *pColor,
1183 uint32_t rangeCount,
1184 const VkImageSubresourceRange *pRanges)
1185 {
1186 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1187 PVR_FROM_HANDLE(pvr_image, image, _image);
1188
1189 for (uint32_t i = 0; i < rangeCount; i++) {
1190 const VkResult result =
1191 pvr_clear_image_range(cmd_buffer, image, pColor, &pRanges[i], 0);
1192 if (result != VK_SUCCESS)
1193 return;
1194 }
1195 }
1196
pvr_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1197 void pvr_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
1198 VkImage _image,
1199 VkImageLayout imageLayout,
1200 const VkClearDepthStencilValue *pDepthStencil,
1201 uint32_t rangeCount,
1202 const VkImageSubresourceRange *pRanges)
1203 {
1204 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1205 PVR_FROM_HANDLE(pvr_image, image, _image);
1206
1207 for (uint32_t i = 0; i < rangeCount; i++) {
1208 const VkImageAspectFlags ds_aspect = VK_IMAGE_ASPECT_DEPTH_BIT |
1209 VK_IMAGE_ASPECT_STENCIL_BIT;
1210 VkClearColorValue clear_ds = { 0 };
1211 uint32_t flags = 0U;
1212 VkResult result;
1213
1214 if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
1215 pRanges[i].aspectMask != ds_aspect) {
1216 /* A depth or stencil blit to a packed_depth_stencil requires a merge
1217 * operation.
1218 */
1219 flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
1220
1221 if (pRanges[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
1222 flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
1223 }
1224
1225 clear_ds.float32[0] = pDepthStencil->depth;
1226 clear_ds.uint32[1] = pDepthStencil->stencil;
1227
1228 result =
1229 pvr_clear_image_range(cmd_buffer, image, &clear_ds, pRanges + i, flags);
1230 if (result != VK_SUCCESS)
1231 return;
1232 }
1233 }
1234
pvr_cmd_copy_buffer_region(struct pvr_cmd_buffer * cmd_buffer,pvr_dev_addr_t src_addr,VkDeviceSize src_offset,pvr_dev_addr_t dst_addr,VkDeviceSize dst_offset,VkDeviceSize size,uint32_t fill_data,bool is_fill)1235 static VkResult pvr_cmd_copy_buffer_region(struct pvr_cmd_buffer *cmd_buffer,
1236 pvr_dev_addr_t src_addr,
1237 VkDeviceSize src_offset,
1238 pvr_dev_addr_t dst_addr,
1239 VkDeviceSize dst_offset,
1240 VkDeviceSize size,
1241 uint32_t fill_data,
1242 bool is_fill)
1243 {
1244 VkDeviceSize offset = 0;
1245
1246 while (offset < size) {
1247 const VkDeviceSize remaining_size = size - offset;
1248 struct pvr_transfer_cmd *transfer_cmd;
1249 uint32_t src_align = (src_addr.addr + offset + src_offset) & 0xF;
1250 uint32_t dst_align = (dst_addr.addr + offset + src_offset) & 0xF;
1251 uint32_t texel_width;
1252 VkDeviceSize texels;
1253 VkFormat vk_format;
1254 VkResult result;
1255 uint32_t height;
1256 uint32_t width;
1257
1258 if (is_fill) {
1259 vk_format = VK_FORMAT_R32_UINT;
1260 texel_width = 4U;
1261 } else if (remaining_size >= 16U && (src_align % 16U) == 0 &&
1262 (dst_align % 16U) == 0) {
1263 /* Only if address is 128bpp aligned */
1264 vk_format = VK_FORMAT_R32G32B32A32_UINT;
1265 texel_width = 16U;
1266 } else if (remaining_size >= 4U) {
1267 vk_format = VK_FORMAT_R32_UINT;
1268 texel_width = 4U;
1269 } else {
1270 vk_format = VK_FORMAT_R8_UINT;
1271 texel_width = 1U;
1272 }
1273
1274 texels = remaining_size / texel_width;
1275
1276 /* Try to do max-width rects, fall back to a 1-height rect for the
1277 * remainder.
1278 */
1279 if (texels > PVR_MAX_TRANSFER_SIZE_IN_TEXELS) {
1280 width = PVR_MAX_TRANSFER_SIZE_IN_TEXELS;
1281 height = texels / PVR_MAX_TRANSFER_SIZE_IN_TEXELS;
1282 height = MIN2(height, PVR_MAX_TRANSFER_SIZE_IN_TEXELS);
1283 } else {
1284 width = texels;
1285 height = 1;
1286 }
1287
1288 transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
1289 if (!transfer_cmd)
1290 return VK_ERROR_OUT_OF_HOST_MEMORY;
1291
1292 if (!is_fill) {
1293 pvr_setup_buffer_surface(
1294 &transfer_cmd->sources[0].surface,
1295 &transfer_cmd->sources[0].mappings[0].src_rect,
1296 src_addr,
1297 offset + src_offset,
1298 vk_format,
1299 vk_format,
1300 width,
1301 height,
1302 width);
1303 transfer_cmd->source_count = 1;
1304 } else {
1305 transfer_cmd->flags |= PVR_TRANSFER_CMD_FLAGS_FILL;
1306
1307 for (uint32_t i = 0; i < ARRAY_SIZE(transfer_cmd->clear_color); i++)
1308 transfer_cmd->clear_color[i].ui = fill_data;
1309 }
1310
1311 pvr_setup_buffer_surface(&transfer_cmd->dst,
1312 &transfer_cmd->scissor,
1313 dst_addr,
1314 offset + dst_offset,
1315 vk_format,
1316 vk_format,
1317 width,
1318 height,
1319 width);
1320
1321 if (transfer_cmd->source_count > 0) {
1322 transfer_cmd->sources[0].mappings[0].dst_rect = transfer_cmd->scissor;
1323
1324 transfer_cmd->sources[0].mapping_count++;
1325 }
1326
1327 result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
1328 if (result != VK_SUCCESS) {
1329 vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
1330 return result;
1331 }
1332
1333 offset += width * height * texel_width;
1334 }
1335
1336 return VK_SUCCESS;
1337 }
1338
pvr_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)1339 void pvr_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
1340 VkBuffer dstBuffer,
1341 VkDeviceSize dstOffset,
1342 VkDeviceSize dataSize,
1343 const void *pData)
1344 {
1345 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1346 PVR_FROM_HANDLE(pvr_buffer, dst, dstBuffer);
1347 struct pvr_suballoc_bo *pvr_bo;
1348 VkResult result;
1349
1350 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1351
1352 result = pvr_cmd_buffer_upload_general(cmd_buffer, pData, dataSize, &pvr_bo);
1353 if (result != VK_SUCCESS)
1354 return;
1355
1356 pvr_cmd_copy_buffer_region(cmd_buffer,
1357 pvr_bo->dev_addr,
1358 0,
1359 dst->dev_addr,
1360 dstOffset,
1361 dataSize,
1362 0U,
1363 false);
1364 }
1365
pvr_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)1366 void pvr_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
1367 const VkCopyBufferInfo2 *pCopyBufferInfo)
1368 {
1369 PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferInfo->srcBuffer);
1370 PVR_FROM_HANDLE(pvr_buffer, dst, pCopyBufferInfo->dstBuffer);
1371 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1372
1373 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1374
1375 for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
1376 const VkResult result =
1377 pvr_cmd_copy_buffer_region(cmd_buffer,
1378 src->dev_addr,
1379 pCopyBufferInfo->pRegions[i].srcOffset,
1380 dst->dev_addr,
1381 pCopyBufferInfo->pRegions[i].dstOffset,
1382 pCopyBufferInfo->pRegions[i].size,
1383 0U,
1384 false);
1385 if (result != VK_SUCCESS)
1386 return;
1387 }
1388 }
1389
pvr_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)1390 void pvr_CmdFillBuffer(VkCommandBuffer commandBuffer,
1391 VkBuffer dstBuffer,
1392 VkDeviceSize dstOffset,
1393 VkDeviceSize fillSize,
1394 uint32_t data)
1395 {
1396 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1397 PVR_FROM_HANDLE(pvr_buffer, dst, dstBuffer);
1398
1399 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1400
1401 fillSize = vk_buffer_range(&dst->vk, dstOffset, fillSize);
1402
1403 /* From the Vulkan spec:
1404 *
1405 * "size is the number of bytes to fill, and must be either a multiple
1406 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
1407 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
1408 * buffer is not a multiple of 4, then the nearest smaller multiple is
1409 * used."
1410 */
1411 fillSize &= ~3ULL;
1412
1413 pvr_cmd_copy_buffer_region(cmd_buffer,
1414 PVR_DEV_ADDR_INVALID,
1415 0,
1416 dst->dev_addr,
1417 dstOffset,
1418 fillSize,
1419 data,
1420 true);
1421 }
1422
1423 /**
1424 * \brief Returns the maximum number of layers to clear starting from base_layer
1425 * that contain or match the target rectangle.
1426 *
1427 * \param[in] target_rect The region which the clear should contain or
1428 * match.
1429 * \param[in] base_layer The layer index to start at.
1430 * \param[in] clear_rect_count Amount of clear_rects
1431 * \param[in] clear_rects Array of clear rects.
1432 *
1433 * \return Max number of layers that cover or match the target region.
1434 */
1435 static uint32_t
pvr_get_max_layers_covering_target(VkRect2D target_rect,uint32_t base_layer,uint32_t clear_rect_count,const VkClearRect * clear_rects)1436 pvr_get_max_layers_covering_target(VkRect2D target_rect,
1437 uint32_t base_layer,
1438 uint32_t clear_rect_count,
1439 const VkClearRect *clear_rects)
1440 {
1441 const int32_t target_x0 = target_rect.offset.x;
1442 const int32_t target_x1 = target_x0 + (int32_t)target_rect.extent.width;
1443 const int32_t target_y0 = target_rect.offset.y;
1444 const int32_t target_y1 = target_y0 + (int32_t)target_rect.extent.height;
1445
1446 uint32_t layer_count = 0;
1447
1448 assert((int64_t)target_x0 + (int64_t)target_rect.extent.width <= INT32_MAX);
1449 assert((int64_t)target_y0 + (int64_t)target_rect.extent.height <= INT32_MAX);
1450
1451 for (uint32_t i = 0; i < clear_rect_count; i++) {
1452 const VkClearRect *clear_rect = &clear_rects[i];
1453 const uint32_t max_layer =
1454 clear_rect->baseArrayLayer + clear_rect->layerCount;
1455 bool target_is_covered;
1456 int32_t x0, x1;
1457 int32_t y0, y1;
1458
1459 if (clear_rect->baseArrayLayer == 0)
1460 continue;
1461
1462 assert((uint64_t)clear_rect->baseArrayLayer + clear_rect->layerCount <=
1463 UINT32_MAX);
1464
1465 /* Check for layer intersection. */
1466 if (clear_rect->baseArrayLayer > base_layer || max_layer <= base_layer)
1467 continue;
1468
1469 x0 = clear_rect->rect.offset.x;
1470 x1 = x0 + (int32_t)clear_rect->rect.extent.width;
1471 y0 = clear_rect->rect.offset.y;
1472 y1 = y0 + (int32_t)clear_rect->rect.extent.height;
1473
1474 assert((int64_t)x0 + (int64_t)clear_rect->rect.extent.width <= INT32_MAX);
1475 assert((int64_t)y0 + (int64_t)clear_rect->rect.extent.height <=
1476 INT32_MAX);
1477
1478 target_is_covered = x0 <= target_x0 && x1 >= target_x1;
1479 target_is_covered &= y0 <= target_y0 && y1 >= target_y1;
1480
1481 if (target_is_covered)
1482 layer_count = MAX2(layer_count, max_layer - base_layer);
1483 }
1484
1485 return layer_count;
1486 }
1487
1488 /* Return true if vertex shader is required to output render target id to pick
1489 * the texture array layer.
1490 */
1491 static inline bool
pvr_clear_needs_rt_id_output(struct pvr_device_info * dev_info,uint32_t rect_count,const VkClearRect * rects)1492 pvr_clear_needs_rt_id_output(struct pvr_device_info *dev_info,
1493 uint32_t rect_count,
1494 const VkClearRect *rects)
1495 {
1496 if (!PVR_HAS_FEATURE(dev_info, gs_rta_support))
1497 return false;
1498
1499 for (uint32_t i = 0; i < rect_count; i++) {
1500 if (rects[i].baseArrayLayer != 0 || rects[i].layerCount > 1)
1501 return true;
1502 }
1503
1504 return false;
1505 }
1506
pvr_clear_color_attachment_static_create_consts_buffer(struct pvr_cmd_buffer * cmd_buffer,const struct pvr_shader_factory_info * shader_info,const uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],ASSERTED bool uses_tile_buffer,uint32_t tile_buffer_idx,struct pvr_suballoc_bo ** const const_shareds_buffer_out)1507 static VkResult pvr_clear_color_attachment_static_create_consts_buffer(
1508 struct pvr_cmd_buffer *cmd_buffer,
1509 const struct pvr_shader_factory_info *shader_info,
1510 const uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],
1511 ASSERTED bool uses_tile_buffer,
1512 uint32_t tile_buffer_idx,
1513 struct pvr_suballoc_bo **const const_shareds_buffer_out)
1514 {
1515 struct pvr_device *device = cmd_buffer->device;
1516 struct pvr_suballoc_bo *const_shareds_buffer;
1517 struct pvr_bo *tile_buffer;
1518 uint64_t tile_dev_addr;
1519 uint32_t *buffer;
1520 VkResult result;
1521
1522 /* TODO: This doesn't need to be aligned to slc size. Alignment to 4 is fine.
1523 * Change pvr_cmd_buffer_alloc_mem() to take in an alignment?
1524 */
1525 result =
1526 pvr_cmd_buffer_alloc_mem(cmd_buffer,
1527 device->heaps.general_heap,
1528 PVR_DW_TO_BYTES(shader_info->const_shared_regs),
1529 &const_shareds_buffer);
1530 if (result != VK_SUCCESS)
1531 return result;
1532
1533 buffer = pvr_bo_suballoc_get_map_addr(const_shareds_buffer);
1534
1535 for (uint32_t i = 0; i < PVR_CLEAR_ATTACHMENT_CONST_COUNT; i++) {
1536 uint32_t dest_idx = shader_info->driver_const_location_map[i];
1537
1538 if (dest_idx == PVR_CLEAR_ATTACHMENT_DEST_ID_UNUSED)
1539 continue;
1540
1541 assert(dest_idx < shader_info->const_shared_regs);
1542
1543 switch (i) {
1544 case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_0:
1545 case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_1:
1546 case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_2:
1547 case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_3:
1548 buffer[dest_idx] = clear_color[i];
1549 break;
1550
1551 case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_UPPER:
1552 assert(uses_tile_buffer);
1553 tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
1554 tile_dev_addr = tile_buffer->vma->dev_addr.addr;
1555 buffer[dest_idx] = (uint32_t)(tile_dev_addr >> 32);
1556 break;
1557
1558 case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_LOWER:
1559 assert(uses_tile_buffer);
1560 tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
1561 tile_dev_addr = tile_buffer->vma->dev_addr.addr;
1562 buffer[dest_idx] = (uint32_t)tile_dev_addr;
1563 break;
1564
1565 default:
1566 unreachable("Unsupported clear attachment const type.");
1567 }
1568 }
1569
1570 for (uint32_t i = 0; i < shader_info->num_static_const; i++) {
1571 const struct pvr_static_buffer *static_buff =
1572 &shader_info->static_const_buffer[i];
1573
1574 assert(static_buff->dst_idx < shader_info->const_shared_regs);
1575
1576 buffer[static_buff->dst_idx] = static_buff->value;
1577 }
1578
1579 *const_shareds_buffer_out = const_shareds_buffer;
1580
1581 return VK_SUCCESS;
1582 }
1583
pvr_clear_color_attachment_static(struct pvr_cmd_buffer * cmd_buffer,const struct usc_mrt_resource * mrt_resource,VkFormat format,uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],uint32_t template_idx,uint32_t stencil,bool vs_has_rt_id_output)1584 static VkResult pvr_clear_color_attachment_static(
1585 struct pvr_cmd_buffer *cmd_buffer,
1586 const struct usc_mrt_resource *mrt_resource,
1587 VkFormat format,
1588 uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],
1589 uint32_t template_idx,
1590 uint32_t stencil,
1591 bool vs_has_rt_id_output)
1592 {
1593 struct pvr_device *device = cmd_buffer->device;
1594 ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1595 ASSERTED const bool has_eight_output_registers =
1596 PVR_HAS_FEATURE(dev_info, eight_output_registers);
1597 const struct pvr_device_static_clear_state *dev_clear_state =
1598 &device->static_clear_state;
1599 const bool uses_tile_buffer = mrt_resource->type ==
1600 USC_MRT_RESOURCE_TYPE_MEMORY;
1601 const struct pvr_pds_clear_attachment_program_info *clear_attachment_program;
1602 struct pvr_pds_pixel_shader_sa_program texture_program;
1603 uint32_t pds_state[PVR_STATIC_CLEAR_PDS_STATE_COUNT];
1604 const struct pvr_shader_factory_info *shader_info;
1605 struct pvr_suballoc_bo *pds_texture_program_bo;
1606 struct pvr_static_clear_ppp_template template;
1607 struct pvr_suballoc_bo *const_shareds_buffer;
1608 uint64_t pds_texture_program_addr;
1609 struct pvr_suballoc_bo *pvr_bo;
1610 uint32_t tile_buffer_idx = 0;
1611 uint32_t out_reg_count;
1612 uint32_t output_offset;
1613 uint32_t program_idx;
1614 uint32_t *buffer;
1615 VkResult result;
1616
1617 out_reg_count =
1618 DIV_ROUND_UP(pvr_get_pbe_accum_format_size_in_bytes(format), 4U);
1619
1620 if (uses_tile_buffer) {
1621 tile_buffer_idx = mrt_resource->mem.tile_buffer;
1622 output_offset = mrt_resource->mem.offset_dw;
1623 } else {
1624 output_offset = mrt_resource->reg.output_reg;
1625 }
1626
1627 assert(has_eight_output_registers || out_reg_count + output_offset <= 4);
1628
1629 program_idx = pvr_get_clear_attachment_program_index(out_reg_count,
1630 output_offset,
1631 uses_tile_buffer);
1632
1633 shader_info = clear_attachment_collection[program_idx].info;
1634
1635 result = pvr_clear_color_attachment_static_create_consts_buffer(
1636 cmd_buffer,
1637 shader_info,
1638 clear_color,
1639 uses_tile_buffer,
1640 tile_buffer_idx,
1641 &const_shareds_buffer);
1642 if (result != VK_SUCCESS)
1643 return result;
1644
1645 /* clang-format off */
1646 texture_program = (struct pvr_pds_pixel_shader_sa_program){
1647 .num_texture_dma_kicks = 1,
1648 .texture_dma_address = {
1649 [0] = const_shareds_buffer->dev_addr.addr,
1650 }
1651 };
1652 /* clang-format on */
1653
1654 pvr_csb_pack (&texture_program.texture_dma_control[0],
1655 PDSINST_DOUT_FIELDS_DOUTD_SRC1,
1656 doutd_src1) {
1657 doutd_src1.dest = ROGUE_PDSINST_DOUTD_DEST_COMMON_STORE;
1658 doutd_src1.bsize = shader_info->const_shared_regs;
1659 }
1660
1661 clear_attachment_program =
1662 &dev_clear_state->pds_clear_attachment_program_info[program_idx];
1663
1664 /* TODO: This doesn't need to be aligned to slc size. Alignment to 4 is fine.
1665 * Change pvr_cmd_buffer_alloc_mem() to take in an alignment?
1666 */
1667 result = pvr_cmd_buffer_alloc_mem(
1668 cmd_buffer,
1669 device->heaps.pds_heap,
1670 clear_attachment_program->texture_program_data_size,
1671 &pds_texture_program_bo);
1672 if (result != VK_SUCCESS) {
1673 list_del(&const_shareds_buffer->link);
1674 pvr_bo_suballoc_free(const_shareds_buffer);
1675
1676 return result;
1677 }
1678
1679 buffer = pvr_bo_suballoc_get_map_addr(pds_texture_program_bo);
1680 pds_texture_program_addr = pds_texture_program_bo->dev_addr.addr -
1681 device->heaps.pds_heap->base_addr.addr;
1682
1683 pvr_pds_generate_pixel_shader_sa_texture_state_data(
1684 &texture_program,
1685 buffer,
1686 &device->pdevice->dev_info);
1687
1688 pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SHADERBASE],
1689 TA_STATE_PDS_SHADERBASE,
1690 shaderbase) {
1691 shaderbase.addr = clear_attachment_program->pixel_program_offset;
1692 }
1693
1694 pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_TEXUNICODEBASE],
1695 TA_STATE_PDS_TEXUNICODEBASE,
1696 texunicodebase) {
1697 texunicodebase.addr = clear_attachment_program->texture_program_offset;
1698 }
1699
1700 pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SIZEINFO1],
1701 TA_STATE_PDS_SIZEINFO1,
1702 sizeinfo1) {
1703 sizeinfo1.pds_texturestatesize = DIV_ROUND_UP(
1704 clear_attachment_program->texture_program_data_size,
1705 ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_TEXTURESTATESIZE_UNIT_SIZE);
1706
1707 sizeinfo1.pds_tempsize =
1708 DIV_ROUND_UP(clear_attachment_program->texture_program_pds_temps_count,
1709 ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE);
1710 }
1711
1712 pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SIZEINFO2],
1713 TA_STATE_PDS_SIZEINFO2,
1714 sizeinfo2) {
1715 sizeinfo2.usc_sharedsize =
1716 DIV_ROUND_UP(shader_info->const_shared_regs,
1717 ROGUE_TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE);
1718 }
1719
1720 /* Dummy coefficient loading program. */
1721 pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_VARYINGBASE] = 0;
1722
1723 pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_TEXTUREDATABASE],
1724 TA_STATE_PDS_TEXTUREDATABASE,
1725 texturedatabase) {
1726 texturedatabase.addr = PVR_DEV_ADDR(pds_texture_program_addr);
1727 }
1728
1729 assert(template_idx < PVR_STATIC_CLEAR_VARIANT_COUNT);
1730 template =
1731 cmd_buffer->device->static_clear_state.ppp_templates[template_idx];
1732
1733 template.config.pds_state = &pds_state;
1734
1735 template.config.ispctl.upass =
1736 cmd_buffer->state.render_pass_info.isp_userpass;
1737
1738 if (template_idx & VK_IMAGE_ASPECT_STENCIL_BIT) {
1739 /* clang-format off */
1740 template.config.ispa.sref = stencil & ROGUE_TA_STATE_ISPA_SREF_SIZE_MAX;
1741 /* clang-format on */
1742 }
1743
1744 if (vs_has_rt_id_output) {
1745 template.config.output_sel.rhw_pres = true;
1746 template.config.output_sel.render_tgt_pres = true;
1747 template.config.output_sel.vtxsize = 4 + 1;
1748 }
1749
1750 result = pvr_emit_ppp_from_template(
1751 &cmd_buffer->state.current_sub_cmd->gfx.control_stream,
1752 &template,
1753 &pvr_bo);
1754 if (result != VK_SUCCESS) {
1755 list_del(&pds_texture_program_bo->link);
1756 pvr_bo_suballoc_free(pds_texture_program_bo);
1757
1758 list_del(&const_shareds_buffer->link);
1759 pvr_bo_suballoc_free(const_shareds_buffer);
1760
1761 return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
1762 }
1763
1764 list_add(&pvr_bo->link, &cmd_buffer->bo_list);
1765
1766 return VK_SUCCESS;
1767 }
1768
1769 /**
1770 * \brief Record a deferred clear operation into the command buffer.
1771 *
1772 * Devices which don't have gs_rta_support require extra handling for RTA
1773 * clears. We setup a list of deferred clear transfer commands which will be
1774 * processed at the end of the graphics sub command to account for the missing
1775 * feature.
1776 */
pvr_add_deferred_rta_clear(struct pvr_cmd_buffer * cmd_buffer,const VkClearAttachment * attachment,const VkClearRect * rect,bool is_render_init)1777 static VkResult pvr_add_deferred_rta_clear(struct pvr_cmd_buffer *cmd_buffer,
1778 const VkClearAttachment *attachment,
1779 const VkClearRect *rect,
1780 bool is_render_init)
1781 {
1782 struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info;
1783 struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx;
1784 const struct pvr_renderpass_hwsetup_render *hw_render =
1785 &pass_info->pass->hw_setup->renders[sub_cmd->hw_render_idx];
1786 struct pvr_transfer_cmd *transfer_cmd_list;
1787 const struct pvr_image_view *image_view;
1788 const struct pvr_image *image;
1789 uint32_t base_layer;
1790
1791 const VkOffset3D offset = {
1792 .x = rect->rect.offset.x,
1793 .y = rect->rect.offset.y,
1794 .z = 1,
1795 };
1796 const VkExtent3D extent = {
1797 .width = rect->rect.extent.width,
1798 .height = rect->rect.extent.height,
1799 .depth = 1,
1800 };
1801
1802 assert(
1803 !PVR_HAS_FEATURE(&cmd_buffer->device->pdevice->dev_info, gs_rta_support));
1804
1805 transfer_cmd_list = util_dynarray_grow(&cmd_buffer->deferred_clears,
1806 struct pvr_transfer_cmd,
1807 rect->layerCount);
1808 if (!transfer_cmd_list) {
1809 return vk_command_buffer_set_error(&cmd_buffer->vk,
1810 VK_ERROR_OUT_OF_HOST_MEMORY);
1811 }
1812
1813 /* From the Vulkan 1.3.229 spec VUID-VkClearAttachment-aspectMask-00019:
1814 *
1815 * "If aspectMask includes VK_IMAGE_ASPECT_COLOR_BIT, it must not
1816 * include VK_IMAGE_ASPECT_DEPTH_BIT or VK_IMAGE_ASPECT_STENCIL_BIT"
1817 *
1818 */
1819 if (attachment->aspectMask != VK_IMAGE_ASPECT_COLOR_BIT) {
1820 assert(attachment->aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ||
1821 attachment->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT ||
1822 attachment->aspectMask ==
1823 (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
1824
1825 image_view = pass_info->attachments[hw_render->ds_attach_idx];
1826 } else if (is_render_init) {
1827 uint32_t index;
1828
1829 assert(attachment->colorAttachment < hw_render->color_init_count);
1830 index = hw_render->color_init[attachment->colorAttachment].index;
1831
1832 image_view = pass_info->attachments[index];
1833 } else {
1834 const struct pvr_renderpass_hwsetup_subpass *hw_pass =
1835 pvr_get_hw_subpass(pass_info->pass, pass_info->subpass_idx);
1836 const struct pvr_render_subpass *sub_pass =
1837 &pass_info->pass->subpasses[hw_pass->index];
1838 const uint32_t attachment_idx =
1839 sub_pass->color_attachments[attachment->colorAttachment];
1840
1841 assert(attachment->colorAttachment < sub_pass->color_count);
1842
1843 image_view = pass_info->attachments[attachment_idx];
1844 }
1845
1846 base_layer = image_view->vk.base_array_layer + rect->baseArrayLayer;
1847 image = vk_to_pvr_image(image_view->vk.image);
1848
1849 for (uint32_t i = 0; i < rect->layerCount; i++) {
1850 struct pvr_transfer_cmd *transfer_cmd = &transfer_cmd_list[i];
1851
1852 /* TODO: Add an init function for when we don't want to use
1853 * pvr_transfer_cmd_alloc()? And use it here.
1854 */
1855 *transfer_cmd = (struct pvr_transfer_cmd){
1856 .flags = PVR_TRANSFER_CMD_FLAGS_FILL,
1857 .cmd_buffer = cmd_buffer,
1858 .is_deferred_clear = true,
1859 };
1860
1861 if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
1862 for (uint32_t j = 0; j < ARRAY_SIZE(transfer_cmd->clear_color); j++) {
1863 transfer_cmd->clear_color[j].ui =
1864 attachment->clearValue.color.uint32[j];
1865 }
1866 } else {
1867 transfer_cmd->clear_color[0].f =
1868 attachment->clearValue.depthStencil.depth;
1869 transfer_cmd->clear_color[1].ui =
1870 attachment->clearValue.depthStencil.stencil;
1871 }
1872
1873 pvr_setup_transfer_surface(cmd_buffer->device,
1874 &transfer_cmd->dst,
1875 &transfer_cmd->scissor,
1876 image,
1877 base_layer + i,
1878 0,
1879 &offset,
1880 &extent,
1881 0.0f,
1882 image->vk.format,
1883 attachment->aspectMask);
1884 }
1885
1886 return VK_SUCCESS;
1887 }
1888
pvr_clear_attachments(struct pvr_cmd_buffer * cmd_buffer,uint32_t attachment_count,const VkClearAttachment * attachments,uint32_t rect_count,const VkClearRect * rects,bool is_render_init)1889 static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer,
1890 uint32_t attachment_count,
1891 const VkClearAttachment *attachments,
1892 uint32_t rect_count,
1893 const VkClearRect *rects,
1894 bool is_render_init)
1895 {
1896 const struct pvr_render_pass *pass = cmd_buffer->state.render_pass_info.pass;
1897 struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info;
1898 const struct pvr_renderpass_hwsetup_subpass *hw_pass =
1899 pvr_get_hw_subpass(pass, pass_info->subpass_idx);
1900 struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx;
1901 struct pvr_device_info *dev_info = &cmd_buffer->device->pdevice->dev_info;
1902 struct pvr_render_subpass *sub_pass = &pass->subpasses[hw_pass->index];
1903 uint32_t vs_output_size_in_bytes;
1904 bool vs_has_rt_id_output;
1905
1906 /* TODO: This function can be optimized so that most of the device memory
1907 * gets allocated together in one go and then filled as needed. There might
1908 * also be opportunities to reuse pds code and data segments.
1909 */
1910
1911 assert(cmd_buffer->state.current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
1912
1913 pvr_reset_graphics_dirty_state(cmd_buffer, false);
1914
1915 /* We'll be emitting to the control stream. */
1916 sub_cmd->empty_cmd = false;
1917
1918 vs_has_rt_id_output =
1919 pvr_clear_needs_rt_id_output(dev_info, rect_count, rects);
1920
1921 /* 4 because we're expecting the USC to output X, Y, Z, and W. */
1922 vs_output_size_in_bytes = PVR_DW_TO_BYTES(4);
1923 if (vs_has_rt_id_output)
1924 vs_output_size_in_bytes += PVR_DW_TO_BYTES(1);
1925
1926 for (uint32_t i = 0; i < attachment_count; i++) {
1927 const VkClearAttachment *attachment = &attachments[i];
1928 struct pvr_pds_vertex_shader_program pds_program;
1929 struct pvr_pds_upload pds_program_upload = { 0 };
1930 uint64_t current_base_array_layer = ~0;
1931 VkResult result;
1932 float depth;
1933
1934 if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
1935 uint32_t packed_clear_color[PVR_CLEAR_COLOR_ARRAY_SIZE];
1936 const struct usc_mrt_resource *mrt_resource;
1937 uint32_t global_attachment_idx;
1938 uint32_t local_attachment_idx;
1939 VkFormat format;
1940
1941 local_attachment_idx = attachment->colorAttachment;
1942
1943 if (is_render_init) {
1944 struct pvr_renderpass_hwsetup_render *hw_render;
1945
1946 assert(pass->hw_setup->render_count > 0);
1947 hw_render = &pass->hw_setup->renders[0];
1948
1949 mrt_resource =
1950 &hw_render->init_setup.mrt_resources[local_attachment_idx];
1951
1952 assert(local_attachment_idx < hw_render->color_init_count);
1953 global_attachment_idx =
1954 hw_render->color_init[local_attachment_idx].index;
1955 } else {
1956 mrt_resource = &hw_pass->setup.mrt_resources[local_attachment_idx];
1957
1958 assert(local_attachment_idx < sub_pass->color_count);
1959 global_attachment_idx =
1960 sub_pass->color_attachments[local_attachment_idx];
1961 }
1962
1963 if (global_attachment_idx == VK_ATTACHMENT_UNUSED)
1964 continue;
1965
1966 assert(global_attachment_idx < pass->attachment_count);
1967 format = pass->attachments[global_attachment_idx].vk_format;
1968
1969 assert(format != VK_FORMAT_UNDEFINED);
1970
1971 pvr_get_hw_clear_color(format,
1972 attachment->clearValue.color,
1973 packed_clear_color);
1974
1975 result = pvr_clear_color_attachment_static(cmd_buffer,
1976 mrt_resource,
1977 format,
1978 packed_clear_color,
1979 VK_IMAGE_ASPECT_COLOR_BIT,
1980 0,
1981 vs_has_rt_id_output);
1982 if (result != VK_SUCCESS)
1983 return;
1984 } else if (hw_pass->z_replicate != -1 &&
1985 attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1986 const VkClearColorValue clear_color = {
1987 .float32 = { [0] = attachment->clearValue.depthStencil.depth, },
1988 };
1989 const uint32_t template_idx = attachment->aspectMask |
1990 VK_IMAGE_ASPECT_COLOR_BIT;
1991 const uint32_t stencil = attachment->clearValue.depthStencil.stencil;
1992 uint32_t packed_clear_color[PVR_CLEAR_COLOR_ARRAY_SIZE];
1993 const struct usc_mrt_resource *mrt_resource;
1994
1995 mrt_resource = &hw_pass->setup.mrt_resources[hw_pass->z_replicate];
1996
1997 pvr_get_hw_clear_color(VK_FORMAT_R32_SFLOAT,
1998 clear_color,
1999 packed_clear_color);
2000
2001 result = pvr_clear_color_attachment_static(cmd_buffer,
2002 mrt_resource,
2003 VK_FORMAT_R32_SFLOAT,
2004 packed_clear_color,
2005 template_idx,
2006 stencil,
2007 vs_has_rt_id_output);
2008 if (result != VK_SUCCESS)
2009 return;
2010 } else {
2011 const uint32_t template_idx = attachment->aspectMask;
2012 struct pvr_static_clear_ppp_template template;
2013 struct pvr_suballoc_bo *pvr_bo;
2014
2015 assert(template_idx < PVR_STATIC_CLEAR_VARIANT_COUNT);
2016 template =
2017 cmd_buffer->device->static_clear_state.ppp_templates[template_idx];
2018
2019 if (attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
2020 /* clang-format off */
2021 template.config.ispa.sref =
2022 attachment->clearValue.depthStencil.stencil &
2023 ROGUE_TA_STATE_ISPA_SREF_SIZE_MAX;
2024 /* clang-format on */
2025 }
2026
2027 if (vs_has_rt_id_output) {
2028 template.config.output_sel.rhw_pres = true;
2029 template.config.output_sel.render_tgt_pres = true;
2030 template.config.output_sel.vtxsize = 4 + 1;
2031 }
2032
2033 result = pvr_emit_ppp_from_template(&sub_cmd->control_stream,
2034 &template,
2035 &pvr_bo);
2036 if (result != VK_SUCCESS) {
2037 pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
2038 return;
2039 }
2040
2041 list_add(&pvr_bo->link, &cmd_buffer->bo_list);
2042 }
2043
2044 if (attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
2045 depth = attachment->clearValue.depthStencil.depth;
2046 else
2047 depth = 1.0f;
2048
2049 if (vs_has_rt_id_output) {
2050 const struct pvr_device_static_clear_state *dev_clear_state =
2051 &cmd_buffer->device->static_clear_state;
2052 const struct pvr_suballoc_bo *multi_layer_vert_bo =
2053 dev_clear_state->usc_multi_layer_vertex_shader_bo;
2054
2055 /* We can't use the device's passthrough pds program since it doesn't
2056 * have iterate_instance_id enabled. We'll be uploading code sections
2057 * per each clear rect.
2058 */
2059
2060 /* TODO: See if we can allocate all the code section memory in one go.
2061 * We'd need to make sure that changing instance_id_modifier doesn't
2062 * change the code section size.
2063 * Also check if we can reuse the same code segment for each rect.
2064 * Seems like the instance_id_modifier is written into the data section
2065 * and used by the pds ADD instruction that way instead of it being
2066 * embedded into the code section.
2067 */
2068
2069 pvr_pds_clear_rta_vertex_shader_program_init_base(&pds_program,
2070 multi_layer_vert_bo);
2071 } else {
2072 /* We can reuse the device's code section but we'll need to upload data
2073 * sections so initialize the program.
2074 */
2075 pvr_pds_clear_vertex_shader_program_init_base(
2076 &pds_program,
2077 cmd_buffer->device->static_clear_state.usc_vertex_shader_bo);
2078
2079 pds_program_upload.code_offset =
2080 cmd_buffer->device->static_clear_state.pds.code_offset;
2081 /* TODO: The code size doesn't get used by pvr_clear_vdm_state() maybe
2082 * let's change its interface to make that clear and not set this?
2083 */
2084 pds_program_upload.code_size =
2085 cmd_buffer->device->static_clear_state.pds.code_size;
2086 }
2087
2088 for (uint32_t j = 0; j < rect_count; j++) {
2089 struct pvr_pds_upload pds_program_data_upload;
2090 const VkClearRect *clear_rect = &rects[j];
2091 struct pvr_suballoc_bo *vertices_bo;
2092 uint32_t vdm_cs_size_in_dw;
2093 uint32_t *vdm_cs_buffer;
2094 VkResult result;
2095
2096 if (!PVR_HAS_FEATURE(dev_info, gs_rta_support) &&
2097 (clear_rect->baseArrayLayer != 0 || clear_rect->layerCount > 1)) {
2098 result = pvr_add_deferred_rta_clear(cmd_buffer,
2099 attachment,
2100 clear_rect,
2101 is_render_init);
2102 if (result != VK_SUCCESS)
2103 return;
2104
2105 if (clear_rect->baseArrayLayer != 0)
2106 continue;
2107 }
2108
2109 /* TODO: Allocate all the buffers in one go before the loop, and add
2110 * support to multi-alloc bo.
2111 */
2112 result = pvr_clear_vertices_upload(cmd_buffer->device,
2113 &clear_rect->rect,
2114 depth,
2115 &vertices_bo);
2116 if (result != VK_SUCCESS) {
2117 pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
2118 return;
2119 }
2120
2121 list_add(&vertices_bo->link, &cmd_buffer->bo_list);
2122
2123 if (vs_has_rt_id_output) {
2124 if (current_base_array_layer != clear_rect->baseArrayLayer) {
2125 const uint32_t base_array_layer = clear_rect->baseArrayLayer;
2126 struct pvr_pds_upload pds_program_code_upload;
2127
2128 result =
2129 pvr_pds_clear_rta_vertex_shader_program_create_and_upload_code(
2130 &pds_program,
2131 cmd_buffer,
2132 base_array_layer,
2133 &pds_program_code_upload);
2134 if (result != VK_SUCCESS) {
2135 pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
2136 return;
2137 }
2138
2139 pds_program_upload.code_offset =
2140 pds_program_code_upload.code_offset;
2141 /* TODO: The code size doesn't get used by pvr_clear_vdm_state()
2142 * maybe let's change its interface to make that clear and not
2143 * set this?
2144 */
2145 pds_program_upload.code_size = pds_program_code_upload.code_size;
2146
2147 current_base_array_layer = base_array_layer;
2148 }
2149
2150 result =
2151 pvr_pds_clear_rta_vertex_shader_program_create_and_upload_data(
2152 &pds_program,
2153 cmd_buffer,
2154 vertices_bo,
2155 &pds_program_data_upload);
2156 if (result != VK_SUCCESS)
2157 return;
2158 } else {
2159 result = pvr_pds_clear_vertex_shader_program_create_and_upload_data(
2160 &pds_program,
2161 cmd_buffer,
2162 vertices_bo,
2163 &pds_program_data_upload);
2164 if (result != VK_SUCCESS)
2165 return;
2166 }
2167
2168 pds_program_upload.data_offset = pds_program_data_upload.data_offset;
2169 pds_program_upload.data_size = pds_program_data_upload.data_size;
2170
2171 vdm_cs_size_in_dw =
2172 pvr_clear_vdm_state_get_size_in_dw(dev_info,
2173 clear_rect->layerCount);
2174
2175 pvr_csb_set_relocation_mark(&sub_cmd->control_stream);
2176
2177 vdm_cs_buffer =
2178 pvr_csb_alloc_dwords(&sub_cmd->control_stream, vdm_cs_size_in_dw);
2179 if (!vdm_cs_buffer) {
2180 pvr_cmd_buffer_set_error_unwarned(cmd_buffer,
2181 sub_cmd->control_stream.status);
2182 return;
2183 }
2184
2185 pvr_pack_clear_vdm_state(dev_info,
2186 &pds_program_upload,
2187 pds_program.temps_used,
2188 4,
2189 vs_output_size_in_bytes,
2190 clear_rect->layerCount,
2191 vdm_cs_buffer);
2192
2193 pvr_csb_clear_relocation_mark(&sub_cmd->control_stream);
2194 }
2195 }
2196 }
2197
pvr_clear_attachments_render_init(struct pvr_cmd_buffer * cmd_buffer,const VkClearAttachment * attachment,const VkClearRect * rect)2198 void pvr_clear_attachments_render_init(struct pvr_cmd_buffer *cmd_buffer,
2199 const VkClearAttachment *attachment,
2200 const VkClearRect *rect)
2201 {
2202 pvr_clear_attachments(cmd_buffer, 1, attachment, 1, rect, true);
2203 }
2204
pvr_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)2205 void pvr_CmdClearAttachments(VkCommandBuffer commandBuffer,
2206 uint32_t attachmentCount,
2207 const VkClearAttachment *pAttachments,
2208 uint32_t rectCount,
2209 const VkClearRect *pRects)
2210 {
2211 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
2212 struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
2213 struct pvr_sub_cmd_gfx *sub_cmd = &state->current_sub_cmd->gfx;
2214
2215 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
2216 assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
2217
2218 /* TODO: There are some optimizations that can be made here:
2219 * - For a full screen clear, update the clear values for the corresponding
2220 * attachment index.
2221 * - For a full screen color attachment clear, add its index to a load op
2222 * override to add it to the background shader. This will elide any load
2223 * op loads currently in the background shader as well as the usual
2224 * frag kick for geometry clear.
2225 */
2226
2227 /* If we have any depth/stencil clears, update the sub command depth/stencil
2228 * modification and usage flags.
2229 */
2230 if (state->depth_format != VK_FORMAT_UNDEFINED) {
2231 uint32_t full_screen_clear_count;
2232 bool has_stencil_clear = false;
2233 bool has_depth_clear = false;
2234
2235 for (uint32_t i = 0; i < attachmentCount; i++) {
2236 const VkImageAspectFlags aspect_mask = pAttachments[i].aspectMask;
2237
2238 if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
2239 has_stencil_clear = true;
2240
2241 if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
2242 has_depth_clear = true;
2243
2244 if (has_stencil_clear && has_depth_clear)
2245 break;
2246 }
2247
2248 sub_cmd->modifies_stencil |= has_stencil_clear;
2249 sub_cmd->modifies_depth |= has_depth_clear;
2250
2251 /* We only care about clears that have a baseArrayLayer of 0 as any
2252 * attachment clears we move to the background shader must apply to all of
2253 * the attachment's sub resources.
2254 */
2255 full_screen_clear_count =
2256 pvr_get_max_layers_covering_target(state->render_pass_info.render_area,
2257 0,
2258 rectCount,
2259 pRects);
2260
2261 if (full_screen_clear_count > 0) {
2262 if (has_stencil_clear &&
2263 sub_cmd->stencil_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
2264 sub_cmd->stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
2265 }
2266
2267 if (has_depth_clear &&
2268 sub_cmd->depth_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
2269 sub_cmd->depth_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
2270 }
2271 }
2272 }
2273
2274 pvr_clear_attachments(cmd_buffer,
2275 attachmentCount,
2276 pAttachments,
2277 rectCount,
2278 pRects,
2279 false);
2280 }
2281
pvr_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)2282 void pvr_CmdResolveImage2(VkCommandBuffer commandBuffer,
2283 const VkResolveImageInfo2 *pResolveImageInfo)
2284 {
2285 PVR_FROM_HANDLE(pvr_image, src, pResolveImageInfo->srcImage);
2286 PVR_FROM_HANDLE(pvr_image, dst, pResolveImageInfo->dstImage);
2287 PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
2288
2289 PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
2290
2291 for (uint32_t i = 0U; i < pResolveImageInfo->regionCount; i++) {
2292 VkImageCopy2 region = {
2293 .sType = VK_STRUCTURE_TYPE_IMAGE_COPY_2,
2294 .srcSubresource = pResolveImageInfo->pRegions[i].srcSubresource,
2295 .srcOffset = pResolveImageInfo->pRegions[i].srcOffset,
2296 .dstSubresource = pResolveImageInfo->pRegions[i].dstSubresource,
2297 .dstOffset = pResolveImageInfo->pRegions[i].dstOffset,
2298 .extent = pResolveImageInfo->pRegions[i].extent,
2299 };
2300
2301 VkResult result =
2302 pvr_copy_or_resolve_color_image_region(cmd_buffer, src, dst, ®ion);
2303 if (result != VK_SUCCESS)
2304 return;
2305 }
2306 }
2307