• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <stddef.h>
27 #include <stdint.h>
28 #include <vulkan/vulkan.h>
29 
30 #include "pvr_blit.h"
31 #include "pvr_clear.h"
32 #include "pvr_csb.h"
33 #include "pvr_formats.h"
34 #include "pvr_job_transfer.h"
35 #include "pvr_private.h"
36 #include "usc/programs/pvr_shader_factory.h"
37 #include "usc/programs/pvr_static_shaders.h"
38 #include "pvr_types.h"
39 #include "util/bitscan.h"
40 #include "util/list.h"
41 #include "util/macros.h"
42 #include "util/u_math.h"
43 #include "vk_alloc.h"
44 #include "vk_command_buffer.h"
45 #include "vk_command_pool.h"
46 #include "vk_format.h"
47 #include "vk_log.h"
48 
49 /* TODO: Investigate where this limit comes from. */
50 #define PVR_MAX_TRANSFER_SIZE_IN_TEXELS 2048U
51 
52 static struct pvr_transfer_cmd *
pvr_transfer_cmd_alloc(struct pvr_cmd_buffer * cmd_buffer)53 pvr_transfer_cmd_alloc(struct pvr_cmd_buffer *cmd_buffer)
54 {
55    struct pvr_transfer_cmd *transfer_cmd;
56 
57    transfer_cmd = vk_zalloc(&cmd_buffer->vk.pool->alloc,
58                             sizeof(*transfer_cmd),
59                             8U,
60                             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
61    if (!transfer_cmd) {
62       vk_command_buffer_set_error(&cmd_buffer->vk, VK_ERROR_OUT_OF_HOST_MEMORY);
63       return NULL;
64    }
65 
66    /* transfer_cmd->mapping_count is already set to zero. */
67    transfer_cmd->sources[0].filter = PVR_FILTER_POINT;
68    transfer_cmd->sources[0].resolve_op = PVR_RESOLVE_BLEND;
69    transfer_cmd->sources[0].addr_mode = ROGUE_TEXSTATE_ADDRMODE_CLAMP_TO_EDGE;
70    transfer_cmd->cmd_buffer = cmd_buffer;
71 
72    return transfer_cmd;
73 }
74 
pvr_setup_buffer_surface(struct pvr_transfer_cmd_surface * surface,VkRect2D * rect,pvr_dev_addr_t dev_addr,VkDeviceSize offset,VkFormat vk_format,VkFormat image_format,uint32_t width,uint32_t height,uint32_t stride)75 static void pvr_setup_buffer_surface(struct pvr_transfer_cmd_surface *surface,
76                                      VkRect2D *rect,
77                                      pvr_dev_addr_t dev_addr,
78                                      VkDeviceSize offset,
79                                      VkFormat vk_format,
80                                      VkFormat image_format,
81                                      uint32_t width,
82                                      uint32_t height,
83                                      uint32_t stride)
84 {
85    enum pipe_format pformat = vk_format_to_pipe_format(image_format);
86 
87    surface->dev_addr = PVR_DEV_ADDR_OFFSET(dev_addr, offset);
88    surface->width = width;
89    surface->height = height;
90    surface->stride = stride;
91    surface->vk_format = vk_format;
92    surface->mem_layout = PVR_MEMLAYOUT_LINEAR;
93    surface->sample_count = 1;
94 
95    /* Initialize rectangle extent. Also, rectangle.offset should be set to
96     * zero, as the offset is already adjusted in the device address above. We
97     * don't explicitly set offset to zero as transfer_cmd is zero allocated.
98     */
99    rect->extent.width = width;
100    rect->extent.height = height;
101 
102    if (util_format_is_compressed(pformat)) {
103       uint32_t block_width = util_format_get_blockwidth(pformat);
104       uint32_t block_height = util_format_get_blockheight(pformat);
105 
106       surface->width = MAX2(1U, DIV_ROUND_UP(surface->width, block_width));
107       surface->height = MAX2(1U, DIV_ROUND_UP(surface->height, block_height));
108       surface->stride = MAX2(1U, DIV_ROUND_UP(surface->stride, block_width));
109 
110       rect->offset.x /= block_width;
111       rect->offset.y /= block_height;
112       rect->extent.width =
113          MAX2(1U, DIV_ROUND_UP(rect->extent.width, block_width));
114       rect->extent.height =
115          MAX2(1U, DIV_ROUND_UP(rect->extent.height, block_height));
116    }
117 }
118 
pvr_get_raw_copy_format(VkFormat format)119 VkFormat pvr_get_raw_copy_format(VkFormat format)
120 {
121    switch (vk_format_get_blocksize(format)) {
122    case 1:
123       return VK_FORMAT_R8_UINT;
124    case 2:
125       return VK_FORMAT_R8G8_UINT;
126    case 3:
127       return VK_FORMAT_R8G8B8_UINT;
128    case 4:
129       return VK_FORMAT_R32_UINT;
130    case 6:
131       return VK_FORMAT_R16G16B16_UINT;
132    case 8:
133       return VK_FORMAT_R32G32_UINT;
134    case 12:
135       return VK_FORMAT_R32G32B32_UINT;
136    case 16:
137       return VK_FORMAT_R32G32B32A32_UINT;
138    default:
139       unreachable("Unhandled copy block size.");
140    }
141 }
142 
pvr_setup_transfer_surface(struct pvr_device * device,struct pvr_transfer_cmd_surface * surface,VkRect2D * rect,const struct pvr_image * image,uint32_t array_layer,uint32_t mip_level,const VkOffset3D * offset,const VkExtent3D * extent,float fdepth,VkFormat format,VkImageAspectFlags aspect_mask)143 static void pvr_setup_transfer_surface(struct pvr_device *device,
144                                        struct pvr_transfer_cmd_surface *surface,
145                                        VkRect2D *rect,
146                                        const struct pvr_image *image,
147                                        uint32_t array_layer,
148                                        uint32_t mip_level,
149                                        const VkOffset3D *offset,
150                                        const VkExtent3D *extent,
151                                        float fdepth,
152                                        VkFormat format,
153                                        VkImageAspectFlags aspect_mask)
154 {
155    const uint32_t height = MAX2(image->vk.extent.height >> mip_level, 1U);
156    const uint32_t width = MAX2(image->vk.extent.width >> mip_level, 1U);
157    enum pipe_format image_pformat = vk_format_to_pipe_format(image->vk.format);
158    enum pipe_format pformat = vk_format_to_pipe_format(format);
159    const VkImageSubresource sub_resource = {
160       .aspectMask = aspect_mask,
161       .mipLevel = mip_level,
162       .arrayLayer = array_layer,
163    };
164    VkSubresourceLayout info;
165    uint32_t depth;
166 
167    if (image->memlayout == PVR_MEMLAYOUT_3DTWIDDLED)
168       depth = MAX2(image->vk.extent.depth >> mip_level, 1U);
169    else
170       depth = 1U;
171 
172    pvr_get_image_subresource_layout(image, &sub_resource, &info);
173 
174    surface->dev_addr = PVR_DEV_ADDR_OFFSET(image->dev_addr, info.offset);
175    surface->width = width;
176    surface->height = height;
177    surface->depth = depth;
178 
179    assert(info.rowPitch % vk_format_get_blocksize(format) == 0);
180    surface->stride = info.rowPitch / vk_format_get_blocksize(format);
181 
182    surface->vk_format = format;
183    surface->mem_layout = image->memlayout;
184    surface->sample_count = image->vk.samples;
185 
186    if (image->memlayout == PVR_MEMLAYOUT_3DTWIDDLED)
187       surface->z_position = fdepth;
188    else
189       surface->dev_addr.addr += info.depthPitch * ((uint32_t)fdepth);
190 
191    rect->offset.x = offset->x;
192    rect->offset.y = offset->y;
193    rect->extent.width = extent->width;
194    rect->extent.height = extent->height;
195 
196    if (util_format_is_compressed(image_pformat) &&
197        !util_format_is_compressed(pformat)) {
198       uint32_t block_width = util_format_get_blockwidth(image_pformat);
199       uint32_t block_height = util_format_get_blockheight(image_pformat);
200 
201       surface->width = MAX2(1U, DIV_ROUND_UP(surface->width, block_width));
202       surface->height = MAX2(1U, DIV_ROUND_UP(surface->height, block_height));
203       surface->stride = MAX2(1U, DIV_ROUND_UP(surface->stride, block_width));
204 
205       rect->offset.x /= block_width;
206       rect->offset.y /= block_height;
207       rect->extent.width =
208          MAX2(1U, DIV_ROUND_UP(rect->extent.width, block_width));
209       rect->extent.height =
210          MAX2(1U, DIV_ROUND_UP(rect->extent.height, block_height));
211    }
212 }
213 
pvr_CmdBlitImage2(VkCommandBuffer commandBuffer,const VkBlitImageInfo2 * pBlitImageInfo)214 void pvr_CmdBlitImage2(VkCommandBuffer commandBuffer,
215                        const VkBlitImageInfo2 *pBlitImageInfo)
216 {
217    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
218    PVR_FROM_HANDLE(pvr_image, src, pBlitImageInfo->srcImage);
219    PVR_FROM_HANDLE(pvr_image, dst, pBlitImageInfo->dstImage);
220    struct pvr_device *device = cmd_buffer->device;
221    enum pvr_filter filter = PVR_FILTER_DONTCARE;
222 
223    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
224 
225    if (pBlitImageInfo->filter == VK_FILTER_LINEAR)
226       filter = PVR_FILTER_LINEAR;
227 
228    for (uint32_t i = 0U; i < pBlitImageInfo->regionCount; i++) {
229       const VkImageBlit2 *region = &pBlitImageInfo->pRegions[i];
230 
231       assert(region->srcSubresource.layerCount ==
232              region->dstSubresource.layerCount);
233       const bool inverted_dst_z =
234          (region->dstOffsets[1].z < region->dstOffsets[0].z);
235       const bool inverted_src_z =
236          (region->srcOffsets[1].z < region->srcOffsets[0].z);
237       const uint32_t min_src_z = inverted_src_z ? region->srcOffsets[1].z
238                                                 : region->srcOffsets[0].z;
239       const uint32_t max_src_z = inverted_src_z ? region->srcOffsets[0].z
240                                                 : region->srcOffsets[1].z;
241       const uint32_t min_dst_z = inverted_dst_z ? region->dstOffsets[1].z
242                                                 : region->dstOffsets[0].z;
243       const uint32_t max_dst_z = inverted_dst_z ? region->dstOffsets[0].z
244                                                 : region->dstOffsets[1].z;
245 
246       const uint32_t src_width =
247          region->srcOffsets[1].x - region->srcOffsets[0].x;
248       const uint32_t src_height =
249          region->srcOffsets[1].y - region->srcOffsets[0].y;
250       uint32_t dst_width;
251       uint32_t dst_height;
252 
253       float initial_depth_offset;
254       VkExtent3D src_extent;
255       VkExtent3D dst_extent;
256       VkOffset3D dst_offset = region->dstOffsets[0];
257       float z_slice_stride;
258       bool flip_x;
259       bool flip_y;
260 
261       if (region->dstOffsets[1].x > region->dstOffsets[0].x) {
262          dst_width = region->dstOffsets[1].x - region->dstOffsets[0].x;
263          flip_x = false;
264       } else {
265          dst_width = region->dstOffsets[0].x - region->dstOffsets[1].x;
266          flip_x = true;
267          dst_offset.x = region->dstOffsets[1].x;
268       }
269 
270       if (region->dstOffsets[1].y > region->dstOffsets[0].y) {
271          dst_height = region->dstOffsets[1].y - region->dstOffsets[0].y;
272          flip_y = false;
273       } else {
274          dst_height = region->dstOffsets[0].y - region->dstOffsets[1].y;
275          flip_y = true;
276          dst_offset.y = region->dstOffsets[1].y;
277       }
278 
279       /* If any of the extent regions is zero, then reject the blit and
280        * continue.
281        */
282       if (!src_width || !src_height || !dst_width || !dst_height ||
283           !(max_dst_z - min_dst_z) || !(max_src_z - min_src_z)) {
284          mesa_loge("BlitImage: Region %i has an area of zero", i);
285          continue;
286       }
287 
288       src_extent = (VkExtent3D){
289          .width = src_width,
290          .height = src_height,
291          .depth = 0U,
292       };
293 
294       dst_extent = (VkExtent3D){
295          .width = dst_width,
296          .height = dst_height,
297          .depth = 0U,
298       };
299 
300       /* The z_position of a transfer surface is intended to be in the range
301        * of 0.0f <= z_position <= depth. It will be used as a texture coordinate
302        * in the source surface for cases where linear filtering is enabled, so
303        * the fractional part will need to represent the exact midpoint of a z
304        * slice range in the source texture, as it maps to each destination
305        * slice.
306        *
307        * For destination surfaces, the fractional part is discarded, so
308        * we can safely pass the slice index.
309        */
310 
311       /* Calculate the ratio of z slices in our source region to that of our
312        * destination region, to get the number of z slices in our source region
313        * to iterate over for each destination slice.
314        *
315        * If our destination region is inverted, we iterate backwards.
316        */
317       z_slice_stride =
318          (inverted_dst_z ? -1.0f : 1.0f) *
319          ((float)(max_src_z - min_src_z) / (float)(max_dst_z - min_dst_z));
320 
321       /* Offset the initial depth offset by half of the z slice stride, into the
322        * blit region's z range.
323        */
324       initial_depth_offset =
325          (inverted_dst_z ? max_src_z : min_src_z) + (0.5f * z_slice_stride);
326 
327       for (uint32_t j = 0U; j < region->srcSubresource.layerCount; j++) {
328          struct pvr_transfer_cmd_surface src_surface = { 0 };
329          struct pvr_transfer_cmd_surface dst_surface = { 0 };
330          VkRect2D src_rect;
331          VkRect2D dst_rect;
332 
333          /* Get the subresource info for the src and dst images, this is
334           * required when incrementing the address of the depth slice used by
335           * the transfer surface.
336           */
337          VkSubresourceLayout src_info, dst_info;
338          const VkImageSubresource src_sub_resource = {
339             .aspectMask = region->srcSubresource.aspectMask,
340             .mipLevel = region->srcSubresource.mipLevel,
341             .arrayLayer = region->srcSubresource.baseArrayLayer + j,
342          };
343          const VkImageSubresource dst_sub_resource = {
344             .aspectMask = region->dstSubresource.aspectMask,
345             .mipLevel = region->dstSubresource.mipLevel,
346             .arrayLayer = region->dstSubresource.baseArrayLayer + j,
347          };
348 
349          pvr_get_image_subresource_layout(src, &src_sub_resource, &src_info);
350          pvr_get_image_subresource_layout(dst, &dst_sub_resource, &dst_info);
351 
352          /* Setup the transfer surfaces once per image layer, which saves us
353           * from repeating subresource queries by manually incrementing the
354           * depth slices.
355           */
356          pvr_setup_transfer_surface(device,
357                                     &src_surface,
358                                     &src_rect,
359                                     src,
360                                     region->srcSubresource.baseArrayLayer + j,
361                                     region->srcSubresource.mipLevel,
362                                     &region->srcOffsets[0],
363                                     &src_extent,
364                                     initial_depth_offset,
365                                     src->vk.format,
366                                     region->srcSubresource.aspectMask);
367 
368          pvr_setup_transfer_surface(device,
369                                     &dst_surface,
370                                     &dst_rect,
371                                     dst,
372                                     region->dstSubresource.baseArrayLayer + j,
373                                     region->dstSubresource.mipLevel,
374                                     &dst_offset,
375                                     &dst_extent,
376                                     min_dst_z,
377                                     dst->vk.format,
378                                     region->dstSubresource.aspectMask);
379 
380          for (uint32_t dst_z = min_dst_z; dst_z < max_dst_z; dst_z++) {
381             struct pvr_transfer_cmd *transfer_cmd;
382             VkResult result;
383 
384             /* TODO: See if we can allocate all the transfer cmds in one go. */
385             transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
386             if (!transfer_cmd)
387                return;
388 
389             transfer_cmd->sources[0].mappings[0].src_rect = src_rect;
390             transfer_cmd->sources[0].mappings[0].dst_rect = dst_rect;
391             transfer_cmd->sources[0].mappings[0].flip_x = flip_x;
392             transfer_cmd->sources[0].mappings[0].flip_y = flip_y;
393             transfer_cmd->sources[0].mapping_count++;
394 
395             transfer_cmd->sources[0].surface = src_surface;
396             transfer_cmd->sources[0].filter = filter;
397             transfer_cmd->source_count = 1;
398 
399             transfer_cmd->dst = dst_surface;
400             transfer_cmd->scissor = dst_rect;
401 
402             result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
403             if (result != VK_SUCCESS) {
404                vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
405                return;
406             }
407 
408             if (src_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED) {
409                src_surface.z_position += z_slice_stride;
410             } else {
411                src_surface.dev_addr.addr +=
412                   src_info.depthPitch * ((uint32_t)z_slice_stride);
413             }
414 
415             if (dst_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
416                dst_surface.z_position += 1.0f;
417             else
418                dst_surface.dev_addr.addr += dst_info.depthPitch;
419          }
420       }
421    }
422 }
423 
pvr_get_copy_format(VkFormat format)424 static VkFormat pvr_get_copy_format(VkFormat format)
425 {
426    switch (format) {
427    case VK_FORMAT_R8_SNORM:
428       return VK_FORMAT_R8_SINT;
429    case VK_FORMAT_R8G8_SNORM:
430       return VK_FORMAT_R8G8_SINT;
431    case VK_FORMAT_R8G8B8_SNORM:
432       return VK_FORMAT_R8G8B8_SINT;
433    case VK_FORMAT_R8G8B8A8_SNORM:
434       return VK_FORMAT_R8G8B8A8_SINT;
435    case VK_FORMAT_B8G8R8A8_SNORM:
436       return VK_FORMAT_B8G8R8A8_SINT;
437    default:
438       return format;
439    }
440 }
441 
442 static void
pvr_setup_surface_for_image(struct pvr_device * device,struct pvr_transfer_cmd_surface * surface,VkRect2D * rect,const struct pvr_image * image,uint32_t array_layer,uint32_t array_offset,uint32_t mip_level,const VkOffset3D * offset,const VkExtent3D * extent,uint32_t depth,VkFormat format,const VkImageAspectFlags aspect_mask)443 pvr_setup_surface_for_image(struct pvr_device *device,
444                             struct pvr_transfer_cmd_surface *surface,
445                             VkRect2D *rect,
446                             const struct pvr_image *image,
447                             uint32_t array_layer,
448                             uint32_t array_offset,
449                             uint32_t mip_level,
450                             const VkOffset3D *offset,
451                             const VkExtent3D *extent,
452                             uint32_t depth,
453                             VkFormat format,
454                             const VkImageAspectFlags aspect_mask)
455 {
456    if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
457       pvr_setup_transfer_surface(device,
458                                  surface,
459                                  rect,
460                                  image,
461                                  array_layer + array_offset,
462                                  mip_level,
463                                  offset,
464                                  extent,
465                                  0.0f,
466                                  format,
467                                  aspect_mask);
468    } else {
469       pvr_setup_transfer_surface(device,
470                                  surface,
471                                  rect,
472                                  image,
473                                  array_layer,
474                                  mip_level,
475                                  offset,
476                                  extent,
477                                  (float)depth,
478                                  format,
479                                  aspect_mask);
480    }
481 }
482 
483 static VkResult
pvr_copy_or_resolve_image_region(struct pvr_cmd_buffer * cmd_buffer,enum pvr_resolve_op resolve_op,const struct pvr_image * src,const struct pvr_image * dst,const VkImageCopy2 * region)484 pvr_copy_or_resolve_image_region(struct pvr_cmd_buffer *cmd_buffer,
485                                  enum pvr_resolve_op resolve_op,
486                                  const struct pvr_image *src,
487                                  const struct pvr_image *dst,
488                                  const VkImageCopy2 *region)
489 {
490    enum pipe_format src_pformat = vk_format_to_pipe_format(src->vk.format);
491    enum pipe_format dst_pformat = vk_format_to_pipe_format(dst->vk.format);
492    bool src_block_compressed = util_format_is_compressed(src_pformat);
493    bool dst_block_compressed = util_format_is_compressed(dst_pformat);
494    VkExtent3D src_extent;
495    VkExtent3D dst_extent;
496    VkFormat dst_format;
497    VkFormat src_format;
498    uint32_t dst_layers;
499    uint32_t src_layers;
500    uint32_t max_slices;
501    uint32_t flags = 0U;
502 
503    if (src->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
504        region->srcSubresource.aspectMask !=
505           (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
506       /* Takes the stencil of the source and the depth of the destination and
507        * combines the two interleaved.
508        */
509       flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
510 
511       if (region->srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
512          /* Takes the depth of the source and the stencil of the destination and
513           * combines the two interleaved.
514           */
515          flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
516       }
517    }
518 
519    src_extent = region->extent;
520    dst_extent = region->extent;
521 
522    if (src_block_compressed && !dst_block_compressed) {
523       uint32_t block_width = util_format_get_blockwidth(src_pformat);
524       uint32_t block_height = util_format_get_blockheight(src_pformat);
525 
526       dst_extent.width = MAX2(1U, DIV_ROUND_UP(src_extent.width, block_width));
527       dst_extent.height =
528          MAX2(1U, DIV_ROUND_UP(src_extent.height, block_height));
529    } else if (!src_block_compressed && dst_block_compressed) {
530       uint32_t block_width = util_format_get_blockwidth(dst_pformat);
531       uint32_t block_height = util_format_get_blockheight(dst_pformat);
532 
533       dst_extent.width = MAX2(1U, src_extent.width * block_width);
534       dst_extent.height = MAX2(1U, src_extent.height * block_height);
535    }
536 
537    if (src->vk.samples > dst->vk.samples) {
538       /* Resolve op needs to know the actual format. */
539       dst_format = dst->vk.format;
540    } else {
541       /* We don't care what format dst is as it's guaranteed to be size
542        * compatible with src.
543        */
544       dst_format = pvr_get_raw_copy_format(src->vk.format);
545    }
546    src_format = dst_format;
547 
548    src_layers =
549       vk_image_subresource_layer_count(&src->vk, &region->srcSubresource);
550    dst_layers =
551       vk_image_subresource_layer_count(&dst->vk, &region->dstSubresource);
552 
553    /* srcSubresource.layerCount must match layerCount of dstSubresource in
554     * copies not involving 3D images. In copies involving 3D images, if there is
555     * a 2D image it's layerCount.
556     */
557    max_slices = MAX3(src_layers, dst_layers, region->extent.depth);
558 
559    for (uint32_t i = 0U; i < max_slices; i++) {
560       struct pvr_transfer_cmd *transfer_cmd;
561       VkResult result;
562 
563       transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
564       if (!transfer_cmd)
565          return VK_ERROR_OUT_OF_HOST_MEMORY;
566 
567       transfer_cmd->flags |= flags;
568       transfer_cmd->sources[0].resolve_op = resolve_op;
569 
570       pvr_setup_surface_for_image(
571          cmd_buffer->device,
572          &transfer_cmd->sources[0].surface,
573          &transfer_cmd->sources[0].mappings[0U].src_rect,
574          src,
575          region->srcSubresource.baseArrayLayer,
576          i,
577          region->srcSubresource.mipLevel,
578          &region->srcOffset,
579          &src_extent,
580          region->srcOffset.z + i,
581          src_format,
582          region->srcSubresource.aspectMask);
583 
584       pvr_setup_surface_for_image(cmd_buffer->device,
585                                   &transfer_cmd->dst,
586                                   &transfer_cmd->scissor,
587                                   dst,
588                                   region->dstSubresource.baseArrayLayer,
589                                   i,
590                                   region->dstSubresource.mipLevel,
591                                   &region->dstOffset,
592                                   &dst_extent,
593                                   region->dstOffset.z + i,
594                                   dst_format,
595                                   region->dstSubresource.aspectMask);
596 
597       transfer_cmd->sources[0].mappings[0U].dst_rect = transfer_cmd->scissor;
598       transfer_cmd->sources[0].mapping_count++;
599       transfer_cmd->source_count = 1;
600 
601       result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
602       if (result != VK_SUCCESS) {
603          vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
604          return result;
605       }
606    }
607 
608    return VK_SUCCESS;
609 }
610 
611 VkResult
pvr_copy_or_resolve_color_image_region(struct pvr_cmd_buffer * cmd_buffer,const struct pvr_image * src,const struct pvr_image * dst,const VkImageCopy2 * region)612 pvr_copy_or_resolve_color_image_region(struct pvr_cmd_buffer *cmd_buffer,
613                                        const struct pvr_image *src,
614                                        const struct pvr_image *dst,
615                                        const VkImageCopy2 *region)
616 {
617    enum pvr_resolve_op resolve_op = PVR_RESOLVE_BLEND;
618 
619    if (src->vk.samples > 1U && dst->vk.samples < 2U) {
620       /* Integer resolve picks a single sample. */
621       if (vk_format_is_int(src->vk.format))
622          resolve_op = PVR_RESOLVE_SAMPLE0;
623    }
624 
625    return pvr_copy_or_resolve_image_region(cmd_buffer,
626                                            resolve_op,
627                                            src,
628                                            dst,
629                                            region);
630 }
631 
pvr_can_merge_ds_regions(const VkImageCopy2 * pRegionA,const VkImageCopy2 * pRegionB)632 static bool pvr_can_merge_ds_regions(const VkImageCopy2 *pRegionA,
633                                      const VkImageCopy2 *pRegionB)
634 {
635    assert(pRegionA->srcSubresource.aspectMask != 0U);
636    assert(pRegionB->srcSubresource.aspectMask != 0U);
637 
638    if (!((pRegionA->srcSubresource.aspectMask ^
639           pRegionB->srcSubresource.aspectMask) &
640          (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
641       return false;
642    }
643 
644    /* Assert if aspectMask mismatch between src and dst, given it's a depth and
645     * stencil image so not multi-planar and from the Vulkan 1.0.223 spec:
646     *
647     *    If neither srcImage nor dstImage has a multi-planar image format then
648     *    for each element of pRegions, srcSubresource.aspectMask and
649     *    dstSubresource.aspectMask must match.
650     */
651    assert(pRegionA->srcSubresource.aspectMask ==
652           pRegionA->dstSubresource.aspectMask);
653    assert(pRegionB->srcSubresource.aspectMask ==
654           pRegionB->dstSubresource.aspectMask);
655 
656    if (!(pRegionA->srcSubresource.mipLevel ==
657             pRegionB->srcSubresource.mipLevel &&
658          pRegionA->srcSubresource.baseArrayLayer ==
659             pRegionB->srcSubresource.baseArrayLayer &&
660          pRegionA->srcSubresource.layerCount ==
661             pRegionB->srcSubresource.layerCount)) {
662       return false;
663    }
664 
665    if (!(pRegionA->dstSubresource.mipLevel ==
666             pRegionB->dstSubresource.mipLevel &&
667          pRegionA->dstSubresource.baseArrayLayer ==
668             pRegionB->dstSubresource.baseArrayLayer &&
669          pRegionA->dstSubresource.layerCount ==
670             pRegionB->dstSubresource.layerCount)) {
671       return false;
672    }
673 
674    if (!(pRegionA->srcOffset.x == pRegionB->srcOffset.x &&
675          pRegionA->srcOffset.y == pRegionB->srcOffset.y &&
676          pRegionA->srcOffset.z == pRegionB->srcOffset.z)) {
677       return false;
678    }
679 
680    if (!(pRegionA->dstOffset.x == pRegionB->dstOffset.x &&
681          pRegionA->dstOffset.y == pRegionB->dstOffset.y &&
682          pRegionA->dstOffset.z == pRegionB->dstOffset.z)) {
683       return false;
684    }
685 
686    if (!(pRegionA->extent.width == pRegionB->extent.width &&
687          pRegionA->extent.height == pRegionB->extent.height &&
688          pRegionA->extent.depth == pRegionB->extent.depth)) {
689       return false;
690    }
691 
692    return true;
693 }
694 
pvr_CmdCopyImage2(VkCommandBuffer commandBuffer,const VkCopyImageInfo2 * pCopyImageInfo)695 void pvr_CmdCopyImage2(VkCommandBuffer commandBuffer,
696                        const VkCopyImageInfo2 *pCopyImageInfo)
697 {
698    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
699    PVR_FROM_HANDLE(pvr_image, src, pCopyImageInfo->srcImage);
700    PVR_FROM_HANDLE(pvr_image, dst, pCopyImageInfo->dstImage);
701 
702    const bool can_merge_ds = src->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
703                              dst->vk.format == VK_FORMAT_D24_UNORM_S8_UINT;
704 
705    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
706 
707    for (uint32_t i = 0U; i < pCopyImageInfo->regionCount; i++) {
708       VkResult result;
709 
710       /* If an application has split a copy between D24S8 images into two
711        * separate copy regions (one for the depth aspect and one for the
712        * stencil aspect) attempt to merge the two regions back into one blit.
713        *
714        * This can only be merged if both regions are identical apart from the
715        * aspectMask, one of which has to be depth and the other has to be
716        * stencil.
717        *
718        * Only attempt to merge consecutive regions, ignore the case of merging
719        * non-consecutive regions.
720        */
721       if (can_merge_ds && i != (pCopyImageInfo->regionCount - 1)) {
722          const bool ret =
723             pvr_can_merge_ds_regions(&pCopyImageInfo->pRegions[i],
724                                      &pCopyImageInfo->pRegions[i + 1]);
725          if (ret) {
726             VkImageCopy2 region = pCopyImageInfo->pRegions[i];
727 
728             region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT |
729                                                VK_IMAGE_ASPECT_STENCIL_BIT;
730             region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT |
731                                                VK_IMAGE_ASPECT_STENCIL_BIT;
732 
733             result = pvr_copy_or_resolve_color_image_region(cmd_buffer,
734                                                             src,
735                                                             dst,
736                                                             &region);
737             if (result != VK_SUCCESS)
738                return;
739 
740             /* Skip the next region as it has been processed with the last
741              * region.
742              */
743             i++;
744 
745             continue;
746          }
747       }
748 
749       result =
750          pvr_copy_or_resolve_color_image_region(cmd_buffer,
751                                                 src,
752                                                 dst,
753                                                 &pCopyImageInfo->pRegions[i]);
754       if (result != VK_SUCCESS)
755          return;
756    }
757 }
758 
759 VkResult
pvr_copy_buffer_to_image_region_format(struct pvr_cmd_buffer * const cmd_buffer,const pvr_dev_addr_t buffer_dev_addr,const struct pvr_image * const image,const VkBufferImageCopy2 * const region,const VkFormat src_format,const VkFormat dst_format,const uint32_t flags)760 pvr_copy_buffer_to_image_region_format(struct pvr_cmd_buffer *const cmd_buffer,
761                                        const pvr_dev_addr_t buffer_dev_addr,
762                                        const struct pvr_image *const image,
763                                        const VkBufferImageCopy2 *const region,
764                                        const VkFormat src_format,
765                                        const VkFormat dst_format,
766                                        const uint32_t flags)
767 {
768    enum pipe_format pformat = vk_format_to_pipe_format(dst_format);
769    uint32_t row_length_in_texels;
770    uint32_t buffer_slice_size;
771    uint32_t buffer_layer_size;
772    uint32_t height_in_blks;
773    uint32_t row_length;
774 
775    if (region->bufferRowLength == 0)
776       row_length_in_texels = region->imageExtent.width;
777    else
778       row_length_in_texels = region->bufferRowLength;
779 
780    if (region->bufferImageHeight == 0)
781       height_in_blks = region->imageExtent.height;
782    else
783       height_in_blks = region->bufferImageHeight;
784 
785    if (util_format_is_compressed(pformat)) {
786       uint32_t block_width = util_format_get_blockwidth(pformat);
787       uint32_t block_height = util_format_get_blockheight(pformat);
788       uint32_t block_size = util_format_get_blocksize(pformat);
789 
790       height_in_blks = DIV_ROUND_UP(height_in_blks, block_height);
791       row_length_in_texels =
792          DIV_ROUND_UP(row_length_in_texels, block_width) * block_size;
793    }
794 
795    row_length = row_length_in_texels * vk_format_get_blocksize(src_format);
796 
797    buffer_slice_size = height_in_blks * row_length;
798    buffer_layer_size = buffer_slice_size * region->imageExtent.depth;
799 
800    for (uint32_t i = 0; i < region->imageExtent.depth; i++) {
801       const uint32_t depth = i + (uint32_t)region->imageOffset.z;
802 
803       for (uint32_t j = 0; j < region->imageSubresource.layerCount; j++) {
804          const VkDeviceSize buffer_offset = region->bufferOffset +
805                                             (j * buffer_layer_size) +
806                                             (i * buffer_slice_size);
807          struct pvr_transfer_cmd *transfer_cmd;
808          VkResult result;
809 
810          transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
811          if (!transfer_cmd)
812             return VK_ERROR_OUT_OF_HOST_MEMORY;
813 
814          transfer_cmd->flags = flags;
815 
816          pvr_setup_buffer_surface(
817             &transfer_cmd->sources[0].surface,
818             &transfer_cmd->sources[0].mappings[0].src_rect,
819             buffer_dev_addr,
820             buffer_offset,
821             src_format,
822             image->vk.format,
823             region->imageExtent.width,
824             region->imageExtent.height,
825             row_length_in_texels);
826 
827          transfer_cmd->sources[0].surface.depth = 1;
828          transfer_cmd->source_count = 1;
829 
830          pvr_setup_transfer_surface(cmd_buffer->device,
831                                     &transfer_cmd->dst,
832                                     &transfer_cmd->scissor,
833                                     image,
834                                     region->imageSubresource.baseArrayLayer + j,
835                                     region->imageSubresource.mipLevel,
836                                     &region->imageOffset,
837                                     &region->imageExtent,
838                                     depth,
839                                     dst_format,
840                                     region->imageSubresource.aspectMask);
841 
842          transfer_cmd->sources[0].mappings[0].dst_rect = transfer_cmd->scissor;
843          transfer_cmd->sources[0].mapping_count++;
844 
845          result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
846          if (result != VK_SUCCESS) {
847             vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
848             return result;
849          }
850       }
851    }
852 
853    return VK_SUCCESS;
854 }
855 
856 VkResult
pvr_copy_buffer_to_image_region(struct pvr_cmd_buffer * const cmd_buffer,const pvr_dev_addr_t buffer_dev_addr,const struct pvr_image * const image,const VkBufferImageCopy2 * const region)857 pvr_copy_buffer_to_image_region(struct pvr_cmd_buffer *const cmd_buffer,
858                                 const pvr_dev_addr_t buffer_dev_addr,
859                                 const struct pvr_image *const image,
860                                 const VkBufferImageCopy2 *const region)
861 {
862    const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
863    VkFormat src_format;
864    VkFormat dst_format;
865    uint32_t flags = 0;
866 
867    if (vk_format_has_depth(image->vk.format) &&
868        vk_format_has_stencil(image->vk.format)) {
869       flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
870 
871       if ((aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) != 0) {
872          src_format = vk_format_stencil_only(image->vk.format);
873       } else {
874          src_format = vk_format_depth_only(image->vk.format);
875          flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
876       }
877 
878       dst_format = image->vk.format;
879    } else {
880       src_format = pvr_get_raw_copy_format(image->vk.format);
881       dst_format = src_format;
882    }
883 
884    return pvr_copy_buffer_to_image_region_format(cmd_buffer,
885                                                  buffer_dev_addr,
886                                                  image,
887                                                  region,
888                                                  src_format,
889                                                  dst_format,
890                                                  flags);
891 }
892 
pvr_CmdCopyBufferToImage2(VkCommandBuffer commandBuffer,const VkCopyBufferToImageInfo2 * pCopyBufferToImageInfo)893 void pvr_CmdCopyBufferToImage2(
894    VkCommandBuffer commandBuffer,
895    const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
896 {
897    PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferToImageInfo->srcBuffer);
898    PVR_FROM_HANDLE(pvr_image, dst, pCopyBufferToImageInfo->dstImage);
899    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
900 
901    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
902 
903    for (uint32_t i = 0; i < pCopyBufferToImageInfo->regionCount; i++) {
904       const VkResult result =
905          pvr_copy_buffer_to_image_region(cmd_buffer,
906                                          src->dev_addr,
907                                          dst,
908                                          &pCopyBufferToImageInfo->pRegions[i]);
909       if (result != VK_SUCCESS)
910          return;
911    }
912 }
913 
914 VkResult
pvr_copy_image_to_buffer_region_format(struct pvr_cmd_buffer * const cmd_buffer,const struct pvr_image * const image,const pvr_dev_addr_t buffer_dev_addr,const VkBufferImageCopy2 * const region,const VkFormat src_format,const VkFormat dst_format)915 pvr_copy_image_to_buffer_region_format(struct pvr_cmd_buffer *const cmd_buffer,
916                                        const struct pvr_image *const image,
917                                        const pvr_dev_addr_t buffer_dev_addr,
918                                        const VkBufferImageCopy2 *const region,
919                                        const VkFormat src_format,
920                                        const VkFormat dst_format)
921 {
922    enum pipe_format pformat = vk_format_to_pipe_format(image->vk.format);
923    struct pvr_transfer_cmd_surface dst_surface = { 0 };
924    VkImageSubresource sub_resource;
925    uint32_t buffer_image_height;
926    uint32_t buffer_row_length;
927    uint32_t buffer_slice_size;
928    uint32_t max_array_layers;
929    VkRect2D dst_rect = { 0 };
930    uint32_t max_depth_slice;
931    VkSubresourceLayout info;
932 
933    /* Only images with VK_SAMPLE_COUNT_1_BIT can be copied to buffer. */
934    assert(image->vk.samples == 1);
935 
936    if (region->bufferRowLength == 0)
937       buffer_row_length = region->imageExtent.width;
938    else
939       buffer_row_length = region->bufferRowLength;
940 
941    if (region->bufferImageHeight == 0)
942       buffer_image_height = region->imageExtent.height;
943    else
944       buffer_image_height = region->bufferImageHeight;
945 
946    max_array_layers =
947       region->imageSubresource.baseArrayLayer +
948       vk_image_subresource_layer_count(&image->vk, &region->imageSubresource);
949 
950    buffer_slice_size = buffer_image_height * buffer_row_length *
951                        vk_format_get_blocksize(dst_format);
952 
953    max_depth_slice = region->imageExtent.depth + region->imageOffset.z;
954 
955    pvr_setup_buffer_surface(&dst_surface,
956                             &dst_rect,
957                             buffer_dev_addr,
958                             region->bufferOffset,
959                             dst_format,
960                             image->vk.format,
961                             buffer_row_length,
962                             buffer_image_height,
963                             buffer_row_length);
964 
965    dst_rect.extent.width = region->imageExtent.width;
966    dst_rect.extent.height = region->imageExtent.height;
967 
968    if (util_format_is_compressed(pformat)) {
969       uint32_t block_width = util_format_get_blockwidth(pformat);
970       uint32_t block_height = util_format_get_blockheight(pformat);
971 
972       dst_rect.extent.width =
973          MAX2(1U, DIV_ROUND_UP(dst_rect.extent.width, block_width));
974       dst_rect.extent.height =
975          MAX2(1U, DIV_ROUND_UP(dst_rect.extent.height, block_height));
976    }
977 
978    sub_resource = (VkImageSubresource){
979       .aspectMask = region->imageSubresource.aspectMask,
980       .mipLevel = region->imageSubresource.mipLevel,
981       .arrayLayer = region->imageSubresource.baseArrayLayer,
982    };
983 
984    pvr_get_image_subresource_layout(image, &sub_resource, &info);
985 
986    for (uint32_t i = region->imageSubresource.baseArrayLayer;
987         i < max_array_layers;
988         i++) {
989       struct pvr_transfer_cmd_surface src_surface = { 0 };
990       VkRect2D src_rect = { 0 };
991 
992       /* Note: Set the depth to the initial depth offset, the memory address (or
993        * the z_position) for the depth slice will be incremented manually in the
994        * loop below.
995        */
996       pvr_setup_transfer_surface(cmd_buffer->device,
997                                  &src_surface,
998                                  &src_rect,
999                                  image,
1000                                  i,
1001                                  region->imageSubresource.mipLevel,
1002                                  &region->imageOffset,
1003                                  &region->imageExtent,
1004                                  region->imageOffset.z,
1005                                  src_format,
1006                                  region->imageSubresource.aspectMask);
1007 
1008       for (uint32_t j = region->imageOffset.z; j < max_depth_slice; j++) {
1009          struct pvr_transfer_cmd *transfer_cmd;
1010          VkResult result;
1011 
1012          /* TODO: See if we can allocate all the transfer cmds in one go. */
1013          transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
1014          if (!transfer_cmd)
1015             return vk_error(cmd_buffer->device, VK_ERROR_OUT_OF_HOST_MEMORY);
1016 
1017          transfer_cmd->sources[0].mappings[0].src_rect = src_rect;
1018          transfer_cmd->sources[0].mappings[0].dst_rect = dst_rect;
1019          transfer_cmd->sources[0].mapping_count++;
1020 
1021          transfer_cmd->sources[0].surface = src_surface;
1022          transfer_cmd->source_count = 1;
1023 
1024          transfer_cmd->dst = dst_surface;
1025          transfer_cmd->scissor = dst_rect;
1026 
1027          result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
1028          if (result != VK_SUCCESS) {
1029             vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
1030             return result;
1031          }
1032 
1033          dst_surface.dev_addr.addr += buffer_slice_size;
1034 
1035          if (src_surface.mem_layout == PVR_MEMLAYOUT_3DTWIDDLED)
1036             src_surface.z_position += 1.0f;
1037          else
1038             src_surface.dev_addr.addr += info.depthPitch;
1039       }
1040    }
1041 
1042    return VK_SUCCESS;
1043 }
1044 
1045 VkResult
pvr_copy_image_to_buffer_region(struct pvr_cmd_buffer * const cmd_buffer,const struct pvr_image * const image,const pvr_dev_addr_t buffer_dev_addr,const VkBufferImageCopy2 * const region)1046 pvr_copy_image_to_buffer_region(struct pvr_cmd_buffer *const cmd_buffer,
1047                                 const struct pvr_image *const image,
1048                                 const pvr_dev_addr_t buffer_dev_addr,
1049                                 const VkBufferImageCopy2 *const region)
1050 {
1051    const VkImageAspectFlags aspect_mask = region->imageSubresource.aspectMask;
1052 
1053    VkFormat src_format = pvr_get_copy_format(image->vk.format);
1054    VkFormat dst_format;
1055 
1056    /* Color and depth aspect copies can be done using an appropriate raw format.
1057     */
1058    if (aspect_mask & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_DEPTH_BIT)) {
1059       src_format = pvr_get_raw_copy_format(src_format);
1060       dst_format = src_format;
1061    } else if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
1062       /* From the Vulkan spec:
1063        *
1064        *    Data copied to or from the stencil aspect of any depth/stencil
1065        *    format is tightly packed with one VK_FORMAT_S8_UINT value per texel.
1066        */
1067       dst_format = VK_FORMAT_S8_UINT;
1068    } else {
1069       /* YUV Planes require specific formats. */
1070       dst_format = src_format;
1071    }
1072 
1073    return pvr_copy_image_to_buffer_region_format(cmd_buffer,
1074                                                  image,
1075                                                  buffer_dev_addr,
1076                                                  region,
1077                                                  src_format,
1078                                                  dst_format);
1079 }
1080 
pvr_CmdCopyImageToBuffer2(VkCommandBuffer commandBuffer,const VkCopyImageToBufferInfo2 * pCopyImageToBufferInfo)1081 void pvr_CmdCopyImageToBuffer2(
1082    VkCommandBuffer commandBuffer,
1083    const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
1084 {
1085    PVR_FROM_HANDLE(pvr_buffer, dst, pCopyImageToBufferInfo->dstBuffer);
1086    PVR_FROM_HANDLE(pvr_image, src, pCopyImageToBufferInfo->srcImage);
1087    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1088 
1089    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1090 
1091    for (uint32_t i = 0U; i < pCopyImageToBufferInfo->regionCount; i++) {
1092       const VkBufferImageCopy2 *region = &pCopyImageToBufferInfo->pRegions[i];
1093 
1094       const VkResult result = pvr_copy_image_to_buffer_region(cmd_buffer,
1095                                                               src,
1096                                                               dst->dev_addr,
1097                                                               region);
1098       if (result != VK_SUCCESS)
1099          return;
1100    }
1101 }
1102 
pvr_calc_mip_level_extents(const struct pvr_image * image,uint16_t mip_level,VkExtent3D * extent_out)1103 static void pvr_calc_mip_level_extents(const struct pvr_image *image,
1104                                        uint16_t mip_level,
1105                                        VkExtent3D *extent_out)
1106 {
1107    /* 3D textures are clamped to 4x4x4. */
1108    const uint32_t clamp = (image->vk.image_type == VK_IMAGE_TYPE_3D) ? 4 : 1;
1109    const VkExtent3D *extent = &image->vk.extent;
1110 
1111    extent_out->width = MAX2(extent->width >> mip_level, clamp);
1112    extent_out->height = MAX2(extent->height >> mip_level, clamp);
1113    extent_out->depth = MAX2(extent->depth >> mip_level, clamp);
1114 }
1115 
pvr_clear_image_range(struct pvr_cmd_buffer * cmd_buffer,const struct pvr_image * image,const VkClearColorValue * pColor,const VkImageSubresourceRange * psRange,uint32_t flags)1116 static VkResult pvr_clear_image_range(struct pvr_cmd_buffer *cmd_buffer,
1117                                       const struct pvr_image *image,
1118                                       const VkClearColorValue *pColor,
1119                                       const VkImageSubresourceRange *psRange,
1120                                       uint32_t flags)
1121 {
1122    const uint32_t layer_count =
1123       vk_image_subresource_layer_count(&image->vk, psRange);
1124    const uint32_t max_layers = psRange->baseArrayLayer + layer_count;
1125    VkFormat format = image->vk.format;
1126    const VkOffset3D offset = { 0 };
1127    VkExtent3D mip_extent;
1128 
1129    assert((psRange->baseArrayLayer + layer_count) <= image->vk.array_layers);
1130 
1131    for (uint32_t layer = psRange->baseArrayLayer; layer < max_layers; layer++) {
1132       const uint32_t level_count =
1133          vk_image_subresource_level_count(&image->vk, psRange);
1134       const uint32_t max_level = psRange->baseMipLevel + level_count;
1135 
1136       assert((psRange->baseMipLevel + level_count) <= image->vk.mip_levels);
1137 
1138       for (uint32_t level = psRange->baseMipLevel; level < max_level; level++) {
1139          pvr_calc_mip_level_extents(image, level, &mip_extent);
1140 
1141          for (uint32_t depth = 0; depth < mip_extent.depth; depth++) {
1142             struct pvr_transfer_cmd *transfer_cmd;
1143             VkResult result;
1144 
1145             transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
1146             if (!transfer_cmd)
1147                return VK_ERROR_OUT_OF_HOST_MEMORY;
1148 
1149             transfer_cmd->flags |= flags;
1150             transfer_cmd->flags |= PVR_TRANSFER_CMD_FLAGS_FILL;
1151 
1152             for (uint32_t i = 0; i < ARRAY_SIZE(transfer_cmd->clear_color); i++)
1153                transfer_cmd->clear_color[i].ui = pColor->uint32[i];
1154 
1155             pvr_setup_transfer_surface(cmd_buffer->device,
1156                                        &transfer_cmd->dst,
1157                                        &transfer_cmd->scissor,
1158                                        image,
1159                                        layer,
1160                                        level,
1161                                        &offset,
1162                                        &mip_extent,
1163                                        depth,
1164                                        format,
1165                                        psRange->aspectMask);
1166 
1167             result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
1168             if (result != VK_SUCCESS) {
1169                vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
1170                return result;
1171             }
1172          }
1173       }
1174    }
1175 
1176    return VK_SUCCESS;
1177 }
1178 
pvr_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1179 void pvr_CmdClearColorImage(VkCommandBuffer commandBuffer,
1180                             VkImage _image,
1181                             VkImageLayout imageLayout,
1182                             const VkClearColorValue *pColor,
1183                             uint32_t rangeCount,
1184                             const VkImageSubresourceRange *pRanges)
1185 {
1186    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1187    PVR_FROM_HANDLE(pvr_image, image, _image);
1188 
1189    for (uint32_t i = 0; i < rangeCount; i++) {
1190       const VkResult result =
1191          pvr_clear_image_range(cmd_buffer, image, pColor, &pRanges[i], 0);
1192       if (result != VK_SUCCESS)
1193          return;
1194    }
1195 }
1196 
pvr_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)1197 void pvr_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
1198                                    VkImage _image,
1199                                    VkImageLayout imageLayout,
1200                                    const VkClearDepthStencilValue *pDepthStencil,
1201                                    uint32_t rangeCount,
1202                                    const VkImageSubresourceRange *pRanges)
1203 {
1204    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1205    PVR_FROM_HANDLE(pvr_image, image, _image);
1206 
1207    for (uint32_t i = 0; i < rangeCount; i++) {
1208       const VkImageAspectFlags ds_aspect = VK_IMAGE_ASPECT_DEPTH_BIT |
1209                                            VK_IMAGE_ASPECT_STENCIL_BIT;
1210       VkClearColorValue clear_ds = { 0 };
1211       uint32_t flags = 0U;
1212       VkResult result;
1213 
1214       if (image->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
1215           pRanges[i].aspectMask != ds_aspect) {
1216          /* A depth or stencil blit to a packed_depth_stencil requires a merge
1217           * operation.
1218           */
1219          flags |= PVR_TRANSFER_CMD_FLAGS_DSMERGE;
1220 
1221          if (pRanges[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
1222             flags |= PVR_TRANSFER_CMD_FLAGS_PICKD;
1223       }
1224 
1225       clear_ds.float32[0] = pDepthStencil->depth;
1226       clear_ds.uint32[1] = pDepthStencil->stencil;
1227 
1228       result =
1229          pvr_clear_image_range(cmd_buffer, image, &clear_ds, pRanges + i, flags);
1230       if (result != VK_SUCCESS)
1231          return;
1232    }
1233 }
1234 
pvr_cmd_copy_buffer_region(struct pvr_cmd_buffer * cmd_buffer,pvr_dev_addr_t src_addr,VkDeviceSize src_offset,pvr_dev_addr_t dst_addr,VkDeviceSize dst_offset,VkDeviceSize size,uint32_t fill_data,bool is_fill)1235 static VkResult pvr_cmd_copy_buffer_region(struct pvr_cmd_buffer *cmd_buffer,
1236                                            pvr_dev_addr_t src_addr,
1237                                            VkDeviceSize src_offset,
1238                                            pvr_dev_addr_t dst_addr,
1239                                            VkDeviceSize dst_offset,
1240                                            VkDeviceSize size,
1241                                            uint32_t fill_data,
1242                                            bool is_fill)
1243 {
1244    VkDeviceSize offset = 0;
1245 
1246    while (offset < size) {
1247       const VkDeviceSize remaining_size = size - offset;
1248       struct pvr_transfer_cmd *transfer_cmd;
1249       uint32_t src_align = (src_addr.addr + offset + src_offset) & 0xF;
1250       uint32_t dst_align = (dst_addr.addr + offset + src_offset) & 0xF;
1251       uint32_t texel_width;
1252       VkDeviceSize texels;
1253       VkFormat vk_format;
1254       VkResult result;
1255       uint32_t height;
1256       uint32_t width;
1257 
1258       if (is_fill) {
1259          vk_format = VK_FORMAT_R32_UINT;
1260          texel_width = 4U;
1261       } else if (remaining_size >= 16U && (src_align % 16U) == 0 &&
1262                  (dst_align % 16U) == 0) {
1263          /* Only if address is 128bpp aligned */
1264          vk_format = VK_FORMAT_R32G32B32A32_UINT;
1265          texel_width = 16U;
1266       } else if (remaining_size >= 4U) {
1267          vk_format = VK_FORMAT_R32_UINT;
1268          texel_width = 4U;
1269       } else {
1270          vk_format = VK_FORMAT_R8_UINT;
1271          texel_width = 1U;
1272       }
1273 
1274       texels = remaining_size / texel_width;
1275 
1276       /* Try to do max-width rects, fall back to a 1-height rect for the
1277        * remainder.
1278        */
1279       if (texels > PVR_MAX_TRANSFER_SIZE_IN_TEXELS) {
1280          width = PVR_MAX_TRANSFER_SIZE_IN_TEXELS;
1281          height = texels / PVR_MAX_TRANSFER_SIZE_IN_TEXELS;
1282          height = MIN2(height, PVR_MAX_TRANSFER_SIZE_IN_TEXELS);
1283       } else {
1284          width = texels;
1285          height = 1;
1286       }
1287 
1288       transfer_cmd = pvr_transfer_cmd_alloc(cmd_buffer);
1289       if (!transfer_cmd)
1290          return VK_ERROR_OUT_OF_HOST_MEMORY;
1291 
1292       if (!is_fill) {
1293          pvr_setup_buffer_surface(
1294             &transfer_cmd->sources[0].surface,
1295             &transfer_cmd->sources[0].mappings[0].src_rect,
1296             src_addr,
1297             offset + src_offset,
1298             vk_format,
1299             vk_format,
1300             width,
1301             height,
1302             width);
1303          transfer_cmd->source_count = 1;
1304       } else {
1305          transfer_cmd->flags |= PVR_TRANSFER_CMD_FLAGS_FILL;
1306 
1307          for (uint32_t i = 0; i < ARRAY_SIZE(transfer_cmd->clear_color); i++)
1308             transfer_cmd->clear_color[i].ui = fill_data;
1309       }
1310 
1311       pvr_setup_buffer_surface(&transfer_cmd->dst,
1312                                &transfer_cmd->scissor,
1313                                dst_addr,
1314                                offset + dst_offset,
1315                                vk_format,
1316                                vk_format,
1317                                width,
1318                                height,
1319                                width);
1320 
1321       if (transfer_cmd->source_count > 0) {
1322          transfer_cmd->sources[0].mappings[0].dst_rect = transfer_cmd->scissor;
1323 
1324          transfer_cmd->sources[0].mapping_count++;
1325       }
1326 
1327       result = pvr_cmd_buffer_add_transfer_cmd(cmd_buffer, transfer_cmd);
1328       if (result != VK_SUCCESS) {
1329          vk_free(&cmd_buffer->vk.pool->alloc, transfer_cmd);
1330          return result;
1331       }
1332 
1333       offset += width * height * texel_width;
1334    }
1335 
1336    return VK_SUCCESS;
1337 }
1338 
pvr_CmdUpdateBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize dataSize,const void * pData)1339 void pvr_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
1340                          VkBuffer dstBuffer,
1341                          VkDeviceSize dstOffset,
1342                          VkDeviceSize dataSize,
1343                          const void *pData)
1344 {
1345    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1346    PVR_FROM_HANDLE(pvr_buffer, dst, dstBuffer);
1347    struct pvr_suballoc_bo *pvr_bo;
1348    VkResult result;
1349 
1350    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1351 
1352    result = pvr_cmd_buffer_upload_general(cmd_buffer, pData, dataSize, &pvr_bo);
1353    if (result != VK_SUCCESS)
1354       return;
1355 
1356    pvr_cmd_copy_buffer_region(cmd_buffer,
1357                               pvr_bo->dev_addr,
1358                               0,
1359                               dst->dev_addr,
1360                               dstOffset,
1361                               dataSize,
1362                               0U,
1363                               false);
1364 }
1365 
pvr_CmdCopyBuffer2(VkCommandBuffer commandBuffer,const VkCopyBufferInfo2 * pCopyBufferInfo)1366 void pvr_CmdCopyBuffer2(VkCommandBuffer commandBuffer,
1367                         const VkCopyBufferInfo2 *pCopyBufferInfo)
1368 {
1369    PVR_FROM_HANDLE(pvr_buffer, src, pCopyBufferInfo->srcBuffer);
1370    PVR_FROM_HANDLE(pvr_buffer, dst, pCopyBufferInfo->dstBuffer);
1371    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1372 
1373    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1374 
1375    for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
1376       const VkResult result =
1377          pvr_cmd_copy_buffer_region(cmd_buffer,
1378                                     src->dev_addr,
1379                                     pCopyBufferInfo->pRegions[i].srcOffset,
1380                                     dst->dev_addr,
1381                                     pCopyBufferInfo->pRegions[i].dstOffset,
1382                                     pCopyBufferInfo->pRegions[i].size,
1383                                     0U,
1384                                     false);
1385       if (result != VK_SUCCESS)
1386          return;
1387    }
1388 }
1389 
pvr_CmdFillBuffer(VkCommandBuffer commandBuffer,VkBuffer dstBuffer,VkDeviceSize dstOffset,VkDeviceSize fillSize,uint32_t data)1390 void pvr_CmdFillBuffer(VkCommandBuffer commandBuffer,
1391                        VkBuffer dstBuffer,
1392                        VkDeviceSize dstOffset,
1393                        VkDeviceSize fillSize,
1394                        uint32_t data)
1395 {
1396    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
1397    PVR_FROM_HANDLE(pvr_buffer, dst, dstBuffer);
1398 
1399    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
1400 
1401    fillSize = vk_buffer_range(&dst->vk, dstOffset, fillSize);
1402 
1403    /* From the Vulkan spec:
1404     *
1405     *    "size is the number of bytes to fill, and must be either a multiple
1406     *    of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
1407     *    the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
1408     *    buffer is not a multiple of 4, then the nearest smaller multiple is
1409     *    used."
1410     */
1411    fillSize &= ~3ULL;
1412 
1413    pvr_cmd_copy_buffer_region(cmd_buffer,
1414                               PVR_DEV_ADDR_INVALID,
1415                               0,
1416                               dst->dev_addr,
1417                               dstOffset,
1418                               fillSize,
1419                               data,
1420                               true);
1421 }
1422 
1423 /**
1424  * \brief Returns the maximum number of layers to clear starting from base_layer
1425  * that contain or match the target rectangle.
1426  *
1427  * \param[in] target_rect      The region which the clear should contain or
1428  *                             match.
1429  * \param[in] base_layer       The layer index to start at.
1430  * \param[in] clear_rect_count Amount of clear_rects
1431  * \param[in] clear_rects      Array of clear rects.
1432  *
1433  * \return Max number of layers that cover or match the target region.
1434  */
1435 static uint32_t
pvr_get_max_layers_covering_target(VkRect2D target_rect,uint32_t base_layer,uint32_t clear_rect_count,const VkClearRect * clear_rects)1436 pvr_get_max_layers_covering_target(VkRect2D target_rect,
1437                                    uint32_t base_layer,
1438                                    uint32_t clear_rect_count,
1439                                    const VkClearRect *clear_rects)
1440 {
1441    const int32_t target_x0 = target_rect.offset.x;
1442    const int32_t target_x1 = target_x0 + (int32_t)target_rect.extent.width;
1443    const int32_t target_y0 = target_rect.offset.y;
1444    const int32_t target_y1 = target_y0 + (int32_t)target_rect.extent.height;
1445 
1446    uint32_t layer_count = 0;
1447 
1448    assert((int64_t)target_x0 + (int64_t)target_rect.extent.width <= INT32_MAX);
1449    assert((int64_t)target_y0 + (int64_t)target_rect.extent.height <= INT32_MAX);
1450 
1451    for (uint32_t i = 0; i < clear_rect_count; i++) {
1452       const VkClearRect *clear_rect = &clear_rects[i];
1453       const uint32_t max_layer =
1454          clear_rect->baseArrayLayer + clear_rect->layerCount;
1455       bool target_is_covered;
1456       int32_t x0, x1;
1457       int32_t y0, y1;
1458 
1459       if (clear_rect->baseArrayLayer == 0)
1460          continue;
1461 
1462       assert((uint64_t)clear_rect->baseArrayLayer + clear_rect->layerCount <=
1463              UINT32_MAX);
1464 
1465       /* Check for layer intersection. */
1466       if (clear_rect->baseArrayLayer > base_layer || max_layer <= base_layer)
1467          continue;
1468 
1469       x0 = clear_rect->rect.offset.x;
1470       x1 = x0 + (int32_t)clear_rect->rect.extent.width;
1471       y0 = clear_rect->rect.offset.y;
1472       y1 = y0 + (int32_t)clear_rect->rect.extent.height;
1473 
1474       assert((int64_t)x0 + (int64_t)clear_rect->rect.extent.width <= INT32_MAX);
1475       assert((int64_t)y0 + (int64_t)clear_rect->rect.extent.height <=
1476              INT32_MAX);
1477 
1478       target_is_covered = x0 <= target_x0 && x1 >= target_x1;
1479       target_is_covered &= y0 <= target_y0 && y1 >= target_y1;
1480 
1481       if (target_is_covered)
1482          layer_count = MAX2(layer_count, max_layer - base_layer);
1483    }
1484 
1485    return layer_count;
1486 }
1487 
1488 /* Return true if vertex shader is required to output render target id to pick
1489  * the texture array layer.
1490  */
1491 static inline bool
pvr_clear_needs_rt_id_output(struct pvr_device_info * dev_info,uint32_t rect_count,const VkClearRect * rects)1492 pvr_clear_needs_rt_id_output(struct pvr_device_info *dev_info,
1493                              uint32_t rect_count,
1494                              const VkClearRect *rects)
1495 {
1496    if (!PVR_HAS_FEATURE(dev_info, gs_rta_support))
1497       return false;
1498 
1499    for (uint32_t i = 0; i < rect_count; i++) {
1500       if (rects[i].baseArrayLayer != 0 || rects[i].layerCount > 1)
1501          return true;
1502    }
1503 
1504    return false;
1505 }
1506 
pvr_clear_color_attachment_static_create_consts_buffer(struct pvr_cmd_buffer * cmd_buffer,const struct pvr_shader_factory_info * shader_info,const uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],ASSERTED bool uses_tile_buffer,uint32_t tile_buffer_idx,struct pvr_suballoc_bo ** const const_shareds_buffer_out)1507 static VkResult pvr_clear_color_attachment_static_create_consts_buffer(
1508    struct pvr_cmd_buffer *cmd_buffer,
1509    const struct pvr_shader_factory_info *shader_info,
1510    const uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],
1511    ASSERTED bool uses_tile_buffer,
1512    uint32_t tile_buffer_idx,
1513    struct pvr_suballoc_bo **const const_shareds_buffer_out)
1514 {
1515    struct pvr_device *device = cmd_buffer->device;
1516    struct pvr_suballoc_bo *const_shareds_buffer;
1517    struct pvr_bo *tile_buffer;
1518    uint64_t tile_dev_addr;
1519    uint32_t *buffer;
1520    VkResult result;
1521 
1522    /* TODO: This doesn't need to be aligned to slc size. Alignment to 4 is fine.
1523     * Change pvr_cmd_buffer_alloc_mem() to take in an alignment?
1524     */
1525    result =
1526       pvr_cmd_buffer_alloc_mem(cmd_buffer,
1527                                device->heaps.general_heap,
1528                                PVR_DW_TO_BYTES(shader_info->const_shared_regs),
1529                                &const_shareds_buffer);
1530    if (result != VK_SUCCESS)
1531       return result;
1532 
1533    buffer = pvr_bo_suballoc_get_map_addr(const_shareds_buffer);
1534 
1535    for (uint32_t i = 0; i < PVR_CLEAR_ATTACHMENT_CONST_COUNT; i++) {
1536       uint32_t dest_idx = shader_info->driver_const_location_map[i];
1537 
1538       if (dest_idx == PVR_CLEAR_ATTACHMENT_DEST_ID_UNUSED)
1539          continue;
1540 
1541       assert(dest_idx < shader_info->const_shared_regs);
1542 
1543       switch (i) {
1544       case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_0:
1545       case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_1:
1546       case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_2:
1547       case PVR_CLEAR_ATTACHMENT_CONST_COMPONENT_3:
1548          buffer[dest_idx] = clear_color[i];
1549          break;
1550 
1551       case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_UPPER:
1552          assert(uses_tile_buffer);
1553          tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
1554          tile_dev_addr = tile_buffer->vma->dev_addr.addr;
1555          buffer[dest_idx] = (uint32_t)(tile_dev_addr >> 32);
1556          break;
1557 
1558       case PVR_CLEAR_ATTACHMENT_CONST_TILE_BUFFER_LOWER:
1559          assert(uses_tile_buffer);
1560          tile_buffer = device->tile_buffer_state.buffers[tile_buffer_idx];
1561          tile_dev_addr = tile_buffer->vma->dev_addr.addr;
1562          buffer[dest_idx] = (uint32_t)tile_dev_addr;
1563          break;
1564 
1565       default:
1566          unreachable("Unsupported clear attachment const type.");
1567       }
1568    }
1569 
1570    for (uint32_t i = 0; i < shader_info->num_static_const; i++) {
1571       const struct pvr_static_buffer *static_buff =
1572          &shader_info->static_const_buffer[i];
1573 
1574       assert(static_buff->dst_idx < shader_info->const_shared_regs);
1575 
1576       buffer[static_buff->dst_idx] = static_buff->value;
1577    }
1578 
1579    *const_shareds_buffer_out = const_shareds_buffer;
1580 
1581    return VK_SUCCESS;
1582 }
1583 
pvr_clear_color_attachment_static(struct pvr_cmd_buffer * cmd_buffer,const struct usc_mrt_resource * mrt_resource,VkFormat format,uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],uint32_t template_idx,uint32_t stencil,bool vs_has_rt_id_output)1584 static VkResult pvr_clear_color_attachment_static(
1585    struct pvr_cmd_buffer *cmd_buffer,
1586    const struct usc_mrt_resource *mrt_resource,
1587    VkFormat format,
1588    uint32_t clear_color[static const PVR_CLEAR_COLOR_ARRAY_SIZE],
1589    uint32_t template_idx,
1590    uint32_t stencil,
1591    bool vs_has_rt_id_output)
1592 {
1593    struct pvr_device *device = cmd_buffer->device;
1594    ASSERTED const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
1595    ASSERTED const bool has_eight_output_registers =
1596       PVR_HAS_FEATURE(dev_info, eight_output_registers);
1597    const struct pvr_device_static_clear_state *dev_clear_state =
1598       &device->static_clear_state;
1599    const bool uses_tile_buffer = mrt_resource->type ==
1600                                  USC_MRT_RESOURCE_TYPE_MEMORY;
1601    const struct pvr_pds_clear_attachment_program_info *clear_attachment_program;
1602    struct pvr_pds_pixel_shader_sa_program texture_program;
1603    uint32_t pds_state[PVR_STATIC_CLEAR_PDS_STATE_COUNT];
1604    const struct pvr_shader_factory_info *shader_info;
1605    struct pvr_suballoc_bo *pds_texture_program_bo;
1606    struct pvr_static_clear_ppp_template template;
1607    struct pvr_suballoc_bo *const_shareds_buffer;
1608    uint64_t pds_texture_program_addr;
1609    struct pvr_suballoc_bo *pvr_bo;
1610    uint32_t tile_buffer_idx = 0;
1611    uint32_t out_reg_count;
1612    uint32_t output_offset;
1613    uint32_t program_idx;
1614    uint32_t *buffer;
1615    VkResult result;
1616 
1617    out_reg_count =
1618       DIV_ROUND_UP(pvr_get_pbe_accum_format_size_in_bytes(format), 4U);
1619 
1620    if (uses_tile_buffer) {
1621       tile_buffer_idx = mrt_resource->mem.tile_buffer;
1622       output_offset = mrt_resource->mem.offset_dw;
1623    } else {
1624       output_offset = mrt_resource->reg.output_reg;
1625    }
1626 
1627    assert(has_eight_output_registers || out_reg_count + output_offset <= 4);
1628 
1629    program_idx = pvr_get_clear_attachment_program_index(out_reg_count,
1630                                                         output_offset,
1631                                                         uses_tile_buffer);
1632 
1633    shader_info = clear_attachment_collection[program_idx].info;
1634 
1635    result = pvr_clear_color_attachment_static_create_consts_buffer(
1636       cmd_buffer,
1637       shader_info,
1638       clear_color,
1639       uses_tile_buffer,
1640       tile_buffer_idx,
1641       &const_shareds_buffer);
1642    if (result != VK_SUCCESS)
1643       return result;
1644 
1645    /* clang-format off */
1646    texture_program = (struct pvr_pds_pixel_shader_sa_program){
1647       .num_texture_dma_kicks = 1,
1648       .texture_dma_address = {
1649          [0] = const_shareds_buffer->dev_addr.addr,
1650       }
1651    };
1652    /* clang-format on */
1653 
1654    pvr_csb_pack (&texture_program.texture_dma_control[0],
1655                  PDSINST_DOUT_FIELDS_DOUTD_SRC1,
1656                  doutd_src1) {
1657       doutd_src1.dest = ROGUE_PDSINST_DOUTD_DEST_COMMON_STORE;
1658       doutd_src1.bsize = shader_info->const_shared_regs;
1659    }
1660 
1661    clear_attachment_program =
1662       &dev_clear_state->pds_clear_attachment_program_info[program_idx];
1663 
1664    /* TODO: This doesn't need to be aligned to slc size. Alignment to 4 is fine.
1665     * Change pvr_cmd_buffer_alloc_mem() to take in an alignment?
1666     */
1667    result = pvr_cmd_buffer_alloc_mem(
1668       cmd_buffer,
1669       device->heaps.pds_heap,
1670       clear_attachment_program->texture_program_data_size,
1671       &pds_texture_program_bo);
1672    if (result != VK_SUCCESS) {
1673       list_del(&const_shareds_buffer->link);
1674       pvr_bo_suballoc_free(const_shareds_buffer);
1675 
1676       return result;
1677    }
1678 
1679    buffer = pvr_bo_suballoc_get_map_addr(pds_texture_program_bo);
1680    pds_texture_program_addr = pds_texture_program_bo->dev_addr.addr -
1681                               device->heaps.pds_heap->base_addr.addr;
1682 
1683    pvr_pds_generate_pixel_shader_sa_texture_state_data(
1684       &texture_program,
1685       buffer,
1686       &device->pdevice->dev_info);
1687 
1688    pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SHADERBASE],
1689                  TA_STATE_PDS_SHADERBASE,
1690                  shaderbase) {
1691       shaderbase.addr = clear_attachment_program->pixel_program_offset;
1692    }
1693 
1694    pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_TEXUNICODEBASE],
1695                  TA_STATE_PDS_TEXUNICODEBASE,
1696                  texunicodebase) {
1697       texunicodebase.addr = clear_attachment_program->texture_program_offset;
1698    }
1699 
1700    pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SIZEINFO1],
1701                  TA_STATE_PDS_SIZEINFO1,
1702                  sizeinfo1) {
1703       sizeinfo1.pds_texturestatesize = DIV_ROUND_UP(
1704          clear_attachment_program->texture_program_data_size,
1705          ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_TEXTURESTATESIZE_UNIT_SIZE);
1706 
1707       sizeinfo1.pds_tempsize =
1708          DIV_ROUND_UP(clear_attachment_program->texture_program_pds_temps_count,
1709                       ROGUE_TA_STATE_PDS_SIZEINFO1_PDS_TEMPSIZE_UNIT_SIZE);
1710    }
1711 
1712    pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_SIZEINFO2],
1713                  TA_STATE_PDS_SIZEINFO2,
1714                  sizeinfo2) {
1715       sizeinfo2.usc_sharedsize =
1716          DIV_ROUND_UP(shader_info->const_shared_regs,
1717                       ROGUE_TA_STATE_PDS_SIZEINFO2_USC_SHAREDSIZE_UNIT_SIZE);
1718    }
1719 
1720    /* Dummy coefficient loading program. */
1721    pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_VARYINGBASE] = 0;
1722 
1723    pvr_csb_pack (&pds_state[PVR_STATIC_CLEAR_PPP_PDS_TYPE_TEXTUREDATABASE],
1724                  TA_STATE_PDS_TEXTUREDATABASE,
1725                  texturedatabase) {
1726       texturedatabase.addr = PVR_DEV_ADDR(pds_texture_program_addr);
1727    }
1728 
1729    assert(template_idx < PVR_STATIC_CLEAR_VARIANT_COUNT);
1730    template =
1731       cmd_buffer->device->static_clear_state.ppp_templates[template_idx];
1732 
1733    template.config.pds_state = &pds_state;
1734 
1735    template.config.ispctl.upass =
1736       cmd_buffer->state.render_pass_info.isp_userpass;
1737 
1738    if (template_idx & VK_IMAGE_ASPECT_STENCIL_BIT) {
1739       /* clang-format off */
1740       template.config.ispa.sref = stencil & ROGUE_TA_STATE_ISPA_SREF_SIZE_MAX;
1741       /* clang-format on */
1742    }
1743 
1744    if (vs_has_rt_id_output) {
1745       template.config.output_sel.rhw_pres = true;
1746       template.config.output_sel.render_tgt_pres = true;
1747       template.config.output_sel.vtxsize = 4 + 1;
1748    }
1749 
1750    result = pvr_emit_ppp_from_template(
1751       &cmd_buffer->state.current_sub_cmd->gfx.control_stream,
1752       &template,
1753       &pvr_bo);
1754    if (result != VK_SUCCESS) {
1755       list_del(&pds_texture_program_bo->link);
1756       pvr_bo_suballoc_free(pds_texture_program_bo);
1757 
1758       list_del(&const_shareds_buffer->link);
1759       pvr_bo_suballoc_free(const_shareds_buffer);
1760 
1761       return pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
1762    }
1763 
1764    list_add(&pvr_bo->link, &cmd_buffer->bo_list);
1765 
1766    return VK_SUCCESS;
1767 }
1768 
1769 /**
1770  * \brief Record a deferred clear operation into the command buffer.
1771  *
1772  * Devices which don't have gs_rta_support require extra handling for RTA
1773  * clears. We setup a list of deferred clear transfer commands which will be
1774  * processed at the end of the graphics sub command to account for the missing
1775  * feature.
1776  */
pvr_add_deferred_rta_clear(struct pvr_cmd_buffer * cmd_buffer,const VkClearAttachment * attachment,const VkClearRect * rect,bool is_render_init)1777 static VkResult pvr_add_deferred_rta_clear(struct pvr_cmd_buffer *cmd_buffer,
1778                                            const VkClearAttachment *attachment,
1779                                            const VkClearRect *rect,
1780                                            bool is_render_init)
1781 {
1782    struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info;
1783    struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx;
1784    const struct pvr_renderpass_hwsetup_render *hw_render =
1785       &pass_info->pass->hw_setup->renders[sub_cmd->hw_render_idx];
1786    struct pvr_transfer_cmd *transfer_cmd_list;
1787    const struct pvr_image_view *image_view;
1788    const struct pvr_image *image;
1789    uint32_t base_layer;
1790 
1791    const VkOffset3D offset = {
1792       .x = rect->rect.offset.x,
1793       .y = rect->rect.offset.y,
1794       .z = 1,
1795    };
1796    const VkExtent3D extent = {
1797       .width = rect->rect.extent.width,
1798       .height = rect->rect.extent.height,
1799       .depth = 1,
1800    };
1801 
1802    assert(
1803       !PVR_HAS_FEATURE(&cmd_buffer->device->pdevice->dev_info, gs_rta_support));
1804 
1805    transfer_cmd_list = util_dynarray_grow(&cmd_buffer->deferred_clears,
1806                                           struct pvr_transfer_cmd,
1807                                           rect->layerCount);
1808    if (!transfer_cmd_list) {
1809       return vk_command_buffer_set_error(&cmd_buffer->vk,
1810                                          VK_ERROR_OUT_OF_HOST_MEMORY);
1811    }
1812 
1813    /* From the Vulkan 1.3.229 spec VUID-VkClearAttachment-aspectMask-00019:
1814     *
1815     *    "If aspectMask includes VK_IMAGE_ASPECT_COLOR_BIT, it must not
1816     *    include VK_IMAGE_ASPECT_DEPTH_BIT or VK_IMAGE_ASPECT_STENCIL_BIT"
1817     *
1818     */
1819    if (attachment->aspectMask != VK_IMAGE_ASPECT_COLOR_BIT) {
1820       assert(attachment->aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ||
1821              attachment->aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT ||
1822              attachment->aspectMask ==
1823                 (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
1824 
1825       image_view = pass_info->attachments[hw_render->ds_attach_idx];
1826    } else if (is_render_init) {
1827       uint32_t index;
1828 
1829       assert(attachment->colorAttachment < hw_render->color_init_count);
1830       index = hw_render->color_init[attachment->colorAttachment].index;
1831 
1832       image_view = pass_info->attachments[index];
1833    } else {
1834       const struct pvr_renderpass_hwsetup_subpass *hw_pass =
1835          pvr_get_hw_subpass(pass_info->pass, pass_info->subpass_idx);
1836       const struct pvr_render_subpass *sub_pass =
1837          &pass_info->pass->subpasses[hw_pass->index];
1838       const uint32_t attachment_idx =
1839          sub_pass->color_attachments[attachment->colorAttachment];
1840 
1841       assert(attachment->colorAttachment < sub_pass->color_count);
1842 
1843       image_view = pass_info->attachments[attachment_idx];
1844    }
1845 
1846    base_layer = image_view->vk.base_array_layer + rect->baseArrayLayer;
1847    image = vk_to_pvr_image(image_view->vk.image);
1848 
1849    for (uint32_t i = 0; i < rect->layerCount; i++) {
1850       struct pvr_transfer_cmd *transfer_cmd = &transfer_cmd_list[i];
1851 
1852       /* TODO: Add an init function for when we don't want to use
1853        * pvr_transfer_cmd_alloc()? And use it here.
1854        */
1855       *transfer_cmd = (struct pvr_transfer_cmd){
1856          .flags = PVR_TRANSFER_CMD_FLAGS_FILL,
1857          .cmd_buffer = cmd_buffer,
1858          .is_deferred_clear = true,
1859       };
1860 
1861       if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
1862          for (uint32_t j = 0; j < ARRAY_SIZE(transfer_cmd->clear_color); j++) {
1863             transfer_cmd->clear_color[j].ui =
1864                attachment->clearValue.color.uint32[j];
1865          }
1866       } else {
1867          transfer_cmd->clear_color[0].f =
1868             attachment->clearValue.depthStencil.depth;
1869          transfer_cmd->clear_color[1].ui =
1870             attachment->clearValue.depthStencil.stencil;
1871       }
1872 
1873       pvr_setup_transfer_surface(cmd_buffer->device,
1874                                  &transfer_cmd->dst,
1875                                  &transfer_cmd->scissor,
1876                                  image,
1877                                  base_layer + i,
1878                                  0,
1879                                  &offset,
1880                                  &extent,
1881                                  0.0f,
1882                                  image->vk.format,
1883                                  attachment->aspectMask);
1884    }
1885 
1886    return VK_SUCCESS;
1887 }
1888 
pvr_clear_attachments(struct pvr_cmd_buffer * cmd_buffer,uint32_t attachment_count,const VkClearAttachment * attachments,uint32_t rect_count,const VkClearRect * rects,bool is_render_init)1889 static void pvr_clear_attachments(struct pvr_cmd_buffer *cmd_buffer,
1890                                   uint32_t attachment_count,
1891                                   const VkClearAttachment *attachments,
1892                                   uint32_t rect_count,
1893                                   const VkClearRect *rects,
1894                                   bool is_render_init)
1895 {
1896    const struct pvr_render_pass *pass = cmd_buffer->state.render_pass_info.pass;
1897    struct pvr_render_pass_info *pass_info = &cmd_buffer->state.render_pass_info;
1898    const struct pvr_renderpass_hwsetup_subpass *hw_pass =
1899       pvr_get_hw_subpass(pass, pass_info->subpass_idx);
1900    struct pvr_sub_cmd_gfx *sub_cmd = &cmd_buffer->state.current_sub_cmd->gfx;
1901    struct pvr_device_info *dev_info = &cmd_buffer->device->pdevice->dev_info;
1902    struct pvr_render_subpass *sub_pass = &pass->subpasses[hw_pass->index];
1903    uint32_t vs_output_size_in_bytes;
1904    bool vs_has_rt_id_output;
1905 
1906    /* TODO: This function can be optimized so that most of the device memory
1907     * gets allocated together in one go and then filled as needed. There might
1908     * also be opportunities to reuse pds code and data segments.
1909     */
1910 
1911    assert(cmd_buffer->state.current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
1912 
1913    pvr_reset_graphics_dirty_state(cmd_buffer, false);
1914 
1915    /* We'll be emitting to the control stream. */
1916    sub_cmd->empty_cmd = false;
1917 
1918    vs_has_rt_id_output =
1919       pvr_clear_needs_rt_id_output(dev_info, rect_count, rects);
1920 
1921    /* 4 because we're expecting the USC to output X, Y, Z, and W. */
1922    vs_output_size_in_bytes = PVR_DW_TO_BYTES(4);
1923    if (vs_has_rt_id_output)
1924       vs_output_size_in_bytes += PVR_DW_TO_BYTES(1);
1925 
1926    for (uint32_t i = 0; i < attachment_count; i++) {
1927       const VkClearAttachment *attachment = &attachments[i];
1928       struct pvr_pds_vertex_shader_program pds_program;
1929       struct pvr_pds_upload pds_program_upload = { 0 };
1930       uint64_t current_base_array_layer = ~0;
1931       VkResult result;
1932       float depth;
1933 
1934       if (attachment->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT) {
1935          uint32_t packed_clear_color[PVR_CLEAR_COLOR_ARRAY_SIZE];
1936          const struct usc_mrt_resource *mrt_resource;
1937          uint32_t global_attachment_idx;
1938          uint32_t local_attachment_idx;
1939          VkFormat format;
1940 
1941          local_attachment_idx = attachment->colorAttachment;
1942 
1943          if (is_render_init) {
1944             struct pvr_renderpass_hwsetup_render *hw_render;
1945 
1946             assert(pass->hw_setup->render_count > 0);
1947             hw_render = &pass->hw_setup->renders[0];
1948 
1949             mrt_resource =
1950                &hw_render->init_setup.mrt_resources[local_attachment_idx];
1951 
1952             assert(local_attachment_idx < hw_render->color_init_count);
1953             global_attachment_idx =
1954                hw_render->color_init[local_attachment_idx].index;
1955          } else {
1956             mrt_resource = &hw_pass->setup.mrt_resources[local_attachment_idx];
1957 
1958             assert(local_attachment_idx < sub_pass->color_count);
1959             global_attachment_idx =
1960                sub_pass->color_attachments[local_attachment_idx];
1961          }
1962 
1963          if (global_attachment_idx == VK_ATTACHMENT_UNUSED)
1964             continue;
1965 
1966          assert(global_attachment_idx < pass->attachment_count);
1967          format = pass->attachments[global_attachment_idx].vk_format;
1968 
1969          assert(format != VK_FORMAT_UNDEFINED);
1970 
1971          pvr_get_hw_clear_color(format,
1972                                 attachment->clearValue.color,
1973                                 packed_clear_color);
1974 
1975          result = pvr_clear_color_attachment_static(cmd_buffer,
1976                                                     mrt_resource,
1977                                                     format,
1978                                                     packed_clear_color,
1979                                                     VK_IMAGE_ASPECT_COLOR_BIT,
1980                                                     0,
1981                                                     vs_has_rt_id_output);
1982          if (result != VK_SUCCESS)
1983             return;
1984       } else if (hw_pass->z_replicate != -1 &&
1985                  attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1986          const VkClearColorValue clear_color = {
1987             .float32 = { [0] = attachment->clearValue.depthStencil.depth, },
1988          };
1989          const uint32_t template_idx = attachment->aspectMask |
1990                                        VK_IMAGE_ASPECT_COLOR_BIT;
1991          const uint32_t stencil = attachment->clearValue.depthStencil.stencil;
1992          uint32_t packed_clear_color[PVR_CLEAR_COLOR_ARRAY_SIZE];
1993          const struct usc_mrt_resource *mrt_resource;
1994 
1995          mrt_resource = &hw_pass->setup.mrt_resources[hw_pass->z_replicate];
1996 
1997          pvr_get_hw_clear_color(VK_FORMAT_R32_SFLOAT,
1998                                 clear_color,
1999                                 packed_clear_color);
2000 
2001          result = pvr_clear_color_attachment_static(cmd_buffer,
2002                                                     mrt_resource,
2003                                                     VK_FORMAT_R32_SFLOAT,
2004                                                     packed_clear_color,
2005                                                     template_idx,
2006                                                     stencil,
2007                                                     vs_has_rt_id_output);
2008          if (result != VK_SUCCESS)
2009             return;
2010       } else {
2011          const uint32_t template_idx = attachment->aspectMask;
2012          struct pvr_static_clear_ppp_template template;
2013          struct pvr_suballoc_bo *pvr_bo;
2014 
2015          assert(template_idx < PVR_STATIC_CLEAR_VARIANT_COUNT);
2016          template =
2017             cmd_buffer->device->static_clear_state.ppp_templates[template_idx];
2018 
2019          if (attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
2020             /* clang-format off */
2021             template.config.ispa.sref =
2022                attachment->clearValue.depthStencil.stencil &
2023                   ROGUE_TA_STATE_ISPA_SREF_SIZE_MAX;
2024             /* clang-format on */
2025          }
2026 
2027          if (vs_has_rt_id_output) {
2028             template.config.output_sel.rhw_pres = true;
2029             template.config.output_sel.render_tgt_pres = true;
2030             template.config.output_sel.vtxsize = 4 + 1;
2031          }
2032 
2033          result = pvr_emit_ppp_from_template(&sub_cmd->control_stream,
2034                                              &template,
2035                                              &pvr_bo);
2036          if (result != VK_SUCCESS) {
2037             pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
2038             return;
2039          }
2040 
2041          list_add(&pvr_bo->link, &cmd_buffer->bo_list);
2042       }
2043 
2044       if (attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
2045          depth = attachment->clearValue.depthStencil.depth;
2046       else
2047          depth = 1.0f;
2048 
2049       if (vs_has_rt_id_output) {
2050          const struct pvr_device_static_clear_state *dev_clear_state =
2051             &cmd_buffer->device->static_clear_state;
2052          const struct pvr_suballoc_bo *multi_layer_vert_bo =
2053             dev_clear_state->usc_multi_layer_vertex_shader_bo;
2054 
2055          /* We can't use the device's passthrough pds program since it doesn't
2056           * have iterate_instance_id enabled. We'll be uploading code sections
2057           * per each clear rect.
2058           */
2059 
2060          /* TODO: See if we can allocate all the code section memory in one go.
2061           * We'd need to make sure that changing instance_id_modifier doesn't
2062           * change the code section size.
2063           * Also check if we can reuse the same code segment for each rect.
2064           * Seems like the instance_id_modifier is written into the data section
2065           * and used by the pds ADD instruction that way instead of it being
2066           * embedded into the code section.
2067           */
2068 
2069          pvr_pds_clear_rta_vertex_shader_program_init_base(&pds_program,
2070                                                            multi_layer_vert_bo);
2071       } else {
2072          /* We can reuse the device's code section but we'll need to upload data
2073           * sections so initialize the program.
2074           */
2075          pvr_pds_clear_vertex_shader_program_init_base(
2076             &pds_program,
2077             cmd_buffer->device->static_clear_state.usc_vertex_shader_bo);
2078 
2079          pds_program_upload.code_offset =
2080             cmd_buffer->device->static_clear_state.pds.code_offset;
2081          /* TODO: The code size doesn't get used by pvr_clear_vdm_state() maybe
2082           * let's change its interface to make that clear and not set this?
2083           */
2084          pds_program_upload.code_size =
2085             cmd_buffer->device->static_clear_state.pds.code_size;
2086       }
2087 
2088       for (uint32_t j = 0; j < rect_count; j++) {
2089          struct pvr_pds_upload pds_program_data_upload;
2090          const VkClearRect *clear_rect = &rects[j];
2091          struct pvr_suballoc_bo *vertices_bo;
2092          uint32_t vdm_cs_size_in_dw;
2093          uint32_t *vdm_cs_buffer;
2094          VkResult result;
2095 
2096          if (!PVR_HAS_FEATURE(dev_info, gs_rta_support) &&
2097              (clear_rect->baseArrayLayer != 0 || clear_rect->layerCount > 1)) {
2098             result = pvr_add_deferred_rta_clear(cmd_buffer,
2099                                                 attachment,
2100                                                 clear_rect,
2101                                                 is_render_init);
2102             if (result != VK_SUCCESS)
2103                return;
2104 
2105             if (clear_rect->baseArrayLayer != 0)
2106                continue;
2107          }
2108 
2109          /* TODO: Allocate all the buffers in one go before the loop, and add
2110           * support to multi-alloc bo.
2111           */
2112          result = pvr_clear_vertices_upload(cmd_buffer->device,
2113                                             &clear_rect->rect,
2114                                             depth,
2115                                             &vertices_bo);
2116          if (result != VK_SUCCESS) {
2117             pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
2118             return;
2119          }
2120 
2121          list_add(&vertices_bo->link, &cmd_buffer->bo_list);
2122 
2123          if (vs_has_rt_id_output) {
2124             if (current_base_array_layer != clear_rect->baseArrayLayer) {
2125                const uint32_t base_array_layer = clear_rect->baseArrayLayer;
2126                struct pvr_pds_upload pds_program_code_upload;
2127 
2128                result =
2129                   pvr_pds_clear_rta_vertex_shader_program_create_and_upload_code(
2130                      &pds_program,
2131                      cmd_buffer,
2132                      base_array_layer,
2133                      &pds_program_code_upload);
2134                if (result != VK_SUCCESS) {
2135                   pvr_cmd_buffer_set_error_unwarned(cmd_buffer, result);
2136                   return;
2137                }
2138 
2139                pds_program_upload.code_offset =
2140                   pds_program_code_upload.code_offset;
2141                /* TODO: The code size doesn't get used by pvr_clear_vdm_state()
2142                 * maybe let's change its interface to make that clear and not
2143                 * set this?
2144                 */
2145                pds_program_upload.code_size = pds_program_code_upload.code_size;
2146 
2147                current_base_array_layer = base_array_layer;
2148             }
2149 
2150             result =
2151                pvr_pds_clear_rta_vertex_shader_program_create_and_upload_data(
2152                   &pds_program,
2153                   cmd_buffer,
2154                   vertices_bo,
2155                   &pds_program_data_upload);
2156             if (result != VK_SUCCESS)
2157                return;
2158          } else {
2159             result = pvr_pds_clear_vertex_shader_program_create_and_upload_data(
2160                &pds_program,
2161                cmd_buffer,
2162                vertices_bo,
2163                &pds_program_data_upload);
2164             if (result != VK_SUCCESS)
2165                return;
2166          }
2167 
2168          pds_program_upload.data_offset = pds_program_data_upload.data_offset;
2169          pds_program_upload.data_size = pds_program_data_upload.data_size;
2170 
2171          vdm_cs_size_in_dw =
2172             pvr_clear_vdm_state_get_size_in_dw(dev_info,
2173                                                clear_rect->layerCount);
2174 
2175          pvr_csb_set_relocation_mark(&sub_cmd->control_stream);
2176 
2177          vdm_cs_buffer =
2178             pvr_csb_alloc_dwords(&sub_cmd->control_stream, vdm_cs_size_in_dw);
2179          if (!vdm_cs_buffer) {
2180             pvr_cmd_buffer_set_error_unwarned(cmd_buffer,
2181                                               sub_cmd->control_stream.status);
2182             return;
2183          }
2184 
2185          pvr_pack_clear_vdm_state(dev_info,
2186                                   &pds_program_upload,
2187                                   pds_program.temps_used,
2188                                   4,
2189                                   vs_output_size_in_bytes,
2190                                   clear_rect->layerCount,
2191                                   vdm_cs_buffer);
2192 
2193          pvr_csb_clear_relocation_mark(&sub_cmd->control_stream);
2194       }
2195    }
2196 }
2197 
pvr_clear_attachments_render_init(struct pvr_cmd_buffer * cmd_buffer,const VkClearAttachment * attachment,const VkClearRect * rect)2198 void pvr_clear_attachments_render_init(struct pvr_cmd_buffer *cmd_buffer,
2199                                        const VkClearAttachment *attachment,
2200                                        const VkClearRect *rect)
2201 {
2202    pvr_clear_attachments(cmd_buffer, 1, attachment, 1, rect, true);
2203 }
2204 
pvr_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)2205 void pvr_CmdClearAttachments(VkCommandBuffer commandBuffer,
2206                              uint32_t attachmentCount,
2207                              const VkClearAttachment *pAttachments,
2208                              uint32_t rectCount,
2209                              const VkClearRect *pRects)
2210 {
2211    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
2212    struct pvr_cmd_buffer_state *state = &cmd_buffer->state;
2213    struct pvr_sub_cmd_gfx *sub_cmd = &state->current_sub_cmd->gfx;
2214 
2215    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
2216    assert(state->current_sub_cmd->type == PVR_SUB_CMD_TYPE_GRAPHICS);
2217 
2218    /* TODO: There are some optimizations that can be made here:
2219     *  - For a full screen clear, update the clear values for the corresponding
2220     *    attachment index.
2221     *  - For a full screen color attachment clear, add its index to a load op
2222     *    override to add it to the background shader. This will elide any load
2223     *    op loads currently in the background shader as well as the usual
2224     *    frag kick for geometry clear.
2225     */
2226 
2227    /* If we have any depth/stencil clears, update the sub command depth/stencil
2228     * modification and usage flags.
2229     */
2230    if (state->depth_format != VK_FORMAT_UNDEFINED) {
2231       uint32_t full_screen_clear_count;
2232       bool has_stencil_clear = false;
2233       bool has_depth_clear = false;
2234 
2235       for (uint32_t i = 0; i < attachmentCount; i++) {
2236          const VkImageAspectFlags aspect_mask = pAttachments[i].aspectMask;
2237 
2238          if (aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT)
2239             has_stencil_clear = true;
2240 
2241          if (aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT)
2242             has_depth_clear = true;
2243 
2244          if (has_stencil_clear && has_depth_clear)
2245             break;
2246       }
2247 
2248       sub_cmd->modifies_stencil |= has_stencil_clear;
2249       sub_cmd->modifies_depth |= has_depth_clear;
2250 
2251       /* We only care about clears that have a baseArrayLayer of 0 as any
2252        * attachment clears we move to the background shader must apply to all of
2253        * the attachment's sub resources.
2254        */
2255       full_screen_clear_count =
2256          pvr_get_max_layers_covering_target(state->render_pass_info.render_area,
2257                                             0,
2258                                             rectCount,
2259                                             pRects);
2260 
2261       if (full_screen_clear_count > 0) {
2262          if (has_stencil_clear &&
2263              sub_cmd->stencil_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
2264             sub_cmd->stencil_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
2265          }
2266 
2267          if (has_depth_clear &&
2268              sub_cmd->depth_usage == PVR_DEPTH_STENCIL_USAGE_UNDEFINED) {
2269             sub_cmd->depth_usage = PVR_DEPTH_STENCIL_USAGE_NEVER;
2270          }
2271       }
2272    }
2273 
2274    pvr_clear_attachments(cmd_buffer,
2275                          attachmentCount,
2276                          pAttachments,
2277                          rectCount,
2278                          pRects,
2279                          false);
2280 }
2281 
pvr_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)2282 void pvr_CmdResolveImage2(VkCommandBuffer commandBuffer,
2283                           const VkResolveImageInfo2 *pResolveImageInfo)
2284 {
2285    PVR_FROM_HANDLE(pvr_image, src, pResolveImageInfo->srcImage);
2286    PVR_FROM_HANDLE(pvr_image, dst, pResolveImageInfo->dstImage);
2287    PVR_FROM_HANDLE(pvr_cmd_buffer, cmd_buffer, commandBuffer);
2288 
2289    PVR_CHECK_COMMAND_BUFFER_BUILDING_STATE(cmd_buffer);
2290 
2291    for (uint32_t i = 0U; i < pResolveImageInfo->regionCount; i++) {
2292       VkImageCopy2 region = {
2293          .sType = VK_STRUCTURE_TYPE_IMAGE_COPY_2,
2294          .srcSubresource = pResolveImageInfo->pRegions[i].srcSubresource,
2295          .srcOffset = pResolveImageInfo->pRegions[i].srcOffset,
2296          .dstSubresource = pResolveImageInfo->pRegions[i].dstSubresource,
2297          .dstOffset = pResolveImageInfo->pRegions[i].dstOffset,
2298          .extent = pResolveImageInfo->pRegions[i].extent,
2299       };
2300 
2301       VkResult result =
2302          pvr_copy_or_resolve_color_image_region(cmd_buffer, src, dst, &region);
2303       if (result != VK_SUCCESS)
2304          return;
2305    }
2306 }
2307