• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <limits.h>
26 #include <stdbool.h>
27 #include <stddef.h>
28 #include <string.h>
29 #include <vulkan/vulkan.h>
30 
31 #include "hwdef/rogue_hw_defs.h"
32 #include "hwdef/rogue_hw_utils.h"
33 #include "pvr_hw_pass.h"
34 #include "pvr_formats.h"
35 #include "pvr_private.h"
36 #include "util/bitset.h"
37 #include "util/list.h"
38 #include "util/macros.h"
39 #include "util/u_math.h"
40 #include "vk_alloc.h"
41 #include "vk_format.h"
42 #include "vk_log.h"
43 
44 struct pvr_render_int_subpass {
45    /* Points to the input subpass. This is set to NULL when the subpass is
46     * unscheduled.
47     */
48    struct pvr_render_subpass *subpass;
49 
50    /* Count of other subpasses which have this subpass as a dependency. */
51    uint32_t out_subpass_count;
52 
53    /* Pointers to the other subpasses which have this subpass as a dependency.
54     */
55    struct pvr_render_int_subpass **out_subpasses;
56 
57    /* Count of subpasses on which this subpass is dependent and which haven't
58     * been scheduled yet.
59     */
60    uint32_t in_subpass_count;
61 };
62 
63 struct pvr_renderpass_resource {
64    /* Resource type allocated for render target. */
65    enum usc_mrt_resource_type type;
66 
67    union {
68       /* If type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG. */
69       struct {
70          /* The output register to use. */
71          uint32_t output_reg;
72 
73          /* The offset in bytes within the output register. */
74          uint32_t offset;
75       } reg;
76 
77       /* If type == USC_MRT_RESOURCE_TYPE_MEMORY.  */
78       struct {
79          /* The index of the tile buffer to use. */
80          uint32_t tile_buffer;
81 
82          /* The offset (in dwords) within the tile buffer. */
83          uint32_t offset_dw;
84       } mem;
85    };
86 };
87 
88 struct pvr_render_int_attachment {
89    /* Points to the corresponding input attachment. */
90    struct pvr_render_pass_attachment *attachment;
91 
92    /* True if this attachment is referenced in the currently open render. */
93    bool is_used;
94 
95    /* Operation to use when this attachment is non-resident and referenced as a
96     * color or depth attachment.
97     */
98    VkAttachmentLoadOp load_op;
99 
100    /* Operation to use for the stencil component when this attachment is
101     * non-resident and referenced as a color or depth attachment.
102     */
103    VkAttachmentLoadOp stencil_load_op;
104 
105    /* Count of uses of this attachment in unscheduled subpasses. */
106    uint32_t remaining_count;
107 
108    /* Count of uses of the stencil component of this attachment in unscheduled
109     * subpasses.
110     */
111    uint32_t stencil_remaining_count;
112 
113    /* If this attachment has currently allocated on-chip storage then details of
114     * the allocated location.
115     */
116    struct usc_mrt_resource resource;
117 
118    /* Index of the subpass in the current render where the attachment is first
119     * used. VK_ATTACHMENT_UNUSED if the attachment isn't used in the current
120     * render.
121     */
122    int32_t first_use;
123 
124    /* Index of the subpass in the current render where the attachment is last
125     * used.
126     */
127    int32_t last_use;
128 
129    /* Index of the subpass (global) where the attachment is last read. */
130    int32_t last_read;
131 
132    /* If this attachment has currently allocated on-chip storage then the entry
133     * in context.active_surf_list.
134     */
135    struct list_head link;
136 
137    /* During pvr_close_render: if this attachment has allocated on-chip storage
138     * then the index in pvr_renderpass_hwsetup_render.eot_setup.mrt_resources
139     * with details of the storage location. Otherwise -1.
140     */
141    int32_t mrt_idx;
142 
143    /* Index of the last render where the attachment was the source of an MSAA
144     * resolve.
145     */
146    int32_t last_resolve_src_render;
147 
148    /* Index of the last render where the attachment was the destination of an
149     * MSAA resolve.
150     */
151    int32_t last_resolve_dst_render;
152 
153    /* true if the attachment is used with a z replicate in the current render.
154     */
155    bool z_replicate;
156 
157    /* true if this attachment can be resolved by the PBE. */
158    bool is_pbe_downscalable;
159 
160    /* true if this attachment requires an EOT attachment. */
161    bool eot_surf_required;
162 };
163 
164 /* Which parts of the output registers/a tile buffer are currently allocated. */
165 struct pvr_renderpass_alloc_buffer {
166    /* Bit array. A bit is set if the corresponding dword is allocated. */
167    BITSET_DECLARE(allocs, 8U);
168 };
169 
170 struct pvr_renderpass_alloc {
171    /* Which pixel output registers are allocated. */
172    struct pvr_renderpass_alloc_buffer output_reg;
173 
174    /* Range of allocated output registers. */
175    uint32_t output_regs_count;
176 
177    /* Number of tile buffers allocated. */
178    uint32_t tile_buffers_count;
179 
180    /* Which parts of each tile buffer are allocated. Length is
181     * tile_buffers_count.
182     */
183    struct pvr_renderpass_alloc_buffer *tile_buffers;
184 };
185 
186 struct pvr_renderpass_subpass {
187    /* A pointer to the input subpass description. */
188    struct pvr_render_subpass *input_subpass;
189 
190    /* true if the depth attachment for this subpass has z replication enabled.
191     */
192    bool z_replicate;
193 
194    /* Which pixel output registers/tile buffer locations are allocated during
195     * this subpass.
196     */
197    struct pvr_renderpass_alloc alloc;
198 };
199 
200 struct pvr_renderpass_context {
201    /* Internal information about each input attachment. */
202    struct pvr_render_int_attachment *int_attach;
203 
204    /* Internal information about each input subpass. */
205    struct pvr_render_int_subpass *int_subpasses;
206 
207    /* Input structure. */
208    struct pvr_render_pass *pass;
209 
210    /* Output structure. */
211    struct pvr_renderpass_hwsetup *hw_setup;
212 
213    /* In-progress render. */
214    struct pvr_renderpass_hwsetup_render *hw_render;
215 
216    /* Information about each subpass in the current render. */
217    struct pvr_renderpass_subpass *subpasses;
218 
219    /* Which parts of color storage are currently allocated. */
220    struct pvr_renderpass_alloc alloc;
221 
222    /* Attachment which is currently allocated the on-chip depth/stencil. */
223    struct pvr_render_int_attachment *int_ds_attach;
224 
225    /* Attachment which is loaded into the on-chip depth/stencil at the start of
226     * the render.
227     */
228    struct pvr_render_int_attachment *ds_load_surface;
229 
230    /* Attachment which the depth/stencil attachment should be resolved to at the
231     * end of the render.
232     */
233    struct pvr_render_int_attachment *ds_resolve_surface;
234 
235    /* Count of surfaces which are allocated on-chip color storage. */
236    uint32_t active_surfaces;
237 
238    /* List of attachment/ranges which are allocated on-chip color storage. */
239    struct list_head active_surf_list;
240 
241    const VkAllocationCallbacks *allocator;
242 };
243 
244 struct pvr_render_int_subpass_dsts {
245    struct pvr_renderpass_resource *color;
246    struct pvr_renderpass_resource incoming_zrep;
247    struct pvr_renderpass_resource existing_zrep;
248 };
249 
250 struct pvr_render_subpass_depth_params {
251    bool existing_ds_is_input;
252    bool incoming_ds_is_input;
253    uint32_t existing_ds_attach;
254 };
255 
256 struct pvr_renderpass_storage_firstuse_buffer {
257    /* For each pixel output register/tile buffer location: true if the output
258     * register has been allocated in the current render.
259     */
260    bool used[8U];
261 };
262 
263 struct pvr_renderpass_storage_firstuse {
264    /* First use information for pixel output registers. */
265    struct pvr_renderpass_storage_firstuse_buffer output_reg;
266 
267    /* First use information for tile buffers. */
268    struct pvr_renderpass_storage_firstuse_buffer *tile_buffers;
269 };
270 
pvr_get_accum_format_bitsize(VkFormat vk_format)271 static uint32_t pvr_get_accum_format_bitsize(VkFormat vk_format)
272 {
273    if (util_format_has_depth(vk_format_description(vk_format)))
274       return vk_format_get_blocksizebits(vk_format);
275 
276    if (!vk_format_has_stencil(vk_format))
277        return pvr_get_pbe_accum_format_size_in_bytes(vk_format) * 8;
278 
279    return 0;
280 }
281 
282 /** Copy information about allocated color storage. */
pvr_copy_alloc(struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * dst,struct pvr_renderpass_alloc * src)283 static VkResult pvr_copy_alloc(struct pvr_renderpass_context *ctx,
284                                struct pvr_renderpass_alloc *dst,
285                                struct pvr_renderpass_alloc *src)
286 {
287    dst->output_reg = src->output_reg;
288    dst->output_regs_count = src->output_regs_count;
289 
290    dst->tile_buffers_count = src->tile_buffers_count;
291    if (dst->tile_buffers_count > 0U) {
292       dst->tile_buffers =
293          vk_alloc(ctx->allocator,
294                   sizeof(dst->tile_buffers[0U]) * dst->tile_buffers_count,
295                   8,
296                   VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
297       if (!dst->tile_buffers)
298          return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
299 
300       memcpy(dst->tile_buffers,
301              src->tile_buffers,
302              sizeof(dst->tile_buffers[0U]) * dst->tile_buffers_count);
303    } else {
304       dst->tile_buffers = NULL;
305    }
306 
307    return VK_SUCCESS;
308 }
309 
310 /** Free information about allocated color storage. */
pvr_free_alloc(struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * alloc)311 static void pvr_free_alloc(struct pvr_renderpass_context *ctx,
312                            struct pvr_renderpass_alloc *alloc)
313 {
314    if (alloc->tile_buffers)
315       vk_free(ctx->allocator, alloc->tile_buffers);
316 
317    memset(alloc, 0U, sizeof(*alloc));
318 }
319 
pvr_reset_render(struct pvr_renderpass_context * ctx)320 static void pvr_reset_render(struct pvr_renderpass_context *ctx)
321 {
322    ctx->int_ds_attach = NULL;
323    ctx->active_surfaces = 0U;
324    list_inithead(&ctx->active_surf_list);
325 
326    memset(&ctx->alloc.output_reg, 0U, sizeof(ctx->alloc.output_reg));
327    ctx->alloc.output_regs_count = 0U;
328    ctx->alloc.tile_buffers_count = 0U;
329    ctx->alloc.tile_buffers = NULL;
330 
331    ctx->hw_render = NULL;
332    ctx->subpasses = NULL;
333    ctx->ds_load_surface = NULL;
334 }
335 
336 /** Gets the amount of memory to allocate per-core for a tile buffer. */
337 static uint32_t
pvr_get_tile_buffer_size_per_core(const struct pvr_device * device)338 pvr_get_tile_buffer_size_per_core(const struct pvr_device *device)
339 {
340    uint32_t clusters =
341       PVR_GET_FEATURE_VALUE(&device->pdevice->dev_info, num_clusters, 1U);
342 
343    /* Round the number of clusters up to the next power of two. */
344    if (!PVR_HAS_FEATURE(&device->pdevice->dev_info, tile_per_usc))
345       clusters = util_next_power_of_two(clusters);
346 
347    /* Tile buffer is (total number of partitions across all clusters) * 16 * 16
348     * (quadrant size in pixels).
349     */
350    return device->pdevice->dev_runtime_info.total_reserved_partition_size *
351           clusters * sizeof(uint32_t);
352 }
353 
354 /**
355  * Gets the amount of memory to allocate for a tile buffer on the current BVNC.
356  */
pvr_get_tile_buffer_size(const struct pvr_device * device)357 uint32_t pvr_get_tile_buffer_size(const struct pvr_device *device)
358 {
359    /* On a multicore system duplicate the buffer for each core. */
360    return pvr_get_tile_buffer_size_per_core(device) *
361           rogue_get_max_num_cores(&device->pdevice->dev_info);
362 }
363 
364 static void
pvr_finalise_mrt_setup(const struct pvr_device * device,struct pvr_renderpass_hwsetup_render * hw_render,struct usc_mrt_setup * mrt)365 pvr_finalise_mrt_setup(const struct pvr_device *device,
366                        struct pvr_renderpass_hwsetup_render *hw_render,
367                        struct usc_mrt_setup *mrt)
368 {
369    mrt->num_output_regs = hw_render->output_regs_count;
370    mrt->num_tile_buffers = hw_render->tile_buffers_count;
371    mrt->tile_buffer_size = pvr_get_tile_buffer_size(device);
372 }
373 
374 /**
375  * Copy information about the number of pixel output registers and tile buffers
376  * required for the current render to the output structure.
377  */
pvr_finalise_po_alloc(const struct pvr_device * device,struct pvr_renderpass_context * ctx)378 static void pvr_finalise_po_alloc(const struct pvr_device *device,
379                                   struct pvr_renderpass_context *ctx)
380 {
381    struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
382 
383    /* The number of output registers must be a power of two. */
384    hw_render->output_regs_count =
385       util_next_power_of_two(ctx->alloc.output_regs_count);
386 
387    assert(ctx->alloc.tile_buffers_count <= ctx->pass->max_tilebuffer_count);
388    hw_render->tile_buffers_count = ctx->alloc.tile_buffers_count;
389 
390    /* Copy the number of output registers and tile buffers to each subpass. */
391    for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
392       struct pvr_renderpass_hwsetup_subpass *hw_subpass =
393          &hw_render->subpasses[i];
394 
395       pvr_finalise_mrt_setup(device, hw_render, &hw_subpass->setup);
396    }
397 
398    pvr_finalise_mrt_setup(device, hw_render, &hw_render->init_setup);
399    pvr_finalise_mrt_setup(device, hw_render, &hw_render->eot_setup);
400 }
401 
402 /** Mark that device memory must be allocated for an attachment. */
pvr_mark_surface_alloc(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_attach)403 static void pvr_mark_surface_alloc(struct pvr_renderpass_context *ctx,
404                                    struct pvr_render_int_attachment *int_attach)
405 {
406    const uint32_t attach_idx = int_attach - ctx->int_attach;
407 
408    assert(attach_idx < ctx->pass->attachment_count);
409    ctx->hw_setup->surface_allocate[attach_idx] = true;
410 }
411 
412 /**
413  * Check if there is space in a buffer for storing a render target of a
414  * specified size.
415  */
416 static int32_t
pvr_is_space_in_buffer(const struct pvr_device_info * dev_info,struct pvr_renderpass_alloc_buffer * buffer,uint32_t pixel_size)417 pvr_is_space_in_buffer(const struct pvr_device_info *dev_info,
418                        struct pvr_renderpass_alloc_buffer *buffer,
419                        uint32_t pixel_size)
420 {
421    const uint32_t max_out_regs = rogue_get_max_output_regs_per_pixel(dev_info);
422    uint32_t alignment = 1U;
423 
424    if (PVR_HAS_FEATURE(dev_info, pbe2_in_xe)) {
425       /* For a 64-bit/128-bit source format: the start offset must be even. */
426       if (pixel_size == 2U || pixel_size == 4U)
427          alignment = 2U;
428    }
429 
430    assert(pixel_size <= max_out_regs);
431 
432    for (uint32_t i = 0U; i <= (max_out_regs - pixel_size); i += alignment) {
433       if (!BITSET_TEST_RANGE(buffer->allocs, i, i + pixel_size - 1U))
434          return i;
435    }
436 
437    return -1;
438 }
439 
440 static VkResult
pvr_surface_setup_render_init(struct pvr_renderpass_context * ctx,struct pvr_renderpass_storage_firstuse * first_use,struct usc_mrt_resource const * resource,struct pvr_render_pass_attachment * attachment,VkAttachmentLoadOp load_op,bool * use_render_init)441 pvr_surface_setup_render_init(struct pvr_renderpass_context *ctx,
442                               struct pvr_renderpass_storage_firstuse *first_use,
443                               struct usc_mrt_resource const *resource,
444                               struct pvr_render_pass_attachment *attachment,
445                               VkAttachmentLoadOp load_op,
446                               bool *use_render_init)
447 {
448    const uint32_t pixel_size =
449       DIV_ROUND_UP(pvr_get_accum_format_bitsize(attachment->vk_format), 32U);
450    struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
451    struct pvr_renderpass_storage_firstuse_buffer *buffer;
452    uint32_t start;
453 
454    /* Check if this is the first use of all the allocated registers. */
455    if (resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
456       buffer = &first_use->output_reg;
457       start = resource->reg.output_reg;
458    } else {
459       assert(resource->mem.tile_buffer < ctx->alloc.tile_buffers_count);
460       buffer = &first_use->tile_buffers[resource->mem.tile_buffer];
461       start = resource->mem.offset_dw;
462    }
463 
464    *use_render_init = true;
465    for (uint32_t i = 0U; i < pixel_size; i++) {
466       /* Don't initialize at the render level if the output registers were
467        * previously allocated a different attachment.
468        */
469       if (buffer->used[start + i])
470          *use_render_init = false;
471 
472       /* Don't use render init for future attachments allocated to the same
473        * registers.
474        */
475       buffer->used[start + i] = true;
476    }
477 
478    if (load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
479       *use_render_init = false;
480 
481    if (*use_render_init) {
482       struct pvr_renderpass_colorinit *new_color_init;
483       struct usc_mrt_resource *new_mrt;
484 
485       /* Initialize the storage at the start of the render. */
486       new_color_init = vk_realloc(ctx->allocator,
487                                   hw_render->color_init,
488                                   sizeof(hw_render->color_init[0U]) *
489                                      (hw_render->color_init_count + 1U),
490                                   8U,
491                                   VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
492       if (!new_color_init)
493          return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
494 
495       hw_render->color_init = new_color_init;
496       hw_render->color_init[hw_render->color_init_count].index =
497          attachment->index;
498       hw_render->color_init[hw_render->color_init_count].op = load_op;
499 
500       /* Set the destination for the attachment load/clear. */
501       assert(hw_render->init_setup.num_render_targets ==
502              hw_render->color_init_count);
503 
504       new_mrt = vk_realloc(ctx->allocator,
505                            hw_render->init_setup.mrt_resources,
506                            sizeof(hw_render->init_setup.mrt_resources[0U]) *
507                               (hw_render->init_setup.num_render_targets + 1U),
508                            8U,
509                            VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
510       if (!new_mrt)
511          return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
512 
513       hw_render->init_setup.mrt_resources = new_mrt;
514       hw_render->init_setup
515          .mrt_resources[hw_render->init_setup.num_render_targets] = *resource;
516       hw_render->init_setup.num_render_targets++;
517 
518       hw_render->color_init_count++;
519    }
520 
521    return VK_SUCCESS;
522 }
523 
524 static VkResult
pvr_subpass_setup_render_init(struct pvr_renderpass_context * ctx)525 pvr_subpass_setup_render_init(struct pvr_renderpass_context *ctx)
526 {
527    struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
528    struct pvr_renderpass_storage_firstuse first_use = { 0 };
529    bool first_ds = true;
530    VkResult result;
531 
532    if (ctx->alloc.tile_buffers_count > 0U) {
533       first_use.tile_buffers = vk_zalloc(ctx->allocator,
534                                          sizeof(first_use.tile_buffers[0U]) *
535                                             ctx->alloc.tile_buffers_count,
536                                          8,
537                                          VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
538       if (!first_use.tile_buffers)
539          return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
540    }
541 
542    for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
543       struct pvr_renderpass_hwsetup_subpass *hw_subpass =
544          &hw_render->subpasses[i];
545       struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
546       struct pvr_render_subpass *input_subpass = subpass->input_subpass;
547 
548       /* If this is the first depth attachment in the render then clear at the
549        * render level, not the subpass level.
550        */
551       if (first_ds &&
552           (hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_CLEAR ||
553            hw_subpass->stencil_clear)) {
554          struct pvr_render_int_attachment *int_ds_attach;
555 
556          assert(input_subpass->depth_stencil_attachment !=
557                 VK_ATTACHMENT_UNUSED);
558          assert(input_subpass->depth_stencil_attachment <
559                 ctx->pass->attachment_count);
560          int_ds_attach =
561             &ctx->int_attach[input_subpass->depth_stencil_attachment];
562 
563          assert(hw_render->ds_attach_idx == VK_ATTACHMENT_UNUSED ||
564                 hw_render->ds_attach_idx == int_ds_attach->attachment->index);
565          hw_render->ds_attach_idx = int_ds_attach->attachment->index;
566 
567          if (hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_CLEAR)
568             hw_render->depth_init = VK_ATTACHMENT_LOAD_OP_CLEAR;
569 
570          if (hw_subpass->stencil_clear) {
571             hw_render->stencil_init = VK_ATTACHMENT_LOAD_OP_CLEAR;
572             hw_subpass->stencil_clear = false;
573          }
574       }
575 
576       if (input_subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED)
577          first_ds = false;
578 
579       for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
580          struct usc_mrt_resource *mrt = &hw_subpass->setup.mrt_resources[j];
581          const uint32_t attach_idx = input_subpass->color_attachments[j];
582          struct pvr_render_int_attachment *int_attach;
583 
584          if (attach_idx == VK_ATTACHMENT_UNUSED)
585             continue;
586 
587          int_attach = &ctx->int_attach[attach_idx];
588 
589          assert(pvr_get_accum_format_bitsize(
590                    int_attach->attachment->vk_format) > 0U);
591 
592          /* Is this the first use of the attachment? */
593          if (int_attach->first_use == (int32_t)i) {
594             /* Set if we should initialize the attachment storage at the
595              * render level.
596              */
597             bool use_render_init;
598             result = pvr_surface_setup_render_init(ctx,
599                                                    &first_use,
600                                                    mrt,
601                                                    int_attach->attachment,
602                                                    hw_subpass->color_initops[j],
603                                                    &use_render_init);
604             if (result != VK_SUCCESS) {
605                vk_free(ctx->allocator, first_use.tile_buffers);
606                return result;
607             }
608 
609             /* On success don't initialize the attachment at the subpass level.
610              */
611             if (use_render_init)
612                hw_subpass->color_initops[j] = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
613          } else {
614             /* This attachment is already present in on-chip storage so don't
615              * do anything.
616              */
617             assert(hw_subpass->color_initops[j] ==
618                    VK_ATTACHMENT_LOAD_OP_DONT_CARE);
619          }
620       }
621    }
622 
623    vk_free(ctx->allocator, first_use.tile_buffers);
624 
625    return VK_SUCCESS;
626 }
627 
628 static void
pvr_mark_storage_allocated_in_buffer(struct pvr_renderpass_alloc_buffer * buffer,uint32_t start,uint32_t pixel_size)629 pvr_mark_storage_allocated_in_buffer(struct pvr_renderpass_alloc_buffer *buffer,
630                                      uint32_t start,
631                                      uint32_t pixel_size)
632 {
633    assert(!BITSET_TEST_RANGE(buffer->allocs, start, start + pixel_size - 1U));
634    BITSET_SET_RANGE(buffer->allocs, start, start + pixel_size - 1U);
635 }
636 
637 static VkResult
pvr_mark_storage_allocated(struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * alloc,struct pvr_render_pass_attachment * attachment,struct pvr_renderpass_resource * resource)638 pvr_mark_storage_allocated(struct pvr_renderpass_context *ctx,
639                            struct pvr_renderpass_alloc *alloc,
640                            struct pvr_render_pass_attachment *attachment,
641                            struct pvr_renderpass_resource *resource)
642 {
643    /* Number of dwords to allocate for the attachment. */
644    const uint32_t pixel_size =
645       DIV_ROUND_UP(pvr_get_accum_format_bitsize(attachment->vk_format), 32U);
646 
647    if (resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
648       /* Update the locations used in the pixel output registers. */
649       pvr_mark_storage_allocated_in_buffer(&alloc->output_reg,
650                                            resource->reg.output_reg,
651                                            pixel_size);
652 
653       /* Update the range of pixel output registers used. */
654       alloc->output_regs_count =
655          MAX2(alloc->output_regs_count, resource->reg.output_reg + pixel_size);
656    } else {
657       assert(resource->type == USC_MRT_RESOURCE_TYPE_MEMORY);
658 
659       if (resource->mem.tile_buffer >= alloc->tile_buffers_count) {
660          /* Grow the number of tile buffers. */
661          struct pvr_renderpass_alloc_buffer *new_tile_buffers = vk_realloc(
662             ctx->allocator,
663             alloc->tile_buffers,
664             sizeof(alloc->tile_buffers[0U]) * (resource->mem.tile_buffer + 1U),
665             8U,
666             VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
667          if (!new_tile_buffers)
668             return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
669 
670          alloc->tile_buffers = new_tile_buffers;
671          memset(
672             &alloc->tile_buffers[alloc->tile_buffers_count],
673             0U,
674             sizeof(alloc->tile_buffers[0U]) *
675                (resource->mem.tile_buffer + 1U - alloc->tile_buffers_count));
676          alloc->tile_buffers_count = resource->mem.tile_buffer + 1U;
677          assert(alloc->tile_buffers_count <= ctx->pass->max_tilebuffer_count);
678       }
679 
680       /* Update the locations used in the tile buffer. */
681       pvr_mark_storage_allocated_in_buffer(
682          &alloc->tile_buffers[resource->mem.tile_buffer],
683          resource->mem.offset_dw,
684          pixel_size);
685 
686       /* The hardware makes the bit depth of the on-chip storage and memory
687        * storage the same so make sure the memory storage is large enough to
688        * accommodate the largest render target.
689        */
690       alloc->output_regs_count =
691          MAX2(alloc->output_regs_count, resource->mem.offset_dw + pixel_size);
692    }
693 
694    return VK_SUCCESS;
695 }
696 
697 static VkResult
pvr_surface_alloc_color_storage(const struct pvr_device_info * dev_info,struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * alloc,struct pvr_render_pass_attachment * attachment,struct pvr_renderpass_resource * resource)698 pvr_surface_alloc_color_storage(const struct pvr_device_info *dev_info,
699                                 struct pvr_renderpass_context *ctx,
700                                 struct pvr_renderpass_alloc *alloc,
701                                 struct pvr_render_pass_attachment *attachment,
702                                 struct pvr_renderpass_resource *resource)
703 {
704    /* Number of dwords to allocate for the attachment. */
705    const uint32_t pixel_size =
706       DIV_ROUND_UP(pvr_get_accum_format_bitsize(attachment->vk_format), 32U);
707 
708    /* Try allocating pixel output registers. */
709    const int32_t output_reg =
710       pvr_is_space_in_buffer(dev_info, &alloc->output_reg, pixel_size);
711    if (output_reg != -1) {
712       resource->type = USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
713       resource->reg.output_reg = (uint32_t)output_reg;
714       resource->reg.offset = 0U;
715    } else {
716       uint32_t i;
717 
718       /* Mark the attachment as using a tile buffer. */
719       resource->type = USC_MRT_RESOURCE_TYPE_MEMORY;
720 
721       /* Try allocating from an existing tile buffer. */
722       for (i = 0U; i < alloc->tile_buffers_count; i++) {
723          const int32_t tile_buffer_offset =
724             pvr_is_space_in_buffer(dev_info,
725                                    &alloc->tile_buffers[i],
726                                    pixel_size);
727 
728          if (tile_buffer_offset != -1) {
729             resource->mem.tile_buffer = i;
730             resource->mem.offset_dw = (uint32_t)tile_buffer_offset;
731             break;
732          }
733       }
734 
735       if (i == alloc->tile_buffers_count) {
736          /* Check for reaching the maximum number of tile buffers. */
737          if (alloc->tile_buffers_count == ctx->pass->max_tilebuffer_count)
738             return vk_error(NULL, VK_ERROR_TOO_MANY_OBJECTS);
739 
740          /* Use a newly allocated tile buffer. */
741          resource->mem.tile_buffer = i;
742          resource->mem.offset_dw = 0U;
743       }
744    }
745 
746    /* Update which parts of the pixel outputs/tile buffers are used. */
747    return pvr_mark_storage_allocated(ctx, alloc, attachment, resource);
748 }
749 
750 /** Free the storage allocated to an attachment. */
751 static void
pvr_free_buffer_storage(struct pvr_renderpass_alloc_buffer * buffer,struct pvr_render_int_attachment * int_attach,uint32_t start)752 pvr_free_buffer_storage(struct pvr_renderpass_alloc_buffer *buffer,
753                         struct pvr_render_int_attachment *int_attach,
754                         uint32_t start)
755 {
756    const uint32_t pixel_size = DIV_ROUND_UP(
757       pvr_get_accum_format_bitsize(int_attach->attachment->vk_format),
758       32U);
759 
760    BITSET_CLEAR_RANGE(buffer->allocs, start, start + pixel_size - 1U);
761 }
762 
763 /** Free the storage allocated to an attachment. */
764 static void
pvr_free_surface_storage(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_attach)765 pvr_free_surface_storage(struct pvr_renderpass_context *ctx,
766                          struct pvr_render_int_attachment *int_attach)
767 {
768    struct usc_mrt_resource *resource = &int_attach->resource;
769    struct pvr_renderpass_alloc *alloc = &ctx->alloc;
770 
771    assert(resource->type != USC_MRT_RESOURCE_TYPE_INVALID);
772 
773    /* Mark the storage as free. */
774    if (resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
775       pvr_free_buffer_storage(&alloc->output_reg,
776                               int_attach,
777                               resource->reg.output_reg);
778    } else {
779       struct pvr_renderpass_alloc_buffer *tile_buffer;
780 
781       assert(resource->type == USC_MRT_RESOURCE_TYPE_MEMORY);
782 
783       assert(resource->mem.tile_buffer < alloc->tile_buffers_count);
784       tile_buffer = &alloc->tile_buffers[resource->mem.tile_buffer];
785       pvr_free_buffer_storage(tile_buffer, int_attach, resource->mem.offset_dw);
786    }
787 
788    /* Mark that the attachment doesn't have allocated storage. */
789    resource->type = USC_MRT_RESOURCE_TYPE_INVALID;
790 
791    /* Remove from the list of surfaces with allocated on-chip storage. */
792    assert(ctx->active_surfaces > 0U);
793    ctx->active_surfaces--;
794    list_del(&int_attach->link);
795 }
796 
pvr_reset_surface(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_attach)797 static void pvr_reset_surface(struct pvr_renderpass_context *ctx,
798                               struct pvr_render_int_attachment *int_attach)
799 {
800    /* Reset information about the range of uses. */
801    int_attach->first_use = int_attach->last_use = -1;
802    int_attach->z_replicate = false;
803 
804    pvr_free_surface_storage(ctx, int_attach);
805 }
806 
807 static void
pvr_make_surface_active(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_attach,uint32_t subpass_num)808 pvr_make_surface_active(struct pvr_renderpass_context *ctx,
809                         struct pvr_render_int_attachment *int_attach,
810                         uint32_t subpass_num)
811 {
812    /* Add to the list of surfaces with on-chip storage. */
813    assert(int_attach->first_use == -1);
814    int_attach->first_use = subpass_num;
815    ctx->active_surfaces++;
816    list_addtail(&int_attach->link, &ctx->active_surf_list);
817 }
818 
819 /**
820  * For a subpass copy details of storage locations for the input/color to the
821  * output structure.
822  */
823 static VkResult
pvr_copy_storage_details(struct pvr_renderpass_context * ctx,struct pvr_renderpass_hwsetup_subpass * hw_subpass,struct pvr_renderpass_subpass * subpass)824 pvr_copy_storage_details(struct pvr_renderpass_context *ctx,
825                          struct pvr_renderpass_hwsetup_subpass *hw_subpass,
826                          struct pvr_renderpass_subpass *subpass)
827 {
828    struct pvr_render_subpass *input_subpass = subpass->input_subpass;
829    const uint32_t max_rts =
830       input_subpass->color_count + input_subpass->input_count;
831    VkResult result;
832 
833    if (max_rts == 0)
834       return VK_SUCCESS;
835 
836    hw_subpass->setup.mrt_resources =
837       vk_zalloc(ctx->allocator,
838                 sizeof(hw_subpass->setup.mrt_resources[0U]) * max_rts,
839                 8,
840                 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
841    if (!hw_subpass->setup.mrt_resources) {
842       result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
843       goto end_copy_storage_details;
844    }
845 
846    for (uint32_t i = 0U; i < input_subpass->color_count; i++) {
847       const uint32_t attach_idx = input_subpass->color_attachments[i];
848       struct pvr_render_int_attachment *int_attach;
849 
850       if (attach_idx == VK_ATTACHMENT_UNUSED)
851          continue;
852 
853       int_attach = &ctx->int_attach[attach_idx];
854 
855       /* Record for the subpass where the color attachment is stored. */
856       assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
857       hw_subpass->setup.mrt_resources[i] = int_attach->resource;
858    }
859 
860    hw_subpass->setup.num_render_targets = input_subpass->color_count;
861 
862    if (input_subpass->input_count == 0)
863       return VK_SUCCESS;
864 
865    /* For this subpass's input attachments. */
866    hw_subpass->input_access = vk_alloc(ctx->allocator,
867                                        sizeof(hw_subpass->input_access[0U]) *
868                                           input_subpass->input_count,
869                                        8,
870                                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
871    if (!hw_subpass->input_access) {
872       result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
873       goto end_copy_storage_details;
874    }
875 
876    for (uint32_t i = 0U; i < input_subpass->input_count; i++) {
877       const uint32_t attach_idx = input_subpass->input_attachments[i];
878       struct pvr_render_int_attachment *int_attach;
879 
880       if (attach_idx == VK_ATTACHMENT_UNUSED)
881          continue;
882 
883       int_attach = &ctx->int_attach[attach_idx];
884 
885       if (int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID) {
886          bool is_color = false;
887 
888          /* Access the input attachment from on-chip storage. */
889          if (int_attach->z_replicate) {
890             hw_subpass->input_access[i].type =
891                PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP_ZREPLICATE;
892          } else {
893             hw_subpass->input_access[i].type =
894                PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP;
895          }
896 
897          /* If this attachment is also a color attachment then point to the
898           * color attachment's resource.
899           */
900          for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
901             if (input_subpass->color_attachments[j] == (int32_t)attach_idx) {
902                hw_subpass->input_access[i].on_chip_rt = j;
903                is_color = true;
904                break;
905             }
906          }
907 
908          if (!is_color) {
909             const uint32_t num_rts = hw_subpass->setup.num_render_targets;
910 
911             hw_subpass->input_access[i].on_chip_rt = num_rts;
912             hw_subpass->setup.num_render_targets++;
913 
914             /* Record the location of the storage for the attachment. */
915             hw_subpass->setup.mrt_resources[num_rts] = int_attach->resource;
916          }
917       } else {
918          /* Access the input attachment from memory. */
919          hw_subpass->input_access[i].type =
920             PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_OFFCHIP;
921          hw_subpass->input_access[i].on_chip_rt = -1;
922       }
923    }
924 
925    return VK_SUCCESS;
926 
927 end_copy_storage_details:
928    if (hw_subpass->input_access) {
929       vk_free(ctx->allocator, hw_subpass->input_access);
930       hw_subpass->input_access = NULL;
931    }
932 
933    if (hw_subpass->setup.mrt_resources) {
934       vk_free(ctx->allocator, hw_subpass->setup.mrt_resources);
935       hw_subpass->setup.mrt_resources = NULL;
936    }
937 
938    return result;
939 }
940 
941 /**
942  * For a subpass copy details of any storage location for a replicated version
943  * of the depth attachment to the output structure.
944  */
945 static VkResult
pvr_copy_z_replicate_details(struct pvr_renderpass_context * ctx,struct pvr_renderpass_hwsetup_subpass * hw_subpass,struct pvr_renderpass_subpass * subpass)946 pvr_copy_z_replicate_details(struct pvr_renderpass_context *ctx,
947                              struct pvr_renderpass_hwsetup_subpass *hw_subpass,
948                              struct pvr_renderpass_subpass *subpass)
949 {
950    struct pvr_render_subpass *input_subpass = subpass->input_subpass;
951    struct pvr_render_int_attachment *int_ds_attach;
952    uint32_t z_replicate;
953    bool found = false;
954 
955    assert(input_subpass->depth_stencil_attachment >= 0U &&
956           input_subpass->depth_stencil_attachment <
957              (int32_t)ctx->pass->attachment_count);
958 
959    int_ds_attach = &ctx->int_attach[input_subpass->depth_stencil_attachment];
960 
961    assert(hw_subpass->z_replicate == -1);
962 
963    /* Is the replicated depth also an input attachment? */
964    for (uint32_t i = 0U; i < input_subpass->input_count; i++) {
965       const uint32_t attach_idx = input_subpass->input_attachments[i];
966       struct pvr_render_int_attachment *int_attach;
967 
968       if (attach_idx == VK_ATTACHMENT_UNUSED)
969          continue;
970 
971       int_attach = &ctx->int_attach[attach_idx];
972 
973       if (int_attach == int_ds_attach) {
974          z_replicate = hw_subpass->input_access[i].on_chip_rt;
975          found = true;
976          break;
977       }
978    }
979 
980    if (!found)
981       z_replicate = hw_subpass->setup.num_render_targets;
982 
983    /* If the Z replicate attachment isn't also an input attachment then grow the
984     * array of locations.
985     */
986    assert(z_replicate <= hw_subpass->setup.num_render_targets);
987    if (z_replicate == hw_subpass->setup.num_render_targets) {
988       struct usc_mrt_resource *mrt =
989          vk_realloc(ctx->allocator,
990                     hw_subpass->setup.mrt_resources,
991                     sizeof(hw_subpass->setup.mrt_resources[0U]) *
992                        (hw_subpass->setup.num_render_targets + 1U),
993                     8U,
994                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
995       if (!mrt)
996          return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
997 
998       hw_subpass->setup.mrt_resources = mrt;
999       hw_subpass->setup.num_render_targets++;
1000    }
1001 
1002    /* Copy the location of the Z replicate. */
1003    assert(int_ds_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
1004    hw_subpass->setup.mrt_resources[z_replicate] = int_ds_attach->resource;
1005    hw_subpass->z_replicate = z_replicate;
1006 
1007    return VK_SUCCESS;
1008 }
1009 
pvr_dereference_surface(struct pvr_renderpass_context * ctx,int32_t attach_idx,uint32_t subpass_num)1010 static void pvr_dereference_surface(struct pvr_renderpass_context *ctx,
1011                                     int32_t attach_idx,
1012                                     uint32_t subpass_num)
1013 {
1014    struct pvr_render_int_attachment *int_attach = &ctx->int_attach[attach_idx];
1015 
1016    assert(int_attach->remaining_count > 0U);
1017    int_attach->remaining_count--;
1018 
1019    if (int_attach->remaining_count == 0U) {
1020       if (int_attach->first_use != -1)
1021          int_attach->last_use = subpass_num;
1022 
1023       if (int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID)
1024          pvr_free_surface_storage(ctx, int_attach);
1025    }
1026 
1027    if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1028       assert(int_attach->stencil_remaining_count > 0U);
1029       int_attach->stencil_remaining_count--;
1030    }
1031 }
1032 
pvr_free_render(struct pvr_renderpass_context * ctx)1033 static void pvr_free_render(struct pvr_renderpass_context *ctx)
1034 {
1035    pvr_free_alloc(ctx, &ctx->alloc);
1036 
1037    if (ctx->subpasses) {
1038       for (uint32_t i = 0U; i < ctx->hw_render->subpass_count; i++)
1039          pvr_free_alloc(ctx, &ctx->subpasses[i].alloc);
1040 
1041       vk_free(ctx->allocator, ctx->subpasses);
1042       ctx->subpasses = NULL;
1043    }
1044 }
1045 
pvr_render_has_side_effects(struct pvr_renderpass_context * ctx)1046 static bool pvr_render_has_side_effects(struct pvr_renderpass_context *ctx)
1047 {
1048    struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
1049    struct pvr_render_pass *pass = ctx->pass;
1050 
1051    if ((hw_render->depth_init == VK_ATTACHMENT_LOAD_OP_CLEAR &&
1052         hw_render->depth_store) ||
1053        (hw_render->stencil_init == VK_ATTACHMENT_LOAD_OP_CLEAR &&
1054         hw_render->stencil_store)) {
1055       return true;
1056    }
1057 
1058    for (uint32_t i = 0U; i < hw_render->eot_surface_count; i++) {
1059       const struct pvr_renderpass_hwsetup_eot_surface *eot_attach =
1060          &hw_render->eot_surfaces[i];
1061       const struct pvr_render_pass_attachment *attachment =
1062          &pass->attachments[eot_attach->attachment_idx];
1063 
1064       if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR &&
1065           attachment->store_op == VK_ATTACHMENT_STORE_OP_STORE) {
1066          return true;
1067       }
1068 
1069       if (eot_attach->need_resolve)
1070          return true;
1071    }
1072 
1073    return false;
1074 }
1075 
pvr_close_render(const struct pvr_device * device,struct pvr_renderpass_context * ctx)1076 static VkResult pvr_close_render(const struct pvr_device *device,
1077                                  struct pvr_renderpass_context *ctx)
1078 {
1079    struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
1080    struct pvr_renderpass_hwsetup_eot_surface *eot_attach;
1081    struct usc_mrt_setup *eot_setup;
1082    int32_t mrt_idx;
1083    VkResult result;
1084 
1085    /* Render already closed. */
1086    if (!hw_render)
1087       return VK_SUCCESS;
1088 
1089    /* Setup render and allocate resources for color/depth loads and clears. */
1090    result = pvr_subpass_setup_render_init(ctx);
1091    if (result != VK_SUCCESS)
1092       return result;
1093 
1094    /* Reset surfaces whose last use was in the current render. */
1095    list_for_each_entry_safe (struct pvr_render_int_attachment,
1096                              int_attach,
1097                              &ctx->active_surf_list,
1098                              link) {
1099       if (int_attach->last_use != -1) {
1100          assert(int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID);
1101          pvr_reset_surface(ctx, int_attach);
1102       }
1103    }
1104 
1105    /* Check if the depth attachment has uses in future subpasses. */
1106    if (ctx->int_ds_attach) {
1107       /* Store the depth to the attachment at the end of the render. */
1108       if (ctx->int_ds_attach->remaining_count > 0U)
1109          hw_render->depth_store = true;
1110 
1111       /* Store the stencil to the attachment at the end of the render. */
1112       if (ctx->int_ds_attach->stencil_remaining_count > 0U)
1113          hw_render->stencil_store = true;
1114 
1115       if (hw_render->depth_store || hw_render->stencil_store) {
1116          assert(hw_render->ds_attach_idx == VK_ATTACHMENT_UNUSED ||
1117                 hw_render->ds_attach_idx ==
1118                    ctx->int_ds_attach->attachment->index);
1119          hw_render->ds_attach_idx = ctx->int_ds_attach->attachment->index;
1120 
1121          /* Allocate memory for the attachment. */
1122          pvr_mark_surface_alloc(ctx, ctx->int_ds_attach);
1123       }
1124 
1125       /* Load the depth and stencil before the next use. */
1126       ctx->int_ds_attach->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
1127       ctx->int_ds_attach->stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
1128    }
1129 
1130    eot_setup = &hw_render->eot_setup;
1131    memset(eot_setup, 0U, sizeof(*eot_setup));
1132 
1133    /* Set the number of pixel output registers/tile buffers allocated for the
1134     * render and copy the information to all subpasses and the EOT program.
1135     */
1136    pvr_finalise_po_alloc(device, ctx);
1137 
1138    /* If any attachment are used with z replicate then they will be stored to by
1139     * the ISP. So remove them from the list to store to using the PBE.
1140     */
1141    list_for_each_entry_safe (struct pvr_render_int_attachment,
1142                              int_attach,
1143                              &ctx->active_surf_list,
1144                              link) {
1145       if (int_attach->z_replicate)
1146          pvr_reset_surface(ctx, int_attach);
1147    }
1148 
1149    /* Number of surfaces with allocated on-chip storage. */
1150    eot_setup->num_render_targets = ctx->active_surfaces;
1151    eot_setup->mrt_resources = vk_alloc(ctx->allocator,
1152                                        sizeof(eot_setup->mrt_resources[0U]) *
1153                                           eot_setup->num_render_targets,
1154                                        8,
1155                                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1156    if (!eot_setup->mrt_resources)
1157       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1158 
1159    /* Record the location of the on-chip storage. */
1160    mrt_idx = 0U;
1161    list_for_each_entry_safe (struct pvr_render_int_attachment,
1162                              int_attach,
1163                              &ctx->active_surf_list,
1164                              link) {
1165       assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
1166       assert(int_attach->remaining_count > 0U);
1167       if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
1168          assert(int_attach->stencil_remaining_count > 0U);
1169 
1170       /* Copy the location of the source data for this attachment. */
1171       eot_setup->mrt_resources[mrt_idx] = int_attach->resource;
1172 
1173       assert(int_attach->mrt_idx == -1);
1174       int_attach->mrt_idx = mrt_idx;
1175 
1176       mrt_idx++;
1177    }
1178    assert(mrt_idx == (int32_t)eot_setup->num_render_targets);
1179 
1180    hw_render->eot_surface_count = 0U;
1181    hw_render->pbe_emits = 0U;
1182 
1183    /* Count the number of surfaces to store to at the end of the subpass. */
1184    for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
1185       struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1186       struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1187 
1188       for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
1189          const uint32_t resolve_output =
1190             input_subpass->resolve_attachments
1191                ? input_subpass->resolve_attachments[j]
1192                : VK_ATTACHMENT_UNUSED;
1193          struct pvr_render_int_attachment *color_attach;
1194 
1195          if (input_subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED)
1196             continue;
1197 
1198          color_attach = &ctx->int_attach[input_subpass->color_attachments[j]];
1199 
1200          if (list_is_linked(&color_attach->link)) {
1201             uint32_t rem_count = resolve_output == VK_ATTACHMENT_UNUSED ? 0U
1202                                                                         : 1U;
1203 
1204             /* If a color attachment is resolved it will have an extra
1205              * remaining usage.
1206              */
1207             if (color_attach->remaining_count > rem_count &&
1208                 !color_attach->eot_surf_required) {
1209                color_attach->eot_surf_required = true;
1210                hw_render->eot_surface_count++;
1211             }
1212          }
1213 
1214          if (resolve_output != VK_ATTACHMENT_UNUSED) {
1215             struct pvr_render_int_attachment *int_resolve_attach =
1216                &ctx->int_attach[resolve_output];
1217 
1218             if (!int_resolve_attach->eot_surf_required) {
1219                int_resolve_attach->eot_surf_required = true;
1220                hw_render->eot_surface_count++;
1221             }
1222          }
1223       }
1224    }
1225 
1226    assert(hw_render->eot_surface_count <= 16U);
1227 
1228    hw_render->eot_surfaces = vk_alloc(ctx->allocator,
1229                                       sizeof(hw_render->eot_surfaces[0U]) *
1230                                          hw_render->eot_surface_count,
1231                                       8,
1232                                       VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1233    if (!hw_render->eot_surfaces)
1234       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1235 
1236    eot_attach = hw_render->eot_surfaces;
1237 
1238    for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
1239       struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1240       struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1241 
1242       for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
1243          const uint32_t resolve_output =
1244             input_subpass->resolve_attachments
1245                ? input_subpass->resolve_attachments[j]
1246                : VK_ATTACHMENT_UNUSED;
1247          struct pvr_render_int_attachment *color_attach;
1248 
1249          if (input_subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED)
1250             continue;
1251 
1252          color_attach = &ctx->int_attach[input_subpass->color_attachments[j]];
1253 
1254          if (resolve_output != VK_ATTACHMENT_UNUSED) {
1255             struct pvr_render_int_attachment *resolve_src =
1256                &ctx->int_attach[input_subpass->color_attachments[j]];
1257             struct pvr_render_int_attachment *resolve_dst =
1258                &ctx->int_attach[resolve_output];
1259 
1260             assert(resolve_dst->eot_surf_required);
1261             resolve_dst->eot_surf_required = false;
1262 
1263             /* Dereference the source to the resolve. */
1264             assert(resolve_src->remaining_count > 0U);
1265             resolve_src->remaining_count--;
1266 
1267             /* Allocate device memory for the resolve destination. */
1268             pvr_mark_surface_alloc(ctx, resolve_dst);
1269 
1270             /* The attachment has been written so load the attachment the
1271              * next time it is referenced.
1272              */
1273             resolve_dst->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
1274 
1275             eot_attach->mrt_idx = resolve_src->mrt_idx;
1276             eot_attach->attachment_idx = resolve_dst->attachment->index;
1277             eot_attach->src_attachment_idx = resolve_src->attachment->index;
1278 
1279             eot_attach->need_resolve = true;
1280 
1281             if (!resolve_src->is_pbe_downscalable) {
1282                /* Resolve src must be stored for transfer resolve. */
1283                assert(resolve_src->remaining_count > 0U);
1284 
1285                eot_attach->resolve_type = PVR_RESOLVE_TYPE_TRANSFER;
1286             } else if (resolve_src->remaining_count == 0U) {
1287                eot_attach->resolve_type = PVR_RESOLVE_TYPE_PBE;
1288                hw_render->pbe_emits++;
1289             } else {
1290                eot_attach->resolve_type = PVR_RESOLVE_TYPE_INVALID;
1291             }
1292 
1293             eot_attach++;
1294          }
1295 
1296          if (color_attach->eot_surf_required) {
1297             assert(color_attach->remaining_count > 0U);
1298 
1299             pvr_mark_surface_alloc(ctx, color_attach);
1300 
1301             assert(color_attach->mrt_idx >= 0);
1302             assert(color_attach->mrt_idx <
1303                    (int32_t)hw_render->eot_setup.num_render_targets);
1304 
1305             eot_attach->mrt_idx = color_attach->mrt_idx;
1306             eot_attach->attachment_idx = color_attach->attachment->index;
1307             eot_attach->need_resolve = false;
1308             eot_attach++;
1309 
1310             hw_render->pbe_emits++;
1311 
1312             color_attach->eot_surf_required = false;
1313          }
1314       }
1315    }
1316 
1317    assert(hw_render->pbe_emits <= PVR_NUM_PBE_EMIT_REGS);
1318 
1319    /* Count the number of extra resolves we can do through the PBE. */
1320    for (uint32_t i = 0U; i < hw_render->eot_surface_count; i++) {
1321       eot_attach = &hw_render->eot_surfaces[i];
1322 
1323       if (eot_attach->need_resolve &&
1324           eot_attach->resolve_type == PVR_RESOLVE_TYPE_INVALID) {
1325          if (hw_render->pbe_emits == PVR_NUM_PBE_EMIT_REGS) {
1326             eot_attach->resolve_type = PVR_RESOLVE_TYPE_TRANSFER;
1327          } else {
1328             eot_attach->resolve_type = PVR_RESOLVE_TYPE_PBE;
1329             hw_render->pbe_emits++;
1330          }
1331       }
1332    }
1333 
1334    assert(hw_render->pbe_emits <= PVR_NUM_PBE_EMIT_REGS);
1335 
1336    /* Check for side effects in the final render. */
1337    hw_render->has_side_effects = pvr_render_has_side_effects(ctx);
1338 
1339    /* Reset active surfaces. */
1340    list_for_each_entry_safe (struct pvr_render_int_attachment,
1341                              int_attach,
1342                              &ctx->active_surf_list,
1343                              link) {
1344       int_attach->mrt_idx = -1;
1345       pvr_reset_surface(ctx, int_attach);
1346    }
1347 
1348    assert(ctx->active_surfaces == 0U);
1349    assert(list_is_empty(&ctx->active_surf_list));
1350 
1351    pvr_free_render(ctx);
1352    pvr_reset_render(ctx);
1353 
1354    return VK_SUCCESS;
1355 }
1356 
pvr_is_input(struct pvr_render_subpass * subpass,uint32_t attach_idx)1357 static bool pvr_is_input(struct pvr_render_subpass *subpass,
1358                          uint32_t attach_idx)
1359 {
1360    if (attach_idx == VK_ATTACHMENT_UNUSED)
1361       return false;
1362 
1363    for (uint32_t i = 0U; i < subpass->input_count; i++) {
1364       if (subpass->input_attachments[i] == attach_idx)
1365          return true;
1366    }
1367 
1368    return false;
1369 }
1370 
1371 static bool
pvr_depth_zls_conflict(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_ds_attach,bool existing_ds_is_input)1372 pvr_depth_zls_conflict(struct pvr_renderpass_context *ctx,
1373                        struct pvr_render_int_attachment *int_ds_attach,
1374                        bool existing_ds_is_input)
1375 {
1376    if (!ctx->int_ds_attach)
1377       return false;
1378 
1379    /* No conflict if the incoming subpass doesn't have a depth/stencil
1380     * attachment.
1381     */
1382    if (!int_ds_attach)
1383       return false;
1384 
1385    /* No conflict if the incoming depth/stencil attachment is the same as the
1386     * existing one.
1387     */
1388    if (ctx->int_ds_attach == int_ds_attach)
1389       return false;
1390 
1391    /* If the existing depth/stencil attachment is used later, then we can't
1392     * overwrite it.
1393     *
1394     * The exception is if the only use is as an input attachment in the incoming
1395     * subpass in which case we can use the Z replicate feature to save the
1396     * value.
1397     */
1398    if (ctx->int_ds_attach->remaining_count > 0U &&
1399        !(existing_ds_is_input && ctx->int_ds_attach->remaining_count == 1U)) {
1400       return true;
1401    }
1402 
1403    if (ctx->int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT &&
1404        ctx->int_ds_attach->stencil_remaining_count > 0U) {
1405       return true;
1406    }
1407 
1408    /* We can't load midrender so fail if the new depth/stencil attachment is
1409     * already initialized.
1410     */
1411    if (int_ds_attach->load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
1412       return true;
1413 
1414    if (int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT &&
1415        int_ds_attach->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
1416       return true;
1417    }
1418 
1419    return false;
1420 }
1421 
1422 static void
pvr_set_surface_resource(struct pvr_render_int_attachment * int_attach,struct pvr_renderpass_resource * resource)1423 pvr_set_surface_resource(struct pvr_render_int_attachment *int_attach,
1424                          struct pvr_renderpass_resource *resource)
1425 {
1426    int_attach->resource.type = resource->type;
1427 
1428    switch (resource->type) {
1429    case USC_MRT_RESOURCE_TYPE_OUTPUT_REG:
1430       int_attach->resource.reg.output_reg = resource->reg.output_reg;
1431       int_attach->resource.reg.offset = resource->reg.offset;
1432       break;
1433 
1434    case USC_MRT_RESOURCE_TYPE_MEMORY:
1435       int_attach->resource.mem.tile_buffer = resource->mem.tile_buffer;
1436       int_attach->resource.mem.offset_dw = resource->mem.offset_dw;
1437       break;
1438 
1439    default:
1440       break;
1441    }
1442 }
1443 
pvr_equal_resources(struct pvr_renderpass_resource * resource1,struct pvr_renderpass_resource * resource2)1444 static bool pvr_equal_resources(struct pvr_renderpass_resource *resource1,
1445                                 struct pvr_renderpass_resource *resource2)
1446 {
1447    if (resource1->type != resource2->type)
1448       return false;
1449 
1450    switch (resource1->type) {
1451    case USC_MRT_RESOURCE_TYPE_OUTPUT_REG:
1452       return resource1->reg.output_reg == resource2->reg.output_reg &&
1453              resource1->reg.offset == resource2->reg.offset;
1454 
1455    case USC_MRT_RESOURCE_TYPE_MEMORY:
1456       return resource1->mem.tile_buffer == resource2->mem.tile_buffer &&
1457              resource1->mem.offset_dw == resource2->mem.offset_dw;
1458 
1459    default:
1460       return true;
1461    }
1462 }
1463 
1464 static VkResult
pvr_enable_z_replicate(struct pvr_renderpass_context * ctx,struct pvr_renderpass_hwsetup_render * hw_render,int32_t replicate_attach_idx,struct pvr_renderpass_resource * replicate_dst)1465 pvr_enable_z_replicate(struct pvr_renderpass_context *ctx,
1466                        struct pvr_renderpass_hwsetup_render *hw_render,
1467                        int32_t replicate_attach_idx,
1468                        struct pvr_renderpass_resource *replicate_dst)
1469 {
1470    struct pvr_render_int_attachment *int_attach =
1471       &ctx->int_attach[replicate_attach_idx];
1472    int32_t first_use = -1;
1473 
1474    /* If Z replication was already enabled for the attachment then nothing more
1475     * to do.
1476     */
1477    if (!int_attach->z_replicate) {
1478       /* Copy details of the storage for the replicated value to the attachment.
1479        */
1480       assert(int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID);
1481       assert(replicate_dst->type != USC_MRT_RESOURCE_TYPE_INVALID);
1482       pvr_set_surface_resource(int_attach, replicate_dst);
1483    } else {
1484       assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
1485       assert(replicate_dst->type == USC_MRT_RESOURCE_TYPE_INVALID);
1486    }
1487 
1488    /* Find the first subpass where the attachment is written. */
1489    for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
1490       struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1491       struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1492 
1493       if (input_subpass->depth_stencil_attachment == replicate_attach_idx) {
1494          first_use = i;
1495          break;
1496       }
1497    }
1498    assert(first_use >= 0);
1499 
1500    /* For all subpasses from the first write. */
1501    for (uint32_t i = first_use; i < hw_render->subpass_count; i++) {
1502       struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1503       struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1504 
1505       /* If the subpass writes to the attachment then enable z replication. */
1506       if (input_subpass->depth_stencil_attachment == replicate_attach_idx &&
1507           !subpass->z_replicate) {
1508          subpass->z_replicate = true;
1509 
1510          if (i != (hw_render->subpass_count - 1U)) {
1511             /* Copy the details of the storage for replicated value. */
1512             const VkResult result =
1513                pvr_copy_z_replicate_details(ctx,
1514                                             &ctx->hw_render->subpasses[i],
1515                                             subpass);
1516             if (result != VK_SUCCESS)
1517                return result;
1518          }
1519       }
1520    }
1521 
1522    if (!int_attach->z_replicate) {
1523       /* Add the storage for the replicated value to locations in use at each
1524        * subpass.
1525        */
1526       for (uint32_t i = first_use; i < (hw_render->subpass_count - 1U); i++) {
1527          struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1528 
1529          pvr_mark_storage_allocated(ctx,
1530                                     &subpass->alloc,
1531                                     int_attach->attachment,
1532                                     replicate_dst);
1533       }
1534 
1535       /* Add the depth attachment to the list of surfaces with allocated
1536        * storage.
1537        */
1538       pvr_make_surface_active(ctx, int_attach, first_use);
1539 
1540       int_attach->z_replicate = true;
1541    }
1542 
1543    return VK_SUCCESS;
1544 }
1545 
pvr_is_pending_resolve_dest(struct pvr_renderpass_context * ctx,uint32_t attach_idx)1546 static bool pvr_is_pending_resolve_dest(struct pvr_renderpass_context *ctx,
1547                                         uint32_t attach_idx)
1548 {
1549    struct pvr_render_int_attachment *int_attach = &ctx->int_attach[attach_idx];
1550 
1551    return int_attach->last_resolve_dst_render != -1 &&
1552           int_attach->last_resolve_dst_render ==
1553              (int32_t)(ctx->hw_setup->render_count - 1U);
1554 }
1555 
pvr_is_pending_resolve_src(struct pvr_renderpass_context * ctx,uint32_t attach_idx)1556 static bool pvr_is_pending_resolve_src(struct pvr_renderpass_context *ctx,
1557                                        uint32_t attach_idx)
1558 {
1559    struct pvr_render_int_attachment *int_attach = &ctx->int_attach[attach_idx];
1560 
1561    return int_attach->last_resolve_src_render != -1 &&
1562           int_attach->last_resolve_src_render ==
1563              (int32_t)(ctx->hw_setup->render_count - 1U);
1564 }
1565 
pvr_exceeds_pbe_registers(struct pvr_renderpass_context * ctx,struct pvr_render_subpass * subpass)1566 static bool pvr_exceeds_pbe_registers(struct pvr_renderpass_context *ctx,
1567                                       struct pvr_render_subpass *subpass)
1568 {
1569    int32_t live_outputs[PVR_NUM_PBE_EMIT_REGS];
1570    uint32_t num_live_outputs = 0U;
1571 
1572    /* Count all color outputs so far. */
1573    for (uint32_t i = 0U; i < ctx->hw_render->subpass_count; i++) {
1574       struct pvr_render_subpass *input_subpass =
1575          ctx->subpasses[i].input_subpass;
1576 
1577       for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
1578          const uint32_t global_color_attach =
1579             input_subpass->color_attachments[j];
1580          struct pvr_render_int_attachment *int_attach;
1581          bool found = false;
1582 
1583          if (global_color_attach == VK_ATTACHMENT_UNUSED)
1584             continue;
1585 
1586          int_attach = &ctx->int_attach[global_color_attach];
1587 
1588          if (int_attach->last_read <= (int32_t)subpass->index)
1589             continue;
1590 
1591          for (uint32_t k = 0U; k < num_live_outputs; k++) {
1592             if (live_outputs[k] == global_color_attach) {
1593                found = true;
1594                break;
1595             }
1596          }
1597 
1598          if (!found)
1599             live_outputs[num_live_outputs++] = global_color_attach;
1600       }
1601    }
1602 
1603    assert(num_live_outputs <= PVR_NUM_PBE_EMIT_REGS);
1604 
1605    /* Check if adding all the color outputs of the new subpass to the render
1606     * would exceed the limit.
1607     */
1608    for (uint32_t i = 0U; i < subpass->color_count; i++) {
1609       const uint32_t global_color_attach = subpass->color_attachments[i];
1610       struct pvr_render_int_attachment *int_attach;
1611       bool found = false;
1612 
1613       if (global_color_attach == VK_ATTACHMENT_UNUSED)
1614          continue;
1615 
1616       int_attach = &ctx->int_attach[global_color_attach];
1617 
1618       if (int_attach->last_read <= (int32_t)subpass->index)
1619          continue;
1620 
1621       for (uint32_t j = 0U; j < num_live_outputs; j++) {
1622          if (live_outputs[j] == global_color_attach) {
1623             found = true;
1624             break;
1625          }
1626       }
1627 
1628       if (!found) {
1629          if (num_live_outputs >= PVR_NUM_PBE_EMIT_REGS)
1630             return true;
1631 
1632          live_outputs[num_live_outputs++] = global_color_attach;
1633       }
1634    }
1635 
1636    return false;
1637 }
1638 
pvr_merge_alloc_buffer(struct pvr_renderpass_alloc_buffer * dst,struct pvr_renderpass_alloc_buffer * src)1639 static void pvr_merge_alloc_buffer(struct pvr_renderpass_alloc_buffer *dst,
1640                                    struct pvr_renderpass_alloc_buffer *src)
1641 {
1642    for (uint32_t i = 0U; i < ARRAY_SIZE(dst->allocs); i++)
1643       dst->allocs[i] |= src->allocs[i];
1644 }
1645 
pvr_merge_alloc(struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * dst,struct pvr_renderpass_alloc * src)1646 static VkResult pvr_merge_alloc(struct pvr_renderpass_context *ctx,
1647                                 struct pvr_renderpass_alloc *dst,
1648                                 struct pvr_renderpass_alloc *src)
1649 {
1650    pvr_merge_alloc_buffer(&dst->output_reg, &src->output_reg);
1651 
1652    dst->output_regs_count =
1653       MAX2(dst->output_regs_count, src->output_regs_count);
1654 
1655    if (dst->tile_buffers_count < src->tile_buffers_count) {
1656       struct pvr_renderpass_alloc_buffer *new_tile_buffers =
1657          vk_realloc(ctx->allocator,
1658                     dst->tile_buffers,
1659                     sizeof(dst->tile_buffers[0U]) * src->tile_buffers_count,
1660                     8U,
1661                     VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1662       if (!new_tile_buffers)
1663          return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1664 
1665       dst->tile_buffers = new_tile_buffers;
1666       memset(dst->tile_buffers + dst->tile_buffers_count,
1667              0U,
1668              sizeof(dst->tile_buffers[0U]) *
1669                 (src->tile_buffers_count - dst->tile_buffers_count));
1670       dst->tile_buffers_count = src->tile_buffers_count;
1671    }
1672 
1673    for (uint32_t i = 0U; i < src->tile_buffers_count; i++)
1674       pvr_merge_alloc_buffer(&dst->tile_buffers[i], &src->tile_buffers[i]);
1675 
1676    return VK_SUCCESS;
1677 }
1678 
1679 static VkResult
pvr_is_z_replicate_space_available(const struct pvr_device_info * dev_info,struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * alloc,uint32_t attach_idx,struct pvr_renderpass_resource * resource)1680 pvr_is_z_replicate_space_available(const struct pvr_device_info *dev_info,
1681                                    struct pvr_renderpass_context *ctx,
1682                                    struct pvr_renderpass_alloc *alloc,
1683                                    uint32_t attach_idx,
1684                                    struct pvr_renderpass_resource *resource)
1685 {
1686    struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
1687    struct pvr_render_int_attachment *int_attach;
1688    struct pvr_renderpass_alloc combined_alloc;
1689    uint32_t first_use;
1690    VkResult result;
1691 
1692    /* If z replication was already enabled by a previous subpass then storage
1693     * will already be allocated.
1694     */
1695    assert(attach_idx < ctx->pass->attachment_count);
1696 
1697    int_attach = &ctx->int_attach[attach_idx];
1698    if (int_attach->z_replicate) {
1699       assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
1700       return VK_SUCCESS;
1701    }
1702 
1703    /* Get the registers used in any subpass after the depth is first written.
1704     * Start with registers used in the incoming subpass.
1705     */
1706    result = pvr_copy_alloc(ctx, &combined_alloc, alloc);
1707    if (result != VK_SUCCESS)
1708       return result;
1709 
1710    if (hw_render) {
1711       /* Find the subpass where the depth is first written. */
1712       first_use = hw_render->subpass_count;
1713       for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
1714          struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1715          struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1716 
1717          if (input_subpass->depth_stencil_attachment == (int32_t)attach_idx) {
1718             first_use = i;
1719             break;
1720          }
1721       }
1722 
1723       /* Merge in registers used in previous subpasses. */
1724       for (uint32_t i = first_use; i < hw_render->subpass_count; i++) {
1725          struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1726 
1727          result = pvr_merge_alloc(ctx, &combined_alloc, &subpass->alloc);
1728          if (result != VK_SUCCESS) {
1729             pvr_free_alloc(ctx, &combined_alloc);
1730             return result;
1731          }
1732       }
1733    }
1734 
1735    result = pvr_surface_alloc_color_storage(dev_info,
1736                                             ctx,
1737                                             &combined_alloc,
1738                                             int_attach->attachment,
1739                                             resource);
1740 
1741    pvr_free_alloc(ctx, &combined_alloc);
1742    if (result != VK_SUCCESS)
1743       return result;
1744 
1745    return pvr_mark_storage_allocated(ctx,
1746                                      alloc,
1747                                      int_attach->attachment,
1748                                      resource);
1749 }
1750 
1751 static VkResult
pvr_is_subpass_space_available(const struct pvr_device_info * dev_info,struct pvr_renderpass_context * ctx,struct pvr_render_subpass * subpass,struct pvr_render_subpass_depth_params * sp_depth,struct pvr_renderpass_alloc * alloc,struct pvr_render_int_subpass_dsts * sp_dsts)1752 pvr_is_subpass_space_available(const struct pvr_device_info *dev_info,
1753                                struct pvr_renderpass_context *ctx,
1754                                struct pvr_render_subpass *subpass,
1755                                struct pvr_render_subpass_depth_params *sp_depth,
1756                                struct pvr_renderpass_alloc *alloc,
1757                                struct pvr_render_int_subpass_dsts *sp_dsts)
1758 {
1759    VkResult result;
1760 
1761    /* Mark pointers in return structures as not allocated. */
1762    sp_dsts->color = NULL;
1763    alloc->tile_buffers = NULL;
1764 
1765    /* Allocate space for which locations are in use after this subpass. */
1766    result = pvr_copy_alloc(ctx, alloc, &ctx->alloc);
1767    if (result != VK_SUCCESS)
1768       return result;
1769 
1770    /* Allocate space to store our results. */
1771    if (subpass->color_count > 0U) {
1772       sp_dsts->color =
1773          vk_alloc(ctx->allocator,
1774                   sizeof(sp_dsts->color[0U]) * subpass->color_count,
1775                   8,
1776                   VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1777       if (!sp_dsts->color) {
1778          result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1779          goto err_free_alloc;
1780       }
1781    } else {
1782       sp_dsts->color = NULL;
1783    }
1784 
1785    sp_dsts->existing_zrep.type = USC_MRT_RESOURCE_TYPE_INVALID;
1786    sp_dsts->incoming_zrep.type = USC_MRT_RESOURCE_TYPE_INVALID;
1787 
1788    for (uint32_t i = 0U; i < subpass->color_count; i++) {
1789       const uint32_t attach_idx = subpass->color_attachments[i];
1790       struct pvr_render_int_attachment *int_attach;
1791 
1792       if (attach_idx == VK_ATTACHMENT_UNUSED)
1793          continue;
1794 
1795       int_attach = &ctx->int_attach[attach_idx];
1796 
1797       assert(pvr_get_accum_format_bitsize(int_attach->attachment->vk_format) >
1798              0U);
1799 
1800       /* Is the attachment not allocated on-chip storage? */
1801       if (int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID) {
1802          result = pvr_surface_alloc_color_storage(dev_info,
1803                                                   ctx,
1804                                                   alloc,
1805                                                   int_attach->attachment,
1806                                                   &sp_dsts->color[i]);
1807          if (result != VK_SUCCESS)
1808             goto err_free_alloc;
1809 
1810          /* Avoid merging subpasses which result in tile buffers having to be
1811           * used. The benefit of merging must be weighed against the cost of
1812           * writing/reading to tile buffers.
1813           */
1814          if (ctx->hw_render &&
1815              sp_dsts->color[i].type != USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
1816             result = vk_error(NULL, VK_ERROR_TOO_MANY_OBJECTS);
1817             goto err_free_alloc;
1818          }
1819       } else {
1820          sp_dsts->color[i].type = USC_MRT_RESOURCE_TYPE_INVALID;
1821       }
1822    }
1823 
1824    if (sp_depth->existing_ds_is_input) {
1825       result = pvr_is_z_replicate_space_available(dev_info,
1826                                                   ctx,
1827                                                   alloc,
1828                                                   sp_depth->existing_ds_attach,
1829                                                   &sp_dsts->existing_zrep);
1830       if (result != VK_SUCCESS)
1831          goto err_free_alloc;
1832    }
1833 
1834    if (sp_depth->incoming_ds_is_input) {
1835       if (sp_depth->existing_ds_attach != subpass->depth_stencil_attachment) {
1836          result = pvr_is_z_replicate_space_available(
1837             dev_info,
1838             ctx,
1839             alloc,
1840             subpass->depth_stencil_attachment,
1841             &sp_dsts->incoming_zrep);
1842          if (result != VK_SUCCESS)
1843             goto err_free_alloc;
1844       } else {
1845          sp_dsts->incoming_zrep = sp_dsts->existing_zrep;
1846       }
1847    }
1848 
1849    return VK_SUCCESS;
1850 
1851 err_free_alloc:
1852    pvr_free_alloc(ctx, alloc);
1853    if (sp_dsts->color)
1854       vk_free(ctx->allocator, sp_dsts->color);
1855 
1856    sp_dsts->color = NULL;
1857 
1858    return result;
1859 }
1860 
1861 static bool
pvr_can_combine_with_render(const struct pvr_device_info * dev_info,struct pvr_renderpass_context * ctx,struct pvr_render_subpass * subpass,struct pvr_render_subpass_depth_params * sp_depth,struct pvr_render_int_attachment * int_ds_attach,struct pvr_renderpass_alloc * new_alloc,struct pvr_render_int_subpass_dsts * sp_dsts)1862 pvr_can_combine_with_render(const struct pvr_device_info *dev_info,
1863                             struct pvr_renderpass_context *ctx,
1864                             struct pvr_render_subpass *subpass,
1865                             struct pvr_render_subpass_depth_params *sp_depth,
1866                             struct pvr_render_int_attachment *int_ds_attach,
1867                             struct pvr_renderpass_alloc *new_alloc,
1868                             struct pvr_render_int_subpass_dsts *sp_dsts)
1869 {
1870    VkResult result;
1871    bool ret;
1872 
1873    /* Mark pointers in return structures as not allocated. */
1874    sp_dsts->color = NULL;
1875    new_alloc->tile_buffers = NULL;
1876 
1877    /* The hardware doesn't support replicating the stencil, so we need to store
1878     * the depth to memory if a stencil attachment is used as an input
1879     * attachment.
1880     */
1881    if (sp_depth->existing_ds_is_input &&
1882        ctx->int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1883       return false;
1884    }
1885 
1886    if (sp_depth->incoming_ds_is_input && int_ds_attach &&
1887        int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT &&
1888        ctx->hw_render) {
1889       return false;
1890    }
1891 
1892    /* Can't mix multiple sample counts into same render. */
1893    if (ctx->hw_render &&
1894        ctx->hw_render->sample_count != subpass->sample_count) {
1895       return false;
1896    }
1897 
1898    /* If the depth is used by both the render and the incoming subpass and
1899     * either the existing depth must be saved or the new depth must be loaded
1900     * then we can't merge.
1901     */
1902    ret = pvr_depth_zls_conflict(ctx,
1903                                 int_ds_attach,
1904                                 sp_depth->existing_ds_is_input);
1905    if (ret)
1906       return false;
1907 
1908    /* Check if any of the subpass's dependencies are marked that the two
1909     * subpasses can't be in the same render.
1910     */
1911    for (uint32_t i = 0U; i < subpass->dep_count; i++) {
1912       const uint32_t dep = subpass->dep_list[i];
1913       if (subpass->flush_on_dep[i] && ctx->hw_setup->subpass_map[dep].render ==
1914                                          (ctx->hw_setup->render_count - 1U)) {
1915          return false;
1916       }
1917    }
1918 
1919    /* Check if one of the input/color attachments is written by an MSAA resolve
1920     * in an existing subpass in the current render.
1921     */
1922    for (uint32_t i = 0U; i < subpass->input_count; i++) {
1923       const uint32_t attach_idx = subpass->input_attachments[i];
1924       if (attach_idx != VK_ATTACHMENT_UNUSED &&
1925           pvr_is_pending_resolve_dest(ctx, attach_idx)) {
1926          return false;
1927       }
1928    }
1929 
1930    for (uint32_t i = 0U; i < subpass->color_count; i++) {
1931       if (subpass->color_attachments[i] != VK_ATTACHMENT_UNUSED &&
1932           (pvr_is_pending_resolve_dest(ctx, subpass->color_attachments[i]) ||
1933            pvr_is_pending_resolve_src(ctx, subpass->color_attachments[i]))) {
1934          return false;
1935       }
1936 
1937       if (subpass->resolve_attachments &&
1938           subpass->resolve_attachments[i] != VK_ATTACHMENT_UNUSED &&
1939           pvr_is_pending_resolve_dest(ctx, subpass->resolve_attachments[i])) {
1940          return false;
1941       }
1942    }
1943 
1944    /* No chance of exceeding PBE registers in a single subpass. */
1945    if (ctx->hw_render) {
1946       ret = pvr_exceeds_pbe_registers(ctx, subpass);
1947       if (ret)
1948          return false;
1949    }
1950 
1951    /* Check we can allocate storage for the new subpass's color attachments and
1952     * any z replications.
1953     */
1954    result = pvr_is_subpass_space_available(dev_info,
1955                                            ctx,
1956                                            subpass,
1957                                            sp_depth,
1958                                            new_alloc,
1959                                            sp_dsts);
1960    if (result != VK_SUCCESS)
1961       return false;
1962 
1963    return true;
1964 }
1965 
1966 static VkResult
pvr_merge_subpass(const struct pvr_device * device,struct pvr_renderpass_context * ctx,struct pvr_render_subpass * input_subpass,struct pvr_renderpass_hwsetup_subpass ** const hw_subpass_out)1967 pvr_merge_subpass(const struct pvr_device *device,
1968                   struct pvr_renderpass_context *ctx,
1969                   struct pvr_render_subpass *input_subpass,
1970                   struct pvr_renderpass_hwsetup_subpass **const hw_subpass_out)
1971 {
1972    struct pvr_renderpass_hwsetup_subpass *new_hw_subpasses;
1973    struct pvr_renderpass_hwsetup_subpass *hw_subpass;
1974    struct pvr_render_int_attachment *int_ds_attach;
1975    struct pvr_renderpass_hwsetup_render *hw_render;
1976    struct pvr_render_subpass_depth_params sp_depth;
1977    struct pvr_renderpass_subpass *new_subpasses;
1978    struct pvr_render_int_subpass_dsts sp_dsts;
1979    struct pvr_renderpass_subpass *subpass;
1980    struct pvr_renderpass_alloc alloc;
1981    VkResult result;
1982    bool ret;
1983 
1984    /* Depth attachment for the incoming subpass. */
1985    if (input_subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED)
1986       int_ds_attach = &ctx->int_attach[input_subpass->depth_stencil_attachment];
1987    else
1988       int_ds_attach = NULL;
1989 
1990    /* Attachment ID for the existing depth attachment. */
1991    if (ctx->int_ds_attach)
1992       sp_depth.existing_ds_attach = ctx->int_ds_attach - ctx->int_attach;
1993    else
1994       sp_depth.existing_ds_attach = VK_ATTACHMENT_UNUSED;
1995 
1996    /* Is the incoming depth attachment used as an input to the incoming subpass?
1997     */
1998    sp_depth.incoming_ds_is_input =
1999       pvr_is_input(input_subpass, input_subpass->depth_stencil_attachment);
2000 
2001    /* Is the current depth attachment used as an input to the incoming subpass?
2002     */
2003    sp_depth.existing_ds_is_input =
2004       pvr_is_input(input_subpass, sp_depth.existing_ds_attach);
2005 
2006    /* Can the incoming subpass be combined with the existing render? Also checks
2007     * if space is available for the subpass results and return the allocated
2008     * locations.
2009     */
2010    ret = pvr_can_combine_with_render(&device->pdevice->dev_info,
2011                                      ctx,
2012                                      input_subpass,
2013                                      &sp_depth,
2014                                      int_ds_attach,
2015                                      &alloc,
2016                                      &sp_dsts);
2017    if (!ret) {
2018       result = pvr_close_render(device, ctx);
2019       if (result != VK_SUCCESS)
2020          goto end_merge_subpass;
2021 
2022       sp_depth.existing_ds_is_input = false;
2023       sp_depth.existing_ds_attach = VK_ATTACHMENT_UNUSED;
2024 
2025       /* Allocate again in a new render. */
2026       result = pvr_is_subpass_space_available(&device->pdevice->dev_info,
2027                                               ctx,
2028                                               input_subpass,
2029                                               &sp_depth,
2030                                               &alloc,
2031                                               &sp_dsts);
2032       assert(result != VK_ERROR_TOO_MANY_OBJECTS);
2033       if (result != VK_SUCCESS)
2034          goto end_merge_subpass;
2035    }
2036 
2037    /* If there isn't an in-progress render then allocate one. */
2038    if (!ctx->hw_render) {
2039       struct pvr_renderpass_hwsetup *hw_setup = ctx->hw_setup;
2040       struct pvr_renderpass_hwsetup_render *new_hw_render = vk_realloc(
2041          ctx->allocator,
2042          hw_setup->renders,
2043          sizeof(hw_setup->renders[0U]) * (hw_setup->render_count + 1U),
2044          8U,
2045          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2046       if (!new_hw_render) {
2047          result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2048          goto end_merge_subpass;
2049       }
2050 
2051       hw_setup->renders = new_hw_render;
2052 
2053       ctx->hw_render = &hw_setup->renders[hw_setup->render_count];
2054       memset(ctx->hw_render, 0U, sizeof(*hw_render));
2055       ctx->hw_render->ds_attach_idx = VK_ATTACHMENT_UNUSED;
2056       hw_setup->render_count++;
2057       ctx->hw_render->depth_init = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2058       ctx->hw_render->stencil_init = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2059       ctx->hw_render->sample_count = input_subpass->sample_count;
2060    }
2061 
2062    /* Allocate a new subpass in the in-progress render. */
2063    hw_render = ctx->hw_render;
2064 
2065    new_hw_subpasses = vk_realloc(ctx->allocator,
2066                                  hw_render->subpasses,
2067                                  sizeof(hw_render->subpasses[0U]) *
2068                                     (hw_render->subpass_count + 1U),
2069                                  8U,
2070                                  VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2071    if (!new_hw_subpasses) {
2072       result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2073       goto end_merge_subpass;
2074    }
2075 
2076    hw_render->subpasses = new_hw_subpasses;
2077    hw_subpass = &hw_render->subpasses[hw_render->subpass_count];
2078 
2079    new_subpasses =
2080       vk_realloc(ctx->allocator,
2081                  ctx->subpasses,
2082                  sizeof(ctx->subpasses[0U]) * (hw_render->subpass_count + 1U),
2083                  8U,
2084                  VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
2085    if (!new_subpasses) {
2086       result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2087       goto end_merge_subpass;
2088    }
2089 
2090    ctx->subpasses = new_subpasses;
2091 
2092    subpass = &ctx->subpasses[hw_render->subpass_count];
2093    subpass->input_subpass = input_subpass;
2094    subpass->z_replicate = false;
2095 
2096    /* Save the allocation state at the subpass. */
2097    result = pvr_copy_alloc(ctx, &subpass->alloc, &alloc);
2098    if (result != VK_SUCCESS)
2099       goto end_merge_subpass;
2100 
2101    hw_render->subpass_count++;
2102 
2103    memset(hw_subpass, 0U, sizeof(*hw_subpass));
2104    hw_subpass->index = input_subpass->index;
2105    hw_subpass->z_replicate = -1;
2106    hw_subpass->depth_initop = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2107 
2108    if (int_ds_attach && ctx->int_ds_attach != int_ds_attach) {
2109       bool setup_render_ds = false;
2110       bool stencil_load = false;
2111       bool depth_load = false;
2112 
2113       if (int_ds_attach->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
2114          depth_load = true;
2115          setup_render_ds = true;
2116          hw_render->depth_init = VK_ATTACHMENT_LOAD_OP_LOAD;
2117          hw_subpass->depth_initop = VK_ATTACHMENT_LOAD_OP_LOAD;
2118 
2119          assert(!ctx->ds_load_surface);
2120          ctx->ds_load_surface = int_ds_attach;
2121       } else if (int_ds_attach->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
2122          hw_subpass->depth_initop = VK_ATTACHMENT_LOAD_OP_CLEAR;
2123       }
2124 
2125       if (int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
2126          if (int_ds_attach->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
2127             stencil_load = true;
2128             setup_render_ds = true;
2129             hw_render->stencil_init = VK_ATTACHMENT_LOAD_OP_LOAD;
2130          } else if (int_ds_attach->stencil_load_op ==
2131                     VK_ATTACHMENT_LOAD_OP_CLEAR) {
2132             hw_subpass->stencil_clear = true;
2133          }
2134       }
2135 
2136       /* If the depth is loaded then allocate external memory for the depth
2137        * attachment.
2138        */
2139       if (depth_load || stencil_load)
2140          pvr_mark_surface_alloc(ctx, int_ds_attach);
2141 
2142       if (setup_render_ds) {
2143          assert(hw_render->ds_attach_idx == VK_ATTACHMENT_UNUSED);
2144          hw_render->ds_attach_idx = int_ds_attach->attachment->index;
2145       }
2146 
2147       ctx->int_ds_attach = int_ds_attach;
2148    }
2149 
2150    /* Set up the initialization operations for subpasses. */
2151    hw_subpass->color_initops = vk_alloc(ctx->allocator,
2152                                         sizeof(hw_subpass->color_initops[0U]) *
2153                                            input_subpass->color_count,
2154                                         8,
2155                                         VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2156    if (!hw_subpass->color_initops) {
2157       result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2158       goto end_merge_subpass;
2159    }
2160 
2161    for (uint32_t i = 0U; i < input_subpass->color_count; i++) {
2162       const uint32_t attach_idx = input_subpass->color_attachments[i];
2163       struct pvr_render_int_attachment *int_attach;
2164 
2165       if (attach_idx == VK_ATTACHMENT_UNUSED)
2166          continue;
2167 
2168       int_attach = &ctx->int_attach[attach_idx];
2169 
2170       if (int_attach->first_use == -1) {
2171          hw_subpass->color_initops[i] = int_attach->load_op;
2172 
2173          /* If the attachment is loaded then off-chip memory must be
2174           * allocated for it.
2175           */
2176          if (int_attach->load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
2177             pvr_mark_surface_alloc(ctx, int_attach);
2178 
2179          /* The attachment has been written so load the attachment the next
2180           * time it is referenced.
2181           */
2182          int_attach->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
2183       } else {
2184          hw_subpass->color_initops[i] = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2185       }
2186    }
2187 
2188    /* Copy the destinations allocated for the color attachments. */
2189    for (uint32_t i = 0U; i < input_subpass->color_count; i++) {
2190       const uint32_t attach_idx = input_subpass->color_attachments[i];
2191       struct pvr_render_int_attachment *int_attach;
2192       struct pvr_renderpass_resource *attach_dst;
2193 
2194       if (attach_idx == VK_ATTACHMENT_UNUSED)
2195          continue;
2196 
2197       int_attach = &ctx->int_attach[attach_idx];
2198       attach_dst = &sp_dsts.color[i];
2199 
2200       if (int_attach->first_use == -1) {
2201          assert(int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID);
2202          assert(attach_dst->type != USC_MRT_RESOURCE_TYPE_INVALID);
2203          pvr_set_surface_resource(int_attach, attach_dst);
2204 
2205          /* If this attachment is being used for the first time then add it
2206           * to the active list.
2207           */
2208          pvr_make_surface_active(ctx,
2209                                  int_attach,
2210                                  hw_render->subpass_count - 1U);
2211       } else {
2212          assert(attach_dst->type == USC_MRT_RESOURCE_TYPE_INVALID);
2213       }
2214    }
2215 
2216    /* We can't directly read the on-chip depth so mark subpasses where the depth
2217     * is written to replicate the value into part of the color storage.
2218     */
2219    if (sp_depth.existing_ds_is_input) {
2220       result = pvr_enable_z_replicate(ctx,
2221                                       hw_render,
2222                                       sp_depth.existing_ds_attach,
2223                                       &sp_dsts.existing_zrep);
2224       if (result != VK_SUCCESS)
2225          goto end_merge_subpass;
2226    }
2227 
2228    if (sp_depth.incoming_ds_is_input) {
2229       if (input_subpass->depth_stencil_attachment !=
2230           sp_depth.existing_ds_attach) {
2231          result =
2232             pvr_enable_z_replicate(ctx,
2233                                    hw_render,
2234                                    input_subpass->depth_stencil_attachment,
2235                                    &sp_dsts.incoming_zrep);
2236          if (result != VK_SUCCESS)
2237             goto end_merge_subpass;
2238       } else {
2239          assert(pvr_equal_resources(&sp_dsts.existing_zrep,
2240                                     &sp_dsts.incoming_zrep));
2241       }
2242    }
2243 
2244    /* Copy the locations of color/input attachments to the output structure.
2245     * N.B. Need to do this after Z replication in case the replicated depth is
2246     * an input attachment for the incoming subpass.
2247     */
2248    result = pvr_copy_storage_details(ctx, hw_subpass, subpass);
2249    if (result != VK_SUCCESS)
2250       goto end_merge_subpass;
2251 
2252    if (subpass->z_replicate) {
2253       result = pvr_copy_z_replicate_details(ctx, hw_subpass, subpass);
2254       if (result != VK_SUCCESS)
2255          goto end_merge_subpass;
2256    }
2257 
2258    /* Copy the allocation at the subpass. This will then be updated if this was
2259     * last use of any attachment.
2260     */
2261    pvr_free_alloc(ctx, &ctx->alloc);
2262    ctx->alloc = alloc;
2263 
2264    /* Free information about subpass destinations. */
2265    if (sp_dsts.color)
2266       vk_free(ctx->allocator, sp_dsts.color);
2267 
2268    *hw_subpass_out = hw_subpass;
2269 
2270    return VK_SUCCESS;
2271 
2272 end_merge_subpass:
2273    if (sp_dsts.color)
2274       vk_free(ctx->allocator, sp_dsts.color);
2275 
2276    pvr_free_alloc(ctx, &alloc);
2277 
2278    return result;
2279 }
2280 
2281 static void
pvr_dereference_color_output_list(struct pvr_renderpass_context * ctx,uint32_t subpass_num,struct pvr_render_subpass * subpass)2282 pvr_dereference_color_output_list(struct pvr_renderpass_context *ctx,
2283                                   uint32_t subpass_num,
2284                                   struct pvr_render_subpass *subpass)
2285 {
2286    for (uint32_t i = 0U; i < subpass->color_count; i++) {
2287       const uint32_t attach_idx = subpass->color_attachments[i];
2288 
2289       if (attach_idx != VK_ATTACHMENT_UNUSED)
2290          pvr_dereference_surface(ctx, attach_idx, subpass_num);
2291    }
2292 }
2293 
pvr_dereference_surface_list(struct pvr_renderpass_context * ctx,uint32_t subpass_num,uint32_t * attachments,uint32_t count)2294 static void pvr_dereference_surface_list(struct pvr_renderpass_context *ctx,
2295                                          uint32_t subpass_num,
2296                                          uint32_t *attachments,
2297                                          uint32_t count)
2298 {
2299    for (uint32_t i = 0U; i < count; i++) {
2300       if (attachments[i] != VK_ATTACHMENT_UNUSED)
2301          pvr_dereference_surface(ctx, attachments[i], subpass_num);
2302    }
2303 }
2304 
pvr_schedule_subpass(const struct pvr_device * device,struct pvr_renderpass_context * ctx,uint32_t subpass_idx)2305 static VkResult pvr_schedule_subpass(const struct pvr_device *device,
2306                                      struct pvr_renderpass_context *ctx,
2307                                      uint32_t subpass_idx)
2308 {
2309    struct pvr_renderpass_hwsetup_subpass *hw_subpass;
2310    struct pvr_renderpass_hwsetup_render *hw_render;
2311    struct pvr_render_int_subpass *int_subpass;
2312    struct pvr_render_subpass *subpass;
2313    uint32_t subpass_num;
2314    VkResult result;
2315 
2316    int_subpass = &ctx->int_subpasses[subpass_idx];
2317    subpass = int_subpass->subpass;
2318 
2319    result = pvr_merge_subpass(device, ctx, subpass, &hw_subpass);
2320    if (result != VK_SUCCESS)
2321       return result;
2322 
2323    hw_render = ctx->hw_render;
2324    subpass_num = hw_render->subpass_count - 1U;
2325 
2326    /* Record where the subpass was scheduled. */
2327    ctx->hw_setup->subpass_map[subpass_idx].render =
2328       ctx->hw_setup->render_count - 1U;
2329    ctx->hw_setup->subpass_map[subpass_idx].subpass = subpass_num;
2330 
2331    /* Check this subpass was the last use of any attachments. */
2332    pvr_dereference_color_output_list(ctx, subpass_num, subpass);
2333    pvr_dereference_surface_list(ctx,
2334                                 subpass_num,
2335                                 subpass->input_attachments,
2336                                 subpass->input_count);
2337    if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
2338       struct pvr_render_int_attachment *int_depth_attach =
2339          &ctx->int_attach[subpass->depth_stencil_attachment];
2340 
2341       assert(int_depth_attach->remaining_count > 0U);
2342       int_depth_attach->remaining_count--;
2343 
2344       if (int_depth_attach->remaining_count == 0U) {
2345          if (int_depth_attach->first_use != -1)
2346             int_depth_attach->last_use = subpass_num;
2347 
2348          if (int_depth_attach->z_replicate)
2349             pvr_free_surface_storage(ctx, int_depth_attach);
2350       }
2351 
2352       if (int_depth_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
2353          assert(int_depth_attach->stencil_remaining_count > 0U);
2354          int_depth_attach->stencil_remaining_count--;
2355       }
2356 
2357       /* The depth attachment has initialized data so load it from memory if it
2358        * is referenced again.
2359        */
2360       int_depth_attach->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
2361       int_depth_attach->stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
2362    }
2363 
2364    /* Mark surfaces which have been the source or destination of an MSAA resolve
2365     * in the current render.
2366     */
2367    for (uint32_t i = 0U; i < subpass->color_count; i++) {
2368       struct pvr_render_int_attachment *resolve_src;
2369       struct pvr_render_int_attachment *resolve_dst;
2370 
2371       if (!subpass->resolve_attachments)
2372          break;
2373 
2374       if (subpass->resolve_attachments[i] == VK_ATTACHMENT_UNUSED)
2375          continue;
2376 
2377       assert(subpass->color_attachments[i] <
2378              (int32_t)ctx->pass->attachment_count);
2379       resolve_src = &ctx->int_attach[subpass->color_attachments[i]];
2380 
2381       assert(subpass->resolve_attachments[i] <
2382              (int32_t)ctx->pass->attachment_count);
2383       resolve_dst = &ctx->int_attach[subpass->resolve_attachments[i]];
2384 
2385       /* Mark the resolve source. */
2386       assert(resolve_src->last_resolve_src_render <
2387              (int32_t)(ctx->hw_setup->render_count - 1U));
2388       resolve_src->last_resolve_src_render = ctx->hw_setup->render_count - 1U;
2389 
2390       /* Mark the resolve destination. */
2391       assert(resolve_dst->last_resolve_dst_render <
2392              (int32_t)(ctx->hw_setup->render_count - 1U));
2393       resolve_dst->last_resolve_dst_render = ctx->hw_setup->render_count - 1U;
2394 
2395       /* If we can't down scale through the PBE then the src must be stored
2396        * for transfer down scale.
2397        */
2398       if (!resolve_src->is_pbe_downscalable &&
2399           resolve_src->last_read < (int32_t)ctx->pass->subpass_count) {
2400          resolve_src->last_read = (int32_t)ctx->pass->subpass_count;
2401          resolve_src->remaining_count++;
2402       }
2403    }
2404 
2405    /* For subpasses dependent on this subpass decrement the unscheduled
2406     * dependency count.
2407     */
2408    for (uint32_t i = 0U; i < int_subpass->out_subpass_count; i++) {
2409       struct pvr_render_int_subpass *int_dst_subpass =
2410          int_subpass->out_subpasses[i];
2411 
2412       assert(int_dst_subpass->in_subpass_count > 0U);
2413       int_dst_subpass->in_subpass_count--;
2414    }
2415 
2416    return VK_SUCCESS;
2417 }
2418 
pvr_count_uses_in_list(uint32_t * attachments,uint32_t size,uint32_t attach_idx)2419 static uint32_t pvr_count_uses_in_list(uint32_t *attachments,
2420                                        uint32_t size,
2421                                        uint32_t attach_idx)
2422 {
2423    uint32_t count = 0U;
2424 
2425    for (uint32_t i = 0U; i < size; i++) {
2426       if (attachments[i] == attach_idx)
2427          count++;
2428    }
2429 
2430    return count;
2431 }
2432 
2433 static uint32_t
pvr_count_uses_in_color_output_list(struct pvr_render_subpass * subpass,uint32_t attach_idx)2434 pvr_count_uses_in_color_output_list(struct pvr_render_subpass *subpass,
2435                                     uint32_t attach_idx)
2436 {
2437    uint32_t count = 0U;
2438 
2439    for (uint32_t i = 0U; i < subpass->color_count; i++) {
2440       if (subpass->color_attachments[i] == attach_idx) {
2441          count++;
2442 
2443          if (subpass->resolve_attachments &&
2444              subpass->resolve_attachments[i] != VK_ATTACHMENT_UNUSED)
2445             count++;
2446       }
2447    }
2448 
2449    return count;
2450 }
2451 
pvr_destroy_renderpass_hwsetup(const VkAllocationCallbacks * alloc,struct pvr_renderpass_hwsetup * hw_setup)2452 void pvr_destroy_renderpass_hwsetup(const VkAllocationCallbacks *alloc,
2453                                     struct pvr_renderpass_hwsetup *hw_setup)
2454 {
2455    for (uint32_t i = 0U; i < hw_setup->render_count; i++) {
2456       struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i];
2457 
2458       vk_free(alloc, hw_render->eot_surfaces);
2459       vk_free(alloc, hw_render->eot_setup.mrt_resources);
2460       vk_free(alloc, hw_render->init_setup.mrt_resources);
2461       vk_free(alloc, hw_render->color_init);
2462 
2463       for (uint32_t j = 0U; j < hw_render->subpass_count; j++) {
2464          struct pvr_renderpass_hwsetup_subpass *subpass =
2465             &hw_render->subpasses[j];
2466 
2467          vk_free(alloc, subpass->color_initops);
2468          vk_free(alloc, subpass->input_access);
2469          vk_free(alloc, subpass->setup.mrt_resources);
2470       }
2471 
2472       vk_free(alloc, hw_render->subpasses);
2473    }
2474 
2475    vk_free(alloc, hw_setup->renders);
2476    vk_free(alloc, hw_setup);
2477 }
2478 
pvr_create_renderpass_hwsetup(struct pvr_device * device,const VkAllocationCallbacks * alloc,struct pvr_render_pass * pass,bool disable_merge,struct pvr_renderpass_hwsetup ** const hw_setup_out)2479 VkResult pvr_create_renderpass_hwsetup(
2480    struct pvr_device *device,
2481    const VkAllocationCallbacks *alloc,
2482    struct pvr_render_pass *pass,
2483    bool disable_merge,
2484    struct pvr_renderpass_hwsetup **const hw_setup_out)
2485 {
2486    struct pvr_render_int_attachment *int_attachments;
2487    struct pvr_render_int_subpass *int_subpasses;
2488    struct pvr_renderpass_hw_map *subpass_map;
2489    struct pvr_renderpass_hwsetup *hw_setup;
2490    struct pvr_renderpass_context *ctx;
2491    bool *surface_allocate;
2492    VkResult result;
2493 
2494    VK_MULTIALLOC(ma);
2495    vk_multialloc_add(&ma, &hw_setup, __typeof__(*hw_setup), 1);
2496    vk_multialloc_add(&ma,
2497                      &surface_allocate,
2498                      __typeof__(*surface_allocate),
2499                      pass->attachment_count);
2500    vk_multialloc_add(&ma,
2501                      &subpass_map,
2502                      __typeof__(*subpass_map),
2503                      pass->subpass_count);
2504 
2505    if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
2506       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2507 
2508    hw_setup->surface_allocate = surface_allocate;
2509    hw_setup->subpass_map = subpass_map;
2510 
2511    VK_MULTIALLOC(ma_ctx);
2512    vk_multialloc_add(&ma_ctx, &ctx, __typeof__(*ctx), 1);
2513    vk_multialloc_add(&ma_ctx,
2514                      &int_attachments,
2515                      __typeof__(*int_attachments),
2516                      pass->attachment_count);
2517    vk_multialloc_add(&ma_ctx,
2518                      &int_subpasses,
2519                      __typeof__(*int_subpasses),
2520                      pass->subpass_count);
2521 
2522    if (!vk_multialloc_zalloc(&ma_ctx,
2523                              alloc,
2524                              VK_SYSTEM_ALLOCATION_SCOPE_COMMAND)) {
2525       vk_free(alloc, hw_setup);
2526       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2527    }
2528 
2529    ctx->pass = pass;
2530    ctx->hw_setup = hw_setup;
2531    ctx->int_attach = int_attachments;
2532    ctx->int_subpasses = int_subpasses;
2533    ctx->allocator = alloc;
2534 
2535    for (uint32_t i = 0U; i < pass->attachment_count; i++) {
2536       struct pvr_render_pass_attachment *attachment = &pass->attachments[i];
2537       struct pvr_render_int_attachment *int_attach = &ctx->int_attach[i];
2538       const VkFormat format = attachment->vk_format;
2539       uint32_t pixel_size_in_chunks;
2540       uint32_t pixel_size_in_bits;
2541 
2542       /* TODO: Add support for packing multiple attachments into the same
2543        * register.
2544        */
2545       const uint32_t part_bits = 0;
2546 
2547       if (vk_format_is_color(format) &&
2548           pvr_get_pbe_accum_format(attachment->vk_format) ==
2549              PVR_PBE_ACCUM_FORMAT_INVALID) {
2550          /* The VkFormat is not supported as a color attachment so `0`.
2551           * Vulkan doesn't seems to restrict vkCreateRenderPass() to supported
2552           * formats only.
2553           */
2554          pixel_size_in_bits = 0;
2555       } else {
2556          pixel_size_in_bits =
2557             pvr_get_accum_format_bitsize(attachment->vk_format);
2558       }
2559 
2560       int_attach->resource.type = USC_MRT_RESOURCE_TYPE_INVALID;
2561       int_attach->resource.intermediate_size =
2562          DIV_ROUND_UP(pixel_size_in_bits, CHAR_BIT);
2563       int_attach->resource.mrt_desc.intermediate_size =
2564          int_attach->resource.intermediate_size;
2565 
2566       pixel_size_in_chunks = DIV_ROUND_UP(pixel_size_in_bits, 32U);
2567       for (uint32_t j = 0U; j < pixel_size_in_chunks; j++)
2568          int_attach->resource.mrt_desc.valid_mask[j] = ~0;
2569 
2570       if (part_bits > 0U) {
2571          int_attach->resource.mrt_desc.valid_mask[pixel_size_in_chunks] =
2572             BITFIELD_MASK(part_bits);
2573       }
2574 
2575       int_attach->load_op = pass->attachments[i].load_op;
2576       int_attach->stencil_load_op = pass->attachments[i].stencil_load_op;
2577       int_attach->attachment = attachment;
2578       int_attach->first_use = -1;
2579       int_attach->last_use = -1;
2580       int_attach->last_read = -1;
2581       int_attach->mrt_idx = -1;
2582       int_attach->last_resolve_dst_render = -1;
2583       int_attach->last_resolve_src_render = -1;
2584       int_attach->z_replicate = false;
2585       int_attach->is_pbe_downscalable = attachment->is_pbe_downscalable;
2586 
2587       /* Count the number of references to this attachment in subpasses. */
2588       for (uint32_t j = 0U; j < pass->subpass_count; j++) {
2589          struct pvr_render_subpass *subpass = &pass->subpasses[j];
2590          const uint32_t color_output_uses =
2591             pvr_count_uses_in_color_output_list(subpass, i);
2592          const uint32_t input_attachment_uses =
2593             pvr_count_uses_in_list(subpass->input_attachments,
2594                                    subpass->input_count,
2595                                    i);
2596 
2597          if (color_output_uses != 0U || input_attachment_uses != 0U)
2598             int_attach->last_read = j;
2599 
2600          int_attach->remaining_count +=
2601             color_output_uses + input_attachment_uses;
2602 
2603          if ((uint32_t)subpass->depth_stencil_attachment == i)
2604             int_attach->remaining_count++;
2605       }
2606 
2607       if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
2608          int_attach->stencil_remaining_count = int_attach->remaining_count;
2609          if (pass->attachments[i].stencil_store_op ==
2610              VK_ATTACHMENT_STORE_OP_STORE) {
2611             int_attach->stencil_remaining_count++;
2612          }
2613       }
2614 
2615       if (pass->attachments[i].store_op == VK_ATTACHMENT_STORE_OP_STORE) {
2616          int_attach->remaining_count++;
2617          int_attach->last_read = pass->subpass_count;
2618       }
2619    }
2620 
2621    for (uint32_t i = 0U; i < pass->subpass_count; i++) {
2622       struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[i];
2623 
2624       int_subpass->subpass = &pass->subpasses[i];
2625       int_subpass->out_subpass_count = 0U;
2626       int_subpass->out_subpasses = NULL;
2627       int_subpass->in_subpass_count = int_subpass->subpass->dep_count;
2628    }
2629 
2630    /* For each dependency of a subpass create an edge in the opposite
2631     * direction.
2632     */
2633    for (uint32_t i = 0U; i < pass->subpass_count; i++) {
2634       struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[i];
2635 
2636       for (uint32_t j = 0U; j < int_subpass->in_subpass_count; j++) {
2637          uint32_t src_idx = int_subpass->subpass->dep_list[j];
2638          struct pvr_render_int_subpass *int_src_subpass;
2639          struct pvr_render_int_subpass **out_subpasses;
2640 
2641          assert(src_idx < pass->subpass_count);
2642 
2643          int_src_subpass = &ctx->int_subpasses[src_idx];
2644 
2645          out_subpasses =
2646             vk_realloc(ctx->allocator,
2647                        int_src_subpass->out_subpasses,
2648                        sizeof(int_src_subpass->out_subpasses[0U]) *
2649                           (int_src_subpass->out_subpass_count + 1U),
2650                        8U,
2651                        VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
2652          if (!out_subpasses) {
2653             result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2654             goto end_create_renderpass_hwsetup;
2655          }
2656 
2657          int_src_subpass->out_subpasses = out_subpasses;
2658          int_src_subpass->out_subpasses[int_src_subpass->out_subpass_count] =
2659             int_subpass;
2660          int_src_subpass->out_subpass_count++;
2661       }
2662    }
2663 
2664    pvr_reset_render(ctx);
2665 
2666    for (uint32_t i = 0U; i < pass->subpass_count; i++) {
2667       uint32_t j;
2668 
2669       /* Find a subpass with no unscheduled dependencies. */
2670       for (j = 0U; j < pass->subpass_count; j++) {
2671          struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[j];
2672 
2673          if (int_subpass->subpass && int_subpass->in_subpass_count == 0U)
2674             break;
2675       }
2676       assert(j < pass->subpass_count);
2677 
2678       result = pvr_schedule_subpass(device, ctx, j);
2679       if (result != VK_SUCCESS)
2680          goto end_create_renderpass_hwsetup;
2681 
2682       if (disable_merge) {
2683          result = pvr_close_render(device, ctx);
2684          if (result != VK_SUCCESS)
2685             goto end_create_renderpass_hwsetup;
2686       }
2687 
2688       ctx->int_subpasses[j].subpass = NULL;
2689    }
2690 
2691    /* Finalise the last in-progress render. */
2692    result = pvr_close_render(device, ctx);
2693 
2694 end_create_renderpass_hwsetup:
2695    if (result != VK_SUCCESS) {
2696       pvr_free_render(ctx);
2697 
2698       if (hw_setup) {
2699          pvr_destroy_renderpass_hwsetup(alloc, hw_setup);
2700          hw_setup = NULL;
2701       }
2702    }
2703 
2704    for (uint32_t i = 0U; i < pass->subpass_count; i++) {
2705       struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[i];
2706 
2707       if (int_subpass->out_subpass_count > 0U)
2708          vk_free(alloc, int_subpass->out_subpasses);
2709    }
2710 
2711    vk_free(alloc, ctx);
2712 
2713    *hw_setup_out = hw_setup;
2714 
2715    return result;
2716 }
2717