1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <limits.h>
26 #include <stdbool.h>
27 #include <stddef.h>
28 #include <string.h>
29 #include <vulkan/vulkan.h>
30
31 #include "hwdef/rogue_hw_defs.h"
32 #include "hwdef/rogue_hw_utils.h"
33 #include "pvr_hw_pass.h"
34 #include "pvr_formats.h"
35 #include "pvr_private.h"
36 #include "util/bitset.h"
37 #include "util/list.h"
38 #include "util/macros.h"
39 #include "util/u_math.h"
40 #include "vk_alloc.h"
41 #include "vk_format.h"
42 #include "vk_log.h"
43
44 struct pvr_render_int_subpass {
45 /* Points to the input subpass. This is set to NULL when the subpass is
46 * unscheduled.
47 */
48 struct pvr_render_subpass *subpass;
49
50 /* Count of other subpasses which have this subpass as a dependency. */
51 uint32_t out_subpass_count;
52
53 /* Pointers to the other subpasses which have this subpass as a dependency.
54 */
55 struct pvr_render_int_subpass **out_subpasses;
56
57 /* Count of subpasses on which this subpass is dependent and which haven't
58 * been scheduled yet.
59 */
60 uint32_t in_subpass_count;
61 };
62
63 struct pvr_renderpass_resource {
64 /* Resource type allocated for render target. */
65 enum usc_mrt_resource_type type;
66
67 union {
68 /* If type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG. */
69 struct {
70 /* The output register to use. */
71 uint32_t output_reg;
72
73 /* The offset in bytes within the output register. */
74 uint32_t offset;
75 } reg;
76
77 /* If type == USC_MRT_RESOURCE_TYPE_MEMORY. */
78 struct {
79 /* The index of the tile buffer to use. */
80 uint32_t tile_buffer;
81
82 /* The offset (in dwords) within the tile buffer. */
83 uint32_t offset_dw;
84 } mem;
85 };
86 };
87
88 struct pvr_render_int_attachment {
89 /* Points to the corresponding input attachment. */
90 struct pvr_render_pass_attachment *attachment;
91
92 /* True if this attachment is referenced in the currently open render. */
93 bool is_used;
94
95 /* Operation to use when this attachment is non-resident and referenced as a
96 * color or depth attachment.
97 */
98 VkAttachmentLoadOp load_op;
99
100 /* Operation to use for the stencil component when this attachment is
101 * non-resident and referenced as a color or depth attachment.
102 */
103 VkAttachmentLoadOp stencil_load_op;
104
105 /* Count of uses of this attachment in unscheduled subpasses. */
106 uint32_t remaining_count;
107
108 /* Count of uses of the stencil component of this attachment in unscheduled
109 * subpasses.
110 */
111 uint32_t stencil_remaining_count;
112
113 /* If this attachment has currently allocated on-chip storage then details of
114 * the allocated location.
115 */
116 struct usc_mrt_resource resource;
117
118 /* Index of the subpass in the current render where the attachment is first
119 * used. VK_ATTACHMENT_UNUSED if the attachment isn't used in the current
120 * render.
121 */
122 int32_t first_use;
123
124 /* Index of the subpass in the current render where the attachment is last
125 * used.
126 */
127 int32_t last_use;
128
129 /* Index of the subpass (global) where the attachment is last read. */
130 int32_t last_read;
131
132 /* If this attachment has currently allocated on-chip storage then the entry
133 * in context.active_surf_list.
134 */
135 struct list_head link;
136
137 /* During pvr_close_render: if this attachment has allocated on-chip storage
138 * then the index in pvr_renderpass_hwsetup_render.eot_setup.mrt_resources
139 * with details of the storage location. Otherwise -1.
140 */
141 int32_t mrt_idx;
142
143 /* Index of the last render where the attachment was the source of an MSAA
144 * resolve.
145 */
146 int32_t last_resolve_src_render;
147
148 /* Index of the last render where the attachment was the destination of an
149 * MSAA resolve.
150 */
151 int32_t last_resolve_dst_render;
152
153 /* true if the attachment is used with a z replicate in the current render.
154 */
155 bool z_replicate;
156
157 /* true if this attachment can be resolved by the PBE. */
158 bool is_pbe_downscalable;
159
160 /* true if this attachment requires an EOT attachment. */
161 bool eot_surf_required;
162 };
163
164 /* Which parts of the output registers/a tile buffer are currently allocated. */
165 struct pvr_renderpass_alloc_buffer {
166 /* Bit array. A bit is set if the corresponding dword is allocated. */
167 BITSET_DECLARE(allocs, 8U);
168 };
169
170 struct pvr_renderpass_alloc {
171 /* Which pixel output registers are allocated. */
172 struct pvr_renderpass_alloc_buffer output_reg;
173
174 /* Range of allocated output registers. */
175 uint32_t output_regs_count;
176
177 /* Number of tile buffers allocated. */
178 uint32_t tile_buffers_count;
179
180 /* Which parts of each tile buffer are allocated. Length is
181 * tile_buffers_count.
182 */
183 struct pvr_renderpass_alloc_buffer *tile_buffers;
184 };
185
186 struct pvr_renderpass_subpass {
187 /* A pointer to the input subpass description. */
188 struct pvr_render_subpass *input_subpass;
189
190 /* true if the depth attachment for this subpass has z replication enabled.
191 */
192 bool z_replicate;
193
194 /* Which pixel output registers/tile buffer locations are allocated during
195 * this subpass.
196 */
197 struct pvr_renderpass_alloc alloc;
198 };
199
200 struct pvr_renderpass_context {
201 /* Internal information about each input attachment. */
202 struct pvr_render_int_attachment *int_attach;
203
204 /* Internal information about each input subpass. */
205 struct pvr_render_int_subpass *int_subpasses;
206
207 /* Input structure. */
208 struct pvr_render_pass *pass;
209
210 /* Output structure. */
211 struct pvr_renderpass_hwsetup *hw_setup;
212
213 /* In-progress render. */
214 struct pvr_renderpass_hwsetup_render *hw_render;
215
216 /* Information about each subpass in the current render. */
217 struct pvr_renderpass_subpass *subpasses;
218
219 /* Which parts of color storage are currently allocated. */
220 struct pvr_renderpass_alloc alloc;
221
222 /* Attachment which is currently allocated the on-chip depth/stencil. */
223 struct pvr_render_int_attachment *int_ds_attach;
224
225 /* Attachment which is loaded into the on-chip depth/stencil at the start of
226 * the render.
227 */
228 struct pvr_render_int_attachment *ds_load_surface;
229
230 /* Attachment which the depth/stencil attachment should be resolved to at the
231 * end of the render.
232 */
233 struct pvr_render_int_attachment *ds_resolve_surface;
234
235 /* Count of surfaces which are allocated on-chip color storage. */
236 uint32_t active_surfaces;
237
238 /* List of attachment/ranges which are allocated on-chip color storage. */
239 struct list_head active_surf_list;
240
241 const VkAllocationCallbacks *allocator;
242 };
243
244 struct pvr_render_int_subpass_dsts {
245 struct pvr_renderpass_resource *color;
246 struct pvr_renderpass_resource incoming_zrep;
247 struct pvr_renderpass_resource existing_zrep;
248 };
249
250 struct pvr_render_subpass_depth_params {
251 bool existing_ds_is_input;
252 bool incoming_ds_is_input;
253 uint32_t existing_ds_attach;
254 };
255
256 struct pvr_renderpass_storage_firstuse_buffer {
257 /* For each pixel output register/tile buffer location: true if the output
258 * register has been allocated in the current render.
259 */
260 bool used[8U];
261 };
262
263 struct pvr_renderpass_storage_firstuse {
264 /* First use information for pixel output registers. */
265 struct pvr_renderpass_storage_firstuse_buffer output_reg;
266
267 /* First use information for tile buffers. */
268 struct pvr_renderpass_storage_firstuse_buffer *tile_buffers;
269 };
270
pvr_get_accum_format_bitsize(VkFormat vk_format)271 static uint32_t pvr_get_accum_format_bitsize(VkFormat vk_format)
272 {
273 if (util_format_has_depth(vk_format_description(vk_format)))
274 return vk_format_get_blocksizebits(vk_format);
275
276 if (!vk_format_has_stencil(vk_format))
277 return pvr_get_pbe_accum_format_size_in_bytes(vk_format) * 8;
278
279 return 0;
280 }
281
282 /** Copy information about allocated color storage. */
pvr_copy_alloc(struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * dst,struct pvr_renderpass_alloc * src)283 static VkResult pvr_copy_alloc(struct pvr_renderpass_context *ctx,
284 struct pvr_renderpass_alloc *dst,
285 struct pvr_renderpass_alloc *src)
286 {
287 dst->output_reg = src->output_reg;
288 dst->output_regs_count = src->output_regs_count;
289
290 dst->tile_buffers_count = src->tile_buffers_count;
291 if (dst->tile_buffers_count > 0U) {
292 dst->tile_buffers =
293 vk_alloc(ctx->allocator,
294 sizeof(dst->tile_buffers[0U]) * dst->tile_buffers_count,
295 8,
296 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
297 if (!dst->tile_buffers)
298 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
299
300 memcpy(dst->tile_buffers,
301 src->tile_buffers,
302 sizeof(dst->tile_buffers[0U]) * dst->tile_buffers_count);
303 } else {
304 dst->tile_buffers = NULL;
305 }
306
307 return VK_SUCCESS;
308 }
309
310 /** Free information about allocated color storage. */
pvr_free_alloc(struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * alloc)311 static void pvr_free_alloc(struct pvr_renderpass_context *ctx,
312 struct pvr_renderpass_alloc *alloc)
313 {
314 if (alloc->tile_buffers)
315 vk_free(ctx->allocator, alloc->tile_buffers);
316
317 memset(alloc, 0U, sizeof(*alloc));
318 }
319
pvr_reset_render(struct pvr_renderpass_context * ctx)320 static void pvr_reset_render(struct pvr_renderpass_context *ctx)
321 {
322 ctx->int_ds_attach = NULL;
323 ctx->active_surfaces = 0U;
324 list_inithead(&ctx->active_surf_list);
325
326 memset(&ctx->alloc.output_reg, 0U, sizeof(ctx->alloc.output_reg));
327 ctx->alloc.output_regs_count = 0U;
328 ctx->alloc.tile_buffers_count = 0U;
329 ctx->alloc.tile_buffers = NULL;
330
331 ctx->hw_render = NULL;
332 ctx->subpasses = NULL;
333 ctx->ds_load_surface = NULL;
334 }
335
336 /** Gets the amount of memory to allocate per-core for a tile buffer. */
337 static uint32_t
pvr_get_tile_buffer_size_per_core(const struct pvr_device * device)338 pvr_get_tile_buffer_size_per_core(const struct pvr_device *device)
339 {
340 uint32_t clusters =
341 PVR_GET_FEATURE_VALUE(&device->pdevice->dev_info, num_clusters, 1U);
342
343 /* Round the number of clusters up to the next power of two. */
344 if (!PVR_HAS_FEATURE(&device->pdevice->dev_info, tile_per_usc))
345 clusters = util_next_power_of_two(clusters);
346
347 /* Tile buffer is (total number of partitions across all clusters) * 16 * 16
348 * (quadrant size in pixels).
349 */
350 return device->pdevice->dev_runtime_info.total_reserved_partition_size *
351 clusters * sizeof(uint32_t);
352 }
353
354 /**
355 * Gets the amount of memory to allocate for a tile buffer on the current BVNC.
356 */
pvr_get_tile_buffer_size(const struct pvr_device * device)357 uint32_t pvr_get_tile_buffer_size(const struct pvr_device *device)
358 {
359 /* On a multicore system duplicate the buffer for each core. */
360 return pvr_get_tile_buffer_size_per_core(device) *
361 rogue_get_max_num_cores(&device->pdevice->dev_info);
362 }
363
364 static void
pvr_finalise_mrt_setup(const struct pvr_device * device,struct pvr_renderpass_hwsetup_render * hw_render,struct usc_mrt_setup * mrt)365 pvr_finalise_mrt_setup(const struct pvr_device *device,
366 struct pvr_renderpass_hwsetup_render *hw_render,
367 struct usc_mrt_setup *mrt)
368 {
369 mrt->num_output_regs = hw_render->output_regs_count;
370 mrt->num_tile_buffers = hw_render->tile_buffers_count;
371 mrt->tile_buffer_size = pvr_get_tile_buffer_size(device);
372 }
373
374 /**
375 * Copy information about the number of pixel output registers and tile buffers
376 * required for the current render to the output structure.
377 */
pvr_finalise_po_alloc(const struct pvr_device * device,struct pvr_renderpass_context * ctx)378 static void pvr_finalise_po_alloc(const struct pvr_device *device,
379 struct pvr_renderpass_context *ctx)
380 {
381 struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
382
383 /* The number of output registers must be a power of two. */
384 hw_render->output_regs_count =
385 util_next_power_of_two(ctx->alloc.output_regs_count);
386
387 assert(ctx->alloc.tile_buffers_count <= ctx->pass->max_tilebuffer_count);
388 hw_render->tile_buffers_count = ctx->alloc.tile_buffers_count;
389
390 /* Copy the number of output registers and tile buffers to each subpass. */
391 for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
392 struct pvr_renderpass_hwsetup_subpass *hw_subpass =
393 &hw_render->subpasses[i];
394
395 pvr_finalise_mrt_setup(device, hw_render, &hw_subpass->setup);
396 }
397
398 pvr_finalise_mrt_setup(device, hw_render, &hw_render->init_setup);
399 pvr_finalise_mrt_setup(device, hw_render, &hw_render->eot_setup);
400 }
401
402 /** Mark that device memory must be allocated for an attachment. */
pvr_mark_surface_alloc(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_attach)403 static void pvr_mark_surface_alloc(struct pvr_renderpass_context *ctx,
404 struct pvr_render_int_attachment *int_attach)
405 {
406 const uint32_t attach_idx = int_attach - ctx->int_attach;
407
408 assert(attach_idx < ctx->pass->attachment_count);
409 ctx->hw_setup->surface_allocate[attach_idx] = true;
410 }
411
412 /**
413 * Check if there is space in a buffer for storing a render target of a
414 * specified size.
415 */
416 static int32_t
pvr_is_space_in_buffer(const struct pvr_device_info * dev_info,struct pvr_renderpass_alloc_buffer * buffer,uint32_t pixel_size)417 pvr_is_space_in_buffer(const struct pvr_device_info *dev_info,
418 struct pvr_renderpass_alloc_buffer *buffer,
419 uint32_t pixel_size)
420 {
421 const uint32_t max_out_regs = rogue_get_max_output_regs_per_pixel(dev_info);
422 uint32_t alignment = 1U;
423
424 if (PVR_HAS_FEATURE(dev_info, pbe2_in_xe)) {
425 /* For a 64-bit/128-bit source format: the start offset must be even. */
426 if (pixel_size == 2U || pixel_size == 4U)
427 alignment = 2U;
428 }
429
430 assert(pixel_size <= max_out_regs);
431
432 for (uint32_t i = 0U; i <= (max_out_regs - pixel_size); i += alignment) {
433 if (!BITSET_TEST_RANGE(buffer->allocs, i, i + pixel_size - 1U))
434 return i;
435 }
436
437 return -1;
438 }
439
440 static VkResult
pvr_surface_setup_render_init(struct pvr_renderpass_context * ctx,struct pvr_renderpass_storage_firstuse * first_use,struct usc_mrt_resource const * resource,struct pvr_render_pass_attachment * attachment,VkAttachmentLoadOp load_op,bool * use_render_init)441 pvr_surface_setup_render_init(struct pvr_renderpass_context *ctx,
442 struct pvr_renderpass_storage_firstuse *first_use,
443 struct usc_mrt_resource const *resource,
444 struct pvr_render_pass_attachment *attachment,
445 VkAttachmentLoadOp load_op,
446 bool *use_render_init)
447 {
448 const uint32_t pixel_size =
449 DIV_ROUND_UP(pvr_get_accum_format_bitsize(attachment->vk_format), 32U);
450 struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
451 struct pvr_renderpass_storage_firstuse_buffer *buffer;
452 uint32_t start;
453
454 /* Check if this is the first use of all the allocated registers. */
455 if (resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
456 buffer = &first_use->output_reg;
457 start = resource->reg.output_reg;
458 } else {
459 assert(resource->mem.tile_buffer < ctx->alloc.tile_buffers_count);
460 buffer = &first_use->tile_buffers[resource->mem.tile_buffer];
461 start = resource->mem.offset_dw;
462 }
463
464 *use_render_init = true;
465 for (uint32_t i = 0U; i < pixel_size; i++) {
466 /* Don't initialize at the render level if the output registers were
467 * previously allocated a different attachment.
468 */
469 if (buffer->used[start + i])
470 *use_render_init = false;
471
472 /* Don't use render init for future attachments allocated to the same
473 * registers.
474 */
475 buffer->used[start + i] = true;
476 }
477
478 if (load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
479 *use_render_init = false;
480
481 if (*use_render_init) {
482 struct pvr_renderpass_colorinit *new_color_init;
483 struct usc_mrt_resource *new_mrt;
484
485 /* Initialize the storage at the start of the render. */
486 new_color_init = vk_realloc(ctx->allocator,
487 hw_render->color_init,
488 sizeof(hw_render->color_init[0U]) *
489 (hw_render->color_init_count + 1U),
490 8U,
491 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
492 if (!new_color_init)
493 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
494
495 hw_render->color_init = new_color_init;
496 hw_render->color_init[hw_render->color_init_count].index =
497 attachment->index;
498 hw_render->color_init[hw_render->color_init_count].op = load_op;
499
500 /* Set the destination for the attachment load/clear. */
501 assert(hw_render->init_setup.num_render_targets ==
502 hw_render->color_init_count);
503
504 new_mrt = vk_realloc(ctx->allocator,
505 hw_render->init_setup.mrt_resources,
506 sizeof(hw_render->init_setup.mrt_resources[0U]) *
507 (hw_render->init_setup.num_render_targets + 1U),
508 8U,
509 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
510 if (!new_mrt)
511 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
512
513 hw_render->init_setup.mrt_resources = new_mrt;
514 hw_render->init_setup
515 .mrt_resources[hw_render->init_setup.num_render_targets] = *resource;
516 hw_render->init_setup.num_render_targets++;
517
518 hw_render->color_init_count++;
519 }
520
521 return VK_SUCCESS;
522 }
523
524 static VkResult
pvr_subpass_setup_render_init(struct pvr_renderpass_context * ctx)525 pvr_subpass_setup_render_init(struct pvr_renderpass_context *ctx)
526 {
527 struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
528 struct pvr_renderpass_storage_firstuse first_use = { 0 };
529 bool first_ds = true;
530 VkResult result;
531
532 if (ctx->alloc.tile_buffers_count > 0U) {
533 first_use.tile_buffers = vk_zalloc(ctx->allocator,
534 sizeof(first_use.tile_buffers[0U]) *
535 ctx->alloc.tile_buffers_count,
536 8,
537 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
538 if (!first_use.tile_buffers)
539 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
540 }
541
542 for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
543 struct pvr_renderpass_hwsetup_subpass *hw_subpass =
544 &hw_render->subpasses[i];
545 struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
546 struct pvr_render_subpass *input_subpass = subpass->input_subpass;
547
548 /* If this is the first depth attachment in the render then clear at the
549 * render level, not the subpass level.
550 */
551 if (first_ds &&
552 (hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_CLEAR ||
553 hw_subpass->stencil_clear)) {
554 struct pvr_render_int_attachment *int_ds_attach;
555
556 assert(input_subpass->depth_stencil_attachment !=
557 VK_ATTACHMENT_UNUSED);
558 assert(input_subpass->depth_stencil_attachment <
559 ctx->pass->attachment_count);
560 int_ds_attach =
561 &ctx->int_attach[input_subpass->depth_stencil_attachment];
562
563 assert(hw_render->ds_attach_idx == VK_ATTACHMENT_UNUSED ||
564 hw_render->ds_attach_idx == int_ds_attach->attachment->index);
565 hw_render->ds_attach_idx = int_ds_attach->attachment->index;
566
567 if (hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_CLEAR)
568 hw_render->depth_init = VK_ATTACHMENT_LOAD_OP_CLEAR;
569
570 if (hw_subpass->stencil_clear) {
571 hw_render->stencil_init = VK_ATTACHMENT_LOAD_OP_CLEAR;
572 hw_subpass->stencil_clear = false;
573 }
574 }
575
576 if (input_subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED)
577 first_ds = false;
578
579 for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
580 struct usc_mrt_resource *mrt = &hw_subpass->setup.mrt_resources[j];
581 const uint32_t attach_idx = input_subpass->color_attachments[j];
582 struct pvr_render_int_attachment *int_attach;
583
584 if (attach_idx == VK_ATTACHMENT_UNUSED)
585 continue;
586
587 int_attach = &ctx->int_attach[attach_idx];
588
589 assert(pvr_get_accum_format_bitsize(
590 int_attach->attachment->vk_format) > 0U);
591
592 /* Is this the first use of the attachment? */
593 if (int_attach->first_use == (int32_t)i) {
594 /* Set if we should initialize the attachment storage at the
595 * render level.
596 */
597 bool use_render_init;
598 result = pvr_surface_setup_render_init(ctx,
599 &first_use,
600 mrt,
601 int_attach->attachment,
602 hw_subpass->color_initops[j],
603 &use_render_init);
604 if (result != VK_SUCCESS) {
605 vk_free(ctx->allocator, first_use.tile_buffers);
606 return result;
607 }
608
609 /* On success don't initialize the attachment at the subpass level.
610 */
611 if (use_render_init)
612 hw_subpass->color_initops[j] = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
613 } else {
614 /* This attachment is already present in on-chip storage so don't
615 * do anything.
616 */
617 assert(hw_subpass->color_initops[j] ==
618 VK_ATTACHMENT_LOAD_OP_DONT_CARE);
619 }
620 }
621 }
622
623 vk_free(ctx->allocator, first_use.tile_buffers);
624
625 return VK_SUCCESS;
626 }
627
628 static void
pvr_mark_storage_allocated_in_buffer(struct pvr_renderpass_alloc_buffer * buffer,uint32_t start,uint32_t pixel_size)629 pvr_mark_storage_allocated_in_buffer(struct pvr_renderpass_alloc_buffer *buffer,
630 uint32_t start,
631 uint32_t pixel_size)
632 {
633 assert(!BITSET_TEST_RANGE(buffer->allocs, start, start + pixel_size - 1U));
634 BITSET_SET_RANGE(buffer->allocs, start, start + pixel_size - 1U);
635 }
636
637 static VkResult
pvr_mark_storage_allocated(struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * alloc,struct pvr_render_pass_attachment * attachment,struct pvr_renderpass_resource * resource)638 pvr_mark_storage_allocated(struct pvr_renderpass_context *ctx,
639 struct pvr_renderpass_alloc *alloc,
640 struct pvr_render_pass_attachment *attachment,
641 struct pvr_renderpass_resource *resource)
642 {
643 /* Number of dwords to allocate for the attachment. */
644 const uint32_t pixel_size =
645 DIV_ROUND_UP(pvr_get_accum_format_bitsize(attachment->vk_format), 32U);
646
647 if (resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
648 /* Update the locations used in the pixel output registers. */
649 pvr_mark_storage_allocated_in_buffer(&alloc->output_reg,
650 resource->reg.output_reg,
651 pixel_size);
652
653 /* Update the range of pixel output registers used. */
654 alloc->output_regs_count =
655 MAX2(alloc->output_regs_count, resource->reg.output_reg + pixel_size);
656 } else {
657 assert(resource->type == USC_MRT_RESOURCE_TYPE_MEMORY);
658
659 if (resource->mem.tile_buffer >= alloc->tile_buffers_count) {
660 /* Grow the number of tile buffers. */
661 struct pvr_renderpass_alloc_buffer *new_tile_buffers = vk_realloc(
662 ctx->allocator,
663 alloc->tile_buffers,
664 sizeof(alloc->tile_buffers[0U]) * (resource->mem.tile_buffer + 1U),
665 8U,
666 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
667 if (!new_tile_buffers)
668 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
669
670 alloc->tile_buffers = new_tile_buffers;
671 memset(
672 &alloc->tile_buffers[alloc->tile_buffers_count],
673 0U,
674 sizeof(alloc->tile_buffers[0U]) *
675 (resource->mem.tile_buffer + 1U - alloc->tile_buffers_count));
676 alloc->tile_buffers_count = resource->mem.tile_buffer + 1U;
677 assert(alloc->tile_buffers_count <= ctx->pass->max_tilebuffer_count);
678 }
679
680 /* Update the locations used in the tile buffer. */
681 pvr_mark_storage_allocated_in_buffer(
682 &alloc->tile_buffers[resource->mem.tile_buffer],
683 resource->mem.offset_dw,
684 pixel_size);
685
686 /* The hardware makes the bit depth of the on-chip storage and memory
687 * storage the same so make sure the memory storage is large enough to
688 * accommodate the largest render target.
689 */
690 alloc->output_regs_count =
691 MAX2(alloc->output_regs_count, resource->mem.offset_dw + pixel_size);
692 }
693
694 return VK_SUCCESS;
695 }
696
697 static VkResult
pvr_surface_alloc_color_storage(const struct pvr_device_info * dev_info,struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * alloc,struct pvr_render_pass_attachment * attachment,struct pvr_renderpass_resource * resource)698 pvr_surface_alloc_color_storage(const struct pvr_device_info *dev_info,
699 struct pvr_renderpass_context *ctx,
700 struct pvr_renderpass_alloc *alloc,
701 struct pvr_render_pass_attachment *attachment,
702 struct pvr_renderpass_resource *resource)
703 {
704 /* Number of dwords to allocate for the attachment. */
705 const uint32_t pixel_size =
706 DIV_ROUND_UP(pvr_get_accum_format_bitsize(attachment->vk_format), 32U);
707
708 /* Try allocating pixel output registers. */
709 const int32_t output_reg =
710 pvr_is_space_in_buffer(dev_info, &alloc->output_reg, pixel_size);
711 if (output_reg != -1) {
712 resource->type = USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
713 resource->reg.output_reg = (uint32_t)output_reg;
714 resource->reg.offset = 0U;
715 } else {
716 uint32_t i;
717
718 /* Mark the attachment as using a tile buffer. */
719 resource->type = USC_MRT_RESOURCE_TYPE_MEMORY;
720
721 /* Try allocating from an existing tile buffer. */
722 for (i = 0U; i < alloc->tile_buffers_count; i++) {
723 const int32_t tile_buffer_offset =
724 pvr_is_space_in_buffer(dev_info,
725 &alloc->tile_buffers[i],
726 pixel_size);
727
728 if (tile_buffer_offset != -1) {
729 resource->mem.tile_buffer = i;
730 resource->mem.offset_dw = (uint32_t)tile_buffer_offset;
731 break;
732 }
733 }
734
735 if (i == alloc->tile_buffers_count) {
736 /* Check for reaching the maximum number of tile buffers. */
737 if (alloc->tile_buffers_count == ctx->pass->max_tilebuffer_count)
738 return vk_error(NULL, VK_ERROR_TOO_MANY_OBJECTS);
739
740 /* Use a newly allocated tile buffer. */
741 resource->mem.tile_buffer = i;
742 resource->mem.offset_dw = 0U;
743 }
744 }
745
746 /* Update which parts of the pixel outputs/tile buffers are used. */
747 return pvr_mark_storage_allocated(ctx, alloc, attachment, resource);
748 }
749
750 /** Free the storage allocated to an attachment. */
751 static void
pvr_free_buffer_storage(struct pvr_renderpass_alloc_buffer * buffer,struct pvr_render_int_attachment * int_attach,uint32_t start)752 pvr_free_buffer_storage(struct pvr_renderpass_alloc_buffer *buffer,
753 struct pvr_render_int_attachment *int_attach,
754 uint32_t start)
755 {
756 const uint32_t pixel_size = DIV_ROUND_UP(
757 pvr_get_accum_format_bitsize(int_attach->attachment->vk_format),
758 32U);
759
760 BITSET_CLEAR_RANGE(buffer->allocs, start, start + pixel_size - 1U);
761 }
762
763 /** Free the storage allocated to an attachment. */
764 static void
pvr_free_surface_storage(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_attach)765 pvr_free_surface_storage(struct pvr_renderpass_context *ctx,
766 struct pvr_render_int_attachment *int_attach)
767 {
768 struct usc_mrt_resource *resource = &int_attach->resource;
769 struct pvr_renderpass_alloc *alloc = &ctx->alloc;
770
771 assert(resource->type != USC_MRT_RESOURCE_TYPE_INVALID);
772
773 /* Mark the storage as free. */
774 if (resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
775 pvr_free_buffer_storage(&alloc->output_reg,
776 int_attach,
777 resource->reg.output_reg);
778 } else {
779 struct pvr_renderpass_alloc_buffer *tile_buffer;
780
781 assert(resource->type == USC_MRT_RESOURCE_TYPE_MEMORY);
782
783 assert(resource->mem.tile_buffer < alloc->tile_buffers_count);
784 tile_buffer = &alloc->tile_buffers[resource->mem.tile_buffer];
785 pvr_free_buffer_storage(tile_buffer, int_attach, resource->mem.offset_dw);
786 }
787
788 /* Mark that the attachment doesn't have allocated storage. */
789 resource->type = USC_MRT_RESOURCE_TYPE_INVALID;
790
791 /* Remove from the list of surfaces with allocated on-chip storage. */
792 assert(ctx->active_surfaces > 0U);
793 ctx->active_surfaces--;
794 list_del(&int_attach->link);
795 }
796
pvr_reset_surface(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_attach)797 static void pvr_reset_surface(struct pvr_renderpass_context *ctx,
798 struct pvr_render_int_attachment *int_attach)
799 {
800 /* Reset information about the range of uses. */
801 int_attach->first_use = int_attach->last_use = -1;
802 int_attach->z_replicate = false;
803
804 pvr_free_surface_storage(ctx, int_attach);
805 }
806
807 static void
pvr_make_surface_active(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_attach,uint32_t subpass_num)808 pvr_make_surface_active(struct pvr_renderpass_context *ctx,
809 struct pvr_render_int_attachment *int_attach,
810 uint32_t subpass_num)
811 {
812 /* Add to the list of surfaces with on-chip storage. */
813 assert(int_attach->first_use == -1);
814 int_attach->first_use = subpass_num;
815 ctx->active_surfaces++;
816 list_addtail(&int_attach->link, &ctx->active_surf_list);
817 }
818
819 /**
820 * For a subpass copy details of storage locations for the input/color to the
821 * output structure.
822 */
823 static VkResult
pvr_copy_storage_details(struct pvr_renderpass_context * ctx,struct pvr_renderpass_hwsetup_subpass * hw_subpass,struct pvr_renderpass_subpass * subpass)824 pvr_copy_storage_details(struct pvr_renderpass_context *ctx,
825 struct pvr_renderpass_hwsetup_subpass *hw_subpass,
826 struct pvr_renderpass_subpass *subpass)
827 {
828 struct pvr_render_subpass *input_subpass = subpass->input_subpass;
829 const uint32_t max_rts =
830 input_subpass->color_count + input_subpass->input_count;
831 VkResult result;
832
833 if (max_rts == 0)
834 return VK_SUCCESS;
835
836 hw_subpass->setup.mrt_resources =
837 vk_zalloc(ctx->allocator,
838 sizeof(hw_subpass->setup.mrt_resources[0U]) * max_rts,
839 8,
840 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
841 if (!hw_subpass->setup.mrt_resources) {
842 result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
843 goto end_copy_storage_details;
844 }
845
846 for (uint32_t i = 0U; i < input_subpass->color_count; i++) {
847 const uint32_t attach_idx = input_subpass->color_attachments[i];
848 struct pvr_render_int_attachment *int_attach;
849
850 if (attach_idx == VK_ATTACHMENT_UNUSED)
851 continue;
852
853 int_attach = &ctx->int_attach[attach_idx];
854
855 /* Record for the subpass where the color attachment is stored. */
856 assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
857 hw_subpass->setup.mrt_resources[i] = int_attach->resource;
858 }
859
860 hw_subpass->setup.num_render_targets = input_subpass->color_count;
861
862 if (input_subpass->input_count == 0)
863 return VK_SUCCESS;
864
865 /* For this subpass's input attachments. */
866 hw_subpass->input_access = vk_alloc(ctx->allocator,
867 sizeof(hw_subpass->input_access[0U]) *
868 input_subpass->input_count,
869 8,
870 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
871 if (!hw_subpass->input_access) {
872 result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
873 goto end_copy_storage_details;
874 }
875
876 for (uint32_t i = 0U; i < input_subpass->input_count; i++) {
877 const uint32_t attach_idx = input_subpass->input_attachments[i];
878 struct pvr_render_int_attachment *int_attach;
879
880 if (attach_idx == VK_ATTACHMENT_UNUSED)
881 continue;
882
883 int_attach = &ctx->int_attach[attach_idx];
884
885 if (int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID) {
886 bool is_color = false;
887
888 /* Access the input attachment from on-chip storage. */
889 if (int_attach->z_replicate) {
890 hw_subpass->input_access[i].type =
891 PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP_ZREPLICATE;
892 } else {
893 hw_subpass->input_access[i].type =
894 PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_ONCHIP;
895 }
896
897 /* If this attachment is also a color attachment then point to the
898 * color attachment's resource.
899 */
900 for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
901 if (input_subpass->color_attachments[j] == (int32_t)attach_idx) {
902 hw_subpass->input_access[i].on_chip_rt = j;
903 is_color = true;
904 break;
905 }
906 }
907
908 if (!is_color) {
909 const uint32_t num_rts = hw_subpass->setup.num_render_targets;
910
911 hw_subpass->input_access[i].on_chip_rt = num_rts;
912 hw_subpass->setup.num_render_targets++;
913
914 /* Record the location of the storage for the attachment. */
915 hw_subpass->setup.mrt_resources[num_rts] = int_attach->resource;
916 }
917 } else {
918 /* Access the input attachment from memory. */
919 hw_subpass->input_access[i].type =
920 PVR_RENDERPASS_HWSETUP_INPUT_ACCESS_OFFCHIP;
921 hw_subpass->input_access[i].on_chip_rt = -1;
922 }
923 }
924
925 return VK_SUCCESS;
926
927 end_copy_storage_details:
928 if (hw_subpass->input_access) {
929 vk_free(ctx->allocator, hw_subpass->input_access);
930 hw_subpass->input_access = NULL;
931 }
932
933 if (hw_subpass->setup.mrt_resources) {
934 vk_free(ctx->allocator, hw_subpass->setup.mrt_resources);
935 hw_subpass->setup.mrt_resources = NULL;
936 }
937
938 return result;
939 }
940
941 /**
942 * For a subpass copy details of any storage location for a replicated version
943 * of the depth attachment to the output structure.
944 */
945 static VkResult
pvr_copy_z_replicate_details(struct pvr_renderpass_context * ctx,struct pvr_renderpass_hwsetup_subpass * hw_subpass,struct pvr_renderpass_subpass * subpass)946 pvr_copy_z_replicate_details(struct pvr_renderpass_context *ctx,
947 struct pvr_renderpass_hwsetup_subpass *hw_subpass,
948 struct pvr_renderpass_subpass *subpass)
949 {
950 struct pvr_render_subpass *input_subpass = subpass->input_subpass;
951 struct pvr_render_int_attachment *int_ds_attach;
952 uint32_t z_replicate;
953 bool found = false;
954
955 assert(input_subpass->depth_stencil_attachment >= 0U &&
956 input_subpass->depth_stencil_attachment <
957 (int32_t)ctx->pass->attachment_count);
958
959 int_ds_attach = &ctx->int_attach[input_subpass->depth_stencil_attachment];
960
961 assert(hw_subpass->z_replicate == -1);
962
963 /* Is the replicated depth also an input attachment? */
964 for (uint32_t i = 0U; i < input_subpass->input_count; i++) {
965 const uint32_t attach_idx = input_subpass->input_attachments[i];
966 struct pvr_render_int_attachment *int_attach;
967
968 if (attach_idx == VK_ATTACHMENT_UNUSED)
969 continue;
970
971 int_attach = &ctx->int_attach[attach_idx];
972
973 if (int_attach == int_ds_attach) {
974 z_replicate = hw_subpass->input_access[i].on_chip_rt;
975 found = true;
976 break;
977 }
978 }
979
980 if (!found)
981 z_replicate = hw_subpass->setup.num_render_targets;
982
983 /* If the Z replicate attachment isn't also an input attachment then grow the
984 * array of locations.
985 */
986 assert(z_replicate <= hw_subpass->setup.num_render_targets);
987 if (z_replicate == hw_subpass->setup.num_render_targets) {
988 struct usc_mrt_resource *mrt =
989 vk_realloc(ctx->allocator,
990 hw_subpass->setup.mrt_resources,
991 sizeof(hw_subpass->setup.mrt_resources[0U]) *
992 (hw_subpass->setup.num_render_targets + 1U),
993 8U,
994 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
995 if (!mrt)
996 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
997
998 hw_subpass->setup.mrt_resources = mrt;
999 hw_subpass->setup.num_render_targets++;
1000 }
1001
1002 /* Copy the location of the Z replicate. */
1003 assert(int_ds_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
1004 hw_subpass->setup.mrt_resources[z_replicate] = int_ds_attach->resource;
1005 hw_subpass->z_replicate = z_replicate;
1006
1007 return VK_SUCCESS;
1008 }
1009
pvr_dereference_surface(struct pvr_renderpass_context * ctx,int32_t attach_idx,uint32_t subpass_num)1010 static void pvr_dereference_surface(struct pvr_renderpass_context *ctx,
1011 int32_t attach_idx,
1012 uint32_t subpass_num)
1013 {
1014 struct pvr_render_int_attachment *int_attach = &ctx->int_attach[attach_idx];
1015
1016 assert(int_attach->remaining_count > 0U);
1017 int_attach->remaining_count--;
1018
1019 if (int_attach->remaining_count == 0U) {
1020 if (int_attach->first_use != -1)
1021 int_attach->last_use = subpass_num;
1022
1023 if (int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID)
1024 pvr_free_surface_storage(ctx, int_attach);
1025 }
1026
1027 if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1028 assert(int_attach->stencil_remaining_count > 0U);
1029 int_attach->stencil_remaining_count--;
1030 }
1031 }
1032
pvr_free_render(struct pvr_renderpass_context * ctx)1033 static void pvr_free_render(struct pvr_renderpass_context *ctx)
1034 {
1035 pvr_free_alloc(ctx, &ctx->alloc);
1036
1037 if (ctx->subpasses) {
1038 for (uint32_t i = 0U; i < ctx->hw_render->subpass_count; i++)
1039 pvr_free_alloc(ctx, &ctx->subpasses[i].alloc);
1040
1041 vk_free(ctx->allocator, ctx->subpasses);
1042 ctx->subpasses = NULL;
1043 }
1044 }
1045
pvr_render_has_side_effects(struct pvr_renderpass_context * ctx)1046 static bool pvr_render_has_side_effects(struct pvr_renderpass_context *ctx)
1047 {
1048 struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
1049 struct pvr_render_pass *pass = ctx->pass;
1050
1051 if ((hw_render->depth_init == VK_ATTACHMENT_LOAD_OP_CLEAR &&
1052 hw_render->depth_store) ||
1053 (hw_render->stencil_init == VK_ATTACHMENT_LOAD_OP_CLEAR &&
1054 hw_render->stencil_store)) {
1055 return true;
1056 }
1057
1058 for (uint32_t i = 0U; i < hw_render->eot_surface_count; i++) {
1059 const struct pvr_renderpass_hwsetup_eot_surface *eot_attach =
1060 &hw_render->eot_surfaces[i];
1061 const struct pvr_render_pass_attachment *attachment =
1062 &pass->attachments[eot_attach->attachment_idx];
1063
1064 if (attachment->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR &&
1065 attachment->store_op == VK_ATTACHMENT_STORE_OP_STORE) {
1066 return true;
1067 }
1068
1069 if (eot_attach->need_resolve)
1070 return true;
1071 }
1072
1073 return false;
1074 }
1075
pvr_close_render(const struct pvr_device * device,struct pvr_renderpass_context * ctx)1076 static VkResult pvr_close_render(const struct pvr_device *device,
1077 struct pvr_renderpass_context *ctx)
1078 {
1079 struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
1080 struct pvr_renderpass_hwsetup_eot_surface *eot_attach;
1081 struct usc_mrt_setup *eot_setup;
1082 int32_t mrt_idx;
1083 VkResult result;
1084
1085 /* Render already closed. */
1086 if (!hw_render)
1087 return VK_SUCCESS;
1088
1089 /* Setup render and allocate resources for color/depth loads and clears. */
1090 result = pvr_subpass_setup_render_init(ctx);
1091 if (result != VK_SUCCESS)
1092 return result;
1093
1094 /* Reset surfaces whose last use was in the current render. */
1095 list_for_each_entry_safe (struct pvr_render_int_attachment,
1096 int_attach,
1097 &ctx->active_surf_list,
1098 link) {
1099 if (int_attach->last_use != -1) {
1100 assert(int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID);
1101 pvr_reset_surface(ctx, int_attach);
1102 }
1103 }
1104
1105 /* Check if the depth attachment has uses in future subpasses. */
1106 if (ctx->int_ds_attach) {
1107 /* Store the depth to the attachment at the end of the render. */
1108 if (ctx->int_ds_attach->remaining_count > 0U)
1109 hw_render->depth_store = true;
1110
1111 /* Store the stencil to the attachment at the end of the render. */
1112 if (ctx->int_ds_attach->stencil_remaining_count > 0U)
1113 hw_render->stencil_store = true;
1114
1115 if (hw_render->depth_store || hw_render->stencil_store) {
1116 assert(hw_render->ds_attach_idx == VK_ATTACHMENT_UNUSED ||
1117 hw_render->ds_attach_idx ==
1118 ctx->int_ds_attach->attachment->index);
1119 hw_render->ds_attach_idx = ctx->int_ds_attach->attachment->index;
1120
1121 /* Allocate memory for the attachment. */
1122 pvr_mark_surface_alloc(ctx, ctx->int_ds_attach);
1123 }
1124
1125 /* Load the depth and stencil before the next use. */
1126 ctx->int_ds_attach->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
1127 ctx->int_ds_attach->stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
1128 }
1129
1130 eot_setup = &hw_render->eot_setup;
1131 memset(eot_setup, 0U, sizeof(*eot_setup));
1132
1133 /* Set the number of pixel output registers/tile buffers allocated for the
1134 * render and copy the information to all subpasses and the EOT program.
1135 */
1136 pvr_finalise_po_alloc(device, ctx);
1137
1138 /* If any attachment are used with z replicate then they will be stored to by
1139 * the ISP. So remove them from the list to store to using the PBE.
1140 */
1141 list_for_each_entry_safe (struct pvr_render_int_attachment,
1142 int_attach,
1143 &ctx->active_surf_list,
1144 link) {
1145 if (int_attach->z_replicate)
1146 pvr_reset_surface(ctx, int_attach);
1147 }
1148
1149 /* Number of surfaces with allocated on-chip storage. */
1150 eot_setup->num_render_targets = ctx->active_surfaces;
1151 eot_setup->mrt_resources = vk_alloc(ctx->allocator,
1152 sizeof(eot_setup->mrt_resources[0U]) *
1153 eot_setup->num_render_targets,
1154 8,
1155 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1156 if (!eot_setup->mrt_resources)
1157 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1158
1159 /* Record the location of the on-chip storage. */
1160 mrt_idx = 0U;
1161 list_for_each_entry_safe (struct pvr_render_int_attachment,
1162 int_attach,
1163 &ctx->active_surf_list,
1164 link) {
1165 assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
1166 assert(int_attach->remaining_count > 0U);
1167 if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
1168 assert(int_attach->stencil_remaining_count > 0U);
1169
1170 /* Copy the location of the source data for this attachment. */
1171 eot_setup->mrt_resources[mrt_idx] = int_attach->resource;
1172
1173 assert(int_attach->mrt_idx == -1);
1174 int_attach->mrt_idx = mrt_idx;
1175
1176 mrt_idx++;
1177 }
1178 assert(mrt_idx == (int32_t)eot_setup->num_render_targets);
1179
1180 hw_render->eot_surface_count = 0U;
1181 hw_render->pbe_emits = 0U;
1182
1183 /* Count the number of surfaces to store to at the end of the subpass. */
1184 for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
1185 struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1186 struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1187
1188 for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
1189 const uint32_t resolve_output =
1190 input_subpass->resolve_attachments
1191 ? input_subpass->resolve_attachments[j]
1192 : VK_ATTACHMENT_UNUSED;
1193 struct pvr_render_int_attachment *color_attach;
1194
1195 if (input_subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED)
1196 continue;
1197
1198 color_attach = &ctx->int_attach[input_subpass->color_attachments[j]];
1199
1200 if (list_is_linked(&color_attach->link)) {
1201 uint32_t rem_count = resolve_output == VK_ATTACHMENT_UNUSED ? 0U
1202 : 1U;
1203
1204 /* If a color attachment is resolved it will have an extra
1205 * remaining usage.
1206 */
1207 if (color_attach->remaining_count > rem_count &&
1208 !color_attach->eot_surf_required) {
1209 color_attach->eot_surf_required = true;
1210 hw_render->eot_surface_count++;
1211 }
1212 }
1213
1214 if (resolve_output != VK_ATTACHMENT_UNUSED) {
1215 struct pvr_render_int_attachment *int_resolve_attach =
1216 &ctx->int_attach[resolve_output];
1217
1218 if (!int_resolve_attach->eot_surf_required) {
1219 int_resolve_attach->eot_surf_required = true;
1220 hw_render->eot_surface_count++;
1221 }
1222 }
1223 }
1224 }
1225
1226 assert(hw_render->eot_surface_count <= 16U);
1227
1228 hw_render->eot_surfaces = vk_alloc(ctx->allocator,
1229 sizeof(hw_render->eot_surfaces[0U]) *
1230 hw_render->eot_surface_count,
1231 8,
1232 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1233 if (!hw_render->eot_surfaces)
1234 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
1235
1236 eot_attach = hw_render->eot_surfaces;
1237
1238 for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
1239 struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1240 struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1241
1242 for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
1243 const uint32_t resolve_output =
1244 input_subpass->resolve_attachments
1245 ? input_subpass->resolve_attachments[j]
1246 : VK_ATTACHMENT_UNUSED;
1247 struct pvr_render_int_attachment *color_attach;
1248
1249 if (input_subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED)
1250 continue;
1251
1252 color_attach = &ctx->int_attach[input_subpass->color_attachments[j]];
1253
1254 if (resolve_output != VK_ATTACHMENT_UNUSED) {
1255 struct pvr_render_int_attachment *resolve_src =
1256 &ctx->int_attach[input_subpass->color_attachments[j]];
1257 struct pvr_render_int_attachment *resolve_dst =
1258 &ctx->int_attach[resolve_output];
1259
1260 assert(resolve_dst->eot_surf_required);
1261 resolve_dst->eot_surf_required = false;
1262
1263 /* Dereference the source to the resolve. */
1264 assert(resolve_src->remaining_count > 0U);
1265 resolve_src->remaining_count--;
1266
1267 /* Allocate device memory for the resolve destination. */
1268 pvr_mark_surface_alloc(ctx, resolve_dst);
1269
1270 /* The attachment has been written so load the attachment the
1271 * next time it is referenced.
1272 */
1273 resolve_dst->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
1274
1275 eot_attach->mrt_idx = resolve_src->mrt_idx;
1276 eot_attach->attachment_idx = resolve_dst->attachment->index;
1277 eot_attach->src_attachment_idx = resolve_src->attachment->index;
1278
1279 eot_attach->need_resolve = true;
1280
1281 if (!resolve_src->is_pbe_downscalable) {
1282 /* Resolve src must be stored for transfer resolve. */
1283 assert(resolve_src->remaining_count > 0U);
1284
1285 eot_attach->resolve_type = PVR_RESOLVE_TYPE_TRANSFER;
1286 } else if (resolve_src->remaining_count == 0U) {
1287 eot_attach->resolve_type = PVR_RESOLVE_TYPE_PBE;
1288 hw_render->pbe_emits++;
1289 } else {
1290 eot_attach->resolve_type = PVR_RESOLVE_TYPE_INVALID;
1291 }
1292
1293 eot_attach++;
1294 }
1295
1296 if (color_attach->eot_surf_required) {
1297 assert(color_attach->remaining_count > 0U);
1298
1299 pvr_mark_surface_alloc(ctx, color_attach);
1300
1301 assert(color_attach->mrt_idx >= 0);
1302 assert(color_attach->mrt_idx <
1303 (int32_t)hw_render->eot_setup.num_render_targets);
1304
1305 eot_attach->mrt_idx = color_attach->mrt_idx;
1306 eot_attach->attachment_idx = color_attach->attachment->index;
1307 eot_attach->need_resolve = false;
1308 eot_attach++;
1309
1310 hw_render->pbe_emits++;
1311
1312 color_attach->eot_surf_required = false;
1313 }
1314 }
1315 }
1316
1317 assert(hw_render->pbe_emits <= PVR_NUM_PBE_EMIT_REGS);
1318
1319 /* Count the number of extra resolves we can do through the PBE. */
1320 for (uint32_t i = 0U; i < hw_render->eot_surface_count; i++) {
1321 eot_attach = &hw_render->eot_surfaces[i];
1322
1323 if (eot_attach->need_resolve &&
1324 eot_attach->resolve_type == PVR_RESOLVE_TYPE_INVALID) {
1325 if (hw_render->pbe_emits == PVR_NUM_PBE_EMIT_REGS) {
1326 eot_attach->resolve_type = PVR_RESOLVE_TYPE_TRANSFER;
1327 } else {
1328 eot_attach->resolve_type = PVR_RESOLVE_TYPE_PBE;
1329 hw_render->pbe_emits++;
1330 }
1331 }
1332 }
1333
1334 assert(hw_render->pbe_emits <= PVR_NUM_PBE_EMIT_REGS);
1335
1336 /* Check for side effects in the final render. */
1337 hw_render->has_side_effects = pvr_render_has_side_effects(ctx);
1338
1339 /* Reset active surfaces. */
1340 list_for_each_entry_safe (struct pvr_render_int_attachment,
1341 int_attach,
1342 &ctx->active_surf_list,
1343 link) {
1344 int_attach->mrt_idx = -1;
1345 pvr_reset_surface(ctx, int_attach);
1346 }
1347
1348 assert(ctx->active_surfaces == 0U);
1349 assert(list_is_empty(&ctx->active_surf_list));
1350
1351 pvr_free_render(ctx);
1352 pvr_reset_render(ctx);
1353
1354 return VK_SUCCESS;
1355 }
1356
pvr_is_input(struct pvr_render_subpass * subpass,uint32_t attach_idx)1357 static bool pvr_is_input(struct pvr_render_subpass *subpass,
1358 uint32_t attach_idx)
1359 {
1360 if (attach_idx == VK_ATTACHMENT_UNUSED)
1361 return false;
1362
1363 for (uint32_t i = 0U; i < subpass->input_count; i++) {
1364 if (subpass->input_attachments[i] == attach_idx)
1365 return true;
1366 }
1367
1368 return false;
1369 }
1370
1371 static bool
pvr_depth_zls_conflict(struct pvr_renderpass_context * ctx,struct pvr_render_int_attachment * int_ds_attach,bool existing_ds_is_input)1372 pvr_depth_zls_conflict(struct pvr_renderpass_context *ctx,
1373 struct pvr_render_int_attachment *int_ds_attach,
1374 bool existing_ds_is_input)
1375 {
1376 if (!ctx->int_ds_attach)
1377 return false;
1378
1379 /* No conflict if the incoming subpass doesn't have a depth/stencil
1380 * attachment.
1381 */
1382 if (!int_ds_attach)
1383 return false;
1384
1385 /* No conflict if the incoming depth/stencil attachment is the same as the
1386 * existing one.
1387 */
1388 if (ctx->int_ds_attach == int_ds_attach)
1389 return false;
1390
1391 /* If the existing depth/stencil attachment is used later, then we can't
1392 * overwrite it.
1393 *
1394 * The exception is if the only use is as an input attachment in the incoming
1395 * subpass in which case we can use the Z replicate feature to save the
1396 * value.
1397 */
1398 if (ctx->int_ds_attach->remaining_count > 0U &&
1399 !(existing_ds_is_input && ctx->int_ds_attach->remaining_count == 1U)) {
1400 return true;
1401 }
1402
1403 if (ctx->int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT &&
1404 ctx->int_ds_attach->stencil_remaining_count > 0U) {
1405 return true;
1406 }
1407
1408 /* We can't load midrender so fail if the new depth/stencil attachment is
1409 * already initialized.
1410 */
1411 if (int_ds_attach->load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
1412 return true;
1413
1414 if (int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT &&
1415 int_ds_attach->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
1416 return true;
1417 }
1418
1419 return false;
1420 }
1421
1422 static void
pvr_set_surface_resource(struct pvr_render_int_attachment * int_attach,struct pvr_renderpass_resource * resource)1423 pvr_set_surface_resource(struct pvr_render_int_attachment *int_attach,
1424 struct pvr_renderpass_resource *resource)
1425 {
1426 int_attach->resource.type = resource->type;
1427
1428 switch (resource->type) {
1429 case USC_MRT_RESOURCE_TYPE_OUTPUT_REG:
1430 int_attach->resource.reg.output_reg = resource->reg.output_reg;
1431 int_attach->resource.reg.offset = resource->reg.offset;
1432 break;
1433
1434 case USC_MRT_RESOURCE_TYPE_MEMORY:
1435 int_attach->resource.mem.tile_buffer = resource->mem.tile_buffer;
1436 int_attach->resource.mem.offset_dw = resource->mem.offset_dw;
1437 break;
1438
1439 default:
1440 break;
1441 }
1442 }
1443
pvr_equal_resources(struct pvr_renderpass_resource * resource1,struct pvr_renderpass_resource * resource2)1444 static bool pvr_equal_resources(struct pvr_renderpass_resource *resource1,
1445 struct pvr_renderpass_resource *resource2)
1446 {
1447 if (resource1->type != resource2->type)
1448 return false;
1449
1450 switch (resource1->type) {
1451 case USC_MRT_RESOURCE_TYPE_OUTPUT_REG:
1452 return resource1->reg.output_reg == resource2->reg.output_reg &&
1453 resource1->reg.offset == resource2->reg.offset;
1454
1455 case USC_MRT_RESOURCE_TYPE_MEMORY:
1456 return resource1->mem.tile_buffer == resource2->mem.tile_buffer &&
1457 resource1->mem.offset_dw == resource2->mem.offset_dw;
1458
1459 default:
1460 return true;
1461 }
1462 }
1463
1464 static VkResult
pvr_enable_z_replicate(struct pvr_renderpass_context * ctx,struct pvr_renderpass_hwsetup_render * hw_render,int32_t replicate_attach_idx,struct pvr_renderpass_resource * replicate_dst)1465 pvr_enable_z_replicate(struct pvr_renderpass_context *ctx,
1466 struct pvr_renderpass_hwsetup_render *hw_render,
1467 int32_t replicate_attach_idx,
1468 struct pvr_renderpass_resource *replicate_dst)
1469 {
1470 struct pvr_render_int_attachment *int_attach =
1471 &ctx->int_attach[replicate_attach_idx];
1472 int32_t first_use = -1;
1473
1474 /* If Z replication was already enabled for the attachment then nothing more
1475 * to do.
1476 */
1477 if (!int_attach->z_replicate) {
1478 /* Copy details of the storage for the replicated value to the attachment.
1479 */
1480 assert(int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID);
1481 assert(replicate_dst->type != USC_MRT_RESOURCE_TYPE_INVALID);
1482 pvr_set_surface_resource(int_attach, replicate_dst);
1483 } else {
1484 assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
1485 assert(replicate_dst->type == USC_MRT_RESOURCE_TYPE_INVALID);
1486 }
1487
1488 /* Find the first subpass where the attachment is written. */
1489 for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
1490 struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1491 struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1492
1493 if (input_subpass->depth_stencil_attachment == replicate_attach_idx) {
1494 first_use = i;
1495 break;
1496 }
1497 }
1498 assert(first_use >= 0);
1499
1500 /* For all subpasses from the first write. */
1501 for (uint32_t i = first_use; i < hw_render->subpass_count; i++) {
1502 struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1503 struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1504
1505 /* If the subpass writes to the attachment then enable z replication. */
1506 if (input_subpass->depth_stencil_attachment == replicate_attach_idx &&
1507 !subpass->z_replicate) {
1508 subpass->z_replicate = true;
1509
1510 if (i != (hw_render->subpass_count - 1U)) {
1511 /* Copy the details of the storage for replicated value. */
1512 const VkResult result =
1513 pvr_copy_z_replicate_details(ctx,
1514 &ctx->hw_render->subpasses[i],
1515 subpass);
1516 if (result != VK_SUCCESS)
1517 return result;
1518 }
1519 }
1520 }
1521
1522 if (!int_attach->z_replicate) {
1523 /* Add the storage for the replicated value to locations in use at each
1524 * subpass.
1525 */
1526 for (uint32_t i = first_use; i < (hw_render->subpass_count - 1U); i++) {
1527 struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1528
1529 pvr_mark_storage_allocated(ctx,
1530 &subpass->alloc,
1531 int_attach->attachment,
1532 replicate_dst);
1533 }
1534
1535 /* Add the depth attachment to the list of surfaces with allocated
1536 * storage.
1537 */
1538 pvr_make_surface_active(ctx, int_attach, first_use);
1539
1540 int_attach->z_replicate = true;
1541 }
1542
1543 return VK_SUCCESS;
1544 }
1545
pvr_is_pending_resolve_dest(struct pvr_renderpass_context * ctx,uint32_t attach_idx)1546 static bool pvr_is_pending_resolve_dest(struct pvr_renderpass_context *ctx,
1547 uint32_t attach_idx)
1548 {
1549 struct pvr_render_int_attachment *int_attach = &ctx->int_attach[attach_idx];
1550
1551 return int_attach->last_resolve_dst_render != -1 &&
1552 int_attach->last_resolve_dst_render ==
1553 (int32_t)(ctx->hw_setup->render_count - 1U);
1554 }
1555
pvr_is_pending_resolve_src(struct pvr_renderpass_context * ctx,uint32_t attach_idx)1556 static bool pvr_is_pending_resolve_src(struct pvr_renderpass_context *ctx,
1557 uint32_t attach_idx)
1558 {
1559 struct pvr_render_int_attachment *int_attach = &ctx->int_attach[attach_idx];
1560
1561 return int_attach->last_resolve_src_render != -1 &&
1562 int_attach->last_resolve_src_render ==
1563 (int32_t)(ctx->hw_setup->render_count - 1U);
1564 }
1565
pvr_exceeds_pbe_registers(struct pvr_renderpass_context * ctx,struct pvr_render_subpass * subpass)1566 static bool pvr_exceeds_pbe_registers(struct pvr_renderpass_context *ctx,
1567 struct pvr_render_subpass *subpass)
1568 {
1569 int32_t live_outputs[PVR_NUM_PBE_EMIT_REGS];
1570 uint32_t num_live_outputs = 0U;
1571
1572 /* Count all color outputs so far. */
1573 for (uint32_t i = 0U; i < ctx->hw_render->subpass_count; i++) {
1574 struct pvr_render_subpass *input_subpass =
1575 ctx->subpasses[i].input_subpass;
1576
1577 for (uint32_t j = 0U; j < input_subpass->color_count; j++) {
1578 const uint32_t global_color_attach =
1579 input_subpass->color_attachments[j];
1580 struct pvr_render_int_attachment *int_attach;
1581 bool found = false;
1582
1583 if (global_color_attach == VK_ATTACHMENT_UNUSED)
1584 continue;
1585
1586 int_attach = &ctx->int_attach[global_color_attach];
1587
1588 if (int_attach->last_read <= (int32_t)subpass->index)
1589 continue;
1590
1591 for (uint32_t k = 0U; k < num_live_outputs; k++) {
1592 if (live_outputs[k] == global_color_attach) {
1593 found = true;
1594 break;
1595 }
1596 }
1597
1598 if (!found)
1599 live_outputs[num_live_outputs++] = global_color_attach;
1600 }
1601 }
1602
1603 assert(num_live_outputs <= PVR_NUM_PBE_EMIT_REGS);
1604
1605 /* Check if adding all the color outputs of the new subpass to the render
1606 * would exceed the limit.
1607 */
1608 for (uint32_t i = 0U; i < subpass->color_count; i++) {
1609 const uint32_t global_color_attach = subpass->color_attachments[i];
1610 struct pvr_render_int_attachment *int_attach;
1611 bool found = false;
1612
1613 if (global_color_attach == VK_ATTACHMENT_UNUSED)
1614 continue;
1615
1616 int_attach = &ctx->int_attach[global_color_attach];
1617
1618 if (int_attach->last_read <= (int32_t)subpass->index)
1619 continue;
1620
1621 for (uint32_t j = 0U; j < num_live_outputs; j++) {
1622 if (live_outputs[j] == global_color_attach) {
1623 found = true;
1624 break;
1625 }
1626 }
1627
1628 if (!found) {
1629 if (num_live_outputs >= PVR_NUM_PBE_EMIT_REGS)
1630 return true;
1631
1632 live_outputs[num_live_outputs++] = global_color_attach;
1633 }
1634 }
1635
1636 return false;
1637 }
1638
pvr_merge_alloc_buffer(struct pvr_renderpass_alloc_buffer * dst,struct pvr_renderpass_alloc_buffer * src)1639 static void pvr_merge_alloc_buffer(struct pvr_renderpass_alloc_buffer *dst,
1640 struct pvr_renderpass_alloc_buffer *src)
1641 {
1642 for (uint32_t i = 0U; i < ARRAY_SIZE(dst->allocs); i++)
1643 dst->allocs[i] |= src->allocs[i];
1644 }
1645
pvr_merge_alloc(struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * dst,struct pvr_renderpass_alloc * src)1646 static VkResult pvr_merge_alloc(struct pvr_renderpass_context *ctx,
1647 struct pvr_renderpass_alloc *dst,
1648 struct pvr_renderpass_alloc *src)
1649 {
1650 pvr_merge_alloc_buffer(&dst->output_reg, &src->output_reg);
1651
1652 dst->output_regs_count =
1653 MAX2(dst->output_regs_count, src->output_regs_count);
1654
1655 if (dst->tile_buffers_count < src->tile_buffers_count) {
1656 struct pvr_renderpass_alloc_buffer *new_tile_buffers =
1657 vk_realloc(ctx->allocator,
1658 dst->tile_buffers,
1659 sizeof(dst->tile_buffers[0U]) * src->tile_buffers_count,
1660 8U,
1661 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1662 if (!new_tile_buffers)
1663 return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1664
1665 dst->tile_buffers = new_tile_buffers;
1666 memset(dst->tile_buffers + dst->tile_buffers_count,
1667 0U,
1668 sizeof(dst->tile_buffers[0U]) *
1669 (src->tile_buffers_count - dst->tile_buffers_count));
1670 dst->tile_buffers_count = src->tile_buffers_count;
1671 }
1672
1673 for (uint32_t i = 0U; i < src->tile_buffers_count; i++)
1674 pvr_merge_alloc_buffer(&dst->tile_buffers[i], &src->tile_buffers[i]);
1675
1676 return VK_SUCCESS;
1677 }
1678
1679 static VkResult
pvr_is_z_replicate_space_available(const struct pvr_device_info * dev_info,struct pvr_renderpass_context * ctx,struct pvr_renderpass_alloc * alloc,uint32_t attach_idx,struct pvr_renderpass_resource * resource)1680 pvr_is_z_replicate_space_available(const struct pvr_device_info *dev_info,
1681 struct pvr_renderpass_context *ctx,
1682 struct pvr_renderpass_alloc *alloc,
1683 uint32_t attach_idx,
1684 struct pvr_renderpass_resource *resource)
1685 {
1686 struct pvr_renderpass_hwsetup_render *hw_render = ctx->hw_render;
1687 struct pvr_render_int_attachment *int_attach;
1688 struct pvr_renderpass_alloc combined_alloc;
1689 uint32_t first_use;
1690 VkResult result;
1691
1692 /* If z replication was already enabled by a previous subpass then storage
1693 * will already be allocated.
1694 */
1695 assert(attach_idx < ctx->pass->attachment_count);
1696
1697 int_attach = &ctx->int_attach[attach_idx];
1698 if (int_attach->z_replicate) {
1699 assert(int_attach->resource.type != USC_MRT_RESOURCE_TYPE_INVALID);
1700 return VK_SUCCESS;
1701 }
1702
1703 /* Get the registers used in any subpass after the depth is first written.
1704 * Start with registers used in the incoming subpass.
1705 */
1706 result = pvr_copy_alloc(ctx, &combined_alloc, alloc);
1707 if (result != VK_SUCCESS)
1708 return result;
1709
1710 if (hw_render) {
1711 /* Find the subpass where the depth is first written. */
1712 first_use = hw_render->subpass_count;
1713 for (uint32_t i = 0U; i < hw_render->subpass_count; i++) {
1714 struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1715 struct pvr_render_subpass *input_subpass = subpass->input_subpass;
1716
1717 if (input_subpass->depth_stencil_attachment == (int32_t)attach_idx) {
1718 first_use = i;
1719 break;
1720 }
1721 }
1722
1723 /* Merge in registers used in previous subpasses. */
1724 for (uint32_t i = first_use; i < hw_render->subpass_count; i++) {
1725 struct pvr_renderpass_subpass *subpass = &ctx->subpasses[i];
1726
1727 result = pvr_merge_alloc(ctx, &combined_alloc, &subpass->alloc);
1728 if (result != VK_SUCCESS) {
1729 pvr_free_alloc(ctx, &combined_alloc);
1730 return result;
1731 }
1732 }
1733 }
1734
1735 result = pvr_surface_alloc_color_storage(dev_info,
1736 ctx,
1737 &combined_alloc,
1738 int_attach->attachment,
1739 resource);
1740
1741 pvr_free_alloc(ctx, &combined_alloc);
1742 if (result != VK_SUCCESS)
1743 return result;
1744
1745 return pvr_mark_storage_allocated(ctx,
1746 alloc,
1747 int_attach->attachment,
1748 resource);
1749 }
1750
1751 static VkResult
pvr_is_subpass_space_available(const struct pvr_device_info * dev_info,struct pvr_renderpass_context * ctx,struct pvr_render_subpass * subpass,struct pvr_render_subpass_depth_params * sp_depth,struct pvr_renderpass_alloc * alloc,struct pvr_render_int_subpass_dsts * sp_dsts)1752 pvr_is_subpass_space_available(const struct pvr_device_info *dev_info,
1753 struct pvr_renderpass_context *ctx,
1754 struct pvr_render_subpass *subpass,
1755 struct pvr_render_subpass_depth_params *sp_depth,
1756 struct pvr_renderpass_alloc *alloc,
1757 struct pvr_render_int_subpass_dsts *sp_dsts)
1758 {
1759 VkResult result;
1760
1761 /* Mark pointers in return structures as not allocated. */
1762 sp_dsts->color = NULL;
1763 alloc->tile_buffers = NULL;
1764
1765 /* Allocate space for which locations are in use after this subpass. */
1766 result = pvr_copy_alloc(ctx, alloc, &ctx->alloc);
1767 if (result != VK_SUCCESS)
1768 return result;
1769
1770 /* Allocate space to store our results. */
1771 if (subpass->color_count > 0U) {
1772 sp_dsts->color =
1773 vk_alloc(ctx->allocator,
1774 sizeof(sp_dsts->color[0U]) * subpass->color_count,
1775 8,
1776 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1777 if (!sp_dsts->color) {
1778 result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
1779 goto err_free_alloc;
1780 }
1781 } else {
1782 sp_dsts->color = NULL;
1783 }
1784
1785 sp_dsts->existing_zrep.type = USC_MRT_RESOURCE_TYPE_INVALID;
1786 sp_dsts->incoming_zrep.type = USC_MRT_RESOURCE_TYPE_INVALID;
1787
1788 for (uint32_t i = 0U; i < subpass->color_count; i++) {
1789 const uint32_t attach_idx = subpass->color_attachments[i];
1790 struct pvr_render_int_attachment *int_attach;
1791
1792 if (attach_idx == VK_ATTACHMENT_UNUSED)
1793 continue;
1794
1795 int_attach = &ctx->int_attach[attach_idx];
1796
1797 assert(pvr_get_accum_format_bitsize(int_attach->attachment->vk_format) >
1798 0U);
1799
1800 /* Is the attachment not allocated on-chip storage? */
1801 if (int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID) {
1802 result = pvr_surface_alloc_color_storage(dev_info,
1803 ctx,
1804 alloc,
1805 int_attach->attachment,
1806 &sp_dsts->color[i]);
1807 if (result != VK_SUCCESS)
1808 goto err_free_alloc;
1809
1810 /* Avoid merging subpasses which result in tile buffers having to be
1811 * used. The benefit of merging must be weighed against the cost of
1812 * writing/reading to tile buffers.
1813 */
1814 if (ctx->hw_render &&
1815 sp_dsts->color[i].type != USC_MRT_RESOURCE_TYPE_OUTPUT_REG) {
1816 result = vk_error(NULL, VK_ERROR_TOO_MANY_OBJECTS);
1817 goto err_free_alloc;
1818 }
1819 } else {
1820 sp_dsts->color[i].type = USC_MRT_RESOURCE_TYPE_INVALID;
1821 }
1822 }
1823
1824 if (sp_depth->existing_ds_is_input) {
1825 result = pvr_is_z_replicate_space_available(dev_info,
1826 ctx,
1827 alloc,
1828 sp_depth->existing_ds_attach,
1829 &sp_dsts->existing_zrep);
1830 if (result != VK_SUCCESS)
1831 goto err_free_alloc;
1832 }
1833
1834 if (sp_depth->incoming_ds_is_input) {
1835 if (sp_depth->existing_ds_attach != subpass->depth_stencil_attachment) {
1836 result = pvr_is_z_replicate_space_available(
1837 dev_info,
1838 ctx,
1839 alloc,
1840 subpass->depth_stencil_attachment,
1841 &sp_dsts->incoming_zrep);
1842 if (result != VK_SUCCESS)
1843 goto err_free_alloc;
1844 } else {
1845 sp_dsts->incoming_zrep = sp_dsts->existing_zrep;
1846 }
1847 }
1848
1849 return VK_SUCCESS;
1850
1851 err_free_alloc:
1852 pvr_free_alloc(ctx, alloc);
1853 if (sp_dsts->color)
1854 vk_free(ctx->allocator, sp_dsts->color);
1855
1856 sp_dsts->color = NULL;
1857
1858 return result;
1859 }
1860
1861 static bool
pvr_can_combine_with_render(const struct pvr_device_info * dev_info,struct pvr_renderpass_context * ctx,struct pvr_render_subpass * subpass,struct pvr_render_subpass_depth_params * sp_depth,struct pvr_render_int_attachment * int_ds_attach,struct pvr_renderpass_alloc * new_alloc,struct pvr_render_int_subpass_dsts * sp_dsts)1862 pvr_can_combine_with_render(const struct pvr_device_info *dev_info,
1863 struct pvr_renderpass_context *ctx,
1864 struct pvr_render_subpass *subpass,
1865 struct pvr_render_subpass_depth_params *sp_depth,
1866 struct pvr_render_int_attachment *int_ds_attach,
1867 struct pvr_renderpass_alloc *new_alloc,
1868 struct pvr_render_int_subpass_dsts *sp_dsts)
1869 {
1870 VkResult result;
1871 bool ret;
1872
1873 /* Mark pointers in return structures as not allocated. */
1874 sp_dsts->color = NULL;
1875 new_alloc->tile_buffers = NULL;
1876
1877 /* The hardware doesn't support replicating the stencil, so we need to store
1878 * the depth to memory if a stencil attachment is used as an input
1879 * attachment.
1880 */
1881 if (sp_depth->existing_ds_is_input &&
1882 ctx->int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1883 return false;
1884 }
1885
1886 if (sp_depth->incoming_ds_is_input && int_ds_attach &&
1887 int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT &&
1888 ctx->hw_render) {
1889 return false;
1890 }
1891
1892 /* Can't mix multiple sample counts into same render. */
1893 if (ctx->hw_render &&
1894 ctx->hw_render->sample_count != subpass->sample_count) {
1895 return false;
1896 }
1897
1898 /* If the depth is used by both the render and the incoming subpass and
1899 * either the existing depth must be saved or the new depth must be loaded
1900 * then we can't merge.
1901 */
1902 ret = pvr_depth_zls_conflict(ctx,
1903 int_ds_attach,
1904 sp_depth->existing_ds_is_input);
1905 if (ret)
1906 return false;
1907
1908 /* Check if any of the subpass's dependencies are marked that the two
1909 * subpasses can't be in the same render.
1910 */
1911 for (uint32_t i = 0U; i < subpass->dep_count; i++) {
1912 const uint32_t dep = subpass->dep_list[i];
1913 if (subpass->flush_on_dep[i] && ctx->hw_setup->subpass_map[dep].render ==
1914 (ctx->hw_setup->render_count - 1U)) {
1915 return false;
1916 }
1917 }
1918
1919 /* Check if one of the input/color attachments is written by an MSAA resolve
1920 * in an existing subpass in the current render.
1921 */
1922 for (uint32_t i = 0U; i < subpass->input_count; i++) {
1923 const uint32_t attach_idx = subpass->input_attachments[i];
1924 if (attach_idx != VK_ATTACHMENT_UNUSED &&
1925 pvr_is_pending_resolve_dest(ctx, attach_idx)) {
1926 return false;
1927 }
1928 }
1929
1930 for (uint32_t i = 0U; i < subpass->color_count; i++) {
1931 if (subpass->color_attachments[i] != VK_ATTACHMENT_UNUSED &&
1932 (pvr_is_pending_resolve_dest(ctx, subpass->color_attachments[i]) ||
1933 pvr_is_pending_resolve_src(ctx, subpass->color_attachments[i]))) {
1934 return false;
1935 }
1936
1937 if (subpass->resolve_attachments &&
1938 subpass->resolve_attachments[i] != VK_ATTACHMENT_UNUSED &&
1939 pvr_is_pending_resolve_dest(ctx, subpass->resolve_attachments[i])) {
1940 return false;
1941 }
1942 }
1943
1944 /* No chance of exceeding PBE registers in a single subpass. */
1945 if (ctx->hw_render) {
1946 ret = pvr_exceeds_pbe_registers(ctx, subpass);
1947 if (ret)
1948 return false;
1949 }
1950
1951 /* Check we can allocate storage for the new subpass's color attachments and
1952 * any z replications.
1953 */
1954 result = pvr_is_subpass_space_available(dev_info,
1955 ctx,
1956 subpass,
1957 sp_depth,
1958 new_alloc,
1959 sp_dsts);
1960 if (result != VK_SUCCESS)
1961 return false;
1962
1963 return true;
1964 }
1965
1966 static VkResult
pvr_merge_subpass(const struct pvr_device * device,struct pvr_renderpass_context * ctx,struct pvr_render_subpass * input_subpass,struct pvr_renderpass_hwsetup_subpass ** const hw_subpass_out)1967 pvr_merge_subpass(const struct pvr_device *device,
1968 struct pvr_renderpass_context *ctx,
1969 struct pvr_render_subpass *input_subpass,
1970 struct pvr_renderpass_hwsetup_subpass **const hw_subpass_out)
1971 {
1972 struct pvr_renderpass_hwsetup_subpass *new_hw_subpasses;
1973 struct pvr_renderpass_hwsetup_subpass *hw_subpass;
1974 struct pvr_render_int_attachment *int_ds_attach;
1975 struct pvr_renderpass_hwsetup_render *hw_render;
1976 struct pvr_render_subpass_depth_params sp_depth;
1977 struct pvr_renderpass_subpass *new_subpasses;
1978 struct pvr_render_int_subpass_dsts sp_dsts;
1979 struct pvr_renderpass_subpass *subpass;
1980 struct pvr_renderpass_alloc alloc;
1981 VkResult result;
1982 bool ret;
1983
1984 /* Depth attachment for the incoming subpass. */
1985 if (input_subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED)
1986 int_ds_attach = &ctx->int_attach[input_subpass->depth_stencil_attachment];
1987 else
1988 int_ds_attach = NULL;
1989
1990 /* Attachment ID for the existing depth attachment. */
1991 if (ctx->int_ds_attach)
1992 sp_depth.existing_ds_attach = ctx->int_ds_attach - ctx->int_attach;
1993 else
1994 sp_depth.existing_ds_attach = VK_ATTACHMENT_UNUSED;
1995
1996 /* Is the incoming depth attachment used as an input to the incoming subpass?
1997 */
1998 sp_depth.incoming_ds_is_input =
1999 pvr_is_input(input_subpass, input_subpass->depth_stencil_attachment);
2000
2001 /* Is the current depth attachment used as an input to the incoming subpass?
2002 */
2003 sp_depth.existing_ds_is_input =
2004 pvr_is_input(input_subpass, sp_depth.existing_ds_attach);
2005
2006 /* Can the incoming subpass be combined with the existing render? Also checks
2007 * if space is available for the subpass results and return the allocated
2008 * locations.
2009 */
2010 ret = pvr_can_combine_with_render(&device->pdevice->dev_info,
2011 ctx,
2012 input_subpass,
2013 &sp_depth,
2014 int_ds_attach,
2015 &alloc,
2016 &sp_dsts);
2017 if (!ret) {
2018 result = pvr_close_render(device, ctx);
2019 if (result != VK_SUCCESS)
2020 goto end_merge_subpass;
2021
2022 sp_depth.existing_ds_is_input = false;
2023 sp_depth.existing_ds_attach = VK_ATTACHMENT_UNUSED;
2024
2025 /* Allocate again in a new render. */
2026 result = pvr_is_subpass_space_available(&device->pdevice->dev_info,
2027 ctx,
2028 input_subpass,
2029 &sp_depth,
2030 &alloc,
2031 &sp_dsts);
2032 assert(result != VK_ERROR_TOO_MANY_OBJECTS);
2033 if (result != VK_SUCCESS)
2034 goto end_merge_subpass;
2035 }
2036
2037 /* If there isn't an in-progress render then allocate one. */
2038 if (!ctx->hw_render) {
2039 struct pvr_renderpass_hwsetup *hw_setup = ctx->hw_setup;
2040 struct pvr_renderpass_hwsetup_render *new_hw_render = vk_realloc(
2041 ctx->allocator,
2042 hw_setup->renders,
2043 sizeof(hw_setup->renders[0U]) * (hw_setup->render_count + 1U),
2044 8U,
2045 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2046 if (!new_hw_render) {
2047 result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2048 goto end_merge_subpass;
2049 }
2050
2051 hw_setup->renders = new_hw_render;
2052
2053 ctx->hw_render = &hw_setup->renders[hw_setup->render_count];
2054 memset(ctx->hw_render, 0U, sizeof(*hw_render));
2055 ctx->hw_render->ds_attach_idx = VK_ATTACHMENT_UNUSED;
2056 hw_setup->render_count++;
2057 ctx->hw_render->depth_init = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2058 ctx->hw_render->stencil_init = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2059 ctx->hw_render->sample_count = input_subpass->sample_count;
2060 }
2061
2062 /* Allocate a new subpass in the in-progress render. */
2063 hw_render = ctx->hw_render;
2064
2065 new_hw_subpasses = vk_realloc(ctx->allocator,
2066 hw_render->subpasses,
2067 sizeof(hw_render->subpasses[0U]) *
2068 (hw_render->subpass_count + 1U),
2069 8U,
2070 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2071 if (!new_hw_subpasses) {
2072 result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2073 goto end_merge_subpass;
2074 }
2075
2076 hw_render->subpasses = new_hw_subpasses;
2077 hw_subpass = &hw_render->subpasses[hw_render->subpass_count];
2078
2079 new_subpasses =
2080 vk_realloc(ctx->allocator,
2081 ctx->subpasses,
2082 sizeof(ctx->subpasses[0U]) * (hw_render->subpass_count + 1U),
2083 8U,
2084 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
2085 if (!new_subpasses) {
2086 result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2087 goto end_merge_subpass;
2088 }
2089
2090 ctx->subpasses = new_subpasses;
2091
2092 subpass = &ctx->subpasses[hw_render->subpass_count];
2093 subpass->input_subpass = input_subpass;
2094 subpass->z_replicate = false;
2095
2096 /* Save the allocation state at the subpass. */
2097 result = pvr_copy_alloc(ctx, &subpass->alloc, &alloc);
2098 if (result != VK_SUCCESS)
2099 goto end_merge_subpass;
2100
2101 hw_render->subpass_count++;
2102
2103 memset(hw_subpass, 0U, sizeof(*hw_subpass));
2104 hw_subpass->index = input_subpass->index;
2105 hw_subpass->z_replicate = -1;
2106 hw_subpass->depth_initop = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2107
2108 if (int_ds_attach && ctx->int_ds_attach != int_ds_attach) {
2109 bool setup_render_ds = false;
2110 bool stencil_load = false;
2111 bool depth_load = false;
2112
2113 if (int_ds_attach->load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
2114 depth_load = true;
2115 setup_render_ds = true;
2116 hw_render->depth_init = VK_ATTACHMENT_LOAD_OP_LOAD;
2117 hw_subpass->depth_initop = VK_ATTACHMENT_LOAD_OP_LOAD;
2118
2119 assert(!ctx->ds_load_surface);
2120 ctx->ds_load_surface = int_ds_attach;
2121 } else if (int_ds_attach->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
2122 hw_subpass->depth_initop = VK_ATTACHMENT_LOAD_OP_CLEAR;
2123 }
2124
2125 if (int_ds_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
2126 if (int_ds_attach->stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD) {
2127 stencil_load = true;
2128 setup_render_ds = true;
2129 hw_render->stencil_init = VK_ATTACHMENT_LOAD_OP_LOAD;
2130 } else if (int_ds_attach->stencil_load_op ==
2131 VK_ATTACHMENT_LOAD_OP_CLEAR) {
2132 hw_subpass->stencil_clear = true;
2133 }
2134 }
2135
2136 /* If the depth is loaded then allocate external memory for the depth
2137 * attachment.
2138 */
2139 if (depth_load || stencil_load)
2140 pvr_mark_surface_alloc(ctx, int_ds_attach);
2141
2142 if (setup_render_ds) {
2143 assert(hw_render->ds_attach_idx == VK_ATTACHMENT_UNUSED);
2144 hw_render->ds_attach_idx = int_ds_attach->attachment->index;
2145 }
2146
2147 ctx->int_ds_attach = int_ds_attach;
2148 }
2149
2150 /* Set up the initialization operations for subpasses. */
2151 hw_subpass->color_initops = vk_alloc(ctx->allocator,
2152 sizeof(hw_subpass->color_initops[0U]) *
2153 input_subpass->color_count,
2154 8,
2155 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2156 if (!hw_subpass->color_initops) {
2157 result = vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
2158 goto end_merge_subpass;
2159 }
2160
2161 for (uint32_t i = 0U; i < input_subpass->color_count; i++) {
2162 const uint32_t attach_idx = input_subpass->color_attachments[i];
2163 struct pvr_render_int_attachment *int_attach;
2164
2165 if (attach_idx == VK_ATTACHMENT_UNUSED)
2166 continue;
2167
2168 int_attach = &ctx->int_attach[attach_idx];
2169
2170 if (int_attach->first_use == -1) {
2171 hw_subpass->color_initops[i] = int_attach->load_op;
2172
2173 /* If the attachment is loaded then off-chip memory must be
2174 * allocated for it.
2175 */
2176 if (int_attach->load_op == VK_ATTACHMENT_LOAD_OP_LOAD)
2177 pvr_mark_surface_alloc(ctx, int_attach);
2178
2179 /* The attachment has been written so load the attachment the next
2180 * time it is referenced.
2181 */
2182 int_attach->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
2183 } else {
2184 hw_subpass->color_initops[i] = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
2185 }
2186 }
2187
2188 /* Copy the destinations allocated for the color attachments. */
2189 for (uint32_t i = 0U; i < input_subpass->color_count; i++) {
2190 const uint32_t attach_idx = input_subpass->color_attachments[i];
2191 struct pvr_render_int_attachment *int_attach;
2192 struct pvr_renderpass_resource *attach_dst;
2193
2194 if (attach_idx == VK_ATTACHMENT_UNUSED)
2195 continue;
2196
2197 int_attach = &ctx->int_attach[attach_idx];
2198 attach_dst = &sp_dsts.color[i];
2199
2200 if (int_attach->first_use == -1) {
2201 assert(int_attach->resource.type == USC_MRT_RESOURCE_TYPE_INVALID);
2202 assert(attach_dst->type != USC_MRT_RESOURCE_TYPE_INVALID);
2203 pvr_set_surface_resource(int_attach, attach_dst);
2204
2205 /* If this attachment is being used for the first time then add it
2206 * to the active list.
2207 */
2208 pvr_make_surface_active(ctx,
2209 int_attach,
2210 hw_render->subpass_count - 1U);
2211 } else {
2212 assert(attach_dst->type == USC_MRT_RESOURCE_TYPE_INVALID);
2213 }
2214 }
2215
2216 /* We can't directly read the on-chip depth so mark subpasses where the depth
2217 * is written to replicate the value into part of the color storage.
2218 */
2219 if (sp_depth.existing_ds_is_input) {
2220 result = pvr_enable_z_replicate(ctx,
2221 hw_render,
2222 sp_depth.existing_ds_attach,
2223 &sp_dsts.existing_zrep);
2224 if (result != VK_SUCCESS)
2225 goto end_merge_subpass;
2226 }
2227
2228 if (sp_depth.incoming_ds_is_input) {
2229 if (input_subpass->depth_stencil_attachment !=
2230 sp_depth.existing_ds_attach) {
2231 result =
2232 pvr_enable_z_replicate(ctx,
2233 hw_render,
2234 input_subpass->depth_stencil_attachment,
2235 &sp_dsts.incoming_zrep);
2236 if (result != VK_SUCCESS)
2237 goto end_merge_subpass;
2238 } else {
2239 assert(pvr_equal_resources(&sp_dsts.existing_zrep,
2240 &sp_dsts.incoming_zrep));
2241 }
2242 }
2243
2244 /* Copy the locations of color/input attachments to the output structure.
2245 * N.B. Need to do this after Z replication in case the replicated depth is
2246 * an input attachment for the incoming subpass.
2247 */
2248 result = pvr_copy_storage_details(ctx, hw_subpass, subpass);
2249 if (result != VK_SUCCESS)
2250 goto end_merge_subpass;
2251
2252 if (subpass->z_replicate) {
2253 result = pvr_copy_z_replicate_details(ctx, hw_subpass, subpass);
2254 if (result != VK_SUCCESS)
2255 goto end_merge_subpass;
2256 }
2257
2258 /* Copy the allocation at the subpass. This will then be updated if this was
2259 * last use of any attachment.
2260 */
2261 pvr_free_alloc(ctx, &ctx->alloc);
2262 ctx->alloc = alloc;
2263
2264 /* Free information about subpass destinations. */
2265 if (sp_dsts.color)
2266 vk_free(ctx->allocator, sp_dsts.color);
2267
2268 *hw_subpass_out = hw_subpass;
2269
2270 return VK_SUCCESS;
2271
2272 end_merge_subpass:
2273 if (sp_dsts.color)
2274 vk_free(ctx->allocator, sp_dsts.color);
2275
2276 pvr_free_alloc(ctx, &alloc);
2277
2278 return result;
2279 }
2280
2281 static void
pvr_dereference_color_output_list(struct pvr_renderpass_context * ctx,uint32_t subpass_num,struct pvr_render_subpass * subpass)2282 pvr_dereference_color_output_list(struct pvr_renderpass_context *ctx,
2283 uint32_t subpass_num,
2284 struct pvr_render_subpass *subpass)
2285 {
2286 for (uint32_t i = 0U; i < subpass->color_count; i++) {
2287 const uint32_t attach_idx = subpass->color_attachments[i];
2288
2289 if (attach_idx != VK_ATTACHMENT_UNUSED)
2290 pvr_dereference_surface(ctx, attach_idx, subpass_num);
2291 }
2292 }
2293
pvr_dereference_surface_list(struct pvr_renderpass_context * ctx,uint32_t subpass_num,uint32_t * attachments,uint32_t count)2294 static void pvr_dereference_surface_list(struct pvr_renderpass_context *ctx,
2295 uint32_t subpass_num,
2296 uint32_t *attachments,
2297 uint32_t count)
2298 {
2299 for (uint32_t i = 0U; i < count; i++) {
2300 if (attachments[i] != VK_ATTACHMENT_UNUSED)
2301 pvr_dereference_surface(ctx, attachments[i], subpass_num);
2302 }
2303 }
2304
pvr_schedule_subpass(const struct pvr_device * device,struct pvr_renderpass_context * ctx,uint32_t subpass_idx)2305 static VkResult pvr_schedule_subpass(const struct pvr_device *device,
2306 struct pvr_renderpass_context *ctx,
2307 uint32_t subpass_idx)
2308 {
2309 struct pvr_renderpass_hwsetup_subpass *hw_subpass;
2310 struct pvr_renderpass_hwsetup_render *hw_render;
2311 struct pvr_render_int_subpass *int_subpass;
2312 struct pvr_render_subpass *subpass;
2313 uint32_t subpass_num;
2314 VkResult result;
2315
2316 int_subpass = &ctx->int_subpasses[subpass_idx];
2317 subpass = int_subpass->subpass;
2318
2319 result = pvr_merge_subpass(device, ctx, subpass, &hw_subpass);
2320 if (result != VK_SUCCESS)
2321 return result;
2322
2323 hw_render = ctx->hw_render;
2324 subpass_num = hw_render->subpass_count - 1U;
2325
2326 /* Record where the subpass was scheduled. */
2327 ctx->hw_setup->subpass_map[subpass_idx].render =
2328 ctx->hw_setup->render_count - 1U;
2329 ctx->hw_setup->subpass_map[subpass_idx].subpass = subpass_num;
2330
2331 /* Check this subpass was the last use of any attachments. */
2332 pvr_dereference_color_output_list(ctx, subpass_num, subpass);
2333 pvr_dereference_surface_list(ctx,
2334 subpass_num,
2335 subpass->input_attachments,
2336 subpass->input_count);
2337 if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) {
2338 struct pvr_render_int_attachment *int_depth_attach =
2339 &ctx->int_attach[subpass->depth_stencil_attachment];
2340
2341 assert(int_depth_attach->remaining_count > 0U);
2342 int_depth_attach->remaining_count--;
2343
2344 if (int_depth_attach->remaining_count == 0U) {
2345 if (int_depth_attach->first_use != -1)
2346 int_depth_attach->last_use = subpass_num;
2347
2348 if (int_depth_attach->z_replicate)
2349 pvr_free_surface_storage(ctx, int_depth_attach);
2350 }
2351
2352 if (int_depth_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
2353 assert(int_depth_attach->stencil_remaining_count > 0U);
2354 int_depth_attach->stencil_remaining_count--;
2355 }
2356
2357 /* The depth attachment has initialized data so load it from memory if it
2358 * is referenced again.
2359 */
2360 int_depth_attach->load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
2361 int_depth_attach->stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
2362 }
2363
2364 /* Mark surfaces which have been the source or destination of an MSAA resolve
2365 * in the current render.
2366 */
2367 for (uint32_t i = 0U; i < subpass->color_count; i++) {
2368 struct pvr_render_int_attachment *resolve_src;
2369 struct pvr_render_int_attachment *resolve_dst;
2370
2371 if (!subpass->resolve_attachments)
2372 break;
2373
2374 if (subpass->resolve_attachments[i] == VK_ATTACHMENT_UNUSED)
2375 continue;
2376
2377 assert(subpass->color_attachments[i] <
2378 (int32_t)ctx->pass->attachment_count);
2379 resolve_src = &ctx->int_attach[subpass->color_attachments[i]];
2380
2381 assert(subpass->resolve_attachments[i] <
2382 (int32_t)ctx->pass->attachment_count);
2383 resolve_dst = &ctx->int_attach[subpass->resolve_attachments[i]];
2384
2385 /* Mark the resolve source. */
2386 assert(resolve_src->last_resolve_src_render <
2387 (int32_t)(ctx->hw_setup->render_count - 1U));
2388 resolve_src->last_resolve_src_render = ctx->hw_setup->render_count - 1U;
2389
2390 /* Mark the resolve destination. */
2391 assert(resolve_dst->last_resolve_dst_render <
2392 (int32_t)(ctx->hw_setup->render_count - 1U));
2393 resolve_dst->last_resolve_dst_render = ctx->hw_setup->render_count - 1U;
2394
2395 /* If we can't down scale through the PBE then the src must be stored
2396 * for transfer down scale.
2397 */
2398 if (!resolve_src->is_pbe_downscalable &&
2399 resolve_src->last_read < (int32_t)ctx->pass->subpass_count) {
2400 resolve_src->last_read = (int32_t)ctx->pass->subpass_count;
2401 resolve_src->remaining_count++;
2402 }
2403 }
2404
2405 /* For subpasses dependent on this subpass decrement the unscheduled
2406 * dependency count.
2407 */
2408 for (uint32_t i = 0U; i < int_subpass->out_subpass_count; i++) {
2409 struct pvr_render_int_subpass *int_dst_subpass =
2410 int_subpass->out_subpasses[i];
2411
2412 assert(int_dst_subpass->in_subpass_count > 0U);
2413 int_dst_subpass->in_subpass_count--;
2414 }
2415
2416 return VK_SUCCESS;
2417 }
2418
pvr_count_uses_in_list(uint32_t * attachments,uint32_t size,uint32_t attach_idx)2419 static uint32_t pvr_count_uses_in_list(uint32_t *attachments,
2420 uint32_t size,
2421 uint32_t attach_idx)
2422 {
2423 uint32_t count = 0U;
2424
2425 for (uint32_t i = 0U; i < size; i++) {
2426 if (attachments[i] == attach_idx)
2427 count++;
2428 }
2429
2430 return count;
2431 }
2432
2433 static uint32_t
pvr_count_uses_in_color_output_list(struct pvr_render_subpass * subpass,uint32_t attach_idx)2434 pvr_count_uses_in_color_output_list(struct pvr_render_subpass *subpass,
2435 uint32_t attach_idx)
2436 {
2437 uint32_t count = 0U;
2438
2439 for (uint32_t i = 0U; i < subpass->color_count; i++) {
2440 if (subpass->color_attachments[i] == attach_idx) {
2441 count++;
2442
2443 if (subpass->resolve_attachments &&
2444 subpass->resolve_attachments[i] != VK_ATTACHMENT_UNUSED)
2445 count++;
2446 }
2447 }
2448
2449 return count;
2450 }
2451
pvr_destroy_renderpass_hwsetup(const VkAllocationCallbacks * alloc,struct pvr_renderpass_hwsetup * hw_setup)2452 void pvr_destroy_renderpass_hwsetup(const VkAllocationCallbacks *alloc,
2453 struct pvr_renderpass_hwsetup *hw_setup)
2454 {
2455 for (uint32_t i = 0U; i < hw_setup->render_count; i++) {
2456 struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i];
2457
2458 vk_free(alloc, hw_render->eot_surfaces);
2459 vk_free(alloc, hw_render->eot_setup.mrt_resources);
2460 vk_free(alloc, hw_render->init_setup.mrt_resources);
2461 vk_free(alloc, hw_render->color_init);
2462
2463 for (uint32_t j = 0U; j < hw_render->subpass_count; j++) {
2464 struct pvr_renderpass_hwsetup_subpass *subpass =
2465 &hw_render->subpasses[j];
2466
2467 vk_free(alloc, subpass->color_initops);
2468 vk_free(alloc, subpass->input_access);
2469 vk_free(alloc, subpass->setup.mrt_resources);
2470 }
2471
2472 vk_free(alloc, hw_render->subpasses);
2473 }
2474
2475 vk_free(alloc, hw_setup->renders);
2476 vk_free(alloc, hw_setup);
2477 }
2478
pvr_create_renderpass_hwsetup(struct pvr_device * device,const VkAllocationCallbacks * alloc,struct pvr_render_pass * pass,bool disable_merge,struct pvr_renderpass_hwsetup ** const hw_setup_out)2479 VkResult pvr_create_renderpass_hwsetup(
2480 struct pvr_device *device,
2481 const VkAllocationCallbacks *alloc,
2482 struct pvr_render_pass *pass,
2483 bool disable_merge,
2484 struct pvr_renderpass_hwsetup **const hw_setup_out)
2485 {
2486 struct pvr_render_int_attachment *int_attachments;
2487 struct pvr_render_int_subpass *int_subpasses;
2488 struct pvr_renderpass_hw_map *subpass_map;
2489 struct pvr_renderpass_hwsetup *hw_setup;
2490 struct pvr_renderpass_context *ctx;
2491 bool *surface_allocate;
2492 VkResult result;
2493
2494 VK_MULTIALLOC(ma);
2495 vk_multialloc_add(&ma, &hw_setup, __typeof__(*hw_setup), 1);
2496 vk_multialloc_add(&ma,
2497 &surface_allocate,
2498 __typeof__(*surface_allocate),
2499 pass->attachment_count);
2500 vk_multialloc_add(&ma,
2501 &subpass_map,
2502 __typeof__(*subpass_map),
2503 pass->subpass_count);
2504
2505 if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
2506 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2507
2508 hw_setup->surface_allocate = surface_allocate;
2509 hw_setup->subpass_map = subpass_map;
2510
2511 VK_MULTIALLOC(ma_ctx);
2512 vk_multialloc_add(&ma_ctx, &ctx, __typeof__(*ctx), 1);
2513 vk_multialloc_add(&ma_ctx,
2514 &int_attachments,
2515 __typeof__(*int_attachments),
2516 pass->attachment_count);
2517 vk_multialloc_add(&ma_ctx,
2518 &int_subpasses,
2519 __typeof__(*int_subpasses),
2520 pass->subpass_count);
2521
2522 if (!vk_multialloc_zalloc(&ma_ctx,
2523 alloc,
2524 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND)) {
2525 vk_free(alloc, hw_setup);
2526 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2527 }
2528
2529 ctx->pass = pass;
2530 ctx->hw_setup = hw_setup;
2531 ctx->int_attach = int_attachments;
2532 ctx->int_subpasses = int_subpasses;
2533 ctx->allocator = alloc;
2534
2535 for (uint32_t i = 0U; i < pass->attachment_count; i++) {
2536 struct pvr_render_pass_attachment *attachment = &pass->attachments[i];
2537 struct pvr_render_int_attachment *int_attach = &ctx->int_attach[i];
2538 const VkFormat format = attachment->vk_format;
2539 uint32_t pixel_size_in_chunks;
2540 uint32_t pixel_size_in_bits;
2541
2542 /* TODO: Add support for packing multiple attachments into the same
2543 * register.
2544 */
2545 const uint32_t part_bits = 0;
2546
2547 if (vk_format_is_color(format) &&
2548 pvr_get_pbe_accum_format(attachment->vk_format) ==
2549 PVR_PBE_ACCUM_FORMAT_INVALID) {
2550 /* The VkFormat is not supported as a color attachment so `0`.
2551 * Vulkan doesn't seems to restrict vkCreateRenderPass() to supported
2552 * formats only.
2553 */
2554 pixel_size_in_bits = 0;
2555 } else {
2556 pixel_size_in_bits =
2557 pvr_get_accum_format_bitsize(attachment->vk_format);
2558 }
2559
2560 int_attach->resource.type = USC_MRT_RESOURCE_TYPE_INVALID;
2561 int_attach->resource.intermediate_size =
2562 DIV_ROUND_UP(pixel_size_in_bits, CHAR_BIT);
2563 int_attach->resource.mrt_desc.intermediate_size =
2564 int_attach->resource.intermediate_size;
2565
2566 pixel_size_in_chunks = DIV_ROUND_UP(pixel_size_in_bits, 32U);
2567 for (uint32_t j = 0U; j < pixel_size_in_chunks; j++)
2568 int_attach->resource.mrt_desc.valid_mask[j] = ~0;
2569
2570 if (part_bits > 0U) {
2571 int_attach->resource.mrt_desc.valid_mask[pixel_size_in_chunks] =
2572 BITFIELD_MASK(part_bits);
2573 }
2574
2575 int_attach->load_op = pass->attachments[i].load_op;
2576 int_attach->stencil_load_op = pass->attachments[i].stencil_load_op;
2577 int_attach->attachment = attachment;
2578 int_attach->first_use = -1;
2579 int_attach->last_use = -1;
2580 int_attach->last_read = -1;
2581 int_attach->mrt_idx = -1;
2582 int_attach->last_resolve_dst_render = -1;
2583 int_attach->last_resolve_src_render = -1;
2584 int_attach->z_replicate = false;
2585 int_attach->is_pbe_downscalable = attachment->is_pbe_downscalable;
2586
2587 /* Count the number of references to this attachment in subpasses. */
2588 for (uint32_t j = 0U; j < pass->subpass_count; j++) {
2589 struct pvr_render_subpass *subpass = &pass->subpasses[j];
2590 const uint32_t color_output_uses =
2591 pvr_count_uses_in_color_output_list(subpass, i);
2592 const uint32_t input_attachment_uses =
2593 pvr_count_uses_in_list(subpass->input_attachments,
2594 subpass->input_count,
2595 i);
2596
2597 if (color_output_uses != 0U || input_attachment_uses != 0U)
2598 int_attach->last_read = j;
2599
2600 int_attach->remaining_count +=
2601 color_output_uses + input_attachment_uses;
2602
2603 if ((uint32_t)subpass->depth_stencil_attachment == i)
2604 int_attach->remaining_count++;
2605 }
2606
2607 if (int_attach->attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
2608 int_attach->stencil_remaining_count = int_attach->remaining_count;
2609 if (pass->attachments[i].stencil_store_op ==
2610 VK_ATTACHMENT_STORE_OP_STORE) {
2611 int_attach->stencil_remaining_count++;
2612 }
2613 }
2614
2615 if (pass->attachments[i].store_op == VK_ATTACHMENT_STORE_OP_STORE) {
2616 int_attach->remaining_count++;
2617 int_attach->last_read = pass->subpass_count;
2618 }
2619 }
2620
2621 for (uint32_t i = 0U; i < pass->subpass_count; i++) {
2622 struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[i];
2623
2624 int_subpass->subpass = &pass->subpasses[i];
2625 int_subpass->out_subpass_count = 0U;
2626 int_subpass->out_subpasses = NULL;
2627 int_subpass->in_subpass_count = int_subpass->subpass->dep_count;
2628 }
2629
2630 /* For each dependency of a subpass create an edge in the opposite
2631 * direction.
2632 */
2633 for (uint32_t i = 0U; i < pass->subpass_count; i++) {
2634 struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[i];
2635
2636 for (uint32_t j = 0U; j < int_subpass->in_subpass_count; j++) {
2637 uint32_t src_idx = int_subpass->subpass->dep_list[j];
2638 struct pvr_render_int_subpass *int_src_subpass;
2639 struct pvr_render_int_subpass **out_subpasses;
2640
2641 assert(src_idx < pass->subpass_count);
2642
2643 int_src_subpass = &ctx->int_subpasses[src_idx];
2644
2645 out_subpasses =
2646 vk_realloc(ctx->allocator,
2647 int_src_subpass->out_subpasses,
2648 sizeof(int_src_subpass->out_subpasses[0U]) *
2649 (int_src_subpass->out_subpass_count + 1U),
2650 8U,
2651 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
2652 if (!out_subpasses) {
2653 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
2654 goto end_create_renderpass_hwsetup;
2655 }
2656
2657 int_src_subpass->out_subpasses = out_subpasses;
2658 int_src_subpass->out_subpasses[int_src_subpass->out_subpass_count] =
2659 int_subpass;
2660 int_src_subpass->out_subpass_count++;
2661 }
2662 }
2663
2664 pvr_reset_render(ctx);
2665
2666 for (uint32_t i = 0U; i < pass->subpass_count; i++) {
2667 uint32_t j;
2668
2669 /* Find a subpass with no unscheduled dependencies. */
2670 for (j = 0U; j < pass->subpass_count; j++) {
2671 struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[j];
2672
2673 if (int_subpass->subpass && int_subpass->in_subpass_count == 0U)
2674 break;
2675 }
2676 assert(j < pass->subpass_count);
2677
2678 result = pvr_schedule_subpass(device, ctx, j);
2679 if (result != VK_SUCCESS)
2680 goto end_create_renderpass_hwsetup;
2681
2682 if (disable_merge) {
2683 result = pvr_close_render(device, ctx);
2684 if (result != VK_SUCCESS)
2685 goto end_create_renderpass_hwsetup;
2686 }
2687
2688 ctx->int_subpasses[j].subpass = NULL;
2689 }
2690
2691 /* Finalise the last in-progress render. */
2692 result = pvr_close_render(device, ctx);
2693
2694 end_create_renderpass_hwsetup:
2695 if (result != VK_SUCCESS) {
2696 pvr_free_render(ctx);
2697
2698 if (hw_setup) {
2699 pvr_destroy_renderpass_hwsetup(alloc, hw_setup);
2700 hw_setup = NULL;
2701 }
2702 }
2703
2704 for (uint32_t i = 0U; i < pass->subpass_count; i++) {
2705 struct pvr_render_int_subpass *int_subpass = &ctx->int_subpasses[i];
2706
2707 if (int_subpass->out_subpass_count > 0U)
2708 vk_free(alloc, int_subpass->out_subpasses);
2709 }
2710
2711 vk_free(alloc, ctx);
2712
2713 *hw_setup_out = hw_setup;
2714
2715 return result;
2716 }
2717