1 /*
2 * Copyright © 2022 Imagination Technologies Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a copy
5 * of this software and associated documentation files (the "Software"), to deal
6 * in the Software without restriction, including without limitation the rights
7 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 * copies of the Software, and to permit persons to whom the Software is
9 * furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include <stdbool.h>
25 #include <stdint.h>
26
27 #include "hwdef/rogue_hw_utils.h"
28 #include "pvr_bo.h"
29 #include "pvr_device_info.h"
30 #include "pvr_formats.h"
31 #include "pvr_hw_pass.h"
32 #include "pvr_pds.h"
33 #include "pvr_private.h"
34 #include "pvr_types.h"
35 #include "usc/programs/pvr_usc_fragment_shader.h"
36 #include "util/macros.h"
37 #include "rogue/rogue.h"
38 #include "vk_alloc.h"
39 #include "vk_format.h"
40 #include "vk_log.h"
41 #include "vk_render_pass.h"
42
pvr_subpass_has_msaa_input_attachment(struct pvr_render_subpass * subpass,const VkRenderPassCreateInfo2 * pCreateInfo)43 static inline bool pvr_subpass_has_msaa_input_attachment(
44 struct pvr_render_subpass *subpass,
45 const VkRenderPassCreateInfo2 *pCreateInfo)
46 {
47 for (uint32_t i = 0; i < subpass->input_count; i++) {
48 const uint32_t attachment = subpass->input_attachments[i];
49
50 if (pCreateInfo->pAttachments[attachment].samples > 1)
51 return true;
52 }
53
54 return false;
55 }
56
pvr_is_subpass_initops_flush_needed(const struct pvr_render_pass * pass,const struct pvr_renderpass_hwsetup_render * hw_render)57 static bool pvr_is_subpass_initops_flush_needed(
58 const struct pvr_render_pass *pass,
59 const struct pvr_renderpass_hwsetup_render *hw_render)
60 {
61 struct pvr_render_subpass *subpass = &pass->subpasses[0];
62 uint32_t render_loadop_mask = 0;
63 uint32_t color_attachment_mask;
64
65 for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
66 if (hw_render->color_init[i].op != VK_ATTACHMENT_LOAD_OP_DONT_CARE)
67 render_loadop_mask |= (1 << hw_render->color_init[i].index);
68 }
69
70 /* If there are no load ops then there's nothing to flush. */
71 if (render_loadop_mask == 0)
72 return false;
73
74 /* If the first subpass has any input attachments, they need to be
75 * initialized with the result of the load op. Since the input attachment
76 * may be read from fragments with an opaque pass type, the load ops must be
77 * flushed or else they would be obscured and eliminated by HSR.
78 */
79 if (subpass->input_count != 0)
80 return true;
81
82 color_attachment_mask = 0;
83
84 for (uint32_t i = 0; i < subpass->color_count; i++) {
85 const uint32_t color_idx = subpass->color_attachments[i];
86
87 if (color_idx != VK_ATTACHMENT_UNUSED)
88 color_attachment_mask |= (1 << pass->attachments[color_idx].index);
89 }
90
91 /* If the first subpass does not write to all attachments which have a load
92 * op then the load ops need to be flushed to ensure they don't get obscured
93 * and removed by HSR.
94 */
95 return (render_loadop_mask & color_attachment_mask) != render_loadop_mask;
96 }
97
98 static void
pvr_init_subpass_isp_userpass(struct pvr_renderpass_hwsetup * hw_setup,struct pvr_render_pass * pass,struct pvr_render_subpass * subpasses)99 pvr_init_subpass_isp_userpass(struct pvr_renderpass_hwsetup *hw_setup,
100 struct pvr_render_pass *pass,
101 struct pvr_render_subpass *subpasses)
102 {
103 uint32_t subpass_idx = 0;
104
105 for (uint32_t i = 0; i < hw_setup->render_count; i++) {
106 struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i];
107 const uint32_t initial_isp_userpass =
108 (uint32_t)pvr_is_subpass_initops_flush_needed(pass, hw_render);
109
110 for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
111 subpasses[subpass_idx].isp_userpass =
112 (j + initial_isp_userpass) & ROGUE_CR_ISP_CTL_UPASS_START_SIZE_MAX;
113 subpass_idx++;
114 }
115 }
116
117 assert(subpass_idx == pass->subpass_count);
118 }
119
pvr_has_output_register_writes(const struct pvr_renderpass_hwsetup_render * hw_render)120 static inline bool pvr_has_output_register_writes(
121 const struct pvr_renderpass_hwsetup_render *hw_render)
122 {
123 for (uint32_t i = 0; i < hw_render->init_setup.num_render_targets; i++) {
124 struct usc_mrt_resource *mrt_resource =
125 &hw_render->init_setup.mrt_resources[i];
126
127 if (mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG)
128 return true;
129 }
130
131 return false;
132 }
133
pvr_pds_unitex_state_program_create_and_upload(struct pvr_device * device,const VkAllocationCallbacks * allocator,uint32_t texture_kicks,uint32_t uniform_kicks,struct pvr_pds_upload * const pds_upload_out)134 VkResult pvr_pds_unitex_state_program_create_and_upload(
135 struct pvr_device *device,
136 const VkAllocationCallbacks *allocator,
137 uint32_t texture_kicks,
138 uint32_t uniform_kicks,
139 struct pvr_pds_upload *const pds_upload_out)
140 {
141 struct pvr_pds_pixel_shader_sa_program program = {
142 .num_texture_dma_kicks = texture_kicks,
143 .num_uniform_dma_kicks = uniform_kicks,
144 };
145 uint32_t staging_buffer_size;
146 uint32_t *staging_buffer;
147 VkResult result;
148
149 pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&program);
150
151 staging_buffer_size = PVR_DW_TO_BYTES(program.code_size);
152
153 staging_buffer = vk_alloc2(&device->vk.alloc,
154 allocator,
155 staging_buffer_size,
156 8U,
157 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
158 if (!staging_buffer)
159 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
160
161 pvr_pds_generate_pixel_shader_sa_code_segment(&program, staging_buffer);
162
163 /* FIXME: Figure out the define for alignment of 16. */
164 result = pvr_gpu_upload_pds(device,
165 NULL,
166 0U,
167 0U,
168 staging_buffer,
169 program.code_size,
170 16U,
171 16U,
172 pds_upload_out);
173 if (result != VK_SUCCESS) {
174 vk_free2(&device->vk.alloc, allocator, staging_buffer);
175 return result;
176 }
177
178 vk_free2(&device->vk.alloc, allocator, staging_buffer);
179
180 return VK_SUCCESS;
181 }
182
183 /* TODO: pvr_create_subpass_load_op() and pvr_create_render_load_op() are quite
184 * similar. See if we can dedup them?
185 */
186 static VkResult
pvr_create_subpass_load_op(struct pvr_device * device,const VkAllocationCallbacks * allocator,const struct pvr_render_pass * pass,struct pvr_renderpass_hwsetup_render * hw_render,uint32_t hw_subpass_idx,struct pvr_load_op ** const load_op_out)187 pvr_create_subpass_load_op(struct pvr_device *device,
188 const VkAllocationCallbacks *allocator,
189 const struct pvr_render_pass *pass,
190 struct pvr_renderpass_hwsetup_render *hw_render,
191 uint32_t hw_subpass_idx,
192 struct pvr_load_op **const load_op_out)
193 {
194 const struct pvr_renderpass_hwsetup_subpass *hw_subpass =
195 &hw_render->subpasses[hw_subpass_idx];
196 const struct pvr_render_subpass *subpass =
197 &pass->subpasses[hw_subpass->index];
198
199 struct pvr_load_op *load_op = vk_zalloc2(&device->vk.alloc,
200 allocator,
201 sizeof(*load_op),
202 8,
203 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
204 if (!load_op)
205 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
206
207 load_op->clears_loads_state.depth_clear_to_reg = PVR_NO_DEPTH_CLEAR_TO_REG;
208
209 if (hw_subpass->z_replicate != -1) {
210 const int32_t z_replicate = hw_subpass->z_replicate;
211
212 switch (hw_subpass->depth_initop) {
213 case VK_ATTACHMENT_LOAD_OP_LOAD:
214 assert(z_replicate < PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
215 load_op->clears_loads_state.rt_load_mask = BITFIELD_BIT(z_replicate);
216 load_op->clears_loads_state.dest_vk_format[z_replicate] =
217 VK_FORMAT_D32_SFLOAT;
218 break;
219
220 case VK_ATTACHMENT_LOAD_OP_CLEAR:
221 load_op->clears_loads_state.depth_clear_to_reg = z_replicate;
222 break;
223
224 default:
225 break;
226 }
227 }
228
229 assert(subpass->color_count <= PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
230 for (uint32_t i = 0; i < subpass->color_count; i++) {
231 const uint32_t attachment_idx = subpass->color_attachments[i];
232
233 assert(attachment_idx < pass->attachment_count);
234 load_op->clears_loads_state.dest_vk_format[i] =
235 pass->attachments[attachment_idx].vk_format;
236
237 if (pass->attachments[attachment_idx].sample_count > 1)
238 load_op->clears_loads_state.unresolved_msaa_mask |= BITFIELD_BIT(i);
239
240 if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_LOAD)
241 load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i);
242 else if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_CLEAR)
243 load_op->clears_loads_state.rt_clear_mask |= BITFIELD_BIT(i);
244 }
245
246 load_op->is_hw_object = false;
247 load_op->subpass = subpass;
248
249 *load_op_out = load_op;
250
251 return VK_SUCCESS;
252 }
253
254 static VkResult
pvr_create_render_load_op(struct pvr_device * device,const VkAllocationCallbacks * allocator,const struct pvr_render_pass * pass,const struct pvr_renderpass_hwsetup_render * hw_render,struct pvr_load_op ** const load_op_out)255 pvr_create_render_load_op(struct pvr_device *device,
256 const VkAllocationCallbacks *allocator,
257 const struct pvr_render_pass *pass,
258 const struct pvr_renderpass_hwsetup_render *hw_render,
259 struct pvr_load_op **const load_op_out)
260 {
261 struct pvr_load_op *load_op = vk_zalloc2(&device->vk.alloc,
262 allocator,
263 sizeof(*load_op),
264 8,
265 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
266 if (!load_op)
267 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
268
269 load_op->clears_loads_state.depth_clear_to_reg = PVR_NO_DEPTH_CLEAR_TO_REG;
270
271 assert(hw_render->color_init_count <= PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
272 for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
273 struct pvr_renderpass_colorinit *color_init = &hw_render->color_init[i];
274
275 assert(color_init->index < pass->attachment_count);
276 load_op->clears_loads_state.dest_vk_format[i] =
277 pass->attachments[color_init->index].vk_format;
278
279 if (pass->attachments[color_init->index].sample_count > 1)
280 load_op->clears_loads_state.unresolved_msaa_mask |= BITFIELD_BIT(i);
281
282 if (color_init->op == VK_ATTACHMENT_LOAD_OP_LOAD)
283 load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i);
284 else if (color_init->op == VK_ATTACHMENT_LOAD_OP_CLEAR)
285 load_op->clears_loads_state.rt_clear_mask |= BITFIELD_BIT(i);
286 }
287
288 load_op->is_hw_object = true;
289 load_op->hw_render = hw_render;
290
291 *load_op_out = load_op;
292
293 return VK_SUCCESS;
294 }
295
296 static VkResult
pvr_generate_load_op_shader(struct pvr_device * device,const VkAllocationCallbacks * allocator,struct pvr_renderpass_hwsetup_render * hw_render,struct pvr_load_op * load_op)297 pvr_generate_load_op_shader(struct pvr_device *device,
298 const VkAllocationCallbacks *allocator,
299 struct pvr_renderpass_hwsetup_render *hw_render,
300 struct pvr_load_op *load_op)
301 {
302 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
303 const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
304
305 VkResult result = pvr_gpu_upload_usc(device,
306 pvr_usc_fragment_shader,
307 sizeof(pvr_usc_fragment_shader),
308 cache_line_size,
309 &load_op->usc_frag_prog_bo);
310 if (result != VK_SUCCESS)
311 return result;
312
313 /* TODO: amend this once the hardcoded shaders have been removed. */
314 struct pvr_fragment_shader_state fragment_state = {
315 .bo = load_op->usc_frag_prog_bo,
316 .sample_rate = ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
317 .pds_fragment_program = load_op->pds_frag_prog,
318 };
319
320 result = pvr_pds_fragment_program_create_and_upload(device,
321 allocator,
322 NULL,
323 &fragment_state);
324 load_op->usc_frag_prog_bo = fragment_state.bo;
325 load_op->pds_frag_prog = fragment_state.pds_fragment_program;
326
327 if (result != VK_SUCCESS)
328 goto err_free_usc_frag_prog_bo;
329
330 result = pvr_pds_unitex_state_program_create_and_upload(
331 device,
332 allocator,
333 1U,
334 0U,
335 &load_op->pds_tex_state_prog);
336 if (result != VK_SUCCESS)
337 goto err_free_pds_frag_prog;
338
339 /* FIXME: These should be based on the USC and PDS programs, but are hard
340 * coded for now.
341 */
342 load_op->const_shareds_count = 1;
343 load_op->shareds_dest_offset = 0;
344 load_op->shareds_count = 1;
345 load_op->temps_count = 1;
346
347 return VK_SUCCESS;
348
349 err_free_pds_frag_prog:
350 pvr_bo_suballoc_free(load_op->pds_frag_prog.pvr_bo);
351
352 err_free_usc_frag_prog_bo:
353 pvr_bo_suballoc_free(load_op->usc_frag_prog_bo);
354
355 return result;
356 }
357
pvr_load_op_destroy(struct pvr_device * device,const VkAllocationCallbacks * allocator,struct pvr_load_op * load_op)358 static void pvr_load_op_destroy(struct pvr_device *device,
359 const VkAllocationCallbacks *allocator,
360 struct pvr_load_op *load_op)
361 {
362 pvr_bo_suballoc_free(load_op->pds_tex_state_prog.pvr_bo);
363 pvr_bo_suballoc_free(load_op->pds_frag_prog.pvr_bo);
364 pvr_bo_suballoc_free(load_op->usc_frag_prog_bo);
365 vk_free2(&device->vk.alloc, allocator, load_op);
366 }
367
368 #define PVR_SPM_LOAD_IN_BUFFERS_COUNT(dev_info) \
369 ({ \
370 int __ret = PVR_MAX_TILE_BUFFER_COUNT; \
371 if (PVR_HAS_FEATURE(dev_info, eight_output_registers)) \
372 __ret -= 4U; \
373 __ret; \
374 })
375
376 static bool
pvr_is_load_op_needed(const struct pvr_render_pass * pass,struct pvr_renderpass_hwsetup_render * hw_render,const uint32_t subpass_idx)377 pvr_is_load_op_needed(const struct pvr_render_pass *pass,
378 struct pvr_renderpass_hwsetup_render *hw_render,
379 const uint32_t subpass_idx)
380 {
381 struct pvr_renderpass_hwsetup_subpass *hw_subpass =
382 &hw_render->subpasses[subpass_idx];
383 const struct pvr_render_subpass *subpass =
384 &pass->subpasses[hw_subpass->index];
385
386 if (hw_subpass->z_replicate != -1 &&
387 (hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_LOAD ||
388 hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_CLEAR)) {
389 return true;
390 }
391
392 for (uint32_t i = 0; i < subpass->color_count; i++) {
393 if (subpass->color_attachments[i] == VK_ATTACHMENT_UNUSED)
394 continue;
395
396 if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_LOAD ||
397 hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_CLEAR) {
398 return true;
399 }
400 }
401
402 return false;
403 }
404
pvr_CreateRenderPass2(VkDevice _device,const VkRenderPassCreateInfo2 * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkRenderPass * pRenderPass)405 VkResult pvr_CreateRenderPass2(VkDevice _device,
406 const VkRenderPassCreateInfo2 *pCreateInfo,
407 const VkAllocationCallbacks *pAllocator,
408 VkRenderPass *pRenderPass)
409 {
410 struct pvr_render_pass_attachment *attachments;
411 PVR_FROM_HANDLE(pvr_device, device, _device);
412 struct pvr_render_subpass *subpasses;
413 const VkAllocationCallbacks *alloc;
414 size_t subpass_attachment_count;
415 uint32_t *subpass_attachments;
416 struct pvr_render_pass *pass;
417 uint32_t *dep_list;
418 bool *flush_on_dep;
419 VkResult result;
420
421 alloc = pAllocator ? pAllocator : &device->vk.alloc;
422
423 VK_MULTIALLOC(ma);
424 vk_multialloc_add(&ma, &pass, __typeof__(*pass), 1);
425 vk_multialloc_add(&ma,
426 &attachments,
427 __typeof__(*attachments),
428 pCreateInfo->attachmentCount);
429 vk_multialloc_add(&ma,
430 &subpasses,
431 __typeof__(*subpasses),
432 pCreateInfo->subpassCount);
433
434 subpass_attachment_count = 0;
435 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
436 const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
437 subpass_attachment_count +=
438 desc->inputAttachmentCount + desc->colorAttachmentCount +
439 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
440 }
441
442 vk_multialloc_add(&ma,
443 &subpass_attachments,
444 __typeof__(*subpass_attachments),
445 subpass_attachment_count);
446 vk_multialloc_add(&ma,
447 &dep_list,
448 __typeof__(*dep_list),
449 pCreateInfo->dependencyCount);
450 vk_multialloc_add(&ma,
451 &flush_on_dep,
452 __typeof__(*flush_on_dep),
453 pCreateInfo->dependencyCount);
454
455 if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
456 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
457
458 vk_object_base_init(&device->vk, &pass->base, VK_OBJECT_TYPE_RENDER_PASS);
459 pass->attachment_count = pCreateInfo->attachmentCount;
460 pass->attachments = attachments;
461 pass->subpass_count = pCreateInfo->subpassCount;
462 pass->subpasses = subpasses;
463 pass->max_sample_count = 1;
464
465 /* Copy attachment descriptions. */
466 for (uint32_t i = 0; i < pass->attachment_count; i++) {
467 const VkAttachmentDescription2 *desc = &pCreateInfo->pAttachments[i];
468 struct pvr_render_pass_attachment *attachment = &pass->attachments[i];
469
470 pvr_assert(!(desc->flags & ~VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT));
471
472 attachment->load_op = desc->loadOp;
473 attachment->store_op = desc->storeOp;
474
475 attachment->aspects = vk_format_aspects(desc->format);
476 if (attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
477 attachment->stencil_load_op = desc->stencilLoadOp;
478 attachment->stencil_store_op = desc->stencilStoreOp;
479 }
480
481 attachment->vk_format = desc->format;
482 attachment->sample_count = desc->samples;
483 attachment->initial_layout = desc->initialLayout;
484 attachment->is_pbe_downscalable =
485 pvr_format_is_pbe_downscalable(attachment->vk_format);
486 attachment->index = i;
487
488 if (attachment->sample_count > pass->max_sample_count)
489 pass->max_sample_count = attachment->sample_count;
490 }
491
492 /* Count how many dependencies each subpass has. */
493 for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
494 const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
495
496 if (dep->srcSubpass != VK_SUBPASS_EXTERNAL &&
497 dep->dstSubpass != VK_SUBPASS_EXTERNAL &&
498 dep->srcSubpass != dep->dstSubpass) {
499 pass->subpasses[dep->dstSubpass].dep_count++;
500 }
501 }
502
503 /* Assign reference pointers to lists, and fill in the attachments list, we
504 * need to re-walk the dependencies array later to fill the per-subpass
505 * dependencies lists in.
506 */
507 for (uint32_t i = 0; i < pass->subpass_count; i++) {
508 const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
509 struct pvr_render_subpass *subpass = &pass->subpasses[i];
510
511 subpass->pipeline_bind_point = desc->pipelineBindPoint;
512
513 /* From the Vulkan spec. 1.3.265
514 * VUID-VkSubpassDescription2-multisampledRenderToSingleSampled-06872:
515 *
516 * "If none of the VK_AMD_mixed_attachment_samples extension, the
517 * VK_NV_framebuffer_mixed_samples extension, or the
518 * multisampledRenderToSingleSampled feature are enabled, all
519 * attachments in pDepthStencilAttachment or pColorAttachments that are
520 * not VK_ATTACHMENT_UNUSED must have the same sample count"
521 *
522 */
523 subpass->sample_count = VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM;
524
525 if (desc->pDepthStencilAttachment) {
526 uint32_t index = desc->pDepthStencilAttachment->attachment;
527
528 if (index != VK_ATTACHMENT_UNUSED)
529 subpass->sample_count = pass->attachments[index].sample_count;
530
531 subpass->depth_stencil_attachment = index;
532 } else {
533 subpass->depth_stencil_attachment = VK_ATTACHMENT_UNUSED;
534 }
535
536 subpass->color_count = desc->colorAttachmentCount;
537 if (subpass->color_count > 0) {
538 subpass->color_attachments = subpass_attachments;
539 subpass_attachments += subpass->color_count;
540
541 for (uint32_t j = 0; j < subpass->color_count; j++) {
542 subpass->color_attachments[j] =
543 desc->pColorAttachments[j].attachment;
544
545 if (subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED)
546 continue;
547
548 if (subpass->sample_count == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM) {
549 uint32_t index;
550 index = subpass->color_attachments[j];
551 subpass->sample_count = pass->attachments[index].sample_count;
552 }
553 }
554 }
555
556 if (subpass->sample_count == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM)
557 subpass->sample_count = VK_SAMPLE_COUNT_1_BIT;
558
559 if (desc->pResolveAttachments) {
560 subpass->resolve_attachments = subpass_attachments;
561 subpass_attachments += subpass->color_count;
562
563 for (uint32_t j = 0; j < subpass->color_count; j++) {
564 subpass->resolve_attachments[j] =
565 desc->pResolveAttachments[j].attachment;
566 }
567 }
568
569 subpass->input_count = desc->inputAttachmentCount;
570 if (subpass->input_count > 0) {
571 subpass->input_attachments = subpass_attachments;
572 subpass_attachments += subpass->input_count;
573
574 for (uint32_t j = 0; j < subpass->input_count; j++) {
575 subpass->input_attachments[j] =
576 desc->pInputAttachments[j].attachment;
577 }
578 }
579
580 /* Give the dependencies a slice of the subpass_attachments array. */
581 subpass->dep_list = dep_list;
582 dep_list += subpass->dep_count;
583 subpass->flush_on_dep = flush_on_dep;
584 flush_on_dep += subpass->dep_count;
585
586 /* Reset the dependencies count so we can start from 0 and index into
587 * the dependencies array.
588 */
589 subpass->dep_count = 0;
590 subpass->index = i;
591 }
592
593 /* Compute dependencies and populate dep_list and flush_on_dep. */
594 for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
595 const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
596
597 if (dep->srcSubpass != VK_SUBPASS_EXTERNAL &&
598 dep->dstSubpass != VK_SUBPASS_EXTERNAL &&
599 dep->srcSubpass != dep->dstSubpass) {
600 struct pvr_render_subpass *subpass = &pass->subpasses[dep->dstSubpass];
601 bool is_dep_fb_local =
602 vk_subpass_dependency_is_fb_local(dep,
603 dep->srcStageMask,
604 dep->dstStageMask);
605
606 subpass->dep_list[subpass->dep_count] = dep->srcSubpass;
607 if (pvr_subpass_has_msaa_input_attachment(subpass, pCreateInfo) ||
608 !is_dep_fb_local) {
609 subpass->flush_on_dep[subpass->dep_count] = true;
610 }
611
612 subpass->dep_count++;
613 }
614 }
615
616 pass->max_tilebuffer_count =
617 PVR_SPM_LOAD_IN_BUFFERS_COUNT(&device->pdevice->dev_info);
618
619 result =
620 pvr_create_renderpass_hwsetup(device, alloc, pass, false, &pass->hw_setup);
621 if (result != VK_SUCCESS)
622 goto err_free_pass;
623
624 pvr_init_subpass_isp_userpass(pass->hw_setup, pass, pass->subpasses);
625
626 for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
627 struct pvr_renderpass_hwsetup_render *hw_render =
628 &pass->hw_setup->renders[i];
629 struct pvr_load_op *load_op = NULL;
630
631 if (hw_render->tile_buffers_count) {
632 result = pvr_device_tile_buffer_ensure_cap(
633 device,
634 hw_render->tile_buffers_count,
635 hw_render->eot_setup.tile_buffer_size);
636 if (result != VK_SUCCESS)
637 goto err_free_pass;
638 }
639
640 assert(!hw_render->load_op);
641
642 if (hw_render->color_init_count != 0U) {
643 if (!pvr_has_output_register_writes(hw_render)) {
644 const uint32_t last = hw_render->init_setup.num_render_targets;
645 struct usc_mrt_resource *mrt_resources;
646
647 hw_render->init_setup.num_render_targets++;
648
649 mrt_resources =
650 vk_realloc(alloc,
651 hw_render->init_setup.mrt_resources,
652 hw_render->init_setup.num_render_targets *
653 sizeof(*mrt_resources),
654 8U,
655 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
656 if (!mrt_resources) {
657 result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
658 goto err_load_op_destroy;
659 }
660
661 hw_render->init_setup.mrt_resources = mrt_resources;
662
663 mrt_resources[last].type = USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
664 mrt_resources[last].reg.output_reg = 0U;
665 mrt_resources[last].reg.offset = 0U;
666 mrt_resources[last].intermediate_size = 4U;
667 mrt_resources[last].mrt_desc.intermediate_size = 4U;
668 mrt_resources[last].mrt_desc.priority = 0U;
669 mrt_resources[last].mrt_desc.valid_mask[0U] = ~0;
670 mrt_resources[last].mrt_desc.valid_mask[1U] = ~0;
671 mrt_resources[last].mrt_desc.valid_mask[2U] = ~0;
672 mrt_resources[last].mrt_desc.valid_mask[3U] = ~0;
673 }
674
675 result = pvr_create_render_load_op(device,
676 pAllocator,
677 pass,
678 hw_render,
679 &load_op);
680 if (result != VK_SUCCESS)
681 goto err_load_op_destroy;
682
683 result =
684 pvr_generate_load_op_shader(device, pAllocator, hw_render, load_op);
685 if (result != VK_SUCCESS) {
686 vk_free2(&device->vk.alloc, pAllocator, load_op);
687 goto err_load_op_destroy;
688 }
689
690 hw_render->load_op = load_op;
691 }
692
693 for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
694 if (!pvr_is_load_op_needed(pass, hw_render, j))
695 continue;
696
697 result = pvr_create_subpass_load_op(device,
698 pAllocator,
699 pass,
700 hw_render,
701 j,
702 &load_op);
703 if (result != VK_SUCCESS) {
704 vk_free2(&device->vk.alloc, pAllocator, load_op);
705 goto err_load_op_destroy;
706 }
707
708 result =
709 pvr_generate_load_op_shader(device, pAllocator, hw_render, load_op);
710 if (result != VK_SUCCESS)
711 goto err_load_op_destroy;
712
713 hw_render->subpasses[j].load_op = load_op;
714 }
715 }
716
717 *pRenderPass = pvr_render_pass_to_handle(pass);
718
719 return VK_SUCCESS;
720
721 err_load_op_destroy:
722 for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
723 struct pvr_renderpass_hwsetup_render *hw_render =
724 &pass->hw_setup->renders[i];
725
726 for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
727 if (hw_render->subpasses[j].load_op) {
728 pvr_load_op_destroy(device,
729 pAllocator,
730 hw_render->subpasses[j].load_op);
731 }
732 }
733
734 if (hw_render->load_op)
735 pvr_load_op_destroy(device, pAllocator, hw_render->load_op);
736 }
737
738 pvr_destroy_renderpass_hwsetup(alloc, pass->hw_setup);
739
740 err_free_pass:
741 vk_object_base_finish(&pass->base);
742 vk_free2(&device->vk.alloc, pAllocator, pass);
743
744 return result;
745 }
746
pvr_DestroyRenderPass(VkDevice _device,VkRenderPass _pass,const VkAllocationCallbacks * pAllocator)747 void pvr_DestroyRenderPass(VkDevice _device,
748 VkRenderPass _pass,
749 const VkAllocationCallbacks *pAllocator)
750 {
751 PVR_FROM_HANDLE(pvr_device, device, _device);
752 PVR_FROM_HANDLE(pvr_render_pass, pass, _pass);
753
754 if (!pass)
755 return;
756
757 for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
758 struct pvr_renderpass_hwsetup_render *hw_render =
759 &pass->hw_setup->renders[i];
760
761 for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
762 if (hw_render->subpasses[j].load_op) {
763 pvr_load_op_destroy(device,
764 pAllocator,
765 hw_render->subpasses[j].load_op);
766 }
767 }
768
769 if (hw_render->load_op)
770 pvr_load_op_destroy(device, pAllocator, hw_render->load_op);
771 }
772
773 pvr_destroy_renderpass_hwsetup(pAllocator ? pAllocator : &device->vk.alloc,
774 pass->hw_setup);
775 vk_object_base_finish(&pass->base);
776 vk_free2(&device->vk.alloc, pAllocator, pass);
777 }
778
pvr_GetRenderAreaGranularity(VkDevice _device,VkRenderPass renderPass,VkExtent2D * pGranularity)779 void pvr_GetRenderAreaGranularity(VkDevice _device,
780 VkRenderPass renderPass,
781 VkExtent2D *pGranularity)
782 {
783 PVR_FROM_HANDLE(pvr_device, device, _device);
784 const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
785
786 /* Granularity does not depend on any settings in the render pass, so return
787 * the tile granularity.
788 *
789 * The default value is based on the minimum value found in all existing
790 * cores.
791 */
792 pGranularity->width = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 16);
793 pGranularity->height = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 16);
794 }
795