• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2022 Imagination Technologies Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include <stdbool.h>
25 #include <stdint.h>
26 
27 #include "hwdef/rogue_hw_utils.h"
28 #include "pvr_bo.h"
29 #include "pvr_device_info.h"
30 #include "pvr_formats.h"
31 #include "pvr_hw_pass.h"
32 #include "pvr_pds.h"
33 #include "pvr_private.h"
34 #include "pvr_types.h"
35 #include "usc/programs/pvr_usc_fragment_shader.h"
36 #include "util/macros.h"
37 #include "rogue/rogue.h"
38 #include "vk_alloc.h"
39 #include "vk_format.h"
40 #include "vk_log.h"
41 #include "vk_render_pass.h"
42 
pvr_subpass_has_msaa_input_attachment(struct pvr_render_subpass * subpass,const VkRenderPassCreateInfo2 * pCreateInfo)43 static inline bool pvr_subpass_has_msaa_input_attachment(
44    struct pvr_render_subpass *subpass,
45    const VkRenderPassCreateInfo2 *pCreateInfo)
46 {
47    for (uint32_t i = 0; i < subpass->input_count; i++) {
48       const uint32_t attachment = subpass->input_attachments[i];
49 
50       if (pCreateInfo->pAttachments[attachment].samples > 1)
51          return true;
52    }
53 
54    return false;
55 }
56 
pvr_is_subpass_initops_flush_needed(const struct pvr_render_pass * pass,const struct pvr_renderpass_hwsetup_render * hw_render)57 static bool pvr_is_subpass_initops_flush_needed(
58    const struct pvr_render_pass *pass,
59    const struct pvr_renderpass_hwsetup_render *hw_render)
60 {
61    struct pvr_render_subpass *subpass = &pass->subpasses[0];
62    uint32_t render_loadop_mask = 0;
63    uint32_t color_attachment_mask;
64 
65    for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
66       if (hw_render->color_init[i].op != VK_ATTACHMENT_LOAD_OP_DONT_CARE)
67          render_loadop_mask |= (1 << hw_render->color_init[i].index);
68    }
69 
70    /* If there are no load ops then there's nothing to flush. */
71    if (render_loadop_mask == 0)
72       return false;
73 
74    /* If the first subpass has any input attachments, they need to be
75     * initialized with the result of the load op. Since the input attachment
76     * may be read from fragments with an opaque pass type, the load ops must be
77     * flushed or else they would be obscured and eliminated by HSR.
78     */
79    if (subpass->input_count != 0)
80       return true;
81 
82    color_attachment_mask = 0;
83 
84    for (uint32_t i = 0; i < subpass->color_count; i++) {
85       const uint32_t color_idx = subpass->color_attachments[i];
86 
87       if (color_idx != VK_ATTACHMENT_UNUSED)
88          color_attachment_mask |= (1 << pass->attachments[color_idx].index);
89    }
90 
91    /* If the first subpass does not write to all attachments which have a load
92     * op then the load ops need to be flushed to ensure they don't get obscured
93     * and removed by HSR.
94     */
95    return (render_loadop_mask & color_attachment_mask) != render_loadop_mask;
96 }
97 
98 static void
pvr_init_subpass_isp_userpass(struct pvr_renderpass_hwsetup * hw_setup,struct pvr_render_pass * pass,struct pvr_render_subpass * subpasses)99 pvr_init_subpass_isp_userpass(struct pvr_renderpass_hwsetup *hw_setup,
100                               struct pvr_render_pass *pass,
101                               struct pvr_render_subpass *subpasses)
102 {
103    uint32_t subpass_idx = 0;
104 
105    for (uint32_t i = 0; i < hw_setup->render_count; i++) {
106       struct pvr_renderpass_hwsetup_render *hw_render = &hw_setup->renders[i];
107       const uint32_t initial_isp_userpass =
108          (uint32_t)pvr_is_subpass_initops_flush_needed(pass, hw_render);
109 
110       for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
111          subpasses[subpass_idx].isp_userpass =
112             (j + initial_isp_userpass) & ROGUE_CR_ISP_CTL_UPASS_START_SIZE_MAX;
113          subpass_idx++;
114       }
115    }
116 
117    assert(subpass_idx == pass->subpass_count);
118 }
119 
pvr_has_output_register_writes(const struct pvr_renderpass_hwsetup_render * hw_render)120 static inline bool pvr_has_output_register_writes(
121    const struct pvr_renderpass_hwsetup_render *hw_render)
122 {
123    for (uint32_t i = 0; i < hw_render->init_setup.num_render_targets; i++) {
124       struct usc_mrt_resource *mrt_resource =
125          &hw_render->init_setup.mrt_resources[i];
126 
127       if (mrt_resource->type == USC_MRT_RESOURCE_TYPE_OUTPUT_REG)
128          return true;
129    }
130 
131    return false;
132 }
133 
pvr_pds_unitex_state_program_create_and_upload(struct pvr_device * device,const VkAllocationCallbacks * allocator,uint32_t texture_kicks,uint32_t uniform_kicks,struct pvr_pds_upload * const pds_upload_out)134 VkResult pvr_pds_unitex_state_program_create_and_upload(
135    struct pvr_device *device,
136    const VkAllocationCallbacks *allocator,
137    uint32_t texture_kicks,
138    uint32_t uniform_kicks,
139    struct pvr_pds_upload *const pds_upload_out)
140 {
141    struct pvr_pds_pixel_shader_sa_program program = {
142       .num_texture_dma_kicks = texture_kicks,
143       .num_uniform_dma_kicks = uniform_kicks,
144    };
145    uint32_t staging_buffer_size;
146    uint32_t *staging_buffer;
147    VkResult result;
148 
149    pvr_pds_set_sizes_pixel_shader_uniform_texture_code(&program);
150 
151    staging_buffer_size = PVR_DW_TO_BYTES(program.code_size);
152 
153    staging_buffer = vk_alloc2(&device->vk.alloc,
154                               allocator,
155                               staging_buffer_size,
156                               8U,
157                               VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
158    if (!staging_buffer)
159       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
160 
161    pvr_pds_generate_pixel_shader_sa_code_segment(&program, staging_buffer);
162 
163    /* FIXME: Figure out the define for alignment of 16. */
164    result = pvr_gpu_upload_pds(device,
165                                NULL,
166                                0U,
167                                0U,
168                                staging_buffer,
169                                program.code_size,
170                                16U,
171                                16U,
172                                pds_upload_out);
173    if (result != VK_SUCCESS) {
174       vk_free2(&device->vk.alloc, allocator, staging_buffer);
175       return result;
176    }
177 
178    vk_free2(&device->vk.alloc, allocator, staging_buffer);
179 
180    return VK_SUCCESS;
181 }
182 
183 /* TODO: pvr_create_subpass_load_op() and pvr_create_render_load_op() are quite
184  * similar. See if we can dedup them?
185  */
186 static VkResult
pvr_create_subpass_load_op(struct pvr_device * device,const VkAllocationCallbacks * allocator,const struct pvr_render_pass * pass,struct pvr_renderpass_hwsetup_render * hw_render,uint32_t hw_subpass_idx,struct pvr_load_op ** const load_op_out)187 pvr_create_subpass_load_op(struct pvr_device *device,
188                            const VkAllocationCallbacks *allocator,
189                            const struct pvr_render_pass *pass,
190                            struct pvr_renderpass_hwsetup_render *hw_render,
191                            uint32_t hw_subpass_idx,
192                            struct pvr_load_op **const load_op_out)
193 {
194    const struct pvr_renderpass_hwsetup_subpass *hw_subpass =
195       &hw_render->subpasses[hw_subpass_idx];
196    const struct pvr_render_subpass *subpass =
197       &pass->subpasses[hw_subpass->index];
198 
199    struct pvr_load_op *load_op = vk_zalloc2(&device->vk.alloc,
200                                             allocator,
201                                             sizeof(*load_op),
202                                             8,
203                                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
204    if (!load_op)
205       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
206 
207    load_op->clears_loads_state.depth_clear_to_reg = PVR_NO_DEPTH_CLEAR_TO_REG;
208 
209    if (hw_subpass->z_replicate != -1) {
210       const int32_t z_replicate = hw_subpass->z_replicate;
211 
212       switch (hw_subpass->depth_initop) {
213       case VK_ATTACHMENT_LOAD_OP_LOAD:
214          assert(z_replicate < PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
215          load_op->clears_loads_state.rt_load_mask = BITFIELD_BIT(z_replicate);
216          load_op->clears_loads_state.dest_vk_format[z_replicate] =
217             VK_FORMAT_D32_SFLOAT;
218          break;
219 
220       case VK_ATTACHMENT_LOAD_OP_CLEAR:
221          load_op->clears_loads_state.depth_clear_to_reg = z_replicate;
222          break;
223 
224       default:
225          break;
226       }
227    }
228 
229    assert(subpass->color_count <= PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
230    for (uint32_t i = 0; i < subpass->color_count; i++) {
231       const uint32_t attachment_idx = subpass->color_attachments[i];
232 
233       assert(attachment_idx < pass->attachment_count);
234       load_op->clears_loads_state.dest_vk_format[i] =
235          pass->attachments[attachment_idx].vk_format;
236 
237       if (pass->attachments[attachment_idx].sample_count > 1)
238          load_op->clears_loads_state.unresolved_msaa_mask |= BITFIELD_BIT(i);
239 
240       if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_LOAD)
241          load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i);
242       else if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_CLEAR)
243          load_op->clears_loads_state.rt_clear_mask |= BITFIELD_BIT(i);
244    }
245 
246    load_op->is_hw_object = false;
247    load_op->subpass = subpass;
248 
249    *load_op_out = load_op;
250 
251    return VK_SUCCESS;
252 }
253 
254 static VkResult
pvr_create_render_load_op(struct pvr_device * device,const VkAllocationCallbacks * allocator,const struct pvr_render_pass * pass,const struct pvr_renderpass_hwsetup_render * hw_render,struct pvr_load_op ** const load_op_out)255 pvr_create_render_load_op(struct pvr_device *device,
256                           const VkAllocationCallbacks *allocator,
257                           const struct pvr_render_pass *pass,
258                           const struct pvr_renderpass_hwsetup_render *hw_render,
259                           struct pvr_load_op **const load_op_out)
260 {
261    struct pvr_load_op *load_op = vk_zalloc2(&device->vk.alloc,
262                                             allocator,
263                                             sizeof(*load_op),
264                                             8,
265                                             VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
266    if (!load_op)
267       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
268 
269    load_op->clears_loads_state.depth_clear_to_reg = PVR_NO_DEPTH_CLEAR_TO_REG;
270 
271    assert(hw_render->color_init_count <= PVR_LOAD_OP_CLEARS_LOADS_MAX_RTS);
272    for (uint32_t i = 0; i < hw_render->color_init_count; i++) {
273       struct pvr_renderpass_colorinit *color_init = &hw_render->color_init[i];
274 
275       assert(color_init->index < pass->attachment_count);
276       load_op->clears_loads_state.dest_vk_format[i] =
277          pass->attachments[color_init->index].vk_format;
278 
279       if (pass->attachments[color_init->index].sample_count > 1)
280          load_op->clears_loads_state.unresolved_msaa_mask |= BITFIELD_BIT(i);
281 
282       if (color_init->op == VK_ATTACHMENT_LOAD_OP_LOAD)
283          load_op->clears_loads_state.rt_load_mask |= BITFIELD_BIT(i);
284       else if (color_init->op == VK_ATTACHMENT_LOAD_OP_CLEAR)
285          load_op->clears_loads_state.rt_clear_mask |= BITFIELD_BIT(i);
286    }
287 
288    load_op->is_hw_object = true;
289    load_op->hw_render = hw_render;
290 
291    *load_op_out = load_op;
292 
293    return VK_SUCCESS;
294 }
295 
296 static VkResult
pvr_generate_load_op_shader(struct pvr_device * device,const VkAllocationCallbacks * allocator,struct pvr_renderpass_hwsetup_render * hw_render,struct pvr_load_op * load_op)297 pvr_generate_load_op_shader(struct pvr_device *device,
298                             const VkAllocationCallbacks *allocator,
299                             struct pvr_renderpass_hwsetup_render *hw_render,
300                             struct pvr_load_op *load_op)
301 {
302    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
303    const uint32_t cache_line_size = rogue_get_slc_cache_line_size(dev_info);
304 
305    VkResult result = pvr_gpu_upload_usc(device,
306                                         pvr_usc_fragment_shader,
307                                         sizeof(pvr_usc_fragment_shader),
308                                         cache_line_size,
309                                         &load_op->usc_frag_prog_bo);
310    if (result != VK_SUCCESS)
311       return result;
312 
313    /* TODO: amend this once the hardcoded shaders have been removed. */
314    struct pvr_fragment_shader_state fragment_state = {
315       .bo = load_op->usc_frag_prog_bo,
316       .sample_rate = ROGUE_PDSINST_DOUTU_SAMPLE_RATE_INSTANCE,
317       .pds_fragment_program = load_op->pds_frag_prog,
318    };
319 
320    result = pvr_pds_fragment_program_create_and_upload(device,
321                                                        allocator,
322                                                        NULL,
323                                                        &fragment_state);
324    load_op->usc_frag_prog_bo = fragment_state.bo;
325    load_op->pds_frag_prog = fragment_state.pds_fragment_program;
326 
327    if (result != VK_SUCCESS)
328       goto err_free_usc_frag_prog_bo;
329 
330    result = pvr_pds_unitex_state_program_create_and_upload(
331       device,
332       allocator,
333       1U,
334       0U,
335       &load_op->pds_tex_state_prog);
336    if (result != VK_SUCCESS)
337       goto err_free_pds_frag_prog;
338 
339    /* FIXME: These should be based on the USC and PDS programs, but are hard
340     * coded for now.
341     */
342    load_op->const_shareds_count = 1;
343    load_op->shareds_dest_offset = 0;
344    load_op->shareds_count = 1;
345    load_op->temps_count = 1;
346 
347    return VK_SUCCESS;
348 
349 err_free_pds_frag_prog:
350    pvr_bo_suballoc_free(load_op->pds_frag_prog.pvr_bo);
351 
352 err_free_usc_frag_prog_bo:
353    pvr_bo_suballoc_free(load_op->usc_frag_prog_bo);
354 
355    return result;
356 }
357 
pvr_load_op_destroy(struct pvr_device * device,const VkAllocationCallbacks * allocator,struct pvr_load_op * load_op)358 static void pvr_load_op_destroy(struct pvr_device *device,
359                                 const VkAllocationCallbacks *allocator,
360                                 struct pvr_load_op *load_op)
361 {
362    pvr_bo_suballoc_free(load_op->pds_tex_state_prog.pvr_bo);
363    pvr_bo_suballoc_free(load_op->pds_frag_prog.pvr_bo);
364    pvr_bo_suballoc_free(load_op->usc_frag_prog_bo);
365    vk_free2(&device->vk.alloc, allocator, load_op);
366 }
367 
368 #define PVR_SPM_LOAD_IN_BUFFERS_COUNT(dev_info)              \
369    ({                                                        \
370       int __ret = PVR_MAX_TILE_BUFFER_COUNT;                 \
371       if (PVR_HAS_FEATURE(dev_info, eight_output_registers)) \
372          __ret -= 4U;                                        \
373       __ret;                                                 \
374    })
375 
376 static bool
pvr_is_load_op_needed(const struct pvr_render_pass * pass,struct pvr_renderpass_hwsetup_render * hw_render,const uint32_t subpass_idx)377 pvr_is_load_op_needed(const struct pvr_render_pass *pass,
378                       struct pvr_renderpass_hwsetup_render *hw_render,
379                       const uint32_t subpass_idx)
380 {
381    struct pvr_renderpass_hwsetup_subpass *hw_subpass =
382       &hw_render->subpasses[subpass_idx];
383    const struct pvr_render_subpass *subpass =
384       &pass->subpasses[hw_subpass->index];
385 
386    if (hw_subpass->z_replicate != -1 &&
387        (hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_LOAD ||
388         hw_subpass->depth_initop == VK_ATTACHMENT_LOAD_OP_CLEAR)) {
389       return true;
390    }
391 
392    for (uint32_t i = 0; i < subpass->color_count; i++) {
393       if (subpass->color_attachments[i] == VK_ATTACHMENT_UNUSED)
394          continue;
395 
396       if (hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_LOAD ||
397           hw_subpass->color_initops[i] == VK_ATTACHMENT_LOAD_OP_CLEAR) {
398          return true;
399       }
400    }
401 
402    return false;
403 }
404 
pvr_CreateRenderPass2(VkDevice _device,const VkRenderPassCreateInfo2 * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkRenderPass * pRenderPass)405 VkResult pvr_CreateRenderPass2(VkDevice _device,
406                                const VkRenderPassCreateInfo2 *pCreateInfo,
407                                const VkAllocationCallbacks *pAllocator,
408                                VkRenderPass *pRenderPass)
409 {
410    struct pvr_render_pass_attachment *attachments;
411    PVR_FROM_HANDLE(pvr_device, device, _device);
412    struct pvr_render_subpass *subpasses;
413    const VkAllocationCallbacks *alloc;
414    size_t subpass_attachment_count;
415    uint32_t *subpass_attachments;
416    struct pvr_render_pass *pass;
417    uint32_t *dep_list;
418    bool *flush_on_dep;
419    VkResult result;
420 
421    alloc = pAllocator ? pAllocator : &device->vk.alloc;
422 
423    VK_MULTIALLOC(ma);
424    vk_multialloc_add(&ma, &pass, __typeof__(*pass), 1);
425    vk_multialloc_add(&ma,
426                      &attachments,
427                      __typeof__(*attachments),
428                      pCreateInfo->attachmentCount);
429    vk_multialloc_add(&ma,
430                      &subpasses,
431                      __typeof__(*subpasses),
432                      pCreateInfo->subpassCount);
433 
434    subpass_attachment_count = 0;
435    for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
436       const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
437       subpass_attachment_count +=
438          desc->inputAttachmentCount + desc->colorAttachmentCount +
439          (desc->pResolveAttachments ? desc->colorAttachmentCount : 0);
440    }
441 
442    vk_multialloc_add(&ma,
443                      &subpass_attachments,
444                      __typeof__(*subpass_attachments),
445                      subpass_attachment_count);
446    vk_multialloc_add(&ma,
447                      &dep_list,
448                      __typeof__(*dep_list),
449                      pCreateInfo->dependencyCount);
450    vk_multialloc_add(&ma,
451                      &flush_on_dep,
452                      __typeof__(*flush_on_dep),
453                      pCreateInfo->dependencyCount);
454 
455    if (!vk_multialloc_zalloc(&ma, alloc, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT))
456       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
457 
458    vk_object_base_init(&device->vk, &pass->base, VK_OBJECT_TYPE_RENDER_PASS);
459    pass->attachment_count = pCreateInfo->attachmentCount;
460    pass->attachments = attachments;
461    pass->subpass_count = pCreateInfo->subpassCount;
462    pass->subpasses = subpasses;
463    pass->max_sample_count = 1;
464 
465    /* Copy attachment descriptions. */
466    for (uint32_t i = 0; i < pass->attachment_count; i++) {
467       const VkAttachmentDescription2 *desc = &pCreateInfo->pAttachments[i];
468       struct pvr_render_pass_attachment *attachment = &pass->attachments[i];
469 
470       pvr_assert(!(desc->flags & ~VK_ATTACHMENT_DESCRIPTION_MAY_ALIAS_BIT));
471 
472       attachment->load_op = desc->loadOp;
473       attachment->store_op = desc->storeOp;
474 
475       attachment->aspects = vk_format_aspects(desc->format);
476       if (attachment->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
477          attachment->stencil_load_op = desc->stencilLoadOp;
478          attachment->stencil_store_op = desc->stencilStoreOp;
479       }
480 
481       attachment->vk_format = desc->format;
482       attachment->sample_count = desc->samples;
483       attachment->initial_layout = desc->initialLayout;
484       attachment->is_pbe_downscalable =
485          pvr_format_is_pbe_downscalable(attachment->vk_format);
486       attachment->index = i;
487 
488       if (attachment->sample_count > pass->max_sample_count)
489          pass->max_sample_count = attachment->sample_count;
490    }
491 
492    /* Count how many dependencies each subpass has. */
493    for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
494       const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
495 
496       if (dep->srcSubpass != VK_SUBPASS_EXTERNAL &&
497           dep->dstSubpass != VK_SUBPASS_EXTERNAL &&
498           dep->srcSubpass != dep->dstSubpass) {
499          pass->subpasses[dep->dstSubpass].dep_count++;
500       }
501    }
502 
503    /* Assign reference pointers to lists, and fill in the attachments list, we
504     * need to re-walk the dependencies array later to fill the per-subpass
505     * dependencies lists in.
506     */
507    for (uint32_t i = 0; i < pass->subpass_count; i++) {
508       const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
509       struct pvr_render_subpass *subpass = &pass->subpasses[i];
510 
511       subpass->pipeline_bind_point = desc->pipelineBindPoint;
512 
513       /* From the Vulkan spec. 1.3.265
514        * VUID-VkSubpassDescription2-multisampledRenderToSingleSampled-06872:
515        *
516        *   "If none of the VK_AMD_mixed_attachment_samples extension, the
517        *   VK_NV_framebuffer_mixed_samples extension, or the
518        *   multisampledRenderToSingleSampled feature are enabled, all
519        *   attachments in pDepthStencilAttachment or pColorAttachments that are
520        *   not VK_ATTACHMENT_UNUSED must have the same sample count"
521        *
522        */
523       subpass->sample_count = VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM;
524 
525       if (desc->pDepthStencilAttachment) {
526          uint32_t index = desc->pDepthStencilAttachment->attachment;
527 
528          if (index != VK_ATTACHMENT_UNUSED)
529             subpass->sample_count = pass->attachments[index].sample_count;
530 
531          subpass->depth_stencil_attachment = index;
532       } else {
533          subpass->depth_stencil_attachment = VK_ATTACHMENT_UNUSED;
534       }
535 
536       subpass->color_count = desc->colorAttachmentCount;
537       if (subpass->color_count > 0) {
538          subpass->color_attachments = subpass_attachments;
539          subpass_attachments += subpass->color_count;
540 
541          for (uint32_t j = 0; j < subpass->color_count; j++) {
542             subpass->color_attachments[j] =
543                desc->pColorAttachments[j].attachment;
544 
545             if (subpass->color_attachments[j] == VK_ATTACHMENT_UNUSED)
546                continue;
547 
548             if (subpass->sample_count == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM) {
549                uint32_t index;
550                index = subpass->color_attachments[j];
551                subpass->sample_count = pass->attachments[index].sample_count;
552             }
553          }
554       }
555 
556       if (subpass->sample_count == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM)
557          subpass->sample_count = VK_SAMPLE_COUNT_1_BIT;
558 
559       if (desc->pResolveAttachments) {
560          subpass->resolve_attachments = subpass_attachments;
561          subpass_attachments += subpass->color_count;
562 
563          for (uint32_t j = 0; j < subpass->color_count; j++) {
564             subpass->resolve_attachments[j] =
565                desc->pResolveAttachments[j].attachment;
566          }
567       }
568 
569       subpass->input_count = desc->inputAttachmentCount;
570       if (subpass->input_count > 0) {
571          subpass->input_attachments = subpass_attachments;
572          subpass_attachments += subpass->input_count;
573 
574          for (uint32_t j = 0; j < subpass->input_count; j++) {
575             subpass->input_attachments[j] =
576                desc->pInputAttachments[j].attachment;
577          }
578       }
579 
580       /* Give the dependencies a slice of the subpass_attachments array. */
581       subpass->dep_list = dep_list;
582       dep_list += subpass->dep_count;
583       subpass->flush_on_dep = flush_on_dep;
584       flush_on_dep += subpass->dep_count;
585 
586       /* Reset the dependencies count so we can start from 0 and index into
587        * the dependencies array.
588        */
589       subpass->dep_count = 0;
590       subpass->index = i;
591    }
592 
593    /* Compute dependencies and populate dep_list and flush_on_dep. */
594    for (uint32_t i = 0; i < pCreateInfo->dependencyCount; i++) {
595       const VkSubpassDependency2 *dep = &pCreateInfo->pDependencies[i];
596 
597       if (dep->srcSubpass != VK_SUBPASS_EXTERNAL &&
598           dep->dstSubpass != VK_SUBPASS_EXTERNAL &&
599           dep->srcSubpass != dep->dstSubpass) {
600          struct pvr_render_subpass *subpass = &pass->subpasses[dep->dstSubpass];
601          bool is_dep_fb_local =
602             vk_subpass_dependency_is_fb_local(dep,
603                                               dep->srcStageMask,
604                                               dep->dstStageMask);
605 
606          subpass->dep_list[subpass->dep_count] = dep->srcSubpass;
607          if (pvr_subpass_has_msaa_input_attachment(subpass, pCreateInfo) ||
608              !is_dep_fb_local) {
609             subpass->flush_on_dep[subpass->dep_count] = true;
610          }
611 
612          subpass->dep_count++;
613       }
614    }
615 
616    pass->max_tilebuffer_count =
617       PVR_SPM_LOAD_IN_BUFFERS_COUNT(&device->pdevice->dev_info);
618 
619    result =
620       pvr_create_renderpass_hwsetup(device, alloc, pass, false, &pass->hw_setup);
621    if (result != VK_SUCCESS)
622       goto err_free_pass;
623 
624    pvr_init_subpass_isp_userpass(pass->hw_setup, pass, pass->subpasses);
625 
626    for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
627       struct pvr_renderpass_hwsetup_render *hw_render =
628          &pass->hw_setup->renders[i];
629       struct pvr_load_op *load_op = NULL;
630 
631       if (hw_render->tile_buffers_count) {
632          result = pvr_device_tile_buffer_ensure_cap(
633             device,
634             hw_render->tile_buffers_count,
635             hw_render->eot_setup.tile_buffer_size);
636          if (result != VK_SUCCESS)
637             goto err_free_pass;
638       }
639 
640       assert(!hw_render->load_op);
641 
642       if (hw_render->color_init_count != 0U) {
643          if (!pvr_has_output_register_writes(hw_render)) {
644             const uint32_t last = hw_render->init_setup.num_render_targets;
645             struct usc_mrt_resource *mrt_resources;
646 
647             hw_render->init_setup.num_render_targets++;
648 
649             mrt_resources =
650                vk_realloc(alloc,
651                           hw_render->init_setup.mrt_resources,
652                           hw_render->init_setup.num_render_targets *
653                              sizeof(*mrt_resources),
654                           8U,
655                           VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
656             if (!mrt_resources) {
657                result = vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
658                goto err_load_op_destroy;
659             }
660 
661             hw_render->init_setup.mrt_resources = mrt_resources;
662 
663             mrt_resources[last].type = USC_MRT_RESOURCE_TYPE_OUTPUT_REG;
664             mrt_resources[last].reg.output_reg = 0U;
665             mrt_resources[last].reg.offset = 0U;
666             mrt_resources[last].intermediate_size = 4U;
667             mrt_resources[last].mrt_desc.intermediate_size = 4U;
668             mrt_resources[last].mrt_desc.priority = 0U;
669             mrt_resources[last].mrt_desc.valid_mask[0U] = ~0;
670             mrt_resources[last].mrt_desc.valid_mask[1U] = ~0;
671             mrt_resources[last].mrt_desc.valid_mask[2U] = ~0;
672             mrt_resources[last].mrt_desc.valid_mask[3U] = ~0;
673          }
674 
675          result = pvr_create_render_load_op(device,
676                                             pAllocator,
677                                             pass,
678                                             hw_render,
679                                             &load_op);
680          if (result != VK_SUCCESS)
681             goto err_load_op_destroy;
682 
683          result =
684             pvr_generate_load_op_shader(device, pAllocator, hw_render, load_op);
685          if (result != VK_SUCCESS) {
686             vk_free2(&device->vk.alloc, pAllocator, load_op);
687             goto err_load_op_destroy;
688          }
689 
690          hw_render->load_op = load_op;
691       }
692 
693       for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
694          if (!pvr_is_load_op_needed(pass, hw_render, j))
695             continue;
696 
697          result = pvr_create_subpass_load_op(device,
698                                              pAllocator,
699                                              pass,
700                                              hw_render,
701                                              j,
702                                              &load_op);
703          if (result != VK_SUCCESS) {
704             vk_free2(&device->vk.alloc, pAllocator, load_op);
705             goto err_load_op_destroy;
706          }
707 
708          result =
709             pvr_generate_load_op_shader(device, pAllocator, hw_render, load_op);
710          if (result != VK_SUCCESS)
711             goto err_load_op_destroy;
712 
713          hw_render->subpasses[j].load_op = load_op;
714       }
715    }
716 
717    *pRenderPass = pvr_render_pass_to_handle(pass);
718 
719    return VK_SUCCESS;
720 
721 err_load_op_destroy:
722    for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
723       struct pvr_renderpass_hwsetup_render *hw_render =
724          &pass->hw_setup->renders[i];
725 
726       for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
727          if (hw_render->subpasses[j].load_op) {
728             pvr_load_op_destroy(device,
729                                 pAllocator,
730                                 hw_render->subpasses[j].load_op);
731          }
732       }
733 
734       if (hw_render->load_op)
735          pvr_load_op_destroy(device, pAllocator, hw_render->load_op);
736    }
737 
738    pvr_destroy_renderpass_hwsetup(alloc, pass->hw_setup);
739 
740 err_free_pass:
741    vk_object_base_finish(&pass->base);
742    vk_free2(&device->vk.alloc, pAllocator, pass);
743 
744    return result;
745 }
746 
pvr_DestroyRenderPass(VkDevice _device,VkRenderPass _pass,const VkAllocationCallbacks * pAllocator)747 void pvr_DestroyRenderPass(VkDevice _device,
748                            VkRenderPass _pass,
749                            const VkAllocationCallbacks *pAllocator)
750 {
751    PVR_FROM_HANDLE(pvr_device, device, _device);
752    PVR_FROM_HANDLE(pvr_render_pass, pass, _pass);
753 
754    if (!pass)
755       return;
756 
757    for (uint32_t i = 0; i < pass->hw_setup->render_count; i++) {
758       struct pvr_renderpass_hwsetup_render *hw_render =
759          &pass->hw_setup->renders[i];
760 
761       for (uint32_t j = 0; j < hw_render->subpass_count; j++) {
762          if (hw_render->subpasses[j].load_op) {
763             pvr_load_op_destroy(device,
764                                 pAllocator,
765                                 hw_render->subpasses[j].load_op);
766          }
767       }
768 
769       if (hw_render->load_op)
770          pvr_load_op_destroy(device, pAllocator, hw_render->load_op);
771    }
772 
773    pvr_destroy_renderpass_hwsetup(pAllocator ? pAllocator : &device->vk.alloc,
774                                   pass->hw_setup);
775    vk_object_base_finish(&pass->base);
776    vk_free2(&device->vk.alloc, pAllocator, pass);
777 }
778 
pvr_GetRenderAreaGranularity(VkDevice _device,VkRenderPass renderPass,VkExtent2D * pGranularity)779 void pvr_GetRenderAreaGranularity(VkDevice _device,
780                                   VkRenderPass renderPass,
781                                   VkExtent2D *pGranularity)
782 {
783    PVR_FROM_HANDLE(pvr_device, device, _device);
784    const struct pvr_device_info *dev_info = &device->pdevice->dev_info;
785 
786    /* Granularity does not depend on any settings in the render pass, so return
787     * the tile granularity.
788     *
789     * The default value is based on the minimum value found in all existing
790     * cores.
791     */
792    pGranularity->width = PVR_GET_FEATURE_VALUE(dev_info, tile_size_x, 16);
793    pGranularity->height = PVR_GET_FEATURE_VALUE(dev_info, tile_size_y, 16);
794 }
795