• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  * SPDX-License-Identifier: MIT
5  *
6  * based in part on anv driver which is:
7  * Copyright © 2015 Intel Corporation
8  */
9 
10 #include "tu_pass.h"
11 
12 #include "vk_util.h"
13 
14 #include "tu_cmd_buffer.h"
15 #include "tu_device.h"
16 #include "tu_image.h"
17 
18 /* Return true if we have to fallback to sysmem rendering because the
19  * dependency can't be satisfied with tiled rendering.
20  */
21 
22 static bool
dep_invalid_for_gmem(const VkSubpassDependency2 * dep,VkPipelineStageFlags2 src_stage_mask,VkPipelineStageFlags2 dst_stage_mask)23 dep_invalid_for_gmem(const VkSubpassDependency2 *dep,
24                      VkPipelineStageFlags2 src_stage_mask,
25                      VkPipelineStageFlags2 dst_stage_mask)
26 {
27    /* External dependencies don't matter here. */
28    if (dep->srcSubpass == VK_SUBPASS_EXTERNAL ||
29        dep->dstSubpass == VK_SUBPASS_EXTERNAL)
30       return false;
31 
32    /* We can conceptually break down the process of rewriting a sysmem
33     * renderpass into a gmem one into two parts:
34     *
35     * 1. Split each draw and multisample resolve into N copies, one for each
36     * bin. (If hardware binning, add one more copy where the FS is disabled
37     * for the binning pass). This is always allowed because the vertex stage
38     * is allowed to run an arbitrary number of times and there are no extra
39     * ordering constraints within a draw.
40     * 2. Take the last copy of the second-to-last draw and slide it down to
41     * before the last copy of the last draw. Repeat for each earlier draw
42     * until the draw pass for the last bin is complete, then repeat for each
43     * earlier bin until we finish with the first bin.
44     *
45     * During this rearranging process, we can't slide draws past each other in
46     * a way that breaks the subpass dependencies. For each draw, we must slide
47     * it past (copies of) the rest of the draws in the renderpass. We can
48     * slide a draw past another if there isn't a dependency between them, or
49     * if the dependenc(ies) are dependencies between framebuffer-space stages
50     * only with the BY_REGION bit set. Note that this includes
51     * self-dependencies, since these may result in pipeline barriers that also
52     * break the rearranging process.
53     */
54 
55    /* This is straight from the Vulkan 1.2 spec, section 6.1.4 "Framebuffer
56     * Region Dependencies":
57     */
58    const VkPipelineStageFlags2 framebuffer_space_stages =
59       VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT |
60       VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
61       VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT |
62       VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
63 
64    return
65       (src_stage_mask & ~(framebuffer_space_stages | VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) ||
66       (dst_stage_mask & ~(framebuffer_space_stages | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT)) ||
67       !(dep->dependencyFlags & VK_DEPENDENCY_BY_REGION_BIT);
68 }
69 
70 static void
tu_render_pass_add_subpass_dep(struct tu_render_pass * pass,const VkSubpassDependency2 * dep)71 tu_render_pass_add_subpass_dep(struct tu_render_pass *pass,
72                                const VkSubpassDependency2 *dep)
73 {
74    uint32_t src = dep->srcSubpass;
75    uint32_t dst = dep->dstSubpass;
76 
77    /* Ignore subpass self-dependencies as they allow the app to call
78     * vkCmdPipelineBarrier() inside the render pass and the driver should only
79     * do the barrier when called, not when starting the render pass.
80     *
81     * We cannot decide whether to allow gmem rendering before a barrier
82     * is actually emitted, so we delay the decision until then.
83     */
84    if (src == dst)
85       return;
86 
87    /* From the Vulkan 1.2.195 spec:
88     *
89     * "If an instance of VkMemoryBarrier2 is included in the pNext chain, srcStageMask,
90     *  dstStageMask, srcAccessMask, and dstAccessMask parameters are ignored. The synchronization
91     *  and access scopes instead are defined by the parameters of VkMemoryBarrier2."
92     */
93    const VkMemoryBarrier2 *barrier =
94       vk_find_struct_const(dep->pNext, MEMORY_BARRIER_2);
95    VkPipelineStageFlags2 src_stage_mask = barrier ? barrier->srcStageMask : dep->srcStageMask;
96    VkAccessFlags2 src_access_mask = barrier ? barrier->srcAccessMask : dep->srcAccessMask;
97    VkPipelineStageFlags2 dst_stage_mask = barrier ? barrier->dstStageMask : dep->dstStageMask;
98    VkAccessFlags2 dst_access_mask = barrier ? barrier->dstAccessMask : dep->dstAccessMask;
99 
100    if (dep_invalid_for_gmem(dep, src_stage_mask, dst_stage_mask)) {
101       perf_debug((struct tu_device *)pass->base.device, "Disabling gmem rendering due to invalid subpass dependency");
102       for (int i = 0; i < ARRAY_SIZE(pass->gmem_pixels); i++)
103          pass->gmem_pixels[i] = 0;
104    }
105 
106    struct tu_subpass_barrier *dst_barrier;
107    if (dst == VK_SUBPASS_EXTERNAL) {
108       dst_barrier = &pass->end_barrier;
109    } else {
110       dst_barrier = &pass->subpasses[dst].start_barrier;
111    }
112 
113    dst_barrier->src_stage_mask |= src_stage_mask;
114    dst_barrier->dst_stage_mask |= dst_stage_mask;
115    dst_barrier->src_access_mask |= src_access_mask;
116    dst_barrier->dst_access_mask |= dst_access_mask;
117 }
118 
119 /* We currently only care about undefined layouts, because we have to
120  * flush/invalidate CCU for those. PREINITIALIZED is the same thing as
121  * UNDEFINED for anything not linear tiled, but we don't know yet whether the
122  * images used are tiled, so just assume they are.
123  */
124 
125 static bool
layout_undefined(VkImageLayout layout)126 layout_undefined(VkImageLayout layout)
127 {
128    return layout == VK_IMAGE_LAYOUT_UNDEFINED ||
129           layout == VK_IMAGE_LAYOUT_PREINITIALIZED;
130 }
131 
132 /* This implements the following bit of spec text:
133  *
134  *    If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
135  *    first subpass that uses an attachment, then an implicit subpass
136  *    dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
137  *    used in. The implicit subpass dependency only exists if there
138  *    exists an automatic layout transition away from initialLayout.
139  *    The subpass dependency operates as if defined with the
140  *    following parameters:
141  *
142  *    VkSubpassDependency implicitDependency = {
143  *        .srcSubpass = VK_SUBPASS_EXTERNAL;
144  *        .dstSubpass = firstSubpass; // First subpass attachment is used in
145  *        .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
146  *        .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
147  *        .srcAccessMask = 0;
148  *        .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
149  *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
150  *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
151  *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
152  *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
153  *        .dependencyFlags = 0;
154  *    };
155  *
156  *    Similarly, if there is no subpass dependency from the last subpass
157  *    that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
158  *    subpass dependency exists from the last subpass it is used in to
159  *    VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
160  *    if there exists an automatic layout transition into finalLayout.
161  *    The subpass dependency operates as if defined with the following
162  *    parameters:
163  *
164  *    VkSubpassDependency implicitDependency = {
165  *        .srcSubpass = lastSubpass; // Last subpass attachment is used in
166  *        .dstSubpass = VK_SUBPASS_EXTERNAL;
167  *        .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
168  *        .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
169  *        .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
170  *                         VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
171  *                         VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
172  *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
173  *                         VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
174  *        .dstAccessMask = 0;
175  *        .dependencyFlags = 0;
176  *    };
177  *
178  * Note: currently this is the only use we have for layout transitions,
179  * besides needing to invalidate CCU at the beginning, so we also flag
180  * transitions from UNDEFINED here.
181  */
182 static void
tu_render_pass_add_implicit_deps(struct tu_render_pass * pass,const VkRenderPassCreateInfo2 * info)183 tu_render_pass_add_implicit_deps(struct tu_render_pass *pass,
184                                  const VkRenderPassCreateInfo2 *info)
185 {
186    const VkAttachmentDescription2* att = info->pAttachments;
187    bool has_external_src[info->subpassCount];
188    bool has_external_dst[info->subpassCount];
189    bool att_used[pass->attachment_count];
190 
191    memset(has_external_src, 0, sizeof(has_external_src));
192    memset(has_external_dst, 0, sizeof(has_external_dst));
193 
194    for (uint32_t i = 0; i < info->dependencyCount; i++) {
195       uint32_t src = info->pDependencies[i].srcSubpass;
196       uint32_t dst = info->pDependencies[i].dstSubpass;
197 
198       if (src == dst)
199          continue;
200 
201       if (src == VK_SUBPASS_EXTERNAL)
202          has_external_src[dst] = true;
203       if (dst == VK_SUBPASS_EXTERNAL)
204          has_external_dst[src] = true;
205    }
206 
207    memset(att_used, 0, sizeof(att_used));
208 
209    for (unsigned i = 0; i < info->subpassCount; i++) {
210       const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
211       bool src_implicit_dep = false;
212 
213       for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
214          uint32_t a = subpass->pInputAttachments[j].attachment;
215 
216          if (a == VK_ATTACHMENT_UNUSED)
217             continue;
218 
219          uint32_t stencil_layout = vk_format_has_stencil(att[a].format) ?
220                vk_att_ref_stencil_layout(&subpass->pInputAttachments[j], att) :
221                VK_IMAGE_LAYOUT_UNDEFINED;
222          uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
223 
224          if ((att[a].initialLayout != subpass->pInputAttachments[j].layout ||
225              stencil_initial_layout != stencil_layout) &&
226              !att_used[a] && !has_external_src[i])
227             src_implicit_dep = true;
228          att_used[a] = true;
229       }
230 
231       for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
232          uint32_t a = subpass->pColorAttachments[j].attachment;
233          if (a == VK_ATTACHMENT_UNUSED)
234             continue;
235          if (att[a].initialLayout != subpass->pColorAttachments[j].layout &&
236              !att_used[a] && !has_external_src[i])
237             src_implicit_dep = true;
238          att_used[a] = true;
239       }
240 
241       if (subpass->pDepthStencilAttachment &&
242           subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
243          uint32_t a = subpass->pDepthStencilAttachment->attachment;
244          uint32_t stencil_layout = vk_att_ref_stencil_layout(subpass->pDepthStencilAttachment, att);
245          uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
246 
247          if ((att[a].initialLayout != subpass->pDepthStencilAttachment->layout ||
248              stencil_initial_layout != stencil_layout) &&
249              !att_used[a] && !has_external_src[i]) {
250             src_implicit_dep = true;
251          }
252          att_used[a] = true;
253       }
254 
255       if (subpass->pResolveAttachments) {
256          for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
257             uint32_t a = subpass->pResolveAttachments[j].attachment;
258             if (a == VK_ATTACHMENT_UNUSED)
259                continue;
260             if (att[a].initialLayout != subpass->pResolveAttachments[j].layout &&
261                !att_used[a] && !has_external_src[i])
262                src_implicit_dep = true;
263             att_used[a] = true;
264          }
265       }
266 
267       const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
268          vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
269 
270       if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment &&
271           ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
272             uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
273             uint32_t stencil_layout = vk_att_ref_stencil_layout(ds_resolve->pDepthStencilResolveAttachment, att);
274             uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
275 
276             if ((att[a].initialLayout != subpass->pDepthStencilAttachment->layout ||
277                 stencil_initial_layout != stencil_layout) &&
278                 !att_used[a] && !has_external_src[i])
279                src_implicit_dep = true;
280             att_used[a] = true;
281       }
282 
283       if (src_implicit_dep) {
284          tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2) {
285             .srcSubpass = VK_SUBPASS_EXTERNAL,
286             .dstSubpass = i,
287             .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
288             .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
289             .srcAccessMask = 0,
290             .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
291                              VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
292                              VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
293                              VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
294                              VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
295             .dependencyFlags = 0,
296          });
297       }
298    }
299 
300    memset(att_used, 0, sizeof(att_used));
301 
302    for (int i = info->subpassCount - 1; i >= 0; i--) {
303       const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
304       bool dst_implicit_dep = false;
305 
306       for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
307          uint32_t a = subpass->pInputAttachments[j].attachment;
308          if (a == VK_ATTACHMENT_UNUSED)
309             continue;
310 
311          uint32_t stencil_layout = vk_format_has_stencil(att[a].format) ?
312                vk_att_ref_stencil_layout(&subpass->pInputAttachments[j], att) :
313                VK_IMAGE_LAYOUT_UNDEFINED;
314          uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
315 
316          if ((att[a].finalLayout != subpass->pInputAttachments[j].layout ||
317              stencil_final_layout != stencil_layout) &&
318              !att_used[a] && !has_external_dst[i])
319             dst_implicit_dep = true;
320          att_used[a] = true;
321       }
322 
323       for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
324          uint32_t a = subpass->pColorAttachments[j].attachment;
325          if (a == VK_ATTACHMENT_UNUSED)
326             continue;
327          if (att[a].finalLayout != subpass->pColorAttachments[j].layout &&
328              !att_used[a] && !has_external_dst[i])
329             dst_implicit_dep = true;
330          att_used[a] = true;
331       }
332 
333       if (subpass->pDepthStencilAttachment &&
334           subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
335          uint32_t a = subpass->pDepthStencilAttachment->attachment;
336          uint32_t stencil_layout = vk_att_ref_stencil_layout(subpass->pDepthStencilAttachment, att);
337          uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
338 
339          if ((att[a].finalLayout != subpass->pDepthStencilAttachment->layout ||
340              stencil_final_layout != stencil_layout) &&
341              !att_used[a] && !has_external_dst[i]) {
342             dst_implicit_dep = true;
343          }
344          att_used[a] = true;
345       }
346 
347       if (subpass->pResolveAttachments) {
348          for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
349             uint32_t a = subpass->pResolveAttachments[j].attachment;
350             if (a == VK_ATTACHMENT_UNUSED)
351                continue;
352             if (att[a].finalLayout != subpass->pResolveAttachments[j].layout &&
353                 !att_used[a] && !has_external_dst[i])
354                dst_implicit_dep = true;
355             att_used[a] = true;
356          }
357       }
358 
359       const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
360          vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
361 
362       if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment &&
363           ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
364             uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
365             uint32_t stencil_layout = vk_att_ref_stencil_layout(ds_resolve->pDepthStencilResolveAttachment, att);
366             uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
367 
368             if ((att[a].finalLayout != subpass->pDepthStencilAttachment->layout ||
369                 stencil_final_layout != stencil_layout) &&
370                 !att_used[a] && !has_external_src[i])
371                dst_implicit_dep = true;
372             att_used[a] = true;
373       }
374 
375       if (dst_implicit_dep) {
376          tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2) {
377             .srcSubpass = i,
378             .dstSubpass = VK_SUBPASS_EXTERNAL,
379             .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
380             .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
381             .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
382                              VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
383                              VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
384                              VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
385                              VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
386             .dstAccessMask = 0,
387             .dependencyFlags = 0,
388          });
389       }
390    }
391 
392    /* Handle UNDEFINED transitions, similar to the handling in tu_barrier().
393     * Assume that if an attachment has an initial layout of UNDEFINED, it gets
394     * transitioned eventually.
395     */
396    for (unsigned i = 0; i < info->attachmentCount; i++) {
397       if (layout_undefined(att[i].initialLayout)) {
398          if (vk_format_is_depth_or_stencil(att[i].format)) {
399             pass->subpasses[0].start_barrier.incoherent_ccu_depth = true;
400          } else {
401             pass->subpasses[0].start_barrier.incoherent_ccu_color = true;
402          }
403       }
404    }
405 }
406 
407 /* If an input attachment is used without an intervening write to the same
408  * attachment, then we can just use the original image, even in GMEM mode.
409  * This is an optimization, but it's also important because it allows us to
410  * avoid having to invalidate UCHE at the beginning of each tile due to it
411  * becoming invalid. The only reads of GMEM via UCHE should be after an
412  * earlier subpass modified it, which only works if there's already an
413  * appropriate dependency that will add the CACHE_INVALIDATE anyway. We
414  * don't consider this in the dependency code, so this is also required for
415  * correctness.
416  */
417 static void
tu_render_pass_patch_input_gmem(struct tu_render_pass * pass)418 tu_render_pass_patch_input_gmem(struct tu_render_pass *pass)
419 {
420    bool written[pass->attachment_count];
421 
422    memset(written, 0, sizeof(written));
423 
424    for (unsigned i = 0; i < pass->subpass_count; i++) {
425       struct tu_subpass *subpass = &pass->subpasses[i];
426 
427       for (unsigned j = 0; j < subpass->input_count; j++) {
428          uint32_t a = subpass->input_attachments[j].attachment;
429          if (a == VK_ATTACHMENT_UNUSED)
430             continue;
431          subpass->input_attachments[j].patch_input_gmem = written[a];
432       }
433 
434       for (unsigned j = 0; j < subpass->color_count; j++) {
435          uint32_t a = subpass->color_attachments[j].attachment;
436          if (a == VK_ATTACHMENT_UNUSED)
437             continue;
438          written[a] = true;
439 
440          for (unsigned k = 0; k < subpass->input_count; k++) {
441             if (subpass->input_attachments[k].attachment == a &&
442                 !subpass->input_attachments[k].patch_input_gmem) {
443                /* For render feedback loops, we have no idea whether the use
444                 * as a color attachment or input attachment will come first,
445                 * so we have to always use GMEM in case the color attachment
446                 * comes first and defensively invalidate UCHE in case the
447                 * input attachment comes first.
448                 */
449                subpass->feedback_invalidate = true;
450                subpass->input_attachments[k].patch_input_gmem = true;
451             }
452          }
453       }
454 
455       for (unsigned j = 0; j < subpass->resolve_count; j++) {
456          uint32_t a = subpass->resolve_attachments[j].attachment;
457          if (a == VK_ATTACHMENT_UNUSED)
458             continue;
459          written[a] = true;
460       }
461 
462       if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
463          written[subpass->depth_stencil_attachment.attachment] = true;
464          for (unsigned k = 0; k < subpass->input_count; k++) {
465             if (subpass->input_attachments[k].attachment ==
466                 subpass->depth_stencil_attachment.attachment &&
467                 !subpass->input_attachments[k].patch_input_gmem) {
468                subpass->feedback_invalidate = true;
469                subpass->input_attachments[k].patch_input_gmem = true;
470             }
471          }
472       }
473    }
474 }
475 
476 static void
tu_render_pass_check_feedback_loop(struct tu_render_pass * pass)477 tu_render_pass_check_feedback_loop(struct tu_render_pass *pass)
478 {
479    for (unsigned i = 0; i < pass->subpass_count; i++) {
480       struct tu_subpass *subpass = &pass->subpasses[i];
481 
482       for (unsigned j = 0; j < subpass->color_count; j++) {
483          uint32_t a = subpass->color_attachments[j].attachment;
484          if (a == VK_ATTACHMENT_UNUSED)
485             continue;
486          for (unsigned k = 0; k < subpass->input_count; k++) {
487             if (subpass->input_attachments[k].attachment == a) {
488                subpass->feedback_loop_color = true;
489                break;
490             }
491          }
492       }
493 
494       if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
495          for (unsigned k = 0; k < subpass->input_count; k++) {
496             if (subpass->input_attachments[k].attachment ==
497                 subpass->depth_stencil_attachment.attachment) {
498                subpass->feedback_loop_ds = true;
499                break;
500             }
501          }
502       }
503    }
504 }
505 
update_samples(struct tu_subpass * subpass,VkSampleCountFlagBits samples)506 static void update_samples(struct tu_subpass *subpass,
507                            VkSampleCountFlagBits samples)
508 {
509    assert(subpass->samples == 0 || subpass->samples == samples);
510    subpass->samples = samples;
511 }
512 
513 static void
tu_render_pass_cond_config(struct tu_render_pass * pass)514 tu_render_pass_cond_config(struct tu_render_pass *pass)
515 {
516    for (uint32_t i = 0; i < pass->attachment_count; i++) {
517       struct tu_render_pass_attachment *att = &pass->attachments[i];
518 
519       att->cond_load_allowed =
520          (att->load || att->load_stencil) && !att->clear_mask && !att->will_be_resolved;
521       att->cond_store_allowed =
522          (att->store || att->store_stencil) && !att->clear_mask;
523    }
524 }
525 
526 static void
tu_render_pass_gmem_config(struct tu_render_pass * pass,const struct tu_physical_device * phys_dev)527 tu_render_pass_gmem_config(struct tu_render_pass *pass,
528                            const struct tu_physical_device *phys_dev)
529 {
530    for (enum tu_gmem_layout layout = 0; layout < TU_GMEM_LAYOUT_COUNT;
531         layout++) {
532       /* From the VK_KHR_multiview spec:
533        *
534        *    Multiview is all-or-nothing for a render pass - that is, either all
535        *    subpasses must have a non-zero view mask (though some subpasses may
536        *    have only one view) or all must be zero.
537        *
538        * This means we only have to check one of the view masks.
539        */
540       if (pass->subpasses[0].multiview_mask) {
541          /* It seems multiview must use sysmem rendering. */
542          pass->gmem_pixels[layout] = 0;
543          continue;
544       }
545 
546       /* log2(gmem_align/(tile_align_w*tile_align_h)) */
547       uint32_t block_align_shift = 3;
548       uint32_t tile_align_w = phys_dev->info->tile_align_w;
549       uint32_t gmem_align = (1 << block_align_shift) * tile_align_w *
550                             phys_dev->info->tile_align_h;
551 
552       /* calculate total bytes per pixel */
553       uint32_t cpp_total = 0;
554       for (uint32_t i = 0; i < pass->attachment_count; i++) {
555          struct tu_render_pass_attachment *att = &pass->attachments[i];
556          bool cpp1 = (att->cpp == 1);
557          if (att->gmem) {
558             cpp_total += att->cpp;
559 
560             /* take into account the separate stencil: */
561             if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
562                cpp1 = (att->samples == 1);
563                cpp_total += att->samples;
564             }
565 
566             /* texture pitch must be aligned to 64, use a tile_align_w that is
567              * a multiple of 64 for cpp==1 attachment to work as input
568              * attachment
569              */
570             if (cpp1 && tile_align_w % 64 != 0) {
571                tile_align_w *= 2;
572                block_align_shift -= 1;
573             }
574          }
575       }
576 
577       pass->tile_align_w = tile_align_w;
578 
579       /* no gmem attachments */
580       if (cpp_total == 0) {
581          /* any value non-zero value so tiling config works with no
582           * attachments
583           */
584          pass->gmem_pixels[layout] = 1024 * 1024;
585          continue;
586       }
587 
588       /* TODO: this algorithm isn't optimal
589        * for example, two attachments with cpp = {1, 4}
590        * result:  nblocks = {12, 52}, pixels = 196608
591        * optimal: nblocks = {13, 51}, pixels = 208896
592        */
593       uint32_t gmem_size = layout == TU_GMEM_LAYOUT_FULL
594                               ? phys_dev->gmem_size
595                               : phys_dev->ccu_offset_gmem;
596       uint32_t gmem_blocks = gmem_size / gmem_align;
597       uint32_t offset = 0, pixels = ~0u, i;
598       for (i = 0; i < pass->attachment_count; i++) {
599          struct tu_render_pass_attachment *att = &pass->attachments[i];
600          if (!att->gmem)
601             continue;
602 
603          att->gmem_offset[layout] = offset;
604 
605          uint32_t align = MAX2(1, att->cpp >> block_align_shift);
606          uint32_t nblocks =
607             MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
608 
609          if (nblocks > gmem_blocks)
610             break;
611 
612          gmem_blocks -= nblocks;
613          cpp_total -= att->cpp;
614          offset += nblocks * gmem_align;
615          pixels = MIN2(pixels, nblocks * gmem_align / att->cpp);
616 
617          /* repeat the same for separate stencil */
618          if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
619             att->gmem_offset_stencil[layout] = offset;
620 
621             /* note: for s8_uint, block align is always 1 */
622             uint32_t nblocks = gmem_blocks * att->samples / cpp_total;
623             if (nblocks > gmem_blocks)
624                break;
625 
626             gmem_blocks -= nblocks;
627             cpp_total -= att->samples;
628             offset += nblocks * gmem_align;
629             pixels = MIN2(pixels, nblocks * gmem_align / att->samples);
630          }
631       }
632 
633       /* if the loop didn't complete then the gmem config is impossible */
634       if (i == pass->attachment_count)
635          pass->gmem_pixels[layout] = pixels;
636    }
637 }
638 
639 static void
tu_render_pass_bandwidth_config(struct tu_render_pass * pass)640 tu_render_pass_bandwidth_config(struct tu_render_pass *pass)
641 {
642    for (uint32_t i = 0; i < pass->attachment_count; i++) {
643       const struct tu_render_pass_attachment *att = &pass->attachments[i];
644 
645       /* approximate tu_load_gmem_attachment */
646       if (att->load)
647          pass->gmem_bandwidth_per_pixel += att->cpp;
648 
649       /* approximate tu_store_gmem_attachment */
650       if (att->store)
651          pass->gmem_bandwidth_per_pixel += att->cpp;
652 
653       /* approximate tu_clear_sysmem_attachment */
654       if (att->clear_mask)
655          pass->sysmem_bandwidth_per_pixel += att->cpp;
656 
657       /* approximate tu6_emit_sysmem_resolves */
658       if (att->will_be_resolved) {
659          pass->sysmem_bandwidth_per_pixel +=
660             att->cpp + att->cpp / att->samples;
661       }
662    }
663 }
664 
665 static void
attachment_set_ops(struct tu_device * device,struct tu_render_pass_attachment * att,VkAttachmentLoadOp load_op,VkAttachmentLoadOp stencil_load_op,VkAttachmentStoreOp store_op,VkAttachmentStoreOp stencil_store_op)666 attachment_set_ops(struct tu_device *device,
667                    struct tu_render_pass_attachment *att,
668                    VkAttachmentLoadOp load_op,
669                    VkAttachmentLoadOp stencil_load_op,
670                    VkAttachmentStoreOp store_op,
671                    VkAttachmentStoreOp stencil_store_op)
672 {
673    if (device->instance->debug_flags & TU_DEBUG_DONT_CARE_AS_LOAD) {
674       if (load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
675          load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
676       if (stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
677          stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
678    }
679 
680    /* load/store ops */
681    att->clear_mask =
682       (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
683    att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
684    att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE);
685 
686    bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR);
687    bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
688    bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE);
689 
690    switch (att->format) {
691    case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */
692       if (att->clear_mask)
693          att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
694       if (stencil_clear)
695          att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
696       if (stencil_load)
697          att->load = true;
698       if (stencil_store)
699          att->store = true;
700       break;
701    case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */
702       att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
703       att->load = stencil_load;
704       att->store = stencil_store;
705       break;
706    case VK_FORMAT_D32_SFLOAT_S8_UINT: /* separate stencil */
707       if (att->clear_mask)
708          att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
709       if (stencil_clear)
710          att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
711       if (stencil_load)
712          att->load_stencil = true;
713       if (stencil_store)
714          att->store_stencil = true;
715       break;
716    default:
717       break;
718    }
719 }
720 
721 static bool
is_depth_stencil_resolve_enabled(const VkSubpassDescriptionDepthStencilResolve * depth_stencil_resolve)722 is_depth_stencil_resolve_enabled(const VkSubpassDescriptionDepthStencilResolve *depth_stencil_resolve)
723 {
724    if (depth_stencil_resolve &&
725        depth_stencil_resolve->pDepthStencilResolveAttachment &&
726        depth_stencil_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
727       return true;
728    }
729    return false;
730 }
731 
732 static void
tu_subpass_use_attachment(struct tu_render_pass * pass,int i,uint32_t a,const VkRenderPassCreateInfo2 * pCreateInfo)733 tu_subpass_use_attachment(struct tu_render_pass *pass, int i, uint32_t a, const VkRenderPassCreateInfo2 *pCreateInfo)
734 {
735    struct tu_subpass *subpass = &pass->subpasses[i];
736 
737    pass->attachments[a].gmem = true;
738    update_samples(subpass, pCreateInfo->pAttachments[a].samples);
739    pass->attachments[a].clear_views |= subpass->multiview_mask;
740 }
741 
742 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateRenderPass2(VkDevice _device,const VkRenderPassCreateInfo2 * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkRenderPass * pRenderPass)743 tu_CreateRenderPass2(VkDevice _device,
744                      const VkRenderPassCreateInfo2 *pCreateInfo,
745                      const VkAllocationCallbacks *pAllocator,
746                      VkRenderPass *pRenderPass)
747 {
748    TU_FROM_HANDLE(tu_device, device, _device);
749 
750    if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC))
751       return vk_common_CreateRenderPass2(_device, pCreateInfo, pAllocator,
752                                          pRenderPass);
753 
754    struct tu_render_pass *pass;
755    size_t size;
756    size_t attachments_offset;
757 
758    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2);
759 
760    size = sizeof(*pass);
761    size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
762    attachments_offset = size;
763    size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
764 
765    pass = vk_object_zalloc(&device->vk, pAllocator, size,
766                            VK_OBJECT_TYPE_RENDER_PASS);
767    if (pass == NULL)
768       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
769 
770    pass->attachment_count = pCreateInfo->attachmentCount;
771    pass->subpass_count = pCreateInfo->subpassCount;
772    pass->attachments = (void *) pass + attachments_offset;
773 
774    for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
775       struct tu_render_pass_attachment *att = &pass->attachments[i];
776 
777       att->format = pCreateInfo->pAttachments[i].format;
778       att->samples = pCreateInfo->pAttachments[i].samples;
779       /* for d32s8, cpp is for the depth image, and
780        * att->samples will be used as the cpp for the stencil image
781        */
782       if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
783          att->cpp = 4 * att->samples;
784       else
785          att->cpp = vk_format_get_blocksize(att->format) * att->samples;
786       /* Initially not allocated into gmem, tu_subpass_use_attachment() will move it there. */
787       att->gmem = false;
788 
789       VkAttachmentLoadOp loadOp = pCreateInfo->pAttachments[i].loadOp;
790       VkAttachmentLoadOp stencilLoadOp = pCreateInfo->pAttachments[i].stencilLoadOp;
791 
792       attachment_set_ops(device, att, loadOp, stencilLoadOp,
793                          pCreateInfo->pAttachments[i].storeOp,
794                          pCreateInfo->pAttachments[i].stencilStoreOp);
795    }
796    uint32_t subpass_attachment_count = 0;
797    struct tu_subpass_attachment *p;
798    for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
799       const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
800       const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
801          vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
802 
803       subpass_attachment_count +=
804          desc->inputAttachmentCount + desc->colorAttachmentCount +
805          (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
806          (is_depth_stencil_resolve_enabled(ds_resolve) ? 1 : 0);
807    }
808 
809    if (subpass_attachment_count) {
810       pass->subpass_attachments = vk_alloc2(
811          &device->vk.alloc, pAllocator,
812          subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
813          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
814       if (pass->subpass_attachments == NULL) {
815          vk_object_free(&device->vk, pAllocator, pass);
816          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
817       }
818    } else
819       pass->subpass_attachments = NULL;
820 
821    p = pass->subpass_attachments;
822    for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
823       const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
824       const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
825          vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
826       struct tu_subpass *subpass = &pass->subpasses[i];
827 
828       subpass->input_count = desc->inputAttachmentCount;
829       subpass->color_count = desc->colorAttachmentCount;
830       subpass->resolve_count = 0;
831       subpass->resolve_depth_stencil = is_depth_stencil_resolve_enabled(ds_resolve);
832       subpass->samples = 0;
833       subpass->srgb_cntl = 0;
834 
835       const VkSubpassDescriptionFlagBits raster_order_access_bits =
836          VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_ARM |
837          VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_ARM |
838          VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_STENCIL_ACCESS_BIT_ARM;
839 
840       subpass->raster_order_attachment_access = desc->flags & raster_order_access_bits;
841 
842       subpass->multiview_mask = desc->viewMask;
843 
844       if (desc->inputAttachmentCount > 0) {
845          subpass->input_attachments = p;
846          p += desc->inputAttachmentCount;
847 
848          for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
849             uint32_t a = desc->pInputAttachments[j].attachment;
850             subpass->input_attachments[j].attachment = a;
851             /* Note: attachments only used as input attachments will be read
852              * directly instead of through gmem, so we don't mark input
853              * attachments as needing gmem.
854              */
855          }
856       }
857 
858       if (desc->colorAttachmentCount > 0) {
859          subpass->color_attachments = p;
860          p += desc->colorAttachmentCount;
861 
862          for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
863             uint32_t a = desc->pColorAttachments[j].attachment;
864             subpass->color_attachments[j].attachment = a;
865 
866             if (a != VK_ATTACHMENT_UNUSED) {
867                tu_subpass_use_attachment(pass, i, a, pCreateInfo);
868 
869                if (vk_format_is_srgb(pass->attachments[a].format))
870                   subpass->srgb_cntl |= 1 << j;
871             }
872          }
873       }
874 
875       subpass->resolve_attachments = (desc->pResolveAttachments || subpass->resolve_depth_stencil) ? p : NULL;
876       if (desc->pResolveAttachments) {
877          p += desc->colorAttachmentCount;
878          subpass->resolve_count += desc->colorAttachmentCount;
879          for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
880             subpass->resolve_attachments[j].attachment =
881                   desc->pResolveAttachments[j].attachment;
882 
883             uint32_t src_a = desc->pColorAttachments[j].attachment;
884             if (src_a != VK_ATTACHMENT_UNUSED) {
885                pass->attachments[src_a].will_be_resolved =
886                   desc->pResolveAttachments[j].attachment != VK_ATTACHMENT_UNUSED;
887             }
888          }
889       }
890 
891       if (subpass->resolve_depth_stencil) {
892          p++;
893          subpass->resolve_count++;
894          uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
895          subpass->resolve_attachments[subpass->resolve_count - 1].attachment = a;
896 
897          uint32_t src_a = desc->pDepthStencilAttachment->attachment;
898          if (src_a != VK_ATTACHMENT_UNUSED) {
899             pass->attachments[src_a].will_be_resolved = a != VK_ATTACHMENT_UNUSED;
900          }
901       }
902 
903       uint32_t a = desc->pDepthStencilAttachment ?
904          desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
905       subpass->depth_stencil_attachment.attachment = a;
906       if (a != VK_ATTACHMENT_UNUSED)
907          tu_subpass_use_attachment(pass, i, a, pCreateInfo);
908    }
909 
910    tu_render_pass_patch_input_gmem(pass);
911 
912    tu_render_pass_check_feedback_loop(pass);
913 
914    /* disable unused attachments */
915    for (uint32_t i = 0; i < pass->attachment_count; i++) {
916       struct tu_render_pass_attachment *att = &pass->attachments[i];
917       if (!att->gmem) {
918          att->clear_mask = 0;
919          att->load = false;
920       }
921    }
922 
923    tu_render_pass_cond_config(pass);
924    tu_render_pass_gmem_config(pass, device->physical_device);
925    tu_render_pass_bandwidth_config(pass);
926 
927    for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
928       tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]);
929    }
930 
931    tu_render_pass_add_implicit_deps(pass, pCreateInfo);
932 
933    *pRenderPass = tu_render_pass_to_handle(pass);
934 
935    return VK_SUCCESS;
936 }
937 
938 VKAPI_ATTR void VKAPI_CALL
tu_DestroyRenderPass(VkDevice _device,VkRenderPass _pass,const VkAllocationCallbacks * pAllocator)939 tu_DestroyRenderPass(VkDevice _device,
940                      VkRenderPass _pass,
941                      const VkAllocationCallbacks *pAllocator)
942 {
943    TU_FROM_HANDLE(tu_device, device, _device);
944 
945    if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC)) {
946       vk_common_DestroyRenderPass(_device, _pass, pAllocator);
947       return;
948    }
949 
950    TU_FROM_HANDLE(tu_render_pass, pass, _pass);
951 
952    if (!_pass)
953       return;
954 
955    vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
956    vk_object_free(&device->vk, pAllocator, pass);
957 }
958 
959 static void
tu_setup_dynamic_attachment(struct tu_render_pass_attachment * att,struct tu_image_view * view)960 tu_setup_dynamic_attachment(struct tu_render_pass_attachment *att,
961                             struct tu_image_view *view)
962 {
963    att->format = view->vk.format;
964    att->samples = view->image->layout->nr_samples;
965 
966    /* for d32s8, cpp is for the depth image, and
967     * att->samples will be used as the cpp for the stencil image
968     */
969    if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
970       att->cpp = 4 * att->samples;
971    else
972       att->cpp = vk_format_get_blocksize(att->format) * att->samples;
973 }
974 
975 void
tu_setup_dynamic_render_pass(struct tu_cmd_buffer * cmd_buffer,const VkRenderingInfo * info)976 tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
977                              const VkRenderingInfo *info)
978 {
979    struct tu_device *device = cmd_buffer->device;
980    struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
981    struct tu_subpass *subpass = &cmd_buffer->dynamic_subpass;
982 
983    pass->subpass_count = 1;
984    pass->attachments = cmd_buffer->dynamic_rp_attachments;
985 
986    subpass->color_count = subpass->resolve_count = info->colorAttachmentCount;
987    subpass->resolve_depth_stencil = false;
988    subpass->color_attachments = cmd_buffer->dynamic_color_attachments;
989    subpass->resolve_attachments = cmd_buffer->dynamic_resolve_attachments;
990    subpass->feedback_invalidate = false;
991    subpass->feedback_loop_ds = subpass->feedback_loop_color = false;
992    subpass->input_count = 0;
993    subpass->samples = 0;
994    subpass->srgb_cntl = 0;
995    subpass->raster_order_attachment_access = false;
996    subpass->multiview_mask = info->viewMask;
997 
998    uint32_t a = 0;
999    for (uint32_t i = 0; i < info->colorAttachmentCount; i++) {
1000       struct tu_render_pass_attachment *att = &pass->attachments[a];
1001       const VkRenderingAttachmentInfo *att_info = &info->pColorAttachments[i];
1002 
1003       if (att_info->imageView == VK_NULL_HANDLE) {
1004          subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1005          subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1006          continue;
1007       }
1008 
1009       TU_FROM_HANDLE(tu_image_view, view, att_info->imageView);
1010       tu_setup_dynamic_attachment(att, view);
1011       att->gmem = true;
1012       att->clear_views = info->viewMask;
1013       attachment_set_ops(device, att, att_info->loadOp, 0,
1014                          att_info->storeOp, 0);
1015       subpass->color_attachments[i].attachment = a++;
1016 
1017       subpass->samples = view->image->layout->nr_samples;
1018 
1019       if (vk_format_is_srgb(view->vk.format))
1020          subpass->srgb_cntl |= 1 << i;
1021 
1022       if (att_info->resolveMode != VK_RESOLVE_MODE_NONE) {
1023          struct tu_render_pass_attachment *resolve_att = &pass->attachments[a];
1024          TU_FROM_HANDLE(tu_image_view, resolve_view, att_info->resolveImageView);
1025          tu_setup_dynamic_attachment(resolve_att, resolve_view);
1026          resolve_att->gmem = false;
1027          attachment_set_ops(device, resolve_att,
1028                             VK_ATTACHMENT_LOAD_OP_DONT_CARE, 0,
1029                             VK_ATTACHMENT_STORE_OP_STORE, 0);
1030          subpass->resolve_attachments[i].attachment = a++;
1031          att->will_be_resolved = true;
1032       } else {
1033          subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1034          att->will_be_resolved = false;
1035       }
1036    }
1037 
1038    if (info->pDepthAttachment || info->pStencilAttachment) {
1039       const struct VkRenderingAttachmentInfo *common_info =
1040          (info->pDepthAttachment &&
1041           info->pDepthAttachment->imageView != VK_NULL_HANDLE) ?
1042          info->pDepthAttachment :
1043          info->pStencilAttachment;
1044 
1045       if (common_info && common_info->imageView != VK_NULL_HANDLE) {
1046          TU_FROM_HANDLE(tu_image_view, view, common_info->imageView);
1047 
1048          struct tu_render_pass_attachment *att = &pass->attachments[a];
1049          tu_setup_dynamic_attachment(att, view);
1050          att->gmem = true;
1051          att->clear_views = info->viewMask;
1052          subpass->depth_stencil_attachment.attachment = a++;
1053 
1054          attachment_set_ops(device, att,
1055                             info->pDepthAttachment ? info->pDepthAttachment->loadOp : 0,
1056                             info->pStencilAttachment ? info->pStencilAttachment->loadOp : 0,
1057                             info->pDepthAttachment ? info->pDepthAttachment->storeOp : 0,
1058                             info->pStencilAttachment ? info->pStencilAttachment->storeOp : 0);
1059 
1060          subpass->samples = view->image->layout->nr_samples;
1061 
1062          if (common_info->resolveMode != VK_RESOLVE_MODE_NONE) {
1063             unsigned i = subpass->resolve_count++;
1064             struct tu_render_pass_attachment *resolve_att = &pass->attachments[a];
1065             TU_FROM_HANDLE(tu_image_view, resolve_view,
1066                            common_info->resolveImageView);
1067             tu_setup_dynamic_attachment(resolve_att, resolve_view);
1068             resolve_att->gmem = false;
1069             attachment_set_ops(device, resolve_att,
1070                                VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1071                                VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1072                                VK_ATTACHMENT_STORE_OP_STORE,
1073                                VK_ATTACHMENT_STORE_OP_STORE);
1074             subpass->resolve_attachments[i].attachment = a++;
1075             att->will_be_resolved = true;
1076             subpass->resolve_depth_stencil = true;
1077          } else {
1078             att->will_be_resolved = false;
1079          }
1080       } else {
1081          subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1082       }
1083    } else {
1084       subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1085    }
1086 
1087    pass->attachment_count = a;
1088 
1089    tu_render_pass_cond_config(pass);
1090    tu_render_pass_gmem_config(pass, device->physical_device);
1091    tu_render_pass_bandwidth_config(pass);
1092 }
1093 
1094 void
tu_setup_dynamic_inheritance(struct tu_cmd_buffer * cmd_buffer,const VkCommandBufferInheritanceRenderingInfo * info)1095 tu_setup_dynamic_inheritance(struct tu_cmd_buffer *cmd_buffer,
1096                              const VkCommandBufferInheritanceRenderingInfo *info)
1097 {
1098    struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
1099    struct tu_subpass *subpass = &cmd_buffer->dynamic_subpass;
1100 
1101    pass->subpass_count = 1;
1102    pass->attachments = cmd_buffer->dynamic_rp_attachments;
1103 
1104    subpass->color_count = info->colorAttachmentCount;
1105    subpass->resolve_count = 0;
1106    subpass->resolve_depth_stencil = false;
1107    subpass->color_attachments = cmd_buffer->dynamic_color_attachments;
1108    subpass->resolve_attachments = NULL;
1109    subpass->feedback_invalidate = false;
1110    subpass->feedback_loop_ds = subpass->feedback_loop_color = false;
1111    subpass->input_count = 0;
1112    subpass->samples = 0;
1113    subpass->srgb_cntl = 0;
1114    subpass->raster_order_attachment_access = false;
1115    subpass->multiview_mask = info->viewMask;
1116    subpass->samples = info->rasterizationSamples;
1117 
1118    unsigned a = 0;
1119    for (unsigned i = 0; i < info->colorAttachmentCount; i++) {
1120       struct tu_render_pass_attachment *att = &pass->attachments[a];
1121       VkFormat format = info->pColorAttachmentFormats[i];
1122 
1123       if (format == VK_FORMAT_UNDEFINED) {
1124          subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1125          continue;
1126       }
1127 
1128       att->format = format;
1129       att->samples = info->rasterizationSamples;
1130       subpass->samples = info->rasterizationSamples;
1131       subpass->color_attachments[i].attachment = a++;
1132 
1133       /* conservatively assume that the attachment may be conditionally
1134        * loaded/stored.
1135        */
1136       att->cond_load_allowed = att->cond_store_allowed = true;
1137    }
1138 
1139    if (info->depthAttachmentFormat != VK_FORMAT_UNDEFINED ||
1140        info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED) {
1141       struct tu_render_pass_attachment *att = &pass->attachments[a];
1142       att->format = info->depthAttachmentFormat != VK_FORMAT_UNDEFINED ?
1143          info->depthAttachmentFormat : info->stencilAttachmentFormat;
1144       att->samples = info->rasterizationSamples;
1145       subpass->depth_stencil_attachment.attachment = a++;
1146       att->cond_load_allowed = att->cond_store_allowed = true;
1147    } else {
1148       subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1149    }
1150 }
1151 
1152 VKAPI_ATTR void VKAPI_CALL
tu_GetRenderAreaGranularity(VkDevice _device,VkRenderPass renderPass,VkExtent2D * pGranularity)1153 tu_GetRenderAreaGranularity(VkDevice _device,
1154                             VkRenderPass renderPass,
1155                             VkExtent2D *pGranularity)
1156 {
1157    TU_FROM_HANDLE(tu_device, device, _device);
1158    pGranularity->width = device->physical_device->info->gmem_align_w;
1159    pGranularity->height = device->physical_device->info->gmem_align_h;
1160 }
1161 
1162 uint32_t
tu_subpass_get_attachment_to_resolve(const struct tu_subpass * subpass,uint32_t index)1163 tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index)
1164 {
1165    if (subpass->resolve_depth_stencil &&
1166        index == (subpass->resolve_count - 1))
1167       return subpass->depth_stencil_attachment.attachment;
1168 
1169    return subpass->color_attachments[index].attachment;
1170 }
1171