1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 * SPDX-License-Identifier: MIT
5 *
6 * based in part on anv driver which is:
7 * Copyright © 2015 Intel Corporation
8 */
9
10 #include "tu_pass.h"
11
12 #include "vk_util.h"
13
14 #include "tu_cmd_buffer.h"
15 #include "tu_device.h"
16 #include "tu_image.h"
17
18 /* Return true if we have to fallback to sysmem rendering because the
19 * dependency can't be satisfied with tiled rendering.
20 */
21
22 static bool
dep_invalid_for_gmem(const VkSubpassDependency2 * dep,VkPipelineStageFlags2 src_stage_mask,VkPipelineStageFlags2 dst_stage_mask)23 dep_invalid_for_gmem(const VkSubpassDependency2 *dep,
24 VkPipelineStageFlags2 src_stage_mask,
25 VkPipelineStageFlags2 dst_stage_mask)
26 {
27 /* External dependencies don't matter here. */
28 if (dep->srcSubpass == VK_SUBPASS_EXTERNAL ||
29 dep->dstSubpass == VK_SUBPASS_EXTERNAL)
30 return false;
31
32 /* We can conceptually break down the process of rewriting a sysmem
33 * renderpass into a gmem one into two parts:
34 *
35 * 1. Split each draw and multisample resolve into N copies, one for each
36 * bin. (If hardware binning, add one more copy where the FS is disabled
37 * for the binning pass). This is always allowed because the vertex stage
38 * is allowed to run an arbitrary number of times and there are no extra
39 * ordering constraints within a draw.
40 * 2. Take the last copy of the second-to-last draw and slide it down to
41 * before the last copy of the last draw. Repeat for each earlier draw
42 * until the draw pass for the last bin is complete, then repeat for each
43 * earlier bin until we finish with the first bin.
44 *
45 * During this rearranging process, we can't slide draws past each other in
46 * a way that breaks the subpass dependencies. For each draw, we must slide
47 * it past (copies of) the rest of the draws in the renderpass. We can
48 * slide a draw past another if there isn't a dependency between them, or
49 * if the dependenc(ies) are dependencies between framebuffer-space stages
50 * only with the BY_REGION bit set. Note that this includes
51 * self-dependencies, since these may result in pipeline barriers that also
52 * break the rearranging process.
53 */
54
55 /* This is straight from the Vulkan 1.2 spec, section 6.1.4 "Framebuffer
56 * Region Dependencies":
57 */
58 const VkPipelineStageFlags2 framebuffer_space_stages =
59 VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT |
60 VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
61 VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT |
62 VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
63
64 return
65 (src_stage_mask & ~(framebuffer_space_stages | VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) ||
66 (dst_stage_mask & ~(framebuffer_space_stages | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT)) ||
67 !(dep->dependencyFlags & VK_DEPENDENCY_BY_REGION_BIT);
68 }
69
70 static void
tu_render_pass_add_subpass_dep(struct tu_render_pass * pass,const VkSubpassDependency2 * dep)71 tu_render_pass_add_subpass_dep(struct tu_render_pass *pass,
72 const VkSubpassDependency2 *dep)
73 {
74 uint32_t src = dep->srcSubpass;
75 uint32_t dst = dep->dstSubpass;
76
77 /* Ignore subpass self-dependencies as they allow the app to call
78 * vkCmdPipelineBarrier() inside the render pass and the driver should only
79 * do the barrier when called, not when starting the render pass.
80 *
81 * We cannot decide whether to allow gmem rendering before a barrier
82 * is actually emitted, so we delay the decision until then.
83 */
84 if (src == dst)
85 return;
86
87 /* From the Vulkan 1.2.195 spec:
88 *
89 * "If an instance of VkMemoryBarrier2 is included in the pNext chain, srcStageMask,
90 * dstStageMask, srcAccessMask, and dstAccessMask parameters are ignored. The synchronization
91 * and access scopes instead are defined by the parameters of VkMemoryBarrier2."
92 */
93 const VkMemoryBarrier2 *barrier =
94 vk_find_struct_const(dep->pNext, MEMORY_BARRIER_2);
95 VkPipelineStageFlags2 src_stage_mask = barrier ? barrier->srcStageMask : dep->srcStageMask;
96 VkAccessFlags2 src_access_mask = barrier ? barrier->srcAccessMask : dep->srcAccessMask;
97 VkPipelineStageFlags2 dst_stage_mask = barrier ? barrier->dstStageMask : dep->dstStageMask;
98 VkAccessFlags2 dst_access_mask = barrier ? barrier->dstAccessMask : dep->dstAccessMask;
99
100 if (dep_invalid_for_gmem(dep, src_stage_mask, dst_stage_mask)) {
101 perf_debug((struct tu_device *)pass->base.device, "Disabling gmem rendering due to invalid subpass dependency");
102 for (int i = 0; i < ARRAY_SIZE(pass->gmem_pixels); i++)
103 pass->gmem_pixels[i] = 0;
104 }
105
106 struct tu_subpass_barrier *dst_barrier;
107 if (dst == VK_SUBPASS_EXTERNAL) {
108 dst_barrier = &pass->end_barrier;
109 } else {
110 dst_barrier = &pass->subpasses[dst].start_barrier;
111 }
112
113 dst_barrier->src_stage_mask |= src_stage_mask;
114 dst_barrier->dst_stage_mask |= dst_stage_mask;
115 dst_barrier->src_access_mask |= src_access_mask;
116 dst_barrier->dst_access_mask |= dst_access_mask;
117 }
118
119 /* We currently only care about undefined layouts, because we have to
120 * flush/invalidate CCU for those. PREINITIALIZED is the same thing as
121 * UNDEFINED for anything not linear tiled, but we don't know yet whether the
122 * images used are tiled, so just assume they are.
123 */
124
125 static bool
layout_undefined(VkImageLayout layout)126 layout_undefined(VkImageLayout layout)
127 {
128 return layout == VK_IMAGE_LAYOUT_UNDEFINED ||
129 layout == VK_IMAGE_LAYOUT_PREINITIALIZED;
130 }
131
132 /* This implements the following bit of spec text:
133 *
134 * If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the
135 * first subpass that uses an attachment, then an implicit subpass
136 * dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is
137 * used in. The implicit subpass dependency only exists if there
138 * exists an automatic layout transition away from initialLayout.
139 * The subpass dependency operates as if defined with the
140 * following parameters:
141 *
142 * VkSubpassDependency implicitDependency = {
143 * .srcSubpass = VK_SUBPASS_EXTERNAL;
144 * .dstSubpass = firstSubpass; // First subpass attachment is used in
145 * .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
146 * .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
147 * .srcAccessMask = 0;
148 * .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
149 * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
150 * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
151 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
152 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
153 * .dependencyFlags = 0;
154 * };
155 *
156 * Similarly, if there is no subpass dependency from the last subpass
157 * that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit
158 * subpass dependency exists from the last subpass it is used in to
159 * VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists
160 * if there exists an automatic layout transition into finalLayout.
161 * The subpass dependency operates as if defined with the following
162 * parameters:
163 *
164 * VkSubpassDependency implicitDependency = {
165 * .srcSubpass = lastSubpass; // Last subpass attachment is used in
166 * .dstSubpass = VK_SUBPASS_EXTERNAL;
167 * .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
168 * .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;
169 * .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
170 * VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
171 * VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
172 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
173 * VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
174 * .dstAccessMask = 0;
175 * .dependencyFlags = 0;
176 * };
177 *
178 * Note: currently this is the only use we have for layout transitions,
179 * besides needing to invalidate CCU at the beginning, so we also flag
180 * transitions from UNDEFINED here.
181 */
182 static void
tu_render_pass_add_implicit_deps(struct tu_render_pass * pass,const VkRenderPassCreateInfo2 * info)183 tu_render_pass_add_implicit_deps(struct tu_render_pass *pass,
184 const VkRenderPassCreateInfo2 *info)
185 {
186 const VkAttachmentDescription2* att = info->pAttachments;
187 bool has_external_src[info->subpassCount];
188 bool has_external_dst[info->subpassCount];
189 bool att_used[pass->attachment_count];
190
191 memset(has_external_src, 0, sizeof(has_external_src));
192 memset(has_external_dst, 0, sizeof(has_external_dst));
193
194 for (uint32_t i = 0; i < info->dependencyCount; i++) {
195 uint32_t src = info->pDependencies[i].srcSubpass;
196 uint32_t dst = info->pDependencies[i].dstSubpass;
197
198 if (src == dst)
199 continue;
200
201 if (src == VK_SUBPASS_EXTERNAL)
202 has_external_src[dst] = true;
203 if (dst == VK_SUBPASS_EXTERNAL)
204 has_external_dst[src] = true;
205 }
206
207 memset(att_used, 0, sizeof(att_used));
208
209 for (unsigned i = 0; i < info->subpassCount; i++) {
210 const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
211 bool src_implicit_dep = false;
212
213 for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
214 uint32_t a = subpass->pInputAttachments[j].attachment;
215
216 if (a == VK_ATTACHMENT_UNUSED)
217 continue;
218
219 uint32_t stencil_layout = vk_format_has_stencil(att[a].format) ?
220 vk_att_ref_stencil_layout(&subpass->pInputAttachments[j], att) :
221 VK_IMAGE_LAYOUT_UNDEFINED;
222 uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
223
224 if ((att[a].initialLayout != subpass->pInputAttachments[j].layout ||
225 stencil_initial_layout != stencil_layout) &&
226 !att_used[a] && !has_external_src[i])
227 src_implicit_dep = true;
228 att_used[a] = true;
229 }
230
231 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
232 uint32_t a = subpass->pColorAttachments[j].attachment;
233 if (a == VK_ATTACHMENT_UNUSED)
234 continue;
235 if (att[a].initialLayout != subpass->pColorAttachments[j].layout &&
236 !att_used[a] && !has_external_src[i])
237 src_implicit_dep = true;
238 att_used[a] = true;
239 }
240
241 if (subpass->pDepthStencilAttachment &&
242 subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
243 uint32_t a = subpass->pDepthStencilAttachment->attachment;
244 uint32_t stencil_layout = vk_att_ref_stencil_layout(subpass->pDepthStencilAttachment, att);
245 uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
246
247 if ((att[a].initialLayout != subpass->pDepthStencilAttachment->layout ||
248 stencil_initial_layout != stencil_layout) &&
249 !att_used[a] && !has_external_src[i]) {
250 src_implicit_dep = true;
251 }
252 att_used[a] = true;
253 }
254
255 if (subpass->pResolveAttachments) {
256 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
257 uint32_t a = subpass->pResolveAttachments[j].attachment;
258 if (a == VK_ATTACHMENT_UNUSED)
259 continue;
260 if (att[a].initialLayout != subpass->pResolveAttachments[j].layout &&
261 !att_used[a] && !has_external_src[i])
262 src_implicit_dep = true;
263 att_used[a] = true;
264 }
265 }
266
267 const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
268 vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
269
270 if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment &&
271 ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
272 uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
273 uint32_t stencil_layout = vk_att_ref_stencil_layout(ds_resolve->pDepthStencilResolveAttachment, att);
274 uint32_t stencil_initial_layout = vk_att_desc_stencil_layout(&att[a], false);
275
276 if ((att[a].initialLayout != subpass->pDepthStencilAttachment->layout ||
277 stencil_initial_layout != stencil_layout) &&
278 !att_used[a] && !has_external_src[i])
279 src_implicit_dep = true;
280 att_used[a] = true;
281 }
282
283 if (src_implicit_dep) {
284 tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2) {
285 .srcSubpass = VK_SUBPASS_EXTERNAL,
286 .dstSubpass = i,
287 .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
288 .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
289 .srcAccessMask = 0,
290 .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
291 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
292 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
293 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
294 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
295 .dependencyFlags = 0,
296 });
297 }
298 }
299
300 memset(att_used, 0, sizeof(att_used));
301
302 for (int i = info->subpassCount - 1; i >= 0; i--) {
303 const VkSubpassDescription2 *subpass = &info->pSubpasses[i];
304 bool dst_implicit_dep = false;
305
306 for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {
307 uint32_t a = subpass->pInputAttachments[j].attachment;
308 if (a == VK_ATTACHMENT_UNUSED)
309 continue;
310
311 uint32_t stencil_layout = vk_format_has_stencil(att[a].format) ?
312 vk_att_ref_stencil_layout(&subpass->pInputAttachments[j], att) :
313 VK_IMAGE_LAYOUT_UNDEFINED;
314 uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
315
316 if ((att[a].finalLayout != subpass->pInputAttachments[j].layout ||
317 stencil_final_layout != stencil_layout) &&
318 !att_used[a] && !has_external_dst[i])
319 dst_implicit_dep = true;
320 att_used[a] = true;
321 }
322
323 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
324 uint32_t a = subpass->pColorAttachments[j].attachment;
325 if (a == VK_ATTACHMENT_UNUSED)
326 continue;
327 if (att[a].finalLayout != subpass->pColorAttachments[j].layout &&
328 !att_used[a] && !has_external_dst[i])
329 dst_implicit_dep = true;
330 att_used[a] = true;
331 }
332
333 if (subpass->pDepthStencilAttachment &&
334 subpass->pDepthStencilAttachment->attachment != VK_ATTACHMENT_UNUSED) {
335 uint32_t a = subpass->pDepthStencilAttachment->attachment;
336 uint32_t stencil_layout = vk_att_ref_stencil_layout(subpass->pDepthStencilAttachment, att);
337 uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
338
339 if ((att[a].finalLayout != subpass->pDepthStencilAttachment->layout ||
340 stencil_final_layout != stencil_layout) &&
341 !att_used[a] && !has_external_dst[i]) {
342 dst_implicit_dep = true;
343 }
344 att_used[a] = true;
345 }
346
347 if (subpass->pResolveAttachments) {
348 for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {
349 uint32_t a = subpass->pResolveAttachments[j].attachment;
350 if (a == VK_ATTACHMENT_UNUSED)
351 continue;
352 if (att[a].finalLayout != subpass->pResolveAttachments[j].layout &&
353 !att_used[a] && !has_external_dst[i])
354 dst_implicit_dep = true;
355 att_used[a] = true;
356 }
357 }
358
359 const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
360 vk_find_struct_const(subpass->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
361
362 if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment &&
363 ds_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
364 uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
365 uint32_t stencil_layout = vk_att_ref_stencil_layout(ds_resolve->pDepthStencilResolveAttachment, att);
366 uint32_t stencil_final_layout = vk_att_desc_stencil_layout(&att[a], true);
367
368 if ((att[a].finalLayout != subpass->pDepthStencilAttachment->layout ||
369 stencil_final_layout != stencil_layout) &&
370 !att_used[a] && !has_external_src[i])
371 dst_implicit_dep = true;
372 att_used[a] = true;
373 }
374
375 if (dst_implicit_dep) {
376 tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2) {
377 .srcSubpass = i,
378 .dstSubpass = VK_SUBPASS_EXTERNAL,
379 .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
380 .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
381 .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
382 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
383 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
384 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
385 VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
386 .dstAccessMask = 0,
387 .dependencyFlags = 0,
388 });
389 }
390 }
391
392 /* Handle UNDEFINED transitions, similar to the handling in tu_barrier().
393 * Assume that if an attachment has an initial layout of UNDEFINED, it gets
394 * transitioned eventually.
395 */
396 for (unsigned i = 0; i < info->attachmentCount; i++) {
397 if (layout_undefined(att[i].initialLayout)) {
398 if (vk_format_is_depth_or_stencil(att[i].format)) {
399 pass->subpasses[0].start_barrier.incoherent_ccu_depth = true;
400 } else {
401 pass->subpasses[0].start_barrier.incoherent_ccu_color = true;
402 }
403 }
404 }
405 }
406
407 /* If an input attachment is used without an intervening write to the same
408 * attachment, then we can just use the original image, even in GMEM mode.
409 * This is an optimization, but it's also important because it allows us to
410 * avoid having to invalidate UCHE at the beginning of each tile due to it
411 * becoming invalid. The only reads of GMEM via UCHE should be after an
412 * earlier subpass modified it, which only works if there's already an
413 * appropriate dependency that will add the CACHE_INVALIDATE anyway. We
414 * don't consider this in the dependency code, so this is also required for
415 * correctness.
416 */
417 static void
tu_render_pass_patch_input_gmem(struct tu_render_pass * pass)418 tu_render_pass_patch_input_gmem(struct tu_render_pass *pass)
419 {
420 bool written[pass->attachment_count];
421
422 memset(written, 0, sizeof(written));
423
424 for (unsigned i = 0; i < pass->subpass_count; i++) {
425 struct tu_subpass *subpass = &pass->subpasses[i];
426
427 for (unsigned j = 0; j < subpass->input_count; j++) {
428 uint32_t a = subpass->input_attachments[j].attachment;
429 if (a == VK_ATTACHMENT_UNUSED)
430 continue;
431 subpass->input_attachments[j].patch_input_gmem = written[a];
432 }
433
434 for (unsigned j = 0; j < subpass->color_count; j++) {
435 uint32_t a = subpass->color_attachments[j].attachment;
436 if (a == VK_ATTACHMENT_UNUSED)
437 continue;
438 written[a] = true;
439
440 for (unsigned k = 0; k < subpass->input_count; k++) {
441 if (subpass->input_attachments[k].attachment == a &&
442 !subpass->input_attachments[k].patch_input_gmem) {
443 /* For render feedback loops, we have no idea whether the use
444 * as a color attachment or input attachment will come first,
445 * so we have to always use GMEM in case the color attachment
446 * comes first and defensively invalidate UCHE in case the
447 * input attachment comes first.
448 */
449 subpass->feedback_invalidate = true;
450 subpass->input_attachments[k].patch_input_gmem = true;
451 }
452 }
453 }
454
455 for (unsigned j = 0; j < subpass->resolve_count; j++) {
456 uint32_t a = subpass->resolve_attachments[j].attachment;
457 if (a == VK_ATTACHMENT_UNUSED)
458 continue;
459 written[a] = true;
460 }
461
462 if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
463 written[subpass->depth_stencil_attachment.attachment] = true;
464 for (unsigned k = 0; k < subpass->input_count; k++) {
465 if (subpass->input_attachments[k].attachment ==
466 subpass->depth_stencil_attachment.attachment &&
467 !subpass->input_attachments[k].patch_input_gmem) {
468 subpass->feedback_invalidate = true;
469 subpass->input_attachments[k].patch_input_gmem = true;
470 }
471 }
472 }
473 }
474 }
475
476 static void
tu_render_pass_check_feedback_loop(struct tu_render_pass * pass)477 tu_render_pass_check_feedback_loop(struct tu_render_pass *pass)
478 {
479 for (unsigned i = 0; i < pass->subpass_count; i++) {
480 struct tu_subpass *subpass = &pass->subpasses[i];
481
482 for (unsigned j = 0; j < subpass->color_count; j++) {
483 uint32_t a = subpass->color_attachments[j].attachment;
484 if (a == VK_ATTACHMENT_UNUSED)
485 continue;
486 for (unsigned k = 0; k < subpass->input_count; k++) {
487 if (subpass->input_attachments[k].attachment == a) {
488 subpass->feedback_loop_color = true;
489 break;
490 }
491 }
492 }
493
494 if (subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) {
495 for (unsigned k = 0; k < subpass->input_count; k++) {
496 if (subpass->input_attachments[k].attachment ==
497 subpass->depth_stencil_attachment.attachment) {
498 subpass->feedback_loop_ds = true;
499 break;
500 }
501 }
502 }
503 }
504 }
505
update_samples(struct tu_subpass * subpass,VkSampleCountFlagBits samples)506 static void update_samples(struct tu_subpass *subpass,
507 VkSampleCountFlagBits samples)
508 {
509 assert(subpass->samples == 0 || subpass->samples == samples);
510 subpass->samples = samples;
511 }
512
513 static void
tu_render_pass_cond_config(struct tu_render_pass * pass)514 tu_render_pass_cond_config(struct tu_render_pass *pass)
515 {
516 for (uint32_t i = 0; i < pass->attachment_count; i++) {
517 struct tu_render_pass_attachment *att = &pass->attachments[i];
518
519 att->cond_load_allowed =
520 (att->load || att->load_stencil) && !att->clear_mask && !att->will_be_resolved;
521 att->cond_store_allowed =
522 (att->store || att->store_stencil) && !att->clear_mask;
523 }
524 }
525
526 static void
tu_render_pass_gmem_config(struct tu_render_pass * pass,const struct tu_physical_device * phys_dev)527 tu_render_pass_gmem_config(struct tu_render_pass *pass,
528 const struct tu_physical_device *phys_dev)
529 {
530 for (enum tu_gmem_layout layout = 0; layout < TU_GMEM_LAYOUT_COUNT;
531 layout++) {
532 /* From the VK_KHR_multiview spec:
533 *
534 * Multiview is all-or-nothing for a render pass - that is, either all
535 * subpasses must have a non-zero view mask (though some subpasses may
536 * have only one view) or all must be zero.
537 *
538 * This means we only have to check one of the view masks.
539 */
540 if (pass->subpasses[0].multiview_mask) {
541 /* It seems multiview must use sysmem rendering. */
542 pass->gmem_pixels[layout] = 0;
543 continue;
544 }
545
546 /* log2(gmem_align/(tile_align_w*tile_align_h)) */
547 uint32_t block_align_shift = 3;
548 uint32_t tile_align_w = phys_dev->info->tile_align_w;
549 uint32_t gmem_align = (1 << block_align_shift) * tile_align_w *
550 phys_dev->info->tile_align_h;
551
552 /* calculate total bytes per pixel */
553 uint32_t cpp_total = 0;
554 for (uint32_t i = 0; i < pass->attachment_count; i++) {
555 struct tu_render_pass_attachment *att = &pass->attachments[i];
556 bool cpp1 = (att->cpp == 1);
557 if (att->gmem) {
558 cpp_total += att->cpp;
559
560 /* take into account the separate stencil: */
561 if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
562 cpp1 = (att->samples == 1);
563 cpp_total += att->samples;
564 }
565
566 /* texture pitch must be aligned to 64, use a tile_align_w that is
567 * a multiple of 64 for cpp==1 attachment to work as input
568 * attachment
569 */
570 if (cpp1 && tile_align_w % 64 != 0) {
571 tile_align_w *= 2;
572 block_align_shift -= 1;
573 }
574 }
575 }
576
577 pass->tile_align_w = tile_align_w;
578
579 /* no gmem attachments */
580 if (cpp_total == 0) {
581 /* any value non-zero value so tiling config works with no
582 * attachments
583 */
584 pass->gmem_pixels[layout] = 1024 * 1024;
585 continue;
586 }
587
588 /* TODO: this algorithm isn't optimal
589 * for example, two attachments with cpp = {1, 4}
590 * result: nblocks = {12, 52}, pixels = 196608
591 * optimal: nblocks = {13, 51}, pixels = 208896
592 */
593 uint32_t gmem_size = layout == TU_GMEM_LAYOUT_FULL
594 ? phys_dev->gmem_size
595 : phys_dev->ccu_offset_gmem;
596 uint32_t gmem_blocks = gmem_size / gmem_align;
597 uint32_t offset = 0, pixels = ~0u, i;
598 for (i = 0; i < pass->attachment_count; i++) {
599 struct tu_render_pass_attachment *att = &pass->attachments[i];
600 if (!att->gmem)
601 continue;
602
603 att->gmem_offset[layout] = offset;
604
605 uint32_t align = MAX2(1, att->cpp >> block_align_shift);
606 uint32_t nblocks =
607 MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);
608
609 if (nblocks > gmem_blocks)
610 break;
611
612 gmem_blocks -= nblocks;
613 cpp_total -= att->cpp;
614 offset += nblocks * gmem_align;
615 pixels = MIN2(pixels, nblocks * gmem_align / att->cpp);
616
617 /* repeat the same for separate stencil */
618 if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
619 att->gmem_offset_stencil[layout] = offset;
620
621 /* note: for s8_uint, block align is always 1 */
622 uint32_t nblocks = gmem_blocks * att->samples / cpp_total;
623 if (nblocks > gmem_blocks)
624 break;
625
626 gmem_blocks -= nblocks;
627 cpp_total -= att->samples;
628 offset += nblocks * gmem_align;
629 pixels = MIN2(pixels, nblocks * gmem_align / att->samples);
630 }
631 }
632
633 /* if the loop didn't complete then the gmem config is impossible */
634 if (i == pass->attachment_count)
635 pass->gmem_pixels[layout] = pixels;
636 }
637 }
638
639 static void
tu_render_pass_bandwidth_config(struct tu_render_pass * pass)640 tu_render_pass_bandwidth_config(struct tu_render_pass *pass)
641 {
642 for (uint32_t i = 0; i < pass->attachment_count; i++) {
643 const struct tu_render_pass_attachment *att = &pass->attachments[i];
644
645 /* approximate tu_load_gmem_attachment */
646 if (att->load)
647 pass->gmem_bandwidth_per_pixel += att->cpp;
648
649 /* approximate tu_store_gmem_attachment */
650 if (att->store)
651 pass->gmem_bandwidth_per_pixel += att->cpp;
652
653 /* approximate tu_clear_sysmem_attachment */
654 if (att->clear_mask)
655 pass->sysmem_bandwidth_per_pixel += att->cpp;
656
657 /* approximate tu6_emit_sysmem_resolves */
658 if (att->will_be_resolved) {
659 pass->sysmem_bandwidth_per_pixel +=
660 att->cpp + att->cpp / att->samples;
661 }
662 }
663 }
664
665 static void
attachment_set_ops(struct tu_device * device,struct tu_render_pass_attachment * att,VkAttachmentLoadOp load_op,VkAttachmentLoadOp stencil_load_op,VkAttachmentStoreOp store_op,VkAttachmentStoreOp stencil_store_op)666 attachment_set_ops(struct tu_device *device,
667 struct tu_render_pass_attachment *att,
668 VkAttachmentLoadOp load_op,
669 VkAttachmentLoadOp stencil_load_op,
670 VkAttachmentStoreOp store_op,
671 VkAttachmentStoreOp stencil_store_op)
672 {
673 if (device->instance->debug_flags & TU_DEBUG_DONT_CARE_AS_LOAD) {
674 if (load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
675 load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
676 if (stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
677 stencil_load_op = VK_ATTACHMENT_LOAD_OP_LOAD;
678 }
679
680 /* load/store ops */
681 att->clear_mask =
682 (load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
683 att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
684 att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE);
685
686 bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR);
687 bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD);
688 bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE);
689
690 switch (att->format) {
691 case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */
692 if (att->clear_mask)
693 att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
694 if (stencil_clear)
695 att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
696 if (stencil_load)
697 att->load = true;
698 if (stencil_store)
699 att->store = true;
700 break;
701 case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */
702 att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0;
703 att->load = stencil_load;
704 att->store = stencil_store;
705 break;
706 case VK_FORMAT_D32_SFLOAT_S8_UINT: /* separate stencil */
707 if (att->clear_mask)
708 att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;
709 if (stencil_clear)
710 att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;
711 if (stencil_load)
712 att->load_stencil = true;
713 if (stencil_store)
714 att->store_stencil = true;
715 break;
716 default:
717 break;
718 }
719 }
720
721 static bool
is_depth_stencil_resolve_enabled(const VkSubpassDescriptionDepthStencilResolve * depth_stencil_resolve)722 is_depth_stencil_resolve_enabled(const VkSubpassDescriptionDepthStencilResolve *depth_stencil_resolve)
723 {
724 if (depth_stencil_resolve &&
725 depth_stencil_resolve->pDepthStencilResolveAttachment &&
726 depth_stencil_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {
727 return true;
728 }
729 return false;
730 }
731
732 static void
tu_subpass_use_attachment(struct tu_render_pass * pass,int i,uint32_t a,const VkRenderPassCreateInfo2 * pCreateInfo)733 tu_subpass_use_attachment(struct tu_render_pass *pass, int i, uint32_t a, const VkRenderPassCreateInfo2 *pCreateInfo)
734 {
735 struct tu_subpass *subpass = &pass->subpasses[i];
736
737 pass->attachments[a].gmem = true;
738 update_samples(subpass, pCreateInfo->pAttachments[a].samples);
739 pass->attachments[a].clear_views |= subpass->multiview_mask;
740 }
741
742 VKAPI_ATTR VkResult VKAPI_CALL
tu_CreateRenderPass2(VkDevice _device,const VkRenderPassCreateInfo2 * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkRenderPass * pRenderPass)743 tu_CreateRenderPass2(VkDevice _device,
744 const VkRenderPassCreateInfo2 *pCreateInfo,
745 const VkAllocationCallbacks *pAllocator,
746 VkRenderPass *pRenderPass)
747 {
748 TU_FROM_HANDLE(tu_device, device, _device);
749
750 if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC))
751 return vk_common_CreateRenderPass2(_device, pCreateInfo, pAllocator,
752 pRenderPass);
753
754 struct tu_render_pass *pass;
755 size_t size;
756 size_t attachments_offset;
757
758 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2);
759
760 size = sizeof(*pass);
761 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
762 attachments_offset = size;
763 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
764
765 pass = vk_object_zalloc(&device->vk, pAllocator, size,
766 VK_OBJECT_TYPE_RENDER_PASS);
767 if (pass == NULL)
768 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
769
770 pass->attachment_count = pCreateInfo->attachmentCount;
771 pass->subpass_count = pCreateInfo->subpassCount;
772 pass->attachments = (void *) pass + attachments_offset;
773
774 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
775 struct tu_render_pass_attachment *att = &pass->attachments[i];
776
777 att->format = pCreateInfo->pAttachments[i].format;
778 att->samples = pCreateInfo->pAttachments[i].samples;
779 /* for d32s8, cpp is for the depth image, and
780 * att->samples will be used as the cpp for the stencil image
781 */
782 if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
783 att->cpp = 4 * att->samples;
784 else
785 att->cpp = vk_format_get_blocksize(att->format) * att->samples;
786 /* Initially not allocated into gmem, tu_subpass_use_attachment() will move it there. */
787 att->gmem = false;
788
789 VkAttachmentLoadOp loadOp = pCreateInfo->pAttachments[i].loadOp;
790 VkAttachmentLoadOp stencilLoadOp = pCreateInfo->pAttachments[i].stencilLoadOp;
791
792 attachment_set_ops(device, att, loadOp, stencilLoadOp,
793 pCreateInfo->pAttachments[i].storeOp,
794 pCreateInfo->pAttachments[i].stencilStoreOp);
795 }
796 uint32_t subpass_attachment_count = 0;
797 struct tu_subpass_attachment *p;
798 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
799 const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
800 const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
801 vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
802
803 subpass_attachment_count +=
804 desc->inputAttachmentCount + desc->colorAttachmentCount +
805 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
806 (is_depth_stencil_resolve_enabled(ds_resolve) ? 1 : 0);
807 }
808
809 if (subpass_attachment_count) {
810 pass->subpass_attachments = vk_alloc2(
811 &device->vk.alloc, pAllocator,
812 subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,
813 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
814 if (pass->subpass_attachments == NULL) {
815 vk_object_free(&device->vk, pAllocator, pass);
816 return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
817 }
818 } else
819 pass->subpass_attachments = NULL;
820
821 p = pass->subpass_attachments;
822 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
823 const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
824 const VkSubpassDescriptionDepthStencilResolve *ds_resolve =
825 vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
826 struct tu_subpass *subpass = &pass->subpasses[i];
827
828 subpass->input_count = desc->inputAttachmentCount;
829 subpass->color_count = desc->colorAttachmentCount;
830 subpass->resolve_count = 0;
831 subpass->resolve_depth_stencil = is_depth_stencil_resolve_enabled(ds_resolve);
832 subpass->samples = 0;
833 subpass->srgb_cntl = 0;
834
835 const VkSubpassDescriptionFlagBits raster_order_access_bits =
836 VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_COLOR_ACCESS_BIT_ARM |
837 VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_DEPTH_ACCESS_BIT_ARM |
838 VK_SUBPASS_DESCRIPTION_RASTERIZATION_ORDER_ATTACHMENT_STENCIL_ACCESS_BIT_ARM;
839
840 subpass->raster_order_attachment_access = desc->flags & raster_order_access_bits;
841
842 subpass->multiview_mask = desc->viewMask;
843
844 if (desc->inputAttachmentCount > 0) {
845 subpass->input_attachments = p;
846 p += desc->inputAttachmentCount;
847
848 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
849 uint32_t a = desc->pInputAttachments[j].attachment;
850 subpass->input_attachments[j].attachment = a;
851 /* Note: attachments only used as input attachments will be read
852 * directly instead of through gmem, so we don't mark input
853 * attachments as needing gmem.
854 */
855 }
856 }
857
858 if (desc->colorAttachmentCount > 0) {
859 subpass->color_attachments = p;
860 p += desc->colorAttachmentCount;
861
862 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
863 uint32_t a = desc->pColorAttachments[j].attachment;
864 subpass->color_attachments[j].attachment = a;
865
866 if (a != VK_ATTACHMENT_UNUSED) {
867 tu_subpass_use_attachment(pass, i, a, pCreateInfo);
868
869 if (vk_format_is_srgb(pass->attachments[a].format))
870 subpass->srgb_cntl |= 1 << j;
871 }
872 }
873 }
874
875 subpass->resolve_attachments = (desc->pResolveAttachments || subpass->resolve_depth_stencil) ? p : NULL;
876 if (desc->pResolveAttachments) {
877 p += desc->colorAttachmentCount;
878 subpass->resolve_count += desc->colorAttachmentCount;
879 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
880 subpass->resolve_attachments[j].attachment =
881 desc->pResolveAttachments[j].attachment;
882
883 uint32_t src_a = desc->pColorAttachments[j].attachment;
884 if (src_a != VK_ATTACHMENT_UNUSED) {
885 pass->attachments[src_a].will_be_resolved =
886 desc->pResolveAttachments[j].attachment != VK_ATTACHMENT_UNUSED;
887 }
888 }
889 }
890
891 if (subpass->resolve_depth_stencil) {
892 p++;
893 subpass->resolve_count++;
894 uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;
895 subpass->resolve_attachments[subpass->resolve_count - 1].attachment = a;
896
897 uint32_t src_a = desc->pDepthStencilAttachment->attachment;
898 if (src_a != VK_ATTACHMENT_UNUSED) {
899 pass->attachments[src_a].will_be_resolved = a != VK_ATTACHMENT_UNUSED;
900 }
901 }
902
903 uint32_t a = desc->pDepthStencilAttachment ?
904 desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;
905 subpass->depth_stencil_attachment.attachment = a;
906 if (a != VK_ATTACHMENT_UNUSED)
907 tu_subpass_use_attachment(pass, i, a, pCreateInfo);
908 }
909
910 tu_render_pass_patch_input_gmem(pass);
911
912 tu_render_pass_check_feedback_loop(pass);
913
914 /* disable unused attachments */
915 for (uint32_t i = 0; i < pass->attachment_count; i++) {
916 struct tu_render_pass_attachment *att = &pass->attachments[i];
917 if (!att->gmem) {
918 att->clear_mask = 0;
919 att->load = false;
920 }
921 }
922
923 tu_render_pass_cond_config(pass);
924 tu_render_pass_gmem_config(pass, device->physical_device);
925 tu_render_pass_bandwidth_config(pass);
926
927 for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {
928 tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]);
929 }
930
931 tu_render_pass_add_implicit_deps(pass, pCreateInfo);
932
933 *pRenderPass = tu_render_pass_to_handle(pass);
934
935 return VK_SUCCESS;
936 }
937
938 VKAPI_ATTR void VKAPI_CALL
tu_DestroyRenderPass(VkDevice _device,VkRenderPass _pass,const VkAllocationCallbacks * pAllocator)939 tu_DestroyRenderPass(VkDevice _device,
940 VkRenderPass _pass,
941 const VkAllocationCallbacks *pAllocator)
942 {
943 TU_FROM_HANDLE(tu_device, device, _device);
944
945 if (unlikely(device->instance->debug_flags & TU_DEBUG_DYNAMIC)) {
946 vk_common_DestroyRenderPass(_device, _pass, pAllocator);
947 return;
948 }
949
950 TU_FROM_HANDLE(tu_render_pass, pass, _pass);
951
952 if (!_pass)
953 return;
954
955 vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
956 vk_object_free(&device->vk, pAllocator, pass);
957 }
958
959 static void
tu_setup_dynamic_attachment(struct tu_render_pass_attachment * att,struct tu_image_view * view)960 tu_setup_dynamic_attachment(struct tu_render_pass_attachment *att,
961 struct tu_image_view *view)
962 {
963 att->format = view->vk.format;
964 att->samples = view->image->layout->nr_samples;
965
966 /* for d32s8, cpp is for the depth image, and
967 * att->samples will be used as the cpp for the stencil image
968 */
969 if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT)
970 att->cpp = 4 * att->samples;
971 else
972 att->cpp = vk_format_get_blocksize(att->format) * att->samples;
973 }
974
975 void
tu_setup_dynamic_render_pass(struct tu_cmd_buffer * cmd_buffer,const VkRenderingInfo * info)976 tu_setup_dynamic_render_pass(struct tu_cmd_buffer *cmd_buffer,
977 const VkRenderingInfo *info)
978 {
979 struct tu_device *device = cmd_buffer->device;
980 struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
981 struct tu_subpass *subpass = &cmd_buffer->dynamic_subpass;
982
983 pass->subpass_count = 1;
984 pass->attachments = cmd_buffer->dynamic_rp_attachments;
985
986 subpass->color_count = subpass->resolve_count = info->colorAttachmentCount;
987 subpass->resolve_depth_stencil = false;
988 subpass->color_attachments = cmd_buffer->dynamic_color_attachments;
989 subpass->resolve_attachments = cmd_buffer->dynamic_resolve_attachments;
990 subpass->feedback_invalidate = false;
991 subpass->feedback_loop_ds = subpass->feedback_loop_color = false;
992 subpass->input_count = 0;
993 subpass->samples = 0;
994 subpass->srgb_cntl = 0;
995 subpass->raster_order_attachment_access = false;
996 subpass->multiview_mask = info->viewMask;
997
998 uint32_t a = 0;
999 for (uint32_t i = 0; i < info->colorAttachmentCount; i++) {
1000 struct tu_render_pass_attachment *att = &pass->attachments[a];
1001 const VkRenderingAttachmentInfo *att_info = &info->pColorAttachments[i];
1002
1003 if (att_info->imageView == VK_NULL_HANDLE) {
1004 subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1005 subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1006 continue;
1007 }
1008
1009 TU_FROM_HANDLE(tu_image_view, view, att_info->imageView);
1010 tu_setup_dynamic_attachment(att, view);
1011 att->gmem = true;
1012 att->clear_views = info->viewMask;
1013 attachment_set_ops(device, att, att_info->loadOp, 0,
1014 att_info->storeOp, 0);
1015 subpass->color_attachments[i].attachment = a++;
1016
1017 subpass->samples = view->image->layout->nr_samples;
1018
1019 if (vk_format_is_srgb(view->vk.format))
1020 subpass->srgb_cntl |= 1 << i;
1021
1022 if (att_info->resolveMode != VK_RESOLVE_MODE_NONE) {
1023 struct tu_render_pass_attachment *resolve_att = &pass->attachments[a];
1024 TU_FROM_HANDLE(tu_image_view, resolve_view, att_info->resolveImageView);
1025 tu_setup_dynamic_attachment(resolve_att, resolve_view);
1026 resolve_att->gmem = false;
1027 attachment_set_ops(device, resolve_att,
1028 VK_ATTACHMENT_LOAD_OP_DONT_CARE, 0,
1029 VK_ATTACHMENT_STORE_OP_STORE, 0);
1030 subpass->resolve_attachments[i].attachment = a++;
1031 att->will_be_resolved = true;
1032 } else {
1033 subpass->resolve_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1034 att->will_be_resolved = false;
1035 }
1036 }
1037
1038 if (info->pDepthAttachment || info->pStencilAttachment) {
1039 const struct VkRenderingAttachmentInfo *common_info =
1040 (info->pDepthAttachment &&
1041 info->pDepthAttachment->imageView != VK_NULL_HANDLE) ?
1042 info->pDepthAttachment :
1043 info->pStencilAttachment;
1044
1045 if (common_info && common_info->imageView != VK_NULL_HANDLE) {
1046 TU_FROM_HANDLE(tu_image_view, view, common_info->imageView);
1047
1048 struct tu_render_pass_attachment *att = &pass->attachments[a];
1049 tu_setup_dynamic_attachment(att, view);
1050 att->gmem = true;
1051 att->clear_views = info->viewMask;
1052 subpass->depth_stencil_attachment.attachment = a++;
1053
1054 attachment_set_ops(device, att,
1055 info->pDepthAttachment ? info->pDepthAttachment->loadOp : 0,
1056 info->pStencilAttachment ? info->pStencilAttachment->loadOp : 0,
1057 info->pDepthAttachment ? info->pDepthAttachment->storeOp : 0,
1058 info->pStencilAttachment ? info->pStencilAttachment->storeOp : 0);
1059
1060 subpass->samples = view->image->layout->nr_samples;
1061
1062 if (common_info->resolveMode != VK_RESOLVE_MODE_NONE) {
1063 unsigned i = subpass->resolve_count++;
1064 struct tu_render_pass_attachment *resolve_att = &pass->attachments[a];
1065 TU_FROM_HANDLE(tu_image_view, resolve_view,
1066 common_info->resolveImageView);
1067 tu_setup_dynamic_attachment(resolve_att, resolve_view);
1068 resolve_att->gmem = false;
1069 attachment_set_ops(device, resolve_att,
1070 VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1071 VK_ATTACHMENT_LOAD_OP_DONT_CARE,
1072 VK_ATTACHMENT_STORE_OP_STORE,
1073 VK_ATTACHMENT_STORE_OP_STORE);
1074 subpass->resolve_attachments[i].attachment = a++;
1075 att->will_be_resolved = true;
1076 subpass->resolve_depth_stencil = true;
1077 } else {
1078 att->will_be_resolved = false;
1079 }
1080 } else {
1081 subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1082 }
1083 } else {
1084 subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1085 }
1086
1087 pass->attachment_count = a;
1088
1089 tu_render_pass_cond_config(pass);
1090 tu_render_pass_gmem_config(pass, device->physical_device);
1091 tu_render_pass_bandwidth_config(pass);
1092 }
1093
1094 void
tu_setup_dynamic_inheritance(struct tu_cmd_buffer * cmd_buffer,const VkCommandBufferInheritanceRenderingInfo * info)1095 tu_setup_dynamic_inheritance(struct tu_cmd_buffer *cmd_buffer,
1096 const VkCommandBufferInheritanceRenderingInfo *info)
1097 {
1098 struct tu_render_pass *pass = &cmd_buffer->dynamic_pass;
1099 struct tu_subpass *subpass = &cmd_buffer->dynamic_subpass;
1100
1101 pass->subpass_count = 1;
1102 pass->attachments = cmd_buffer->dynamic_rp_attachments;
1103
1104 subpass->color_count = info->colorAttachmentCount;
1105 subpass->resolve_count = 0;
1106 subpass->resolve_depth_stencil = false;
1107 subpass->color_attachments = cmd_buffer->dynamic_color_attachments;
1108 subpass->resolve_attachments = NULL;
1109 subpass->feedback_invalidate = false;
1110 subpass->feedback_loop_ds = subpass->feedback_loop_color = false;
1111 subpass->input_count = 0;
1112 subpass->samples = 0;
1113 subpass->srgb_cntl = 0;
1114 subpass->raster_order_attachment_access = false;
1115 subpass->multiview_mask = info->viewMask;
1116 subpass->samples = info->rasterizationSamples;
1117
1118 unsigned a = 0;
1119 for (unsigned i = 0; i < info->colorAttachmentCount; i++) {
1120 struct tu_render_pass_attachment *att = &pass->attachments[a];
1121 VkFormat format = info->pColorAttachmentFormats[i];
1122
1123 if (format == VK_FORMAT_UNDEFINED) {
1124 subpass->color_attachments[i].attachment = VK_ATTACHMENT_UNUSED;
1125 continue;
1126 }
1127
1128 att->format = format;
1129 att->samples = info->rasterizationSamples;
1130 subpass->samples = info->rasterizationSamples;
1131 subpass->color_attachments[i].attachment = a++;
1132
1133 /* conservatively assume that the attachment may be conditionally
1134 * loaded/stored.
1135 */
1136 att->cond_load_allowed = att->cond_store_allowed = true;
1137 }
1138
1139 if (info->depthAttachmentFormat != VK_FORMAT_UNDEFINED ||
1140 info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED) {
1141 struct tu_render_pass_attachment *att = &pass->attachments[a];
1142 att->format = info->depthAttachmentFormat != VK_FORMAT_UNDEFINED ?
1143 info->depthAttachmentFormat : info->stencilAttachmentFormat;
1144 att->samples = info->rasterizationSamples;
1145 subpass->depth_stencil_attachment.attachment = a++;
1146 att->cond_load_allowed = att->cond_store_allowed = true;
1147 } else {
1148 subpass->depth_stencil_attachment.attachment = VK_ATTACHMENT_UNUSED;
1149 }
1150 }
1151
1152 VKAPI_ATTR void VKAPI_CALL
tu_GetRenderAreaGranularity(VkDevice _device,VkRenderPass renderPass,VkExtent2D * pGranularity)1153 tu_GetRenderAreaGranularity(VkDevice _device,
1154 VkRenderPass renderPass,
1155 VkExtent2D *pGranularity)
1156 {
1157 TU_FROM_HANDLE(tu_device, device, _device);
1158 pGranularity->width = device->physical_device->info->gmem_align_w;
1159 pGranularity->height = device->physical_device->info->gmem_align_h;
1160 }
1161
1162 uint32_t
tu_subpass_get_attachment_to_resolve(const struct tu_subpass * subpass,uint32_t index)1163 tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index)
1164 {
1165 if (subpass->resolve_depth_stencil &&
1166 index == (subpass->resolve_count - 1))
1167 return subpass->depth_stencil_attachment.attachment;
1168
1169 return subpass->color_attachments[index].attachment;
1170 }
1171