1 /*
2 * Copyright © 2019 Raspberry Pi
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "vk_format_info.h"
26
27 static uint32_t
num_subpass_attachments(const VkSubpassDescription * desc)28 num_subpass_attachments(const VkSubpassDescription *desc)
29 {
30 return desc->inputAttachmentCount +
31 desc->colorAttachmentCount +
32 (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
33 (desc->pDepthStencilAttachment != NULL);
34 }
35
36 static void
set_use_tlb_resolve(struct v3dv_render_pass_attachment * att)37 set_use_tlb_resolve(struct v3dv_render_pass_attachment *att)
38 {
39 const struct v3dv_format *format = v3dv_get_format(att->desc.format);
40 att->use_tlb_resolve = v3dv_format_supports_tlb_resolve(format);
41 }
42
43 static void
pass_find_subpass_range_for_attachments(struct v3dv_render_pass * pass)44 pass_find_subpass_range_for_attachments(struct v3dv_render_pass *pass)
45 {
46 for (uint32_t i = 0; i < pass->attachment_count; i++) {
47 pass->attachments[i].first_subpass = pass->subpass_count - 1;
48 pass->attachments[i].last_subpass = 0;
49 }
50
51 for (uint32_t i = 0; i < pass->subpass_count; i++) {
52 const struct v3dv_subpass *subpass = &pass->subpasses[i];
53
54 for (uint32_t j = 0; j < subpass->color_count; j++) {
55 uint32_t attachment_idx = subpass->color_attachments[j].attachment;
56 if (attachment_idx == VK_ATTACHMENT_UNUSED)
57 continue;
58
59 if (i < pass->attachments[attachment_idx].first_subpass)
60 pass->attachments[attachment_idx].first_subpass = i;
61 if (i > pass->attachments[attachment_idx].last_subpass)
62 pass->attachments[attachment_idx].last_subpass = i;
63
64 if (subpass->resolve_attachments &&
65 subpass->resolve_attachments[j].attachment != VK_ATTACHMENT_UNUSED) {
66 set_use_tlb_resolve(&pass->attachments[attachment_idx]);
67 }
68 }
69
70 uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
71 if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
72 if (i < pass->attachments[ds_attachment_idx].first_subpass)
73 pass->attachments[ds_attachment_idx].first_subpass = i;
74 if (i > pass->attachments[ds_attachment_idx].last_subpass)
75 pass->attachments[ds_attachment_idx].last_subpass = i;
76 }
77
78 for (uint32_t j = 0; j < subpass->input_count; j++) {
79 uint32_t input_attachment_idx = subpass->input_attachments[j].attachment;
80 if (input_attachment_idx == VK_ATTACHMENT_UNUSED)
81 continue;
82 if (i < pass->attachments[input_attachment_idx].first_subpass)
83 pass->attachments[input_attachment_idx].first_subpass = i;
84 if (i > pass->attachments[input_attachment_idx].last_subpass)
85 pass->attachments[input_attachment_idx].last_subpass = i;
86 }
87
88 if (subpass->resolve_attachments) {
89 for (uint32_t j = 0; j < subpass->color_count; j++) {
90 uint32_t attachment_idx = subpass->resolve_attachments[j].attachment;
91 if (attachment_idx == VK_ATTACHMENT_UNUSED)
92 continue;
93 if (i < pass->attachments[attachment_idx].first_subpass)
94 pass->attachments[attachment_idx].first_subpass = i;
95 if (i > pass->attachments[attachment_idx].last_subpass)
96 pass->attachments[attachment_idx].last_subpass = i;
97 }
98 }
99 }
100 }
101
102
103 VkResult
v3dv_CreateRenderPass(VkDevice _device,const VkRenderPassCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkRenderPass * pRenderPass)104 v3dv_CreateRenderPass(VkDevice _device,
105 const VkRenderPassCreateInfo *pCreateInfo,
106 const VkAllocationCallbacks *pAllocator,
107 VkRenderPass *pRenderPass)
108 {
109 V3DV_FROM_HANDLE(v3dv_device, device, _device);
110 struct v3dv_render_pass *pass;
111
112 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
113
114 size_t size = sizeof(*pass);
115 size_t subpasses_offset = size;
116 size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
117 size_t attachments_offset = size;
118 size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
119
120 pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
121 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
122 if (pass == NULL)
123 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
124
125 memset(pass, 0, size);
126 pass->attachment_count = pCreateInfo->attachmentCount;
127 pass->attachments = (void *) pass + attachments_offset;
128 pass->subpass_count = pCreateInfo->subpassCount;
129 pass->subpasses = (void *) pass + subpasses_offset;
130
131 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++)
132 pass->attachments[i].desc = pCreateInfo->pAttachments[i];
133
134 uint32_t subpass_attachment_count = 0;
135 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
136 const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
137 subpass_attachment_count += num_subpass_attachments(desc);
138 }
139
140 if (subpass_attachment_count) {
141 const size_t subpass_attachment_bytes =
142 subpass_attachment_count * sizeof(struct v3dv_subpass_attachment);
143 pass->subpass_attachments =
144 vk_alloc2(&device->alloc, pAllocator, subpass_attachment_bytes, 8,
145 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
146 if (pass->subpass_attachments == NULL) {
147 vk_free2(&device->alloc, pAllocator, pass);
148 return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
149 }
150 } else {
151 pass->subpass_attachments = NULL;
152 }
153
154 struct v3dv_subpass_attachment *p = pass->subpass_attachments;
155 for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
156 const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
157 struct v3dv_subpass *subpass = &pass->subpasses[i];
158
159 subpass->input_count = desc->inputAttachmentCount;
160 subpass->color_count = desc->colorAttachmentCount;
161
162 if (desc->inputAttachmentCount > 0) {
163 subpass->input_attachments = p;
164 p += desc->inputAttachmentCount;
165
166 for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
167 subpass->input_attachments[j] = (struct v3dv_subpass_attachment) {
168 .attachment = desc->pInputAttachments[j].attachment,
169 .layout = desc->pInputAttachments[j].layout,
170 };
171 }
172 }
173
174 if (desc->colorAttachmentCount > 0) {
175 subpass->color_attachments = p;
176 p += desc->colorAttachmentCount;
177
178 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
179 const uint32_t attachment_idx =
180 desc->pColorAttachments[j].attachment;
181 subpass->color_attachments[j] = (struct v3dv_subpass_attachment) {
182 .attachment = attachment_idx,
183 .layout = desc->pColorAttachments[j].layout,
184 };
185 if (attachment_idx != VK_ATTACHMENT_UNUSED) {
186 VkFormat format = pass->attachments[attachment_idx].desc.format;
187 subpass->has_srgb_rt |= vk_format_is_srgb(format);
188 }
189 }
190 }
191
192 if (desc->pResolveAttachments) {
193 subpass->resolve_attachments = p;
194 p += desc->colorAttachmentCount;
195
196 for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
197 subpass->resolve_attachments[j] = (struct v3dv_subpass_attachment) {
198 .attachment = desc->pResolveAttachments[j].attachment,
199 .layout = desc->pResolveAttachments[j].layout,
200 };
201 }
202 }
203
204 if (desc->pDepthStencilAttachment) {
205 subpass->ds_attachment = (struct v3dv_subpass_attachment) {
206 .attachment = desc->pDepthStencilAttachment->attachment,
207 .layout = desc->pDepthStencilAttachment->layout,
208 };
209
210 /* GFXH-1461: if depth is cleared but stencil is loaded (or viceversa),
211 * the clear might get lost. If a subpass has this then we can't emit
212 * the clear using the TLB and we have to do it as a draw call.
213 *
214 * FIXME: separate stencil.
215 */
216 if (subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) {
217 struct v3dv_render_pass_attachment *att =
218 &pass->attachments[subpass->ds_attachment.attachment];
219 if (att->desc.format == VK_FORMAT_D24_UNORM_S8_UINT) {
220 if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
221 att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
222 subpass->do_depth_clear_with_draw = true;
223 } else if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD &&
224 att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
225 subpass->do_stencil_clear_with_draw = true;
226 }
227 }
228 }
229 } else {
230 subpass->ds_attachment.attachment = VK_ATTACHMENT_UNUSED;
231 }
232 }
233
234 pass_find_subpass_range_for_attachments(pass);
235
236 /* FIXME: handle subpass dependencies */
237
238 *pRenderPass = v3dv_render_pass_to_handle(pass);
239
240 return VK_SUCCESS;
241 }
242
243 void
v3dv_DestroyRenderPass(VkDevice _device,VkRenderPass _pass,const VkAllocationCallbacks * pAllocator)244 v3dv_DestroyRenderPass(VkDevice _device,
245 VkRenderPass _pass,
246 const VkAllocationCallbacks *pAllocator)
247 {
248 V3DV_FROM_HANDLE(v3dv_device, device, _device);
249 V3DV_FROM_HANDLE(v3dv_render_pass, pass, _pass);
250
251 if (!_pass)
252 return;
253
254 vk_free2(&device->alloc, pAllocator, pass->subpass_attachments);
255 vk_free2(&device->alloc, pAllocator, pass);
256 }
257
258 static void
subpass_get_granularity(struct v3dv_render_pass * pass,uint32_t subpass_idx,VkExtent2D * granularity)259 subpass_get_granularity(struct v3dv_render_pass *pass,
260 uint32_t subpass_idx,
261 VkExtent2D *granularity)
262 {
263 static const uint8_t tile_sizes[] = {
264 64, 64,
265 64, 32,
266 32, 32,
267 32, 16,
268 16, 16,
269 16, 8,
270 8, 8
271 };
272
273 /* Our tile size depends on the number of color attachments and the maximum
274 * bpp across them.
275 */
276 assert(subpass_idx < pass->subpass_count);
277 struct v3dv_subpass *subpass = &pass->subpasses[subpass_idx];
278 const uint32_t color_attachment_count = subpass->color_count;
279
280 uint32_t max_internal_bpp = 0;
281 for (uint32_t i = 0; i < color_attachment_count; i++) {
282 uint32_t attachment_idx = subpass->color_attachments[i].attachment;
283 if (attachment_idx == VK_ATTACHMENT_UNUSED)
284 continue;
285 const VkAttachmentDescription *desc =
286 &pass->attachments[attachment_idx].desc;
287 const struct v3dv_format *format = v3dv_get_format(desc->format);
288 uint32_t internal_type, internal_bpp;
289 v3dv_get_internal_type_bpp_for_output_format(format->rt_type,
290 &internal_type,
291 &internal_bpp);
292 max_internal_bpp = MAX2(max_internal_bpp, internal_bpp);
293 }
294
295 uint32_t idx = 0;
296 if (color_attachment_count > 2)
297 idx += 2;
298 else if (color_attachment_count > 1)
299 idx += 1;
300
301 idx += max_internal_bpp;
302
303 assert(idx < ARRAY_SIZE(tile_sizes));
304 *granularity = (VkExtent2D) {
305 .width = tile_sizes[idx * 2],
306 .height = tile_sizes[idx * 2 + 1]
307 };
308 }
309
310 void
v3dv_GetRenderAreaGranularity(VkDevice device,VkRenderPass renderPass,VkExtent2D * pGranularity)311 v3dv_GetRenderAreaGranularity(VkDevice device,
312 VkRenderPass renderPass,
313 VkExtent2D *pGranularity)
314 {
315 V3DV_FROM_HANDLE(v3dv_render_pass, pass, renderPass);
316
317 *pGranularity = (VkExtent2D) {
318 .width = 64,
319 .height = 64,
320 };
321
322 for (uint32_t i = 0; i < pass->subpass_count; i++) {
323 VkExtent2D sg;
324 subpass_get_granularity(pass, i, &sg);
325 pGranularity->width = MIN2(pGranularity->width, sg.width);
326 pGranularity->height = MIN2(pGranularity->height, sg.height);
327 }
328 }
329
330 /* Checks whether the render area rectangle covers a region that is aligned to
331 * tile boundaries. This means that we are writing to all pixels covered by
332 * all tiles in that area (except for pixels on edge tiles that are outside
333 * the framebuffer dimensions).
334 *
335 * When our framebuffer is aligned to tile boundaries we know we are writing
336 * valid data to all all pixels in each tile and we can apply certain
337 * optimizations, like avoiding tile loads, since we know that none of the
338 * original pixel values in each tile for that area need to be preserved.
339 * We also use this to decide if we can use TLB clears, as these clear whole
340 * tiles so we can't use them if the render area is not aligned.
341 *
342 * Note that when an image is created it will possibly include padding blocks
343 * depending on its tiling layout. When the framebuffer dimensions are not
344 * aligned to tile boundaries then edge tiles are only partially covered by the
345 * framebuffer pixels, but tile stores still seem to store full tiles
346 * writing to the padded sections. This is important when the framebuffer
347 * is aliasing a smaller section of a larger image, as in that case the edge
348 * tiles of the framebuffer would overwrite valid pixels in the larger image.
349 * In that case, we can't flag the area as being aligned.
350 */
351 bool
v3dv_subpass_area_is_tile_aligned(const VkRect2D * area,struct v3dv_framebuffer * fb,struct v3dv_render_pass * pass,uint32_t subpass_idx)352 v3dv_subpass_area_is_tile_aligned(const VkRect2D *area,
353 struct v3dv_framebuffer *fb,
354 struct v3dv_render_pass *pass,
355 uint32_t subpass_idx)
356 {
357 assert(subpass_idx < pass->subpass_count);
358
359 VkExtent2D granularity;
360 subpass_get_granularity(pass, subpass_idx, &granularity);
361
362 return area->offset.x % granularity.width == 0 &&
363 area->offset.y % granularity.height == 0 &&
364 (area->extent.width % granularity.width == 0 ||
365 (fb->has_edge_padding &&
366 area->offset.x + area->extent.width >= fb->width)) &&
367 (area->extent.height % granularity.height == 0 ||
368 (fb->has_edge_padding &&
369 area->offset.y + area->extent.height >= fb->height));
370 }
371