• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "vk_format_info.h"
26 
27 static uint32_t
num_subpass_attachments(const VkSubpassDescription * desc)28 num_subpass_attachments(const VkSubpassDescription *desc)
29 {
30    return desc->inputAttachmentCount +
31           desc->colorAttachmentCount +
32           (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
33           (desc->pDepthStencilAttachment != NULL);
34 }
35 
36 static void
set_use_tlb_resolve(struct v3dv_render_pass_attachment * att)37 set_use_tlb_resolve(struct v3dv_render_pass_attachment *att)
38 {
39    const struct v3dv_format *format = v3dv_get_format(att->desc.format);
40    att->use_tlb_resolve = v3dv_format_supports_tlb_resolve(format);
41 }
42 
43 static void
pass_find_subpass_range_for_attachments(struct v3dv_render_pass * pass)44 pass_find_subpass_range_for_attachments(struct v3dv_render_pass *pass)
45 {
46    for (uint32_t i = 0; i < pass->attachment_count; i++) {
47       pass->attachments[i].first_subpass = pass->subpass_count - 1;
48       pass->attachments[i].last_subpass = 0;
49    }
50 
51    for (uint32_t i = 0; i < pass->subpass_count; i++) {
52       const struct v3dv_subpass *subpass = &pass->subpasses[i];
53 
54       for (uint32_t j = 0; j < subpass->color_count; j++) {
55          uint32_t attachment_idx = subpass->color_attachments[j].attachment;
56          if (attachment_idx == VK_ATTACHMENT_UNUSED)
57             continue;
58 
59          if (i < pass->attachments[attachment_idx].first_subpass)
60             pass->attachments[attachment_idx].first_subpass = i;
61          if (i > pass->attachments[attachment_idx].last_subpass)
62             pass->attachments[attachment_idx].last_subpass = i;
63 
64          if (subpass->resolve_attachments &&
65              subpass->resolve_attachments[j].attachment != VK_ATTACHMENT_UNUSED) {
66             set_use_tlb_resolve(&pass->attachments[attachment_idx]);
67          }
68       }
69 
70       uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
71       if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
72          if (i < pass->attachments[ds_attachment_idx].first_subpass)
73             pass->attachments[ds_attachment_idx].first_subpass = i;
74          if (i > pass->attachments[ds_attachment_idx].last_subpass)
75             pass->attachments[ds_attachment_idx].last_subpass = i;
76       }
77 
78       for (uint32_t j = 0; j < subpass->input_count; j++) {
79          uint32_t input_attachment_idx = subpass->input_attachments[j].attachment;
80          if (input_attachment_idx == VK_ATTACHMENT_UNUSED)
81             continue;
82          if (i < pass->attachments[input_attachment_idx].first_subpass)
83             pass->attachments[input_attachment_idx].first_subpass = i;
84          if (i > pass->attachments[input_attachment_idx].last_subpass)
85             pass->attachments[input_attachment_idx].last_subpass = i;
86       }
87 
88       if (subpass->resolve_attachments) {
89          for (uint32_t j = 0; j < subpass->color_count; j++) {
90             uint32_t attachment_idx = subpass->resolve_attachments[j].attachment;
91             if (attachment_idx == VK_ATTACHMENT_UNUSED)
92                continue;
93             if (i < pass->attachments[attachment_idx].first_subpass)
94                pass->attachments[attachment_idx].first_subpass = i;
95             if (i > pass->attachments[attachment_idx].last_subpass)
96                pass->attachments[attachment_idx].last_subpass = i;
97          }
98       }
99    }
100 }
101 
102 
103 VkResult
v3dv_CreateRenderPass(VkDevice _device,const VkRenderPassCreateInfo * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkRenderPass * pRenderPass)104 v3dv_CreateRenderPass(VkDevice _device,
105                       const VkRenderPassCreateInfo *pCreateInfo,
106                       const VkAllocationCallbacks *pAllocator,
107                       VkRenderPass *pRenderPass)
108 {
109    V3DV_FROM_HANDLE(v3dv_device, device, _device);
110    struct v3dv_render_pass *pass;
111 
112    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
113 
114    size_t size = sizeof(*pass);
115    size_t subpasses_offset = size;
116    size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
117    size_t attachments_offset = size;
118    size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
119 
120    pass = vk_alloc2(&device->alloc, pAllocator, size, 8,
121                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
122    if (pass == NULL)
123       return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
124 
125    memset(pass, 0, size);
126    pass->attachment_count = pCreateInfo->attachmentCount;
127    pass->attachments = (void *) pass + attachments_offset;
128    pass->subpass_count = pCreateInfo->subpassCount;
129    pass->subpasses = (void *) pass + subpasses_offset;
130 
131    for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++)
132       pass->attachments[i].desc = pCreateInfo->pAttachments[i];
133 
134    uint32_t subpass_attachment_count = 0;
135    for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
136       const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
137       subpass_attachment_count += num_subpass_attachments(desc);
138    }
139 
140    if (subpass_attachment_count) {
141       const size_t subpass_attachment_bytes =
142          subpass_attachment_count * sizeof(struct v3dv_subpass_attachment);
143       pass->subpass_attachments =
144          vk_alloc2(&device->alloc, pAllocator, subpass_attachment_bytes, 8,
145                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
146       if (pass->subpass_attachments == NULL) {
147          vk_free2(&device->alloc, pAllocator, pass);
148          return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
149       }
150    } else {
151       pass->subpass_attachments = NULL;
152    }
153 
154    struct v3dv_subpass_attachment *p = pass->subpass_attachments;
155    for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
156       const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
157       struct v3dv_subpass *subpass = &pass->subpasses[i];
158 
159       subpass->input_count = desc->inputAttachmentCount;
160       subpass->color_count = desc->colorAttachmentCount;
161 
162       if (desc->inputAttachmentCount > 0) {
163          subpass->input_attachments = p;
164          p += desc->inputAttachmentCount;
165 
166          for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
167             subpass->input_attachments[j] = (struct v3dv_subpass_attachment) {
168                .attachment = desc->pInputAttachments[j].attachment,
169                .layout = desc->pInputAttachments[j].layout,
170             };
171          }
172       }
173 
174       if (desc->colorAttachmentCount > 0) {
175          subpass->color_attachments = p;
176          p += desc->colorAttachmentCount;
177 
178          for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
179             const uint32_t attachment_idx =
180                desc->pColorAttachments[j].attachment;
181             subpass->color_attachments[j] = (struct v3dv_subpass_attachment) {
182                .attachment = attachment_idx,
183                .layout = desc->pColorAttachments[j].layout,
184             };
185             if (attachment_idx != VK_ATTACHMENT_UNUSED) {
186                VkFormat format = pass->attachments[attachment_idx].desc.format;
187                subpass->has_srgb_rt |= vk_format_is_srgb(format);
188             }
189          }
190       }
191 
192       if (desc->pResolveAttachments) {
193          subpass->resolve_attachments = p;
194          p += desc->colorAttachmentCount;
195 
196          for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
197             subpass->resolve_attachments[j] = (struct v3dv_subpass_attachment) {
198                .attachment = desc->pResolveAttachments[j].attachment,
199                .layout = desc->pResolveAttachments[j].layout,
200             };
201          }
202       }
203 
204       if (desc->pDepthStencilAttachment) {
205          subpass->ds_attachment = (struct v3dv_subpass_attachment) {
206             .attachment = desc->pDepthStencilAttachment->attachment,
207             .layout = desc->pDepthStencilAttachment->layout,
208          };
209 
210          /* GFXH-1461: if depth is cleared but stencil is loaded (or viceversa),
211           * the clear might get lost. If a subpass has this then we can't emit
212           * the clear using the TLB and we have to do it as a draw call.
213           *
214           * FIXME: separate stencil.
215           */
216          if (subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) {
217             struct v3dv_render_pass_attachment *att =
218                &pass->attachments[subpass->ds_attachment.attachment];
219             if (att->desc.format == VK_FORMAT_D24_UNORM_S8_UINT) {
220                if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
221                    att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
222                   subpass->do_depth_clear_with_draw = true;
223                } else if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD &&
224                           att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
225                   subpass->do_stencil_clear_with_draw = true;
226                }
227             }
228          }
229       } else {
230          subpass->ds_attachment.attachment = VK_ATTACHMENT_UNUSED;
231       }
232    }
233 
234    pass_find_subpass_range_for_attachments(pass);
235 
236    /* FIXME: handle subpass dependencies */
237 
238    *pRenderPass = v3dv_render_pass_to_handle(pass);
239 
240    return VK_SUCCESS;
241 }
242 
243 void
v3dv_DestroyRenderPass(VkDevice _device,VkRenderPass _pass,const VkAllocationCallbacks * pAllocator)244 v3dv_DestroyRenderPass(VkDevice _device,
245                        VkRenderPass _pass,
246                        const VkAllocationCallbacks *pAllocator)
247 {
248    V3DV_FROM_HANDLE(v3dv_device, device, _device);
249    V3DV_FROM_HANDLE(v3dv_render_pass, pass, _pass);
250 
251    if (!_pass)
252       return;
253 
254    vk_free2(&device->alloc, pAllocator, pass->subpass_attachments);
255    vk_free2(&device->alloc, pAllocator, pass);
256 }
257 
258 static void
subpass_get_granularity(struct v3dv_render_pass * pass,uint32_t subpass_idx,VkExtent2D * granularity)259 subpass_get_granularity(struct v3dv_render_pass *pass,
260                         uint32_t subpass_idx,
261                         VkExtent2D *granularity)
262 {
263    static const uint8_t tile_sizes[] = {
264       64, 64,
265       64, 32,
266       32, 32,
267       32, 16,
268       16, 16,
269       16,  8,
270        8,  8
271    };
272 
273    /* Our tile size depends on the number of color attachments and the maximum
274     * bpp across them.
275     */
276    assert(subpass_idx < pass->subpass_count);
277    struct v3dv_subpass *subpass = &pass->subpasses[subpass_idx];
278    const uint32_t color_attachment_count = subpass->color_count;
279 
280    uint32_t max_internal_bpp = 0;
281    for (uint32_t i = 0; i < color_attachment_count; i++) {
282       uint32_t attachment_idx = subpass->color_attachments[i].attachment;
283       if (attachment_idx == VK_ATTACHMENT_UNUSED)
284          continue;
285       const VkAttachmentDescription *desc =
286          &pass->attachments[attachment_idx].desc;
287       const struct v3dv_format *format = v3dv_get_format(desc->format);
288       uint32_t internal_type, internal_bpp;
289       v3dv_get_internal_type_bpp_for_output_format(format->rt_type,
290                                                    &internal_type,
291                                                    &internal_bpp);
292       max_internal_bpp = MAX2(max_internal_bpp, internal_bpp);
293    }
294 
295    uint32_t idx = 0;
296    if (color_attachment_count > 2)
297       idx += 2;
298    else if (color_attachment_count > 1)
299       idx += 1;
300 
301    idx += max_internal_bpp;
302 
303    assert(idx < ARRAY_SIZE(tile_sizes));
304    *granularity = (VkExtent2D) {
305       .width = tile_sizes[idx * 2],
306       .height = tile_sizes[idx * 2 + 1]
307    };
308 }
309 
310 void
v3dv_GetRenderAreaGranularity(VkDevice device,VkRenderPass renderPass,VkExtent2D * pGranularity)311 v3dv_GetRenderAreaGranularity(VkDevice device,
312                               VkRenderPass renderPass,
313                               VkExtent2D *pGranularity)
314 {
315    V3DV_FROM_HANDLE(v3dv_render_pass, pass, renderPass);
316 
317    *pGranularity = (VkExtent2D) {
318       .width = 64,
319       .height = 64,
320    };
321 
322    for (uint32_t i = 0; i < pass->subpass_count; i++) {
323       VkExtent2D sg;
324       subpass_get_granularity(pass, i, &sg);
325       pGranularity->width = MIN2(pGranularity->width, sg.width);
326       pGranularity->height = MIN2(pGranularity->height, sg.height);
327    }
328 }
329 
330 /* Checks whether the render area rectangle covers a region that is aligned to
331  * tile boundaries. This means that we are writing to all pixels covered by
332  * all tiles in that area (except for pixels on edge tiles that are outside
333  * the framebuffer dimensions).
334  *
335  * When our framebuffer is aligned to tile boundaries we know we are writing
336  * valid data to all all pixels in each tile and we can apply certain
337  * optimizations, like avoiding tile loads, since we know that none of the
338  * original pixel values in each tile for that area need to be preserved.
339  * We also use this to decide if we can use TLB clears, as these clear whole
340  * tiles so we can't use them if the render area is not aligned.
341  *
342  * Note that when an image is created it will possibly include padding blocks
343  * depending on its tiling layout. When the framebuffer dimensions are not
344  * aligned to tile boundaries then edge tiles are only partially covered by the
345  * framebuffer pixels, but tile stores still seem to store full tiles
346  * writing to the padded sections. This is important when the framebuffer
347  * is aliasing a smaller section of a larger image, as in that case the edge
348  * tiles of the framebuffer would overwrite valid pixels in the larger image.
349  * In that case, we can't flag the area as being aligned.
350  */
351 bool
v3dv_subpass_area_is_tile_aligned(const VkRect2D * area,struct v3dv_framebuffer * fb,struct v3dv_render_pass * pass,uint32_t subpass_idx)352 v3dv_subpass_area_is_tile_aligned(const VkRect2D *area,
353                                   struct v3dv_framebuffer *fb,
354                                   struct v3dv_render_pass *pass,
355                                   uint32_t subpass_idx)
356 {
357    assert(subpass_idx < pass->subpass_count);
358 
359    VkExtent2D granularity;
360    subpass_get_granularity(pass, subpass_idx, &granularity);
361 
362    return area->offset.x % granularity.width == 0 &&
363           area->offset.y % granularity.height == 0 &&
364          (area->extent.width % granularity.width == 0 ||
365           (fb->has_edge_padding &&
366            area->offset.x + area->extent.width >= fb->width)) &&
367          (area->extent.height % granularity.height == 0 ||
368           (fb->has_edge_padding &&
369            area->offset.y + area->extent.height >= fb->height));
370 }
371