• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2019 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 
26 static uint32_t
num_subpass_attachments(const VkSubpassDescription2 * desc)27 num_subpass_attachments(const VkSubpassDescription2 *desc)
28 {
29    return desc->inputAttachmentCount +
30           desc->colorAttachmentCount +
31           (desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +
32           (desc->pDepthStencilAttachment != NULL);
33 }
34 
35 static void
set_try_tlb_resolve(struct v3dv_device * device,struct v3dv_render_pass_attachment * att)36 set_try_tlb_resolve(struct v3dv_device *device,
37                     struct v3dv_render_pass_attachment *att)
38 {
39    const struct v3dv_format *format = v3dv_X(device, get_format)(att->desc.format);
40    att->try_tlb_resolve = v3dv_X(device, format_supports_tlb_resolve)(format);
41 }
42 
43 static void
pass_find_subpass_range_for_attachments(struct v3dv_device * device,struct v3dv_render_pass * pass)44 pass_find_subpass_range_for_attachments(struct v3dv_device *device,
45                                         struct v3dv_render_pass *pass)
46 {
47    for (uint32_t i = 0; i < pass->attachment_count; i++) {
48       pass->attachments[i].first_subpass = pass->subpass_count - 1;
49       pass->attachments[i].last_subpass = 0;
50       if (pass->multiview_enabled) {
51          for (uint32_t j = 0; j < MAX_MULTIVIEW_VIEW_COUNT; j++) {
52             pass->attachments[i].views[j].first_subpass = pass->subpass_count - 1;
53             pass->attachments[i].views[j].last_subpass = 0;
54          }
55       }
56    }
57 
58    for (uint32_t i = 0; i < pass->subpass_count; i++) {
59       const struct v3dv_subpass *subpass = &pass->subpasses[i];
60 
61       for (uint32_t j = 0; j < subpass->color_count; j++) {
62          uint32_t attachment_idx = subpass->color_attachments[j].attachment;
63          if (attachment_idx == VK_ATTACHMENT_UNUSED)
64             continue;
65 
66          struct v3dv_render_pass_attachment *att =
67             &pass->attachments[attachment_idx];
68 
69          if (i < att->first_subpass)
70             att->first_subpass = i;
71          if (i > att->last_subpass)
72             att->last_subpass = i;
73 
74          uint32_t view_mask = subpass->view_mask;
75          while (view_mask) {
76             uint32_t view_index = u_bit_scan(&view_mask);
77             if (i < att->views[view_index].first_subpass)
78                att->views[view_index].first_subpass = i;
79             if (i > att->views[view_index].last_subpass)
80                att->views[view_index].last_subpass = i;
81          }
82 
83          if (subpass->resolve_attachments &&
84              subpass->resolve_attachments[j].attachment != VK_ATTACHMENT_UNUSED) {
85             set_try_tlb_resolve(device, att);
86          }
87       }
88 
89       uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;
90       if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {
91          if (i < pass->attachments[ds_attachment_idx].first_subpass)
92             pass->attachments[ds_attachment_idx].first_subpass = i;
93          if (i > pass->attachments[ds_attachment_idx].last_subpass)
94             pass->attachments[ds_attachment_idx].last_subpass = i;
95 
96          if (subpass->ds_resolve_attachment.attachment != VK_ATTACHMENT_UNUSED)
97             set_try_tlb_resolve(device, &pass->attachments[ds_attachment_idx]);
98       }
99 
100       for (uint32_t j = 0; j < subpass->input_count; j++) {
101          uint32_t input_attachment_idx = subpass->input_attachments[j].attachment;
102          if (input_attachment_idx == VK_ATTACHMENT_UNUSED)
103             continue;
104          if (i < pass->attachments[input_attachment_idx].first_subpass)
105             pass->attachments[input_attachment_idx].first_subpass = i;
106          if (i > pass->attachments[input_attachment_idx].last_subpass)
107             pass->attachments[input_attachment_idx].last_subpass = i;
108       }
109 
110       if (subpass->resolve_attachments) {
111          for (uint32_t j = 0; j < subpass->color_count; j++) {
112             uint32_t attachment_idx = subpass->resolve_attachments[j].attachment;
113             if (attachment_idx == VK_ATTACHMENT_UNUSED)
114                continue;
115             if (i < pass->attachments[attachment_idx].first_subpass)
116                pass->attachments[attachment_idx].first_subpass = i;
117             if (i > pass->attachments[attachment_idx].last_subpass)
118                pass->attachments[attachment_idx].last_subpass = i;
119          }
120       }
121    }
122 }
123 
124 
125 VKAPI_ATTR VkResult VKAPI_CALL
v3dv_CreateRenderPass2(VkDevice _device,const VkRenderPassCreateInfo2 * pCreateInfo,const VkAllocationCallbacks * pAllocator,VkRenderPass * pRenderPass)126 v3dv_CreateRenderPass2(VkDevice _device,
127                        const VkRenderPassCreateInfo2 *pCreateInfo,
128                        const VkAllocationCallbacks *pAllocator,
129                        VkRenderPass *pRenderPass)
130 {
131    V3DV_FROM_HANDLE(v3dv_device, device, _device);
132    struct v3dv_render_pass *pass;
133 
134    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2);
135 
136    /* From the VK_KHR_multiview spec:
137     *
138     *   When a subpass uses a non-zero view mask, multiview functionality is
139     *   considered to be enabled. Multiview is all-or-nothing for a render
140     *   pass - that is, either all subpasses must have a non-zero view mask
141     *   (though some subpasses may have only one view) or all must be zero.
142     */
143    bool multiview_enabled = pCreateInfo->subpassCount &&
144       pCreateInfo->pSubpasses[0].viewMask;
145 
146    size_t size = sizeof(*pass);
147    size_t subpasses_offset = size;
148    size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);
149    size_t attachments_offset = size;
150    size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);
151 
152    pass = vk_object_zalloc(&device->vk, pAllocator, size,
153                            VK_OBJECT_TYPE_RENDER_PASS);
154    if (pass == NULL)
155       return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
156 
157    pass->multiview_enabled = multiview_enabled;
158    pass->attachment_count = pCreateInfo->attachmentCount;
159    pass->attachments = (void *) pass + attachments_offset;
160    pass->subpass_count = pCreateInfo->subpassCount;
161    pass->subpasses = (void *) pass + subpasses_offset;
162 
163    for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++)
164       pass->attachments[i].desc = pCreateInfo->pAttachments[i];
165 
166    uint32_t subpass_attachment_count = 0;
167    for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
168       const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
169       subpass_attachment_count += num_subpass_attachments(desc);
170    }
171 
172    if (subpass_attachment_count) {
173       const size_t subpass_attachment_bytes =
174          subpass_attachment_count * sizeof(struct v3dv_subpass_attachment);
175       pass->subpass_attachments =
176          vk_alloc2(&device->vk.alloc, pAllocator, subpass_attachment_bytes, 8,
177                    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
178       if (pass->subpass_attachments == NULL) {
179          vk_object_free(&device->vk, pAllocator, pass);
180          return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
181       }
182    } else {
183       pass->subpass_attachments = NULL;
184    }
185 
186    struct v3dv_subpass_attachment *p = pass->subpass_attachments;
187    for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
188       const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];
189       struct v3dv_subpass *subpass = &pass->subpasses[i];
190 
191       subpass->input_count = desc->inputAttachmentCount;
192       subpass->color_count = desc->colorAttachmentCount;
193       subpass->view_mask = desc->viewMask;
194 
195       if (desc->inputAttachmentCount > 0) {
196          subpass->input_attachments = p;
197          p += desc->inputAttachmentCount;
198 
199          for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
200             subpass->input_attachments[j] = (struct v3dv_subpass_attachment) {
201                .attachment = desc->pInputAttachments[j].attachment,
202                .layout = desc->pInputAttachments[j].layout,
203             };
204          }
205       }
206 
207       if (desc->colorAttachmentCount > 0) {
208          subpass->color_attachments = p;
209          p += desc->colorAttachmentCount;
210 
211          for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
212             subpass->color_attachments[j] = (struct v3dv_subpass_attachment) {
213                .attachment = desc->pColorAttachments[j].attachment,
214                .layout = desc->pColorAttachments[j].layout,
215             };
216          }
217       }
218 
219       if (desc->pResolveAttachments) {
220          subpass->resolve_attachments = p;
221          p += desc->colorAttachmentCount;
222 
223          for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {
224             subpass->resolve_attachments[j] = (struct v3dv_subpass_attachment) {
225                .attachment = desc->pResolveAttachments[j].attachment,
226                .layout = desc->pResolveAttachments[j].layout,
227             };
228          }
229       }
230 
231       if (desc->pDepthStencilAttachment) {
232          subpass->ds_attachment = (struct v3dv_subpass_attachment) {
233             .attachment = desc->pDepthStencilAttachment->attachment,
234             .layout = desc->pDepthStencilAttachment->layout,
235          };
236 
237          /* GFXH-1461: if depth is cleared but stencil is loaded (or vice versa),
238           * the clear might get lost. If a subpass has this then we can't emit
239           * the clear using the TLB and we have to do it as a draw call. This
240           * issue is fixed since V3D 4.3.18.
241           *
242           * FIXME: separate stencil.
243           */
244          if (device->devinfo.ver == 42 &&
245              subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) {
246             struct v3dv_render_pass_attachment *att =
247                &pass->attachments[subpass->ds_attachment.attachment];
248             if (att->desc.format == VK_FORMAT_D24_UNORM_S8_UINT) {
249                if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&
250                    att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
251                   subpass->do_depth_clear_with_draw = true;
252                } else if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD &&
253                           att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
254                   subpass->do_stencil_clear_with_draw = true;
255                }
256             }
257          }
258 
259          /* VK_KHR_depth_stencil_resolve */
260          const VkSubpassDescriptionDepthStencilResolve *resolve_desc =
261             vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE);
262          const VkAttachmentReference2 *resolve_att =
263             resolve_desc && resolve_desc->pDepthStencilResolveAttachment &&
264             resolve_desc->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED ?
265                resolve_desc->pDepthStencilResolveAttachment : NULL;
266          if (resolve_att) {
267             subpass->ds_resolve_attachment = (struct v3dv_subpass_attachment) {
268                .attachment = resolve_att->attachment,
269                .layout = resolve_att->layout,
270             };
271             assert(resolve_desc->depthResolveMode == VK_RESOLVE_MODE_SAMPLE_ZERO_BIT ||
272                    resolve_desc->stencilResolveMode == VK_RESOLVE_MODE_SAMPLE_ZERO_BIT);
273             subpass->resolve_depth =
274                resolve_desc->depthResolveMode != VK_RESOLVE_MODE_NONE &&
275                resolve_att->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
276             subpass->resolve_stencil =
277                resolve_desc->stencilResolveMode != VK_RESOLVE_MODE_NONE &&
278                resolve_att->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
279          } else {
280             subpass->ds_resolve_attachment.attachment = VK_ATTACHMENT_UNUSED;
281             subpass->resolve_depth = false;
282             subpass->resolve_stencil = false;
283          }
284       } else {
285          subpass->ds_attachment.attachment = VK_ATTACHMENT_UNUSED;
286          subpass->ds_resolve_attachment.attachment = VK_ATTACHMENT_UNUSED;
287          subpass->resolve_depth = false;
288          subpass->resolve_stencil = false;
289       }
290    }
291 
292    pass_find_subpass_range_for_attachments(device, pass);
293 
294    /* FIXME: handle subpass dependencies */
295 
296    *pRenderPass = v3dv_render_pass_to_handle(pass);
297 
298    return VK_SUCCESS;
299 }
300 
301 VKAPI_ATTR void VKAPI_CALL
v3dv_DestroyRenderPass(VkDevice _device,VkRenderPass _pass,const VkAllocationCallbacks * pAllocator)302 v3dv_DestroyRenderPass(VkDevice _device,
303                        VkRenderPass _pass,
304                        const VkAllocationCallbacks *pAllocator)
305 {
306    V3DV_FROM_HANDLE(v3dv_device, device, _device);
307    V3DV_FROM_HANDLE(v3dv_render_pass, pass, _pass);
308 
309    if (!_pass)
310       return;
311 
312    vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);
313    vk_object_free(&device->vk, pAllocator, pass);
314 }
315 
316 static void
subpass_get_granularity(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,VkExtent2D * granularity)317 subpass_get_granularity(struct v3dv_device *device,
318                         struct v3dv_render_pass *pass,
319                         uint32_t subpass_idx,
320                         VkExtent2D *granularity)
321 {
322    /* Granularity is defined by the tile size */
323    assert(subpass_idx < pass->subpass_count);
324    struct v3dv_subpass *subpass = &pass->subpasses[subpass_idx];
325    const uint32_t color_count = subpass->color_count;
326 
327    bool msaa = false;
328    uint32_t max_internal_bpp = 0;
329    uint32_t total_color_bpp = 0;
330    for (uint32_t i = 0; i < color_count; i++) {
331       uint32_t attachment_idx = subpass->color_attachments[i].attachment;
332       if (attachment_idx == VK_ATTACHMENT_UNUSED)
333          continue;
334       const VkAttachmentDescription2 *desc =
335          &pass->attachments[attachment_idx].desc;
336       const struct v3dv_format *format = v3dv_X(device, get_format)(desc->format);
337       uint32_t internal_type, internal_bpp;
338       /* We don't support rendering to YCbCr images */
339       assert(format->plane_count == 1);
340       v3dv_X(device, get_internal_type_bpp_for_output_format)
341          (format->planes[0].rt_type, &internal_type, &internal_bpp);
342 
343       max_internal_bpp = MAX2(max_internal_bpp, internal_bpp);
344       total_color_bpp += 4 * v3d_internal_bpp_words(internal_bpp);
345 
346       if (desc->samples > VK_SAMPLE_COUNT_1_BIT)
347          msaa = true;
348    }
349 
350    /* If requested, double-buffer may or may not be enabled depending on
351     * heuristics so we choose a conservative granularity here, with it disabled.
352     */
353    uint32_t width, height;
354    v3d_choose_tile_size(&device->devinfo, color_count,
355                         max_internal_bpp, total_color_bpp, msaa,
356                         false /* double-buffer */, &width, &height);
357    *granularity = (VkExtent2D) {
358       .width = width,
359       .height = height
360    };
361 }
362 
363 VKAPI_ATTR void VKAPI_CALL
v3dv_GetRenderAreaGranularity(VkDevice _device,VkRenderPass renderPass,VkExtent2D * pGranularity)364 v3dv_GetRenderAreaGranularity(VkDevice _device,
365                               VkRenderPass renderPass,
366                               VkExtent2D *pGranularity)
367 {
368    V3DV_FROM_HANDLE(v3dv_render_pass, pass, renderPass);
369    V3DV_FROM_HANDLE(v3dv_device, device, _device);
370 
371    *pGranularity = (VkExtent2D) {
372       .width = 64,
373       .height = 64,
374    };
375 
376    for (uint32_t i = 0; i < pass->subpass_count; i++) {
377       VkExtent2D sg;
378       subpass_get_granularity(device, pass, i, &sg);
379       pGranularity->width = MIN2(pGranularity->width, sg.width);
380       pGranularity->height = MIN2(pGranularity->height, sg.height);
381    }
382 }
383 
384 /* Checks whether the render area rectangle covers a region that is aligned to
385  * tile boundaries. This means that we are writing to all pixels covered by
386  * all tiles in that area (except for pixels on edge tiles that are outside
387  * the framebuffer dimensions).
388  *
389  * When our framebuffer is aligned to tile boundaries we know we are writing
390  * valid data to all all pixels in each tile and we can apply certain
391  * optimizations, like avoiding tile loads, since we know that none of the
392  * original pixel values in each tile for that area need to be preserved.
393  * We also use this to decide if we can use TLB clears, as these clear whole
394  * tiles so we can't use them if the render area is not aligned.
395  *
396  * Note that when an image is created it will possibly include padding blocks
397  * depending on its tiling layout. When the framebuffer dimensions are not
398  * aligned to tile boundaries then edge tiles are only partially covered by the
399  * framebuffer pixels, but tile stores still seem to store full tiles
400  * writing to the padded sections. This is important when the framebuffer
401  * is aliasing a smaller section of a larger image, as in that case the edge
402  * tiles of the framebuffer would overwrite valid pixels in the larger image.
403  * In that case, we can't flag the area as being aligned.
404  */
405 bool
v3dv_subpass_area_is_tile_aligned(struct v3dv_device * device,const VkRect2D * area,struct v3dv_framebuffer * fb,struct v3dv_render_pass * pass,uint32_t subpass_idx)406 v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,
407                                   const VkRect2D *area,
408                                   struct v3dv_framebuffer *fb,
409                                   struct v3dv_render_pass *pass,
410                                   uint32_t subpass_idx)
411 {
412    assert(subpass_idx < pass->subpass_count);
413 
414    VkExtent2D granularity;
415    subpass_get_granularity(device, pass, subpass_idx, &granularity);
416 
417    return area->offset.x % granularity.width == 0 &&
418           area->offset.y % granularity.height == 0 &&
419          (area->extent.width % granularity.width == 0 ||
420           (fb->has_edge_padding &&
421            area->offset.x + area->extent.width >= fb->width)) &&
422          (area->extent.height % granularity.height == 0 ||
423           (fb->has_edge_padding &&
424            area->offset.y + area->extent.height >= fb->height));
425 }
426