• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2024 Collabora Ltd.
3  * Copyright © 2024 Arm Ltd.
4  *
5  * SPDX-License-Identifier: MIT
6  */
7 
8 #include "panvk_buffer.h"
9 #include "panvk_cmd_buffer.h"
10 #include "panvk_cmd_meta.h"
11 #include "panvk_entrypoints.h"
12 
13 #include "pan_desc.h"
14 
15 static void
render_state_set_color_attachment(struct panvk_cmd_buffer * cmdbuf,const VkRenderingAttachmentInfo * att,uint32_t index)16 render_state_set_color_attachment(struct panvk_cmd_buffer *cmdbuf,
17                                   const VkRenderingAttachmentInfo *att,
18                                   uint32_t index)
19 {
20    struct panvk_physical_device *phys_dev =
21          to_panvk_physical_device(cmdbuf->vk.base.device->physical);
22    struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
23    struct pan_fb_info *fbinfo = &state->render.fb.info;
24    VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
25    struct panvk_image *img =
26       container_of(iview->vk.image, struct panvk_image, vk);
27 
28    state->render.bound_attachments |= MESA_VK_RP_ATTACHMENT_COLOR_BIT(index);
29    state->render.color_attachments.iviews[index] = iview;
30    state->render.color_attachments.fmts[index] = iview->vk.format;
31    state->render.color_attachments.samples[index] = img->vk.samples;
32 
33 #if PAN_ARCH <= 7
34    state->render.fb.bos[state->render.fb.bo_count++] = img->bo;
35 #endif
36 
37    fbinfo->rts[index].view = &iview->pview;
38    fbinfo->rts[index].crc_valid = &state->render.fb.crc_valid[index];
39    fbinfo->nr_samples =
40       MAX2(fbinfo->nr_samples, pan_image_view_get_nr_samples(&iview->pview));
41 
42    if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
43       enum pipe_format fmt = vk_format_to_pipe_format(iview->vk.format);
44       union pipe_color_union *col =
45          (union pipe_color_union *)&att->clearValue.color;
46 
47       fbinfo->rts[index].clear = true;
48       pan_pack_color(phys_dev->formats.blendable,
49                      fbinfo->rts[index].clear_value, col, fmt, false);
50    } else if (att->loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
51       fbinfo->rts[index].preload = true;
52    }
53 
54    if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
55       struct panvk_resolve_attachment *resolve_info =
56          &state->render.color_attachments.resolve[index];
57       VK_FROM_HANDLE(panvk_image_view, resolve_iview, att->resolveImageView);
58 
59       resolve_info->mode = att->resolveMode;
60       resolve_info->dst_iview = resolve_iview;
61    }
62 }
63 
64 static void
render_state_set_z_attachment(struct panvk_cmd_buffer * cmdbuf,const VkRenderingAttachmentInfo * att)65 render_state_set_z_attachment(struct panvk_cmd_buffer *cmdbuf,
66                               const VkRenderingAttachmentInfo *att)
67 {
68    struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
69    struct pan_fb_info *fbinfo = &state->render.fb.info;
70    VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
71    struct panvk_image *img =
72       container_of(iview->vk.image, struct panvk_image, vk);
73 
74 #if PAN_ARCH <= 7
75    state->render.fb.bos[state->render.fb.bo_count++] = img->bo;
76 #endif
77 
78    state->render.z_attachment.fmt = iview->vk.format;
79    state->render.bound_attachments |= MESA_VK_RP_ATTACHMENT_DEPTH_BIT;
80 
81    state->render.zs_pview = iview->pview;
82    fbinfo->zs.view.zs = &state->render.zs_pview;
83 
84    /* D32_S8 is a multiplanar format, so we need to adjust the format of the
85     * depth-only view to match the one of the depth plane.
86     */
87    if (iview->pview.format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
88       state->render.zs_pview.format = PIPE_FORMAT_Z32_FLOAT;
89 
90    state->render.zs_pview.planes[0] = &img->planes[0];
91    state->render.zs_pview.planes[1] = NULL;
92    fbinfo->nr_samples =
93       MAX2(fbinfo->nr_samples, pan_image_view_get_nr_samples(&iview->pview));
94    state->render.z_attachment.iview = iview;
95 
96    /* D24S8 is a single plane format where the depth/stencil are interleaved.
97     * If we touch the depth component, we need to make sure the stencil
98     * component is preserved, hence the preload, and the view format adjusment.
99     */
100    if (img->vk.format == VK_FORMAT_D24_UNORM_S8_UINT) {
101       fbinfo->zs.preload.s = true;
102       cmdbuf->state.gfx.render.zs_pview.format =
103          PIPE_FORMAT_Z24_UNORM_S8_UINT;
104    } else {
105       state->render.zs_pview.format =
106          vk_format_to_pipe_format(vk_format_depth_only(img->vk.format));
107    }
108 
109    if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
110       fbinfo->zs.clear.z = true;
111       fbinfo->zs.clear_value.depth = att->clearValue.depthStencil.depth;
112    } else if (att->loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
113       fbinfo->zs.preload.z = true;
114    }
115 
116    if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
117       struct panvk_resolve_attachment *resolve_info =
118          &state->render.z_attachment.resolve;
119       VK_FROM_HANDLE(panvk_image_view, resolve_iview, att->resolveImageView);
120 
121       resolve_info->mode = att->resolveMode;
122       resolve_info->dst_iview = resolve_iview;
123    }
124 }
125 
126 static void
render_state_set_s_attachment(struct panvk_cmd_buffer * cmdbuf,const VkRenderingAttachmentInfo * att)127 render_state_set_s_attachment(struct panvk_cmd_buffer *cmdbuf,
128                               const VkRenderingAttachmentInfo *att)
129 {
130    struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
131    struct pan_fb_info *fbinfo = &state->render.fb.info;
132    VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
133    struct panvk_image *img =
134       container_of(iview->vk.image, struct panvk_image, vk);
135 
136 #if PAN_ARCH <= 7
137    state->render.fb.bos[state->render.fb.bo_count++] = img->bo;
138 #endif
139 
140    state->render.s_attachment.fmt = iview->vk.format;
141    state->render.bound_attachments |= MESA_VK_RP_ATTACHMENT_STENCIL_BIT;
142 
143    state->render.s_pview = iview->pview;
144    fbinfo->zs.view.s = &state->render.s_pview;
145 
146    /* D32_S8 is a multiplanar format, so we need to adjust the format of the
147     * stencil-only view to match the one of the stencil plane.
148     */
149    state->render.s_pview.format = img->vk.format == VK_FORMAT_D24_UNORM_S8_UINT
150                                      ? PIPE_FORMAT_Z24_UNORM_S8_UINT
151                                      : PIPE_FORMAT_S8_UINT;
152    if (img->vk.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
153       state->render.s_pview.planes[0] = NULL;
154       state->render.s_pview.planes[1] = &img->planes[1];
155    } else {
156       state->render.s_pview.planes[0] = &img->planes[0];
157       state->render.s_pview.planes[1] = NULL;
158    }
159 
160    fbinfo->nr_samples =
161       MAX2(fbinfo->nr_samples, pan_image_view_get_nr_samples(&iview->pview));
162    state->render.s_attachment.iview = iview;
163 
164    /* If the depth and stencil attachments point to the same image,
165     * and the format is D24S8, we can combine them in a single view
166     * addressing both components.
167     */
168    if (img->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
169        state->render.z_attachment.iview &&
170        state->render.z_attachment.iview->vk.image == iview->vk.image) {
171       state->render.zs_pview.format = PIPE_FORMAT_Z24_UNORM_S8_UINT;
172       fbinfo->zs.preload.s = false;
173       fbinfo->zs.view.s = NULL;
174 
175    /* If there was no depth attachment, and the image format is D24S8,
176     * we use the depth+stencil slot, so we can benefit from AFBC, which
177     * is not supported on the stencil-only slot on Bifrost.
178     */
179    } else if (img->vk.format == VK_FORMAT_D24_UNORM_S8_UINT &&
180               fbinfo->zs.view.zs == NULL) {
181       fbinfo->zs.view.zs = &state->render.s_pview;
182       state->render.s_pview.format = PIPE_FORMAT_Z24_UNORM_S8_UINT;
183       fbinfo->zs.preload.z = true;
184       fbinfo->zs.view.s = NULL;
185    }
186 
187    if (att->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
188       fbinfo->zs.clear.s = true;
189       fbinfo->zs.clear_value.stencil = att->clearValue.depthStencil.stencil;
190    } else if (att->loadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {
191       fbinfo->zs.preload.s = true;
192    }
193 
194    if (att->resolveMode != VK_RESOLVE_MODE_NONE) {
195       struct panvk_resolve_attachment *resolve_info =
196          &state->render.s_attachment.resolve;
197       VK_FROM_HANDLE(panvk_image_view, resolve_iview, att->resolveImageView);
198 
199       resolve_info->mode = att->resolveMode;
200       resolve_info->dst_iview = resolve_iview;
201    }
202 }
203 
204 void
panvk_per_arch(cmd_init_render_state)205 panvk_per_arch(cmd_init_render_state)(struct panvk_cmd_buffer *cmdbuf,
206                                       const VkRenderingInfo *pRenderingInfo)
207 {
208    struct panvk_physical_device *phys_dev =
209          to_panvk_physical_device(cmdbuf->vk.base.device->physical);
210    struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
211    struct pan_fb_info *fbinfo = &state->render.fb.info;
212    uint32_t att_width = 0, att_height = 0;
213 
214    state->render.flags = pRenderingInfo->flags;
215 
216    BITSET_SET(state->dirty, PANVK_CMD_GRAPHICS_DIRTY_RENDER_STATE);
217 
218 #if PAN_ARCH <= 7
219    state->render.fb.bo_count = 0;
220    memset(state->render.fb.bos, 0, sizeof(state->render.fb.bos));
221 #endif
222 
223    memset(state->render.fb.crc_valid, 0, sizeof(state->render.fb.crc_valid));
224    memset(&state->render.color_attachments, 0,
225           sizeof(state->render.color_attachments));
226    memset(&state->render.z_attachment, 0, sizeof(state->render.z_attachment));
227    memset(&state->render.s_attachment, 0, sizeof(state->render.s_attachment));
228    state->render.bound_attachments = 0;
229 
230    cmdbuf->state.gfx.render.layer_count = pRenderingInfo->viewMask ?
231       util_last_bit(pRenderingInfo->viewMask) :
232       pRenderingInfo->layerCount;
233    cmdbuf->state.gfx.render.view_mask = pRenderingInfo->viewMask;
234    *fbinfo = (struct pan_fb_info){
235       .tile_buf_budget = panfrost_query_optimal_tib_size(phys_dev->model),
236       .nr_samples = 1,
237       .rt_count = pRenderingInfo->colorAttachmentCount,
238    };
239 
240    assert(pRenderingInfo->colorAttachmentCount <= ARRAY_SIZE(fbinfo->rts));
241 
242    for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
243       const VkRenderingAttachmentInfo *att =
244          &pRenderingInfo->pColorAttachments[i];
245       VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
246 
247       if (!iview)
248          continue;
249 
250       render_state_set_color_attachment(cmdbuf, att, i);
251       att_width = MAX2(iview->vk.extent.width, att_width);
252       att_height = MAX2(iview->vk.extent.height, att_height);
253    }
254 
255    if (pRenderingInfo->pDepthAttachment &&
256        pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE) {
257       const VkRenderingAttachmentInfo *att = pRenderingInfo->pDepthAttachment;
258       VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
259 
260       if (iview) {
261          assert(iview->vk.image->aspects & VK_IMAGE_ASPECT_DEPTH_BIT);
262          render_state_set_z_attachment(cmdbuf, att);
263          att_width = MAX2(iview->vk.extent.width, att_width);
264          att_height = MAX2(iview->vk.extent.height, att_height);
265       }
266    }
267 
268    if (pRenderingInfo->pStencilAttachment &&
269        pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE) {
270       const VkRenderingAttachmentInfo *att = pRenderingInfo->pStencilAttachment;
271       VK_FROM_HANDLE(panvk_image_view, iview, att->imageView);
272 
273       if (iview) {
274          assert(iview->vk.image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
275          render_state_set_s_attachment(cmdbuf, att);
276          att_width = MAX2(iview->vk.extent.width, att_width);
277          att_height = MAX2(iview->vk.extent.height, att_height);
278       }
279    }
280 
281    fbinfo->extent.minx = pRenderingInfo->renderArea.offset.x;
282    fbinfo->extent.maxx = pRenderingInfo->renderArea.offset.x +
283                          pRenderingInfo->renderArea.extent.width - 1;
284    fbinfo->extent.miny = pRenderingInfo->renderArea.offset.y;
285    fbinfo->extent.maxy = pRenderingInfo->renderArea.offset.y +
286                          pRenderingInfo->renderArea.extent.height - 1;
287 
288    if (state->render.bound_attachments) {
289       fbinfo->width = att_width;
290       fbinfo->height = att_height;
291    } else {
292       fbinfo->width = fbinfo->extent.maxx + 1;
293       fbinfo->height = fbinfo->extent.maxy + 1;
294    }
295 
296    assert(fbinfo->width && fbinfo->height);
297 
298    GENX(pan_select_tile_size)(fbinfo);
299 }
300 
301 void
panvk_per_arch(cmd_resolve_attachments)302 panvk_per_arch(cmd_resolve_attachments)(struct panvk_cmd_buffer *cmdbuf)
303 {
304    struct pan_fb_info *fbinfo = &cmdbuf->state.gfx.render.fb.info;
305    bool needs_resolve = false;
306 
307    unsigned bound_atts = cmdbuf->state.gfx.render.bound_attachments;
308    unsigned color_att_count =
309       util_last_bit(bound_atts & MESA_VK_RP_ATTACHMENT_ANY_COLOR_BITS);
310    VkRenderingAttachmentInfo color_atts[MAX_RTS];
311    for (uint32_t i = 0; i < color_att_count; i++) {
312       const struct panvk_resolve_attachment *resolve_info =
313          &cmdbuf->state.gfx.render.color_attachments.resolve[i];
314       struct panvk_image_view *src_iview =
315          cmdbuf->state.gfx.render.color_attachments.iviews[i];
316 
317       color_atts[i] = (VkRenderingAttachmentInfo){
318          .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
319          .imageView = panvk_image_view_to_handle(src_iview),
320          .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
321          .resolveMode = resolve_info->mode,
322          .resolveImageView =
323             panvk_image_view_to_handle(resolve_info->dst_iview),
324          .resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
325       };
326 
327       if (resolve_info->mode != VK_RESOLVE_MODE_NONE)
328          needs_resolve = true;
329    }
330 
331    const struct panvk_resolve_attachment *resolve_info =
332       &cmdbuf->state.gfx.render.z_attachment.resolve;
333    struct panvk_image_view *src_iview =
334       cmdbuf->state.gfx.render.z_attachment.iview;
335    VkRenderingAttachmentInfo z_att = {
336       .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
337       .imageView = panvk_image_view_to_handle(src_iview),
338       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
339       .resolveMode = resolve_info->mode,
340       .resolveImageView = panvk_image_view_to_handle(resolve_info->dst_iview),
341       .resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
342    };
343 
344    if (resolve_info->mode != VK_RESOLVE_MODE_NONE)
345       needs_resolve = true;
346 
347    resolve_info = &cmdbuf->state.gfx.render.s_attachment.resolve;
348    src_iview = cmdbuf->state.gfx.render.s_attachment.iview;
349 
350    VkRenderingAttachmentInfo s_att = {
351       .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
352       .imageView = panvk_image_view_to_handle(src_iview),
353       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
354       .resolveMode = resolve_info->mode,
355       .resolveImageView = panvk_image_view_to_handle(resolve_info->dst_iview),
356       .resolveImageLayout = VK_IMAGE_LAYOUT_GENERAL,
357    };
358 
359    if (resolve_info->mode != VK_RESOLVE_MODE_NONE)
360       needs_resolve = true;
361 
362    if (!needs_resolve)
363       return;
364 
365 #if PAN_ARCH >= 10
366    /* insert a barrier for resolve */
367    const VkMemoryBarrier2 mem_barrier = {
368       .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
369       .srcStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
370                       VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT |
371                       VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
372       .srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
373                        VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
374       .dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT,
375       .dstAccessMask = VK_ACCESS_2_SHADER_SAMPLED_READ_BIT
376    };
377    const VkDependencyInfo dep_info = {
378       .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
379       .memoryBarrierCount = 1,
380       .pMemoryBarriers = &mem_barrier,
381    };
382    panvk_per_arch(CmdPipelineBarrier2)(panvk_cmd_buffer_to_handle(cmdbuf),
383                                        &dep_info);
384 #endif
385 
386    const VkRenderingInfo render_info = {
387       .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
388       .renderArea =
389          {
390             .offset.x = fbinfo->extent.minx,
391             .offset.y = fbinfo->extent.miny,
392             .extent.width = fbinfo->extent.maxx - fbinfo->extent.minx + 1,
393             .extent.height = fbinfo->extent.maxy - fbinfo->extent.miny + 1,
394          },
395       .layerCount = cmdbuf->state.gfx.render.layer_count,
396       .viewMask = cmdbuf->state.gfx.render.view_mask,
397       .colorAttachmentCount = color_att_count,
398       .pColorAttachments = color_atts,
399       .pDepthAttachment = &z_att,
400       .pStencilAttachment = &s_att,
401    };
402 
403    struct panvk_device *dev = to_panvk_device(cmdbuf->vk.base.device);
404    struct panvk_cmd_meta_graphics_save_ctx save = {0};
405 
406    panvk_per_arch(cmd_meta_gfx_start)(cmdbuf, &save);
407    vk_meta_resolve_rendering(&cmdbuf->vk, &dev->meta, &render_info);
408    panvk_per_arch(cmd_meta_gfx_end)(cmdbuf, &save);
409 }
410 
411 void
panvk_per_arch(cmd_force_fb_preload)412 panvk_per_arch(cmd_force_fb_preload)(struct panvk_cmd_buffer *cmdbuf,
413                                      const VkRenderingInfo *render_info)
414 {
415    /* We force preloading for all active attachments when the render area is
416     * unaligned or when a barrier flushes prior draw calls in the middle of a
417     * render pass.  The two cases can be distinguished by whether a
418     * render_info is provided.
419     *
420     * When the render area is unaligned, we force preloading to preserve
421     * contents falling outside of the render area.  We also make sure the
422     * initial attachment clears are performed.
423     */
424    struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
425    struct pan_fb_info *fbinfo = &state->render.fb.info;
426    VkClearAttachment clear_atts[MAX_RTS + 2];
427    uint32_t clear_att_count = 0;
428 
429    if (!state->render.bound_attachments)
430       return;
431 
432    for (unsigned i = 0; i < fbinfo->rt_count; i++) {
433       if (!fbinfo->rts[i].view)
434          continue;
435 
436       fbinfo->rts[i].preload = true;
437 
438       if (fbinfo->rts[i].clear) {
439          if (render_info) {
440             const VkRenderingAttachmentInfo *att =
441                &render_info->pColorAttachments[i];
442 
443             clear_atts[clear_att_count++] = (VkClearAttachment){
444                .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
445                .colorAttachment = i,
446                .clearValue = att->clearValue,
447             };
448          }
449          fbinfo->rts[i].clear = false;
450       }
451    }
452 
453    if (fbinfo->zs.view.zs) {
454       fbinfo->zs.preload.z = true;
455 
456       if (fbinfo->zs.clear.z) {
457          if (render_info) {
458             const VkRenderingAttachmentInfo *att =
459                render_info->pDepthAttachment;
460 
461             clear_atts[clear_att_count++] = (VkClearAttachment){
462                .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
463                .clearValue = att->clearValue,
464             };
465          }
466          fbinfo->zs.clear.z = false;
467       }
468    }
469 
470    if (fbinfo->zs.view.s ||
471        (fbinfo->zs.view.zs &&
472         util_format_is_depth_and_stencil(fbinfo->zs.view.zs->format))) {
473       fbinfo->zs.preload.s = true;
474 
475       if (fbinfo->zs.clear.s) {
476          if (render_info) {
477             const VkRenderingAttachmentInfo *att =
478                render_info->pStencilAttachment;
479 
480             clear_atts[clear_att_count++] = (VkClearAttachment){
481                .aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT,
482                .clearValue = att->clearValue,
483             };
484          }
485 
486          fbinfo->zs.clear.s = false;
487       }
488    }
489 
490 #if PAN_ARCH >= 10
491    /* insert a barrier for preload */
492    const VkMemoryBarrier2 mem_barrier = {
493       .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER_2,
494       .srcStageMask = VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
495                       VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT |
496                       VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
497       .srcAccessMask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
498                        VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
499       .dstStageMask = VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT,
500       .dstAccessMask = VK_ACCESS_2_SHADER_SAMPLED_READ_BIT,
501    };
502    const VkDependencyInfo dep_info = {
503       .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
504       .memoryBarrierCount = 1,
505       .pMemoryBarriers = &mem_barrier,
506    };
507    panvk_per_arch(CmdPipelineBarrier2)(panvk_cmd_buffer_to_handle(cmdbuf),
508                                        &dep_info);
509 #endif
510 
511    if (clear_att_count && render_info) {
512       VkClearRect clear_rect = {
513          .rect = render_info->renderArea,
514          .baseArrayLayer = 0,
515          .layerCount = render_info->viewMask ? 1 : render_info->layerCount,
516       };
517 
518       panvk_per_arch(CmdClearAttachments)(panvk_cmd_buffer_to_handle(cmdbuf),
519                                           clear_att_count, clear_atts, 1,
520                                           &clear_rect);
521    }
522 }
523 
524 void
panvk_per_arch(cmd_preload_render_area_border)525 panvk_per_arch(cmd_preload_render_area_border)(
526    struct panvk_cmd_buffer *cmdbuf, const VkRenderingInfo *render_info)
527 {
528    struct panvk_cmd_graphics_state *state = &cmdbuf->state.gfx;
529    struct pan_fb_info *fbinfo = &state->render.fb.info;
530    bool render_area_is_32x32_aligned =
531       ((fbinfo->extent.minx | fbinfo->extent.miny) % 32) == 0 &&
532       (fbinfo->extent.maxx + 1 == fbinfo->width ||
533        (fbinfo->extent.maxx % 32) == 31) &&
534       (fbinfo->extent.maxy + 1 == fbinfo->height ||
535        (fbinfo->extent.maxy % 32) == 31);
536 
537    /* If the render area is aligned on a 32x32 section, we're good. */
538    if (!render_area_is_32x32_aligned)
539       panvk_per_arch(cmd_force_fb_preload)(cmdbuf, render_info);
540 }
541 
542 /* This value has been selected to get
543  * dEQP-VK.draw.renderpass.inverted_depth_ranges.nodepthclamp_deltazero passing.
544  */
545 #define MIN_DEPTH_CLIP_RANGE 37.7E-06f
546 
547 void
panvk_per_arch(cmd_prepare_draw_sysvals)548 panvk_per_arch(cmd_prepare_draw_sysvals)(struct panvk_cmd_buffer *cmdbuf,
549                                          const struct panvk_draw_info *info)
550 {
551    struct vk_color_blend_state *cb = &cmdbuf->vk.dynamic_graphics_state.cb;
552    const struct panvk_shader *fs = get_fs(cmdbuf);
553    uint32_t noperspective_varyings = fs ? fs->info.varyings.noperspective : 0;
554    BITSET_DECLARE(dirty_sysvals, MAX_SYSVAL_FAUS) = {0};
555 
556    set_gfx_sysval(cmdbuf, dirty_sysvals, vs.noperspective_varyings,
557                   noperspective_varyings);
558    set_gfx_sysval(cmdbuf, dirty_sysvals, vs.first_vertex, info->vertex.base);
559    set_gfx_sysval(cmdbuf, dirty_sysvals, vs.base_instance, info->instance.base);
560 
561 #if PAN_ARCH <= 7
562    set_gfx_sysval(cmdbuf, dirty_sysvals, vs.raw_vertex_offset,
563                   info->vertex.raw_offset);
564    set_gfx_sysval(cmdbuf, dirty_sysvals, layer_id, info->layer_id);
565 #endif
566 
567    if (dyn_gfx_state_dirty(cmdbuf, CB_BLEND_CONSTANTS)) {
568       for (unsigned i = 0; i < ARRAY_SIZE(cb->blend_constants); i++) {
569          set_gfx_sysval(cmdbuf, dirty_sysvals, blend.constants[i],
570                         CLAMP(cb->blend_constants[i], 0.0f, 1.0f));
571       }
572    }
573 
574    if (dyn_gfx_state_dirty(cmdbuf, VP_VIEWPORTS) ||
575        dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLIP_ENABLE) ||
576        dyn_gfx_state_dirty(cmdbuf, RS_DEPTH_CLAMP_ENABLE)) {
577       VkViewport *viewport = &cmdbuf->vk.dynamic_graphics_state.vp.viewports[0];
578 
579       /* Upload the viewport scale. Defined as (px/2, py/2, pz) at the start of
580        * section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the
581        * end of the section, the spec defines:
582        *
583        * px = width
584        * py = height
585        * pz = maxDepth - minDepth
586        */
587       set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.scale.x,
588                      0.5f * viewport->width);
589       set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.scale.y,
590                      0.5f * viewport->height);
591       set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.scale.z,
592                      (viewport->maxDepth - viewport->minDepth));
593 
594       /* Upload the viewport offset. Defined as (ox, oy, oz) at the start of
595        * section 24.5 ("Controlling the Viewport") of the Vulkan spec. At the
596        * end of the section, the spec defines:
597        *
598        * ox = x + width/2
599        * oy = y + height/2
600        * oz = minDepth
601        */
602       set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.offset.x,
603                      (0.5f * viewport->width) + viewport->x);
604       set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.offset.y,
605                      (0.5f * viewport->height) + viewport->y);
606       set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.offset.z,
607                      viewport->minDepth);
608 
609       /* Doing the viewport transform in the vertex shader and then depth
610        * clipping with the viewport depth range gets a similar result to
611        * clipping in clip-space, but loses precision when the viewport depth
612        * range is very small. When minDepth == maxDepth, this completely
613        * flattens the clip-space depth and results in never clipping.
614        *
615        * To work around this, set a lower limit on depth range when clipping is
616        * enabled. This results in slightly incorrect fragment depth values, and
617        * doesn't help with the precision loss, but at least clipping isn't
618        * completely broken.
619        */
620       const struct panvk_graphics_sysvals *sysvals = &cmdbuf->state.gfx.sysvals;
621       const struct vk_rasterization_state *rs =
622          &cmdbuf->vk.dynamic_graphics_state.rs;
623 
624       if (vk_rasterization_state_depth_clip_enable(rs) &&
625           fabsf(sysvals->viewport.scale.z) < MIN_DEPTH_CLIP_RANGE) {
626          float z_min = viewport->minDepth;
627          float z_max = viewport->maxDepth;
628          float z_sign = z_min <= z_max ? 1.0f : -1.0f;
629 
630          set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.scale.z,
631                         z_sign * MIN_DEPTH_CLIP_RANGE);
632 
633          /* Middle of the user range is
634          *    z_range_center = z_min + (z_max - z_min) * 0.5f,
635          * and we want to set the offset to
636          *    z_offset = z_range_center - viewport.scale.z * 0.5f
637          * which, when expanding, gives us
638          *    z_offset = (z_max + z_min - viewport.scale.z) * 0.5f
639          */
640          float z_offset = (z_max + z_min - sysvals->viewport.scale.z) * 0.5f;
641          /* Bump offset off-center if necessary, to not go out of range */
642          set_gfx_sysval(cmdbuf, dirty_sysvals, viewport.offset.z,
643                         CLAMP(z_offset, 0.0f, 1.0f));
644       }
645    }
646 
647    const struct panvk_shader *vs = cmdbuf->state.gfx.vs.shader;
648 
649 #if PAN_ARCH <= 7
650    struct panvk_descriptor_state *desc_state = &cmdbuf->state.gfx.desc_state;
651    struct panvk_shader_desc_state *vs_desc_state = &cmdbuf->state.gfx.vs.desc;
652    struct panvk_shader_desc_state *fs_desc_state = &cmdbuf->state.gfx.fs.desc;
653 
654    if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, VS)) {
655       set_gfx_sysval(cmdbuf, dirty_sysvals,
656                      desc.sets[PANVK_DESC_TABLE_VS_DYN_SSBOS],
657                      vs_desc_state->dyn_ssbos);
658    }
659 
660    if (gfx_state_dirty(cmdbuf, DESC_STATE) || gfx_state_dirty(cmdbuf, FS)) {
661       set_gfx_sysval(cmdbuf, dirty_sysvals,
662                      desc.sets[PANVK_DESC_TABLE_FS_DYN_SSBOS],
663                      fs_desc_state->dyn_ssbos);
664    }
665 
666    for (uint32_t i = 0; i < MAX_SETS; i++) {
667       uint32_t used_set_mask =
668          vs->desc_info.used_set_mask | (fs ? fs->desc_info.used_set_mask : 0);
669 
670       if (used_set_mask & BITFIELD_BIT(i)) {
671          set_gfx_sysval(cmdbuf, dirty_sysvals, desc.sets[i],
672                         desc_state->sets[i]->descs.dev);
673       }
674    }
675 #endif
676 
677    /* We mask the dirty sysvals by the shader usage, and only flag
678     * the push uniforms dirty if those intersect. */
679    BITSET_DECLARE(dirty_shader_sysvals, MAX_SYSVAL_FAUS);
680    BITSET_AND(dirty_shader_sysvals, dirty_sysvals, vs->fau.used_sysvals);
681    if (!BITSET_IS_EMPTY(dirty_shader_sysvals))
682       gfx_state_set_dirty(cmdbuf, VS_PUSH_UNIFORMS);
683 
684    if (fs) {
685       BITSET_AND(dirty_shader_sysvals, dirty_sysvals, fs->fau.used_sysvals);
686 
687       /* If blend constants are not read by the blend shader, we can consider
688        * they are not read at all, so clear the dirty bits to avoid re-emitting
689        * FAUs when we can. */
690       if (!cmdbuf->state.gfx.cb.info.shader_loads_blend_const)
691          BITSET_CLEAR_RANGE(dirty_shader_sysvals, 0, 3);
692 
693       if (!BITSET_IS_EMPTY(dirty_shader_sysvals))
694          gfx_state_set_dirty(cmdbuf, FS_PUSH_UNIFORMS);
695    }
696 }
697 
698 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBindVertexBuffers)699 panvk_per_arch(CmdBindVertexBuffers)(VkCommandBuffer commandBuffer,
700                                      uint32_t firstBinding,
701                                      uint32_t bindingCount,
702                                      const VkBuffer *pBuffers,
703                                      const VkDeviceSize *pOffsets)
704 {
705    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
706 
707    assert(firstBinding + bindingCount <= MAX_VBS);
708 
709    for (uint32_t i = 0; i < bindingCount; i++) {
710       VK_FROM_HANDLE(panvk_buffer, buffer, pBuffers[i]);
711 
712       cmdbuf->state.gfx.vb.bufs[firstBinding + i].address =
713          panvk_buffer_gpu_ptr(buffer, pOffsets[i]);
714       cmdbuf->state.gfx.vb.bufs[firstBinding + i].size =
715          panvk_buffer_range(buffer, pOffsets[i], VK_WHOLE_SIZE);
716    }
717 
718    cmdbuf->state.gfx.vb.count =
719       MAX2(cmdbuf->state.gfx.vb.count, firstBinding + bindingCount);
720    gfx_state_set_dirty(cmdbuf, VB);
721 }
722 
723 VKAPI_ATTR void VKAPI_CALL
panvk_per_arch(CmdBindIndexBuffer)724 panvk_per_arch(CmdBindIndexBuffer)(VkCommandBuffer commandBuffer,
725                                    VkBuffer buffer, VkDeviceSize offset,
726                                    VkIndexType indexType)
727 {
728    VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
729    VK_FROM_HANDLE(panvk_buffer, buf, buffer);
730 
731    cmdbuf->state.gfx.ib.buffer = buf;
732    cmdbuf->state.gfx.ib.offset = offset;
733    cmdbuf->state.gfx.ib.index_size = vk_index_type_to_bytes(indexType);
734    gfx_state_set_dirty(cmdbuf, IB);
735 }
736