• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 
27 #include "nir/nir_builder.h"
28 #include "radv_meta.h"
29 #include "radv_private.h"
30 #include "sid.h"
31 #include "vk_format.h"
32 
33 /* emit 0, 0, 0, 1 */
34 static nir_shader *
build_nir_fs(struct radv_device * dev)35 build_nir_fs(struct radv_device *dev)
36 {
37    const struct glsl_type *vec4 = glsl_vec4_type();
38    nir_variable *f_color; /* vec4, fragment output color */
39 
40    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_resolve_fs");
41 
42    f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");
43    f_color->data.location = FRAG_RESULT_DATA0;
44    nir_store_var(&b, f_color, nir_imm_vec4(&b, 0.0, 0.0, 0.0, 1.0), 0xf);
45 
46    return b.shader;
47 }
48 
49 static VkResult
create_pipeline(struct radv_device * device,VkShaderModule vs_module_h,VkFormat format,VkPipeline * pipeline)50 create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkFormat format,
51                 VkPipeline *pipeline)
52 {
53    VkResult result;
54    VkDevice device_h = radv_device_to_handle(device);
55 
56    nir_shader *fs_module = build_nir_fs(device);
57    if (!fs_module) {
58       /* XXX: Need more accurate error */
59       result = VK_ERROR_OUT_OF_HOST_MEMORY;
60       goto cleanup;
61    }
62 
63    VkPipelineLayoutCreateInfo pl_create_info = {
64       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
65       .setLayoutCount = 0,
66       .pSetLayouts = NULL,
67       .pushConstantRangeCount = 0,
68       .pPushConstantRanges = NULL,
69    };
70 
71    if (!device->meta_state.resolve.p_layout) {
72       result =
73          radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
74                                    &device->meta_state.alloc, &device->meta_state.resolve.p_layout);
75       if (result != VK_SUCCESS)
76          goto cleanup;
77    }
78 
79    VkFormat color_formats[2] = { format, format };
80    const VkPipelineRenderingCreateInfo rendering_create_info = {
81       .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
82       .colorAttachmentCount = 2,
83       .pColorAttachmentFormats = color_formats,
84    };
85 
86    result = radv_graphics_pipeline_create(
87       device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
88       &(VkGraphicsPipelineCreateInfo){
89          .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
90          .pNext = &rendering_create_info,
91          .stageCount = 2,
92          .pStages =
93             (VkPipelineShaderStageCreateInfo[]){
94                {
95                   .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
96                   .stage = VK_SHADER_STAGE_VERTEX_BIT,
97                   .module = vs_module_h,
98                   .pName = "main",
99                },
100                {
101                   .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
102                   .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
103                   .module = vk_shader_module_handle_from_nir(fs_module),
104                   .pName = "main",
105                },
106             },
107          .pVertexInputState =
108             &(VkPipelineVertexInputStateCreateInfo){
109                .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
110                .vertexBindingDescriptionCount = 0,
111                .vertexAttributeDescriptionCount = 0,
112             },
113          .pInputAssemblyState =
114             &(VkPipelineInputAssemblyStateCreateInfo){
115                .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
116                .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
117                .primitiveRestartEnable = false,
118             },
119          .pViewportState =
120             &(VkPipelineViewportStateCreateInfo){
121                .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
122                .viewportCount = 1,
123                .scissorCount = 1,
124             },
125          .pRasterizationState =
126             &(VkPipelineRasterizationStateCreateInfo){
127                .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
128                .depthClampEnable = false,
129                .rasterizerDiscardEnable = false,
130                .polygonMode = VK_POLYGON_MODE_FILL,
131                .cullMode = VK_CULL_MODE_NONE,
132                .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
133             },
134          .pMultisampleState =
135             &(VkPipelineMultisampleStateCreateInfo){
136                .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
137                .rasterizationSamples = 1,
138                .sampleShadingEnable = false,
139                .pSampleMask = NULL,
140                .alphaToCoverageEnable = false,
141                .alphaToOneEnable = false,
142             },
143          .pColorBlendState =
144             &(VkPipelineColorBlendStateCreateInfo){
145                .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
146                .logicOpEnable = false,
147                .attachmentCount = 2,
148                .pAttachments =
149                   (VkPipelineColorBlendAttachmentState[]){
150                      {
151                         .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
152                                           VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
153                      },
154                      {
155                         .colorWriteMask = 0,
156 
157                      }},
158             },
159          .pDynamicState =
160             &(VkPipelineDynamicStateCreateInfo){
161                .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
162                .dynamicStateCount = 2,
163                .pDynamicStates =
164                   (VkDynamicState[]){
165                      VK_DYNAMIC_STATE_VIEWPORT,
166                      VK_DYNAMIC_STATE_SCISSOR,
167                   },
168             },
169          .layout = device->meta_state.resolve.p_layout,
170          .renderPass = VK_NULL_HANDLE,
171          .subpass = 0,
172       },
173       &(struct radv_graphics_pipeline_create_info){
174          .use_rectlist = true,
175          .custom_blend_mode = V_028808_CB_RESOLVE,
176       },
177       &device->meta_state.alloc, pipeline);
178    if (result != VK_SUCCESS)
179       goto cleanup;
180 
181    goto cleanup;
182 
183 cleanup:
184    ralloc_free(fs_module);
185    return result;
186 }
187 
188 void
radv_device_finish_meta_resolve_state(struct radv_device * device)189 radv_device_finish_meta_resolve_state(struct radv_device *device)
190 {
191    struct radv_meta_state *state = &device->meta_state;
192 
193    for (uint32_t j = 0; j < NUM_META_FS_KEYS; j++) {
194       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve.pipeline[j],
195                            &state->alloc);
196    }
197    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve.p_layout,
198                               &state->alloc);
199 }
200 
201 VkResult
radv_device_init_meta_resolve_state(struct radv_device * device,bool on_demand)202 radv_device_init_meta_resolve_state(struct radv_device *device, bool on_demand)
203 {
204    if (on_demand)
205       return VK_SUCCESS;
206 
207    VkResult res = VK_SUCCESS;
208    struct radv_meta_state *state = &device->meta_state;
209    nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices(device);
210    if (!vs_module) {
211       /* XXX: Need more accurate error */
212       res = VK_ERROR_OUT_OF_HOST_MEMORY;
213       goto cleanup;
214    }
215 
216    for (uint32_t i = 0; i < NUM_META_FS_KEYS; ++i) {
217       VkFormat format = radv_fs_key_format_exemplars[i];
218       unsigned fs_key = radv_format_meta_fs_key(device, format);
219 
220       VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
221       res = create_pipeline(device, vs_module_h, format, &state->resolve.pipeline[fs_key]);
222       if (res != VK_SUCCESS)
223          goto cleanup;
224    }
225 
226 cleanup:
227    ralloc_free(vs_module);
228 
229    return res;
230 }
231 
232 static void
emit_resolve(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * src_image,const struct radv_image * dst_image,VkFormat vk_format,const VkOffset2D * dest_offset,const VkExtent2D * resolve_extent)233 emit_resolve(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,
234              const struct radv_image *dst_image, VkFormat vk_format, const VkOffset2D *dest_offset,
235              const VkExtent2D *resolve_extent)
236 {
237    struct radv_device *device = cmd_buffer->device;
238    VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
239    unsigned fs_key = radv_format_meta_fs_key(device, vk_format);
240 
241    cmd_buffer->state.flush_bits |=
242       radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, src_image) |
243       radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT, src_image) |
244       radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, dst_image);
245 
246    radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,
247                         device->meta_state.resolve.pipeline[fs_key]);
248 
249    radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
250                        &(VkViewport){.x = dest_offset->x,
251                                      .y = dest_offset->y,
252                                      .width = resolve_extent->width,
253                                      .height = resolve_extent->height,
254                                      .minDepth = 0.0f,
255                                      .maxDepth = 1.0f});
256 
257    radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
258                       &(VkRect2D){
259                          .offset = *dest_offset,
260                          .extent = *resolve_extent,
261                       });
262 
263    radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);
264    cmd_buffer->state.flush_bits |=
265       radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, dst_image);
266 }
267 
268 enum radv_resolve_method {
269    RESOLVE_HW,
270    RESOLVE_COMPUTE,
271    RESOLVE_FRAGMENT,
272 };
273 
274 static bool
image_hw_resolve_compat(const struct radv_device * device,struct radv_image * src_image,struct radv_image * dst_image)275 image_hw_resolve_compat(const struct radv_device *device, struct radv_image *src_image,
276                         struct radv_image *dst_image)
277 {
278    if (device->physical_device->rad_info.gfx_level >= GFX9) {
279       return dst_image->planes[0].surface.u.gfx9.swizzle_mode ==
280              src_image->planes[0].surface.u.gfx9.swizzle_mode;
281    } else {
282       return dst_image->planes[0].surface.micro_tile_mode ==
283              src_image->planes[0].surface.micro_tile_mode;
284    }
285 }
286 
287 static void
radv_pick_resolve_method_images(struct radv_device * device,struct radv_image * src_image,VkFormat src_format,struct radv_image * dest_image,unsigned dest_level,VkImageLayout dest_image_layout,bool dest_render_loop,struct radv_cmd_buffer * cmd_buffer,enum radv_resolve_method * method)288 radv_pick_resolve_method_images(struct radv_device *device, struct radv_image *src_image,
289                                 VkFormat src_format, struct radv_image *dest_image,
290                                 unsigned dest_level, VkImageLayout dest_image_layout,
291                                 bool dest_render_loop, struct radv_cmd_buffer *cmd_buffer,
292                                 enum radv_resolve_method *method)
293 
294 {
295    uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->qf,
296                                                       cmd_buffer->qf);
297 
298    if (vk_format_is_color(src_format)) {
299       /* Using the fragment resolve path is currently a hint to
300        * avoid decompressing DCC for partial resolves and
301        * re-initialize it after resolving using compute.
302        * TODO: Add support for layered and int to the fragment path.
303        */
304       if (radv_layout_dcc_compressed(device, dest_image, dest_level, dest_image_layout,
305                                      dest_render_loop, queue_mask)) {
306          *method = RESOLVE_FRAGMENT;
307       } else if (!image_hw_resolve_compat(device, src_image, dest_image)) {
308          /* The micro tile mode only needs to match for the HW
309           * resolve path which is the default path for non-DCC
310           * resolves.
311           */
312          *method = RESOLVE_COMPUTE;
313       }
314 
315       if (src_format == VK_FORMAT_R16G16_UNORM || src_format == VK_FORMAT_R16G16_SNORM)
316          *method = RESOLVE_COMPUTE;
317       else if (vk_format_is_int(src_format))
318          *method = RESOLVE_COMPUTE;
319       else if (src_image->info.array_size > 1 || dest_image->info.array_size > 1)
320          *method = RESOLVE_COMPUTE;
321    } else {
322       if (src_image->info.array_size > 1 || dest_image->info.array_size > 1)
323          *method = RESOLVE_COMPUTE;
324       else
325          *method = RESOLVE_FRAGMENT;
326    }
327 }
328 
329 static VkResult
build_resolve_pipeline(struct radv_device * device,unsigned fs_key)330 build_resolve_pipeline(struct radv_device *device, unsigned fs_key)
331 {
332    VkResult result = VK_SUCCESS;
333 
334    if (device->meta_state.resolve.pipeline[fs_key])
335       return result;
336 
337    mtx_lock(&device->meta_state.mtx);
338    if (device->meta_state.resolve.pipeline[fs_key]) {
339       mtx_unlock(&device->meta_state.mtx);
340       return result;
341    }
342 
343    nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices(device);
344 
345    VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
346    result = create_pipeline(device, vs_module_h, radv_fs_key_format_exemplars[fs_key],
347                             &device->meta_state.resolve.pipeline[fs_key]);
348 
349    ralloc_free(vs_module);
350    mtx_unlock(&device->meta_state.mtx);
351    return result;
352 }
353 
354 static void
radv_meta_resolve_hardware_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * src_image,VkImageLayout src_image_layout,struct radv_image * dst_image,VkImageLayout dst_image_layout,const VkImageResolve2 * region)355 radv_meta_resolve_hardware_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
356                                  VkImageLayout src_image_layout, struct radv_image *dst_image,
357                                  VkImageLayout dst_image_layout, const VkImageResolve2 *region)
358 {
359    struct radv_device *device = cmd_buffer->device;
360    struct radv_meta_saved_state saved_state;
361 
362    radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE);
363 
364    assert(src_image->info.samples > 1);
365    assert(dst_image->info.samples == 1);
366 
367    unsigned fs_key = radv_format_meta_fs_key(device, dst_image->vk.format);
368 
369    /* From the Vulkan 1.0 spec:
370     *
371     *    - The aspectMask member of srcSubresource and dstSubresource must
372     *      only contain VK_IMAGE_ASPECT_COLOR_BIT
373     *
374     *    - The layerCount member of srcSubresource and dstSubresource must
375     *      match
376     */
377    assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
378    assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
379    assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
380 
381    const uint32_t src_base_layer =
382       radv_meta_get_iview_layer(src_image, &region->srcSubresource, &region->srcOffset);
383 
384    const uint32_t dst_base_layer =
385       radv_meta_get_iview_layer(dst_image, &region->dstSubresource, &region->dstOffset);
386 
387    /**
388     * From Vulkan 1.0.6 spec: 18.6 Resolving Multisample Images
389     *
390     *    extent is the size in texels of the source image to resolve in width,
391     *    height and depth. 1D images use only x and width. 2D images use x, y,
392     *    width and height. 3D images use x, y, z, width, height and depth.
393     *
394     *    srcOffset and dstOffset select the initial x, y, and z offsets in
395     *    texels of the sub-regions of the source and destination image data.
396     *    extent is the size in texels of the source image to resolve in width,
397     *    height and depth. 1D images use only x and width. 2D images use x, y,
398     *    width and height. 3D images use x, y, z, width, height and depth.
399     */
400    const struct VkExtent3D extent = vk_image_sanitize_extent(&src_image->vk, region->extent);
401    const struct VkOffset3D dstOffset = vk_image_sanitize_offset(&dst_image->vk, region->dstOffset);
402 
403    uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf,
404                                                       cmd_buffer->qf);
405 
406    if (radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel,
407                                   dst_image_layout, false, queue_mask)) {
408       VkImageSubresourceRange range = {
409          .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
410          .baseMipLevel = region->dstSubresource.mipLevel,
411          .levelCount = 1,
412          .baseArrayLayer = dst_base_layer,
413          .layerCount = region->dstSubresource.layerCount,
414       };
415 
416       cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dst_image, &range, 0xffffffff);
417    }
418 
419    for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) {
420 
421       VkResult ret = build_resolve_pipeline(device, fs_key);
422       if (ret != VK_SUCCESS) {
423          cmd_buffer->record_result = ret;
424          break;
425       }
426 
427       struct radv_image_view src_iview;
428       radv_image_view_init(&src_iview, cmd_buffer->device,
429                            &(VkImageViewCreateInfo){
430                               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
431                               .image = radv_image_to_handle(src_image),
432                               .viewType = radv_meta_get_view_type(src_image),
433                               .format = src_image->vk.format,
434                               .subresourceRange =
435                                  {
436                                     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
437                                     .baseMipLevel = region->srcSubresource.mipLevel,
438                                     .levelCount = 1,
439                                     .baseArrayLayer = src_base_layer + layer,
440                                     .layerCount = 1,
441                                  },
442                            },
443                            0, NULL);
444 
445       struct radv_image_view dst_iview;
446       radv_image_view_init(&dst_iview, cmd_buffer->device,
447                            &(VkImageViewCreateInfo){
448                               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
449                               .image = radv_image_to_handle(dst_image),
450                               .viewType = radv_meta_get_view_type(dst_image),
451                               .format = dst_image->vk.format,
452                               .subresourceRange =
453                                  {
454                                     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
455                                     .baseMipLevel = region->dstSubresource.mipLevel,
456                                     .levelCount = 1,
457                                     .baseArrayLayer = dst_base_layer + layer,
458                                     .layerCount = 1,
459                                  },
460                            },
461                            0, NULL);
462 
463       const VkRenderingAttachmentInfo color_atts[2] = {
464          {
465             .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
466             .imageView = radv_image_view_to_handle(&src_iview),
467             .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
468             .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
469             .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
470          },
471          {
472             .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
473             .imageView = radv_image_view_to_handle(&dst_iview),
474             .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
475             .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
476             .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
477          },
478       };
479 
480       const VkRenderingInfo rendering_info = {
481          .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
482          .renderArea = {
483             .offset = { dstOffset.x, dstOffset.y },
484             .extent = { extent.width, extent.height },
485          },
486          .layerCount = 1,
487          .colorAttachmentCount = 2,
488          .pColorAttachments = color_atts,
489       };
490 
491       radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
492 
493       emit_resolve(cmd_buffer, src_image, dst_image, dst_iview.vk.format,
494                    &(VkOffset2D){
495                       .x = dstOffset.x,
496                       .y = dstOffset.y,
497                    },
498                    &(VkExtent2D){
499                       .width = extent.width,
500                       .height = extent.height,
501                    });
502 
503       radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
504 
505       radv_image_view_finish(&src_iview);
506       radv_image_view_finish(&dst_iview);
507    }
508 
509    radv_meta_restore(&saved_state, cmd_buffer);
510 }
511 
512 static void
resolve_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * src_image,VkImageLayout src_image_layout,struct radv_image * dst_image,VkImageLayout dst_image_layout,const VkImageResolve2 * region,enum radv_resolve_method resolve_method)513 resolve_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
514               VkImageLayout src_image_layout, struct radv_image *dst_image,
515               VkImageLayout dst_image_layout, const VkImageResolve2 *region,
516               enum radv_resolve_method resolve_method)
517 {
518    switch (resolve_method) {
519    case RESOLVE_HW:
520       radv_meta_resolve_hardware_image(cmd_buffer, src_image, src_image_layout, dst_image,
521                                        dst_image_layout, region);
522       break;
523    case RESOLVE_FRAGMENT:
524       radv_meta_resolve_fragment_image(cmd_buffer, src_image, src_image_layout, dst_image,
525                                        dst_image_layout, region);
526       break;
527    case RESOLVE_COMPUTE:
528       radv_meta_resolve_compute_image(cmd_buffer, src_image, src_image->vk.format, src_image_layout,
529                                       dst_image, dst_image->vk.format, dst_image_layout, region);
530       break;
531    default:
532       assert(!"Invalid resolve method selected");
533    }
534 }
535 
536 VKAPI_ATTR void VKAPI_CALL
radv_CmdResolveImage2(VkCommandBuffer commandBuffer,const VkResolveImageInfo2 * pResolveImageInfo)537 radv_CmdResolveImage2(VkCommandBuffer commandBuffer,
538                       const VkResolveImageInfo2 *pResolveImageInfo)
539 {
540    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
541    RADV_FROM_HANDLE(radv_image, src_image, pResolveImageInfo->srcImage);
542    RADV_FROM_HANDLE(radv_image, dst_image, pResolveImageInfo->dstImage);
543    VkImageLayout src_image_layout = pResolveImageInfo->srcImageLayout;
544    VkImageLayout dst_image_layout = pResolveImageInfo->dstImageLayout;
545    const struct radv_physical_device *pdevice = cmd_buffer->device->physical_device;
546    enum radv_resolve_method resolve_method =
547       pdevice->rad_info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW;
548 
549    /* we can use the hw resolve only for single full resolves */
550    if (pResolveImageInfo->regionCount == 1) {
551       if (pResolveImageInfo->pRegions[0].srcOffset.x ||
552           pResolveImageInfo->pRegions[0].srcOffset.y || pResolveImageInfo->pRegions[0].srcOffset.z)
553          resolve_method = RESOLVE_COMPUTE;
554       if (pResolveImageInfo->pRegions[0].dstOffset.x ||
555           pResolveImageInfo->pRegions[0].dstOffset.y || pResolveImageInfo->pRegions[0].dstOffset.z)
556          resolve_method = RESOLVE_COMPUTE;
557 
558       if (pResolveImageInfo->pRegions[0].extent.width != src_image->info.width ||
559           pResolveImageInfo->pRegions[0].extent.height != src_image->info.height ||
560           pResolveImageInfo->pRegions[0].extent.depth != src_image->info.depth)
561          resolve_method = RESOLVE_COMPUTE;
562    } else
563       resolve_method = RESOLVE_COMPUTE;
564 
565    for (uint32_t r = 0; r < pResolveImageInfo->regionCount; r++) {
566       const VkImageResolve2 *region = &pResolveImageInfo->pRegions[r];
567 
568       radv_pick_resolve_method_images(cmd_buffer->device, src_image, src_image->vk.format, dst_image,
569                                       region->dstSubresource.mipLevel, dst_image_layout, false,
570                                       cmd_buffer, &resolve_method);
571 
572       resolve_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region,
573                     resolve_method);
574    }
575 }
576 
577 static void
radv_cmd_buffer_resolve_subpass_hw(struct radv_cmd_buffer * cmd_buffer)578 radv_cmd_buffer_resolve_subpass_hw(struct radv_cmd_buffer *cmd_buffer)
579 {
580    struct vk_framebuffer *fb = cmd_buffer->state.framebuffer;
581    const struct radv_subpass *subpass = cmd_buffer->state.subpass;
582    struct radv_meta_saved_state saved_state;
583 
584    radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE);
585 
586    for (uint32_t i = 0; i < subpass->color_count; ++i) {
587       struct radv_subpass_attachment src_att = subpass->color_attachments[i];
588       struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
589 
590       if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
591          continue;
592 
593       struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
594       struct radv_image *src_img = src_iview->image;
595 
596       struct radv_image_view *dest_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
597       struct radv_image *dst_img = dest_iview->image;
598       VkImageLayout dst_image_layout = cmd_buffer->state.attachments[dest_att.attachment].current_layout;
599 
600       uint32_t queue_mask = radv_image_queue_family_mask(dst_img, cmd_buffer->qf,
601                                                          cmd_buffer->qf);
602 
603       if (radv_layout_dcc_compressed(cmd_buffer->device, dst_img, dest_iview->vk.base_mip_level,
604                                      dst_image_layout, false, queue_mask)) {
605          VkImageSubresourceRange range = {
606             .aspectMask = dest_iview->vk.aspects,
607             .baseMipLevel = dest_iview->vk.base_mip_level,
608             .levelCount = dest_iview->vk.level_count,
609             .baseArrayLayer = dest_iview->vk.base_array_layer,
610             .layerCount = dest_iview->vk.layer_count,
611          };
612 
613          cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dst_img, &range, 0xffffffff);
614          cmd_buffer->state.attachments[dest_att.attachment].current_layout =
615             VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
616       }
617 
618       struct radv_subpass resolve_subpass = {
619          .color_count = 2,
620          .color_attachments = (struct radv_subpass_attachment[]){src_att, dest_att},
621          .depth_stencil_attachment = NULL,
622       };
623 
624       radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);
625 
626       VkResult ret = build_resolve_pipeline(
627          cmd_buffer->device, radv_format_meta_fs_key(cmd_buffer->device, dest_iview->vk.format));
628       if (ret != VK_SUCCESS) {
629          cmd_buffer->record_result = ret;
630          continue;
631       }
632 
633       emit_resolve(cmd_buffer, src_img, dst_img, dest_iview->vk.format, &(VkOffset2D){0, 0},
634                    &(VkExtent2D){fb->width, fb->height});
635 
636       radv_cmd_buffer_restore_subpass(cmd_buffer, subpass);
637    }
638 
639    radv_meta_restore(&saved_state, cmd_buffer);
640 }
641 
642 /**
643  * Emit any needed resolves for the current subpass.
644  */
645 void
radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer * cmd_buffer)646 radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)
647 {
648    const struct radv_physical_device *pdevice = cmd_buffer->device->physical_device;
649    const struct radv_subpass *subpass = cmd_buffer->state.subpass;
650    enum radv_resolve_method resolve_method =
651       pdevice->rad_info.gfx_level >= GFX11 ? RESOLVE_FRAGMENT : RESOLVE_HW;
652 
653    if (!subpass->has_color_resolve && !subpass->ds_resolve_attachment)
654       return;
655 
656    radv_describe_begin_render_pass_resolve(cmd_buffer);
657 
658    if (subpass->ds_resolve_attachment) {
659       struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
660       struct radv_subpass_attachment dst_att = *subpass->ds_resolve_attachment;
661       struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
662       struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;
663 
664       /* Make sure to not clear the depth/stencil attachment after resolves. */
665       cmd_buffer->state.attachments[dst_att.attachment].pending_clear_aspects = 0;
666 
667       radv_pick_resolve_method_images(cmd_buffer->device, src_iview->image, src_iview->vk.format,
668                                       dst_iview->image, dst_iview->vk.base_mip_level, dst_att.layout,
669                                       dst_att.in_render_loop, cmd_buffer, &resolve_method);
670 
671       if ((src_iview->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
672           subpass->depth_resolve_mode != VK_RESOLVE_MODE_NONE) {
673          if (resolve_method == RESOLVE_FRAGMENT) {
674             radv_depth_stencil_resolve_subpass_fs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT,
675                                                   subpass->depth_resolve_mode);
676          } else {
677             assert(resolve_method == RESOLVE_COMPUTE);
678             radv_depth_stencil_resolve_subpass_cs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT,
679                                                   subpass->depth_resolve_mode);
680          }
681       }
682 
683       if ((src_iview->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
684           subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE) {
685          if (resolve_method == RESOLVE_FRAGMENT) {
686             radv_depth_stencil_resolve_subpass_fs(cmd_buffer, VK_IMAGE_ASPECT_STENCIL_BIT,
687                                                   subpass->stencil_resolve_mode);
688          } else {
689             assert(resolve_method == RESOLVE_COMPUTE);
690             radv_depth_stencil_resolve_subpass_cs(cmd_buffer, VK_IMAGE_ASPECT_STENCIL_BIT,
691                                                   subpass->stencil_resolve_mode);
692          }
693       }
694 
695       /* From the Vulkan spec 1.2.165:
696        *
697        * "VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT specifies
698        *  write access to a color, resolve, or depth/stencil
699        *  resolve attachment during a render pass or via
700        *  certain subpass load and store operations."
701        *
702        * Yes, it's counterintuitive but it makes sense because ds
703        * resolve operations happen late at the end of the subpass.
704        *
705        * That said, RADV is wrong because it executes the subpass
706        * end barrier *before* any subpass resolves instead of after.
707        *
708        * TODO: Fix this properly by executing subpass end barriers
709        * after subpass resolves.
710        */
711       cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
712       if (radv_image_has_htile(dst_iview->image))
713          cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
714    }
715 
716    if (subpass->has_color_resolve) {
717       for (uint32_t i = 0; i < subpass->color_count; ++i) {
718          struct radv_subpass_attachment src_att = subpass->color_attachments[i];
719          struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
720 
721          if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
722             continue;
723 
724          /* Make sure to not clear color attachments after resolves. */
725          cmd_buffer->state.attachments[dest_att.attachment].pending_clear_aspects = 0;
726 
727          struct radv_image_view *dst_iview =
728             cmd_buffer->state.attachments[dest_att.attachment].iview;
729          struct radv_image *dst_img = dst_iview->image;
730          struct radv_image_view *src_iview =
731             cmd_buffer->state.attachments[src_att.attachment].iview;
732          struct radv_image *src_img = src_iview->image;
733 
734          radv_pick_resolve_method_images(cmd_buffer->device, src_img, src_iview->vk.format, dst_img,
735                                          dst_iview->vk.base_mip_level, dest_att.layout,
736                                          dest_att.in_render_loop, cmd_buffer, &resolve_method);
737 
738          if (resolve_method == RESOLVE_FRAGMENT) {
739             break;
740          }
741       }
742 
743       switch (resolve_method) {
744       case RESOLVE_HW:
745          radv_cmd_buffer_resolve_subpass_hw(cmd_buffer);
746          break;
747       case RESOLVE_COMPUTE:
748          radv_cmd_buffer_resolve_subpass_cs(cmd_buffer);
749          break;
750       case RESOLVE_FRAGMENT:
751          radv_cmd_buffer_resolve_subpass_fs(cmd_buffer);
752          break;
753       default:
754          unreachable("Invalid resolve method");
755       }
756    }
757 
758    radv_describe_end_render_pass_resolve(cmd_buffer);
759 }
760 
761 /**
762  * Decompress CMask/FMask before resolving a multisampled source image inside a
763  * subpass.
764  */
765 void
radv_decompress_resolve_subpass_src(struct radv_cmd_buffer * cmd_buffer)766 radv_decompress_resolve_subpass_src(struct radv_cmd_buffer *cmd_buffer)
767 {
768    const struct radv_subpass *subpass = cmd_buffer->state.subpass;
769    struct vk_framebuffer *fb = cmd_buffer->state.framebuffer;
770    uint32_t layer_count = fb->layers;
771 
772    if (subpass->view_mask)
773       layer_count = util_last_bit(subpass->view_mask);
774 
775    for (uint32_t i = 0; i < subpass->color_count; ++i) {
776       struct radv_subpass_attachment src_att = subpass->color_attachments[i];
777       struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];
778 
779       if (dest_att.attachment == VK_ATTACHMENT_UNUSED)
780          continue;
781 
782       struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
783       struct radv_image *src_image = src_iview->image;
784 
785       VkImageResolve2 region = {0};
786       region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2;
787       region.srcSubresource.aspectMask = src_iview->vk.aspects;
788       region.srcSubresource.mipLevel = 0;
789       region.srcSubresource.baseArrayLayer = src_iview->vk.base_array_layer;
790       region.srcSubresource.layerCount = layer_count;
791 
792       radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, &region);
793    }
794 }
795 
796 static struct radv_sample_locations_state *
radv_get_resolve_sample_locations(struct radv_cmd_buffer * cmd_buffer)797 radv_get_resolve_sample_locations(struct radv_cmd_buffer *cmd_buffer)
798 {
799    struct radv_cmd_state *state = &cmd_buffer->state;
800    uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
801 
802    for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) {
803       if (state->subpass_sample_locs[i].subpass_idx == subpass_id)
804          return &state->subpass_sample_locs[i].sample_location;
805    }
806 
807    return NULL;
808 }
809 
810 /**
811  * Decompress CMask/FMask before resolving a multisampled source image.
812  */
813 void
radv_decompress_resolve_src(struct radv_cmd_buffer * cmd_buffer,struct radv_image * src_image,VkImageLayout src_image_layout,const VkImageResolve2 * region)814 radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
815                             VkImageLayout src_image_layout, const VkImageResolve2 *region)
816 {
817    const uint32_t src_base_layer =
818       radv_meta_get_iview_layer(src_image, &region->srcSubresource, &region->srcOffset);
819 
820    VkImageMemoryBarrier2 barrier = {
821       .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER_2,
822       .srcStageMask = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT,
823       .srcAccessMask = VK_ACCESS_2_TRANSFER_WRITE_BIT,
824       .dstStageMask = VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT,
825       .dstAccessMask = VK_ACCESS_2_TRANSFER_READ_BIT,
826       .oldLayout = src_image_layout,
827       .newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
828       .image = radv_image_to_handle(src_image),
829       .subresourceRange = (VkImageSubresourceRange){
830          .aspectMask = region->srcSubresource.aspectMask,
831          .baseMipLevel = region->srcSubresource.mipLevel,
832          .levelCount = 1,
833          .baseArrayLayer = src_base_layer,
834          .layerCount = region->srcSubresource.layerCount,
835       }
836    };
837 
838    if (src_image->vk.create_flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT) {
839       /* If the depth/stencil image uses different sample
840        * locations, we need them during HTILE decompressions.
841        */
842       struct radv_sample_locations_state *sample_locs =
843          radv_get_resolve_sample_locations(cmd_buffer);
844 
845       barrier.pNext = &(VkSampleLocationsInfoEXT){
846          .sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT,
847          .sampleLocationsPerPixel = sample_locs->per_pixel,
848          .sampleLocationGridSize = sample_locs->grid_size,
849          .sampleLocationsCount = sample_locs->count,
850          .pSampleLocations = sample_locs->locations,
851       };
852    }
853 
854    VkDependencyInfo dep_info = {
855       .sType = VK_STRUCTURE_TYPE_DEPENDENCY_INFO,
856       .imageMemoryBarrierCount = 1,
857       .pImageMemoryBarriers = &barrier,
858    };
859 
860    radv_CmdPipelineBarrier2(radv_cmd_buffer_to_handle(cmd_buffer), &dep_info);
861 }
862