• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 
27 #include "radv_meta.h"
28 #include "radv_private.h"
29 #include "sid.h"
30 
31 enum radv_color_op {
32    FAST_CLEAR_ELIMINATE,
33    FMASK_DECOMPRESS,
34    DCC_DECOMPRESS,
35 };
36 
37 static nir_shader *
build_dcc_decompress_compute_shader(struct radv_device * dev)38 build_dcc_decompress_compute_shader(struct radv_device *dev)
39 {
40    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
41 
42    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "dcc_decompress_compute");
43 
44    /* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */
45    b.shader->info.workgroup_size[0] = 16;
46    b.shader->info.workgroup_size[1] = 16;
47    nir_variable *input_img = nir_variable_create(b.shader, nir_var_image, img_type, "in_img");
48    input_img->data.descriptor_set = 0;
49    input_img->data.binding = 0;
50 
51    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
52    output_img->data.descriptor_set = 0;
53    output_img->data.binding = 1;
54 
55    nir_def *global_id = get_global_ids(&b, 2);
56    nir_def *img_coord = nir_vec4(&b, nir_channel(&b, global_id, 0), nir_channel(&b, global_id, 1), nir_undef(&b, 1, 32),
57                                  nir_undef(&b, 1, 32));
58 
59    nir_def *data = nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->def, img_coord,
60                                         nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
61 
62    /* We need a SCOPE_DEVICE memory_scope because ACO will avoid
63     * creating a vmcnt(0) because it expects the L1 cache to keep memory
64     * operations in-order for the same workgroup. The vmcnt(0) seems
65     * necessary however. */
66    nir_barrier(&b, .execution_scope = SCOPE_WORKGROUP, .memory_scope = SCOPE_DEVICE,
67                .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
68 
69    nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, img_coord, nir_undef(&b, 1, 32), data,
70                          nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
71    return b.shader;
72 }
73 
74 static VkResult
create_dcc_compress_compute(struct radv_device * device)75 create_dcc_compress_compute(struct radv_device *device)
76 {
77    VkResult result = VK_SUCCESS;
78    nir_shader *cs = build_dcc_decompress_compute_shader(device);
79 
80    VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
81                                                      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
82                                                      .bindingCount = 2,
83                                                      .pBindings = (VkDescriptorSetLayoutBinding[]){
84                                                         {.binding = 0,
85                                                          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
86                                                          .descriptorCount = 1,
87                                                          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
88                                                          .pImmutableSamplers = NULL},
89                                                         {.binding = 1,
90                                                          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
91                                                          .descriptorCount = 1,
92                                                          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
93                                                          .pImmutableSamplers = NULL},
94                                                      }};
95 
96    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
97                                            &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);
98    if (result != VK_SUCCESS)
99       goto cleanup;
100 
101    VkPipelineLayoutCreateInfo pl_create_info = {
102       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
103       .setLayoutCount = 1,
104       .pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout,
105       .pushConstantRangeCount = 0,
106       .pPushConstantRanges = NULL,
107    };
108 
109    result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
110                                       &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);
111    if (result != VK_SUCCESS)
112       goto cleanup;
113 
114    /* compute shader */
115 
116    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
117       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
118       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
119       .module = vk_shader_module_handle_from_nir(cs),
120       .pName = "main",
121       .pSpecializationInfo = NULL,
122    };
123 
124    VkComputePipelineCreateInfo vk_pipeline_info = {
125       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
126       .stage = pipeline_shader_stage,
127       .flags = 0,
128       .layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
129    };
130 
131    result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
132                                          NULL, &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
133    if (result != VK_SUCCESS)
134       goto cleanup;
135 
136 cleanup:
137    ralloc_free(cs);
138    return result;
139 }
140 
141 static VkResult
create_pipeline_layout(struct radv_device * device,VkPipelineLayout * layout)142 create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
143 {
144    VkPipelineLayoutCreateInfo pl_create_info = {
145       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
146       .setLayoutCount = 0,
147       .pSetLayouts = NULL,
148       .pushConstantRangeCount = 0,
149       .pPushConstantRanges = NULL,
150    };
151 
152    return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, layout);
153 }
154 
155 static VkResult
create_pipeline(struct radv_device * device,VkShaderModule vs_module_h,VkPipelineLayout layout)156 create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipelineLayout layout)
157 {
158    VkResult result;
159    VkDevice device_h = radv_device_to_handle(device);
160 
161    nir_shader *fs_module = radv_meta_build_nir_fs_noop(device);
162 
163    if (!fs_module) {
164       /* XXX: Need more accurate error */
165       result = VK_ERROR_OUT_OF_HOST_MEMORY;
166       goto cleanup;
167    }
168 
169    const VkPipelineShaderStageCreateInfo stages[2] = {
170       {
171          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
172          .stage = VK_SHADER_STAGE_VERTEX_BIT,
173          .module = vs_module_h,
174          .pName = "main",
175       },
176       {
177          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
178          .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
179          .module = vk_shader_module_handle_from_nir(fs_module),
180          .pName = "main",
181       },
182    };
183 
184    const VkPipelineVertexInputStateCreateInfo vi_state = {
185       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
186       .vertexBindingDescriptionCount = 0,
187       .vertexAttributeDescriptionCount = 0,
188    };
189 
190    const VkPipelineInputAssemblyStateCreateInfo ia_state = {
191       .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
192       .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
193       .primitiveRestartEnable = false,
194    };
195 
196    const VkPipelineColorBlendStateCreateInfo blend_state = {
197       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
198       .logicOpEnable = false,
199       .attachmentCount = 1,
200       .pAttachments = (VkPipelineColorBlendAttachmentState[]){
201          {
202             .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT |
203                               VK_COLOR_COMPONENT_A_BIT,
204          },
205       }};
206    const VkPipelineRasterizationStateCreateInfo rs_state = {
207       .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
208       .depthClampEnable = false,
209       .rasterizerDiscardEnable = false,
210       .polygonMode = VK_POLYGON_MODE_FILL,
211       .cullMode = VK_CULL_MODE_NONE,
212       .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
213    };
214 
215    const VkFormat color_format = VK_FORMAT_R8_UNORM;
216    const VkPipelineRenderingCreateInfo rendering_create_info = {
217       .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
218       .colorAttachmentCount = 1,
219       .pColorAttachmentFormats = &color_format,
220    };
221 
222    result = radv_graphics_pipeline_create(device_h, device->meta_state.cache,
223                                           &(VkGraphicsPipelineCreateInfo){
224                                              .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
225                                              .pNext = &rendering_create_info,
226                                              .stageCount = 2,
227                                              .pStages = stages,
228 
229                                              .pVertexInputState = &vi_state,
230                                              .pInputAssemblyState = &ia_state,
231 
232                                              .pViewportState =
233                                                 &(VkPipelineViewportStateCreateInfo){
234                                                    .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
235                                                    .viewportCount = 1,
236                                                    .scissorCount = 1,
237                                                 },
238                                              .pRasterizationState = &rs_state,
239                                              .pMultisampleState =
240                                                 &(VkPipelineMultisampleStateCreateInfo){
241                                                    .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
242                                                    .rasterizationSamples = 1,
243                                                    .sampleShadingEnable = false,
244                                                    .pSampleMask = NULL,
245                                                    .alphaToCoverageEnable = false,
246                                                    .alphaToOneEnable = false,
247                                                 },
248                                              .pColorBlendState = &blend_state,
249                                              .pDynamicState =
250                                                 &(VkPipelineDynamicStateCreateInfo){
251                                                    .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
252                                                    .dynamicStateCount = 2,
253                                                    .pDynamicStates =
254                                                       (VkDynamicState[]){
255                                                          VK_DYNAMIC_STATE_VIEWPORT,
256                                                          VK_DYNAMIC_STATE_SCISSOR,
257                                                       },
258                                                 },
259                                              .layout = layout,
260                                              .renderPass = VK_NULL_HANDLE,
261                                              .subpass = 0,
262                                           },
263                                           &(struct radv_graphics_pipeline_create_info){
264                                              .use_rectlist = true,
265                                              .custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR,
266                                           },
267                                           &device->meta_state.alloc,
268                                           &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline);
269    if (result != VK_SUCCESS)
270       goto cleanup;
271 
272    result = radv_graphics_pipeline_create(device_h, device->meta_state.cache,
273                                           &(VkGraphicsPipelineCreateInfo){
274                                              .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
275                                              .pNext = &rendering_create_info,
276                                              .stageCount = 2,
277                                              .pStages = stages,
278 
279                                              .pVertexInputState = &vi_state,
280                                              .pInputAssemblyState = &ia_state,
281 
282                                              .pViewportState =
283                                                 &(VkPipelineViewportStateCreateInfo){
284                                                    .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
285                                                    .viewportCount = 1,
286                                                    .scissorCount = 1,
287                                                 },
288                                              .pRasterizationState = &rs_state,
289                                              .pMultisampleState =
290                                                 &(VkPipelineMultisampleStateCreateInfo){
291                                                    .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
292                                                    .rasterizationSamples = 1,
293                                                    .sampleShadingEnable = false,
294                                                    .pSampleMask = NULL,
295                                                    .alphaToCoverageEnable = false,
296                                                    .alphaToOneEnable = false,
297                                                 },
298                                              .pColorBlendState = &blend_state,
299                                              .pDynamicState =
300                                                 &(VkPipelineDynamicStateCreateInfo){
301                                                    .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
302                                                    .dynamicStateCount = 2,
303                                                    .pDynamicStates =
304                                                       (VkDynamicState[]){
305                                                          VK_DYNAMIC_STATE_VIEWPORT,
306                                                          VK_DYNAMIC_STATE_SCISSOR,
307                                                       },
308                                                 },
309                                              .layout = layout,
310                                              .renderPass = VK_NULL_HANDLE,
311                                              .subpass = 0,
312                                           },
313                                           &(struct radv_graphics_pipeline_create_info){
314                                              .use_rectlist = true,
315                                              .custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS,
316                                           },
317                                           &device->meta_state.alloc,
318                                           &device->meta_state.fast_clear_flush.fmask_decompress_pipeline);
319    if (result != VK_SUCCESS)
320       goto cleanup;
321 
322    result = radv_graphics_pipeline_create(
323       device_h, device->meta_state.cache,
324       &(VkGraphicsPipelineCreateInfo){
325          .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
326          .pNext = &rendering_create_info,
327          .stageCount = 2,
328          .pStages = stages,
329 
330          .pVertexInputState = &vi_state,
331          .pInputAssemblyState = &ia_state,
332 
333          .pViewportState =
334             &(VkPipelineViewportStateCreateInfo){
335                .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
336                .viewportCount = 1,
337                .scissorCount = 1,
338             },
339          .pRasterizationState = &rs_state,
340          .pMultisampleState =
341             &(VkPipelineMultisampleStateCreateInfo){
342                .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
343                .rasterizationSamples = 1,
344                .sampleShadingEnable = false,
345                .pSampleMask = NULL,
346                .alphaToCoverageEnable = false,
347                .alphaToOneEnable = false,
348             },
349          .pColorBlendState = &blend_state,
350          .pDynamicState =
351             &(VkPipelineDynamicStateCreateInfo){
352                .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
353                .dynamicStateCount = 2,
354                .pDynamicStates =
355                   (VkDynamicState[]){
356                      VK_DYNAMIC_STATE_VIEWPORT,
357                      VK_DYNAMIC_STATE_SCISSOR,
358                   },
359             },
360          .layout = layout,
361          .renderPass = VK_NULL_HANDLE,
362          .subpass = 0,
363       },
364       &(struct radv_graphics_pipeline_create_info){
365          .use_rectlist = true,
366          .custom_blend_mode = device->physical_device->rad_info.gfx_level >= GFX11 ? V_028808_CB_DCC_DECOMPRESS_GFX11
367                                                                                    : V_028808_CB_DCC_DECOMPRESS_GFX8,
368       },
369       &device->meta_state.alloc, &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
370    if (result != VK_SUCCESS)
371       goto cleanup;
372 
373 cleanup:
374    ralloc_free(fs_module);
375    return result;
376 }
377 
378 void
radv_device_finish_meta_fast_clear_flush_state(struct radv_device * device)379 radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)
380 {
381    struct radv_meta_state *state = &device->meta_state;
382 
383    radv_DestroyPipeline(radv_device_to_handle(device), state->fast_clear_flush.dcc_decompress_pipeline, &state->alloc);
384    radv_DestroyPipeline(radv_device_to_handle(device), state->fast_clear_flush.fmask_decompress_pipeline,
385                         &state->alloc);
386    radv_DestroyPipeline(radv_device_to_handle(device), state->fast_clear_flush.cmask_eliminate_pipeline, &state->alloc);
387    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.p_layout, &state->alloc);
388 
389    radv_DestroyPipeline(radv_device_to_handle(device), state->fast_clear_flush.dcc_decompress_compute_pipeline,
390                         &state->alloc);
391    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.dcc_decompress_compute_p_layout,
392                               &state->alloc);
393    device->vk.dispatch_table.DestroyDescriptorSetLayout(
394       radv_device_to_handle(device), state->fast_clear_flush.dcc_decompress_compute_ds_layout, &state->alloc);
395 }
396 
397 static VkResult
radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device * device)398 radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device *device)
399 {
400    VkResult res = VK_SUCCESS;
401 
402    mtx_lock(&device->meta_state.mtx);
403    if (device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
404       mtx_unlock(&device->meta_state.mtx);
405       return VK_SUCCESS;
406    }
407 
408    nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices(device);
409    if (!vs_module) {
410       /* XXX: Need more accurate error */
411       res = VK_ERROR_OUT_OF_HOST_MEMORY;
412       goto cleanup;
413    }
414 
415    res = create_pipeline_layout(device, &device->meta_state.fast_clear_flush.p_layout);
416    if (res != VK_SUCCESS)
417       goto cleanup;
418 
419    VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
420    res = create_pipeline(device, vs_module_h, device->meta_state.fast_clear_flush.p_layout);
421    if (res != VK_SUCCESS)
422       goto cleanup;
423 
424    res = create_dcc_compress_compute(device);
425    if (res != VK_SUCCESS)
426       goto cleanup;
427 
428 cleanup:
429    ralloc_free(vs_module);
430    mtx_unlock(&device->meta_state.mtx);
431 
432    return res;
433 }
434 
435 VkResult
radv_device_init_meta_fast_clear_flush_state(struct radv_device * device,bool on_demand)436 radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand)
437 {
438    if (on_demand)
439       return VK_SUCCESS;
440 
441    return radv_device_init_meta_fast_clear_flush_state_internal(device);
442 }
443 
444 static void
radv_emit_set_predication_state_from_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,uint64_t pred_offset,bool value)445 radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
446                                            uint64_t pred_offset, bool value)
447 {
448    uint64_t va = 0;
449 
450    if (value) {
451       va = radv_buffer_get_va(image->bindings[0].bo) + image->bindings[0].offset;
452       va += pred_offset;
453    }
454 
455    radv_emit_set_predication_state(cmd_buffer, true, PREDICATION_OP_BOOL64, va);
456 }
457 
458 static void
radv_process_color_image_layer(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,int level,int layer,bool flush_cb)459 radv_process_color_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
460                                const VkImageSubresourceRange *range, int level, int layer, bool flush_cb)
461 {
462    struct radv_device *device = cmd_buffer->device;
463    struct radv_image_view iview;
464    uint32_t width, height;
465 
466    width = radv_minify(image->vk.extent.width, range->baseMipLevel + level);
467    height = radv_minify(image->vk.extent.height, range->baseMipLevel + level);
468 
469    radv_image_view_init(&iview, device,
470                         &(VkImageViewCreateInfo){
471                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
472                            .image = radv_image_to_handle(image),
473                            .viewType = radv_meta_get_view_type(image),
474                            .format = image->vk.format,
475                            .subresourceRange =
476                               {
477                                  .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
478                                  .baseMipLevel = range->baseMipLevel + level,
479                                  .levelCount = 1,
480                                  .baseArrayLayer = range->baseArrayLayer + layer,
481                                  .layerCount = 1,
482                               },
483                         },
484                         0, NULL);
485 
486    const VkRenderingAttachmentInfo color_att = {
487       .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
488       .imageView = radv_image_view_to_handle(&iview),
489       .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
490       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
491       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
492    };
493 
494    const VkRenderingInfo rendering_info = {
495       .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
496       .renderArea = {.offset = {0, 0}, .extent = {width, height}},
497       .layerCount = 1,
498       .colorAttachmentCount = 1,
499       .pColorAttachments = &color_att,
500    };
501 
502    radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
503 
504    if (flush_cb)
505       cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image);
506 
507    radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
508 
509    if (flush_cb)
510       cmd_buffer->state.flush_bits |= radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image);
511 
512    radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
513 
514    radv_image_view_finish(&iview);
515 }
516 
517 static void
radv_process_color_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange,enum radv_color_op op)518 radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
519                          const VkImageSubresourceRange *subresourceRange, enum radv_color_op op)
520 {
521    struct radv_device *device = cmd_buffer->device;
522    struct radv_meta_saved_state saved_state;
523    bool old_predicating = false;
524    bool flush_cb = false;
525    uint64_t pred_offset;
526    VkPipeline *pipeline;
527 
528    switch (op) {
529    case FAST_CLEAR_ELIMINATE:
530       pipeline = &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
531       pred_offset = image->fce_pred_offset;
532       break;
533    case FMASK_DECOMPRESS:
534       pipeline = &device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
535       pred_offset = 0; /* FMASK_DECOMPRESS is never predicated. */
536 
537       /* Flushing CB is required before and after FMASK_DECOMPRESS. */
538       flush_cb = true;
539       break;
540    case DCC_DECOMPRESS:
541       pipeline = &device->meta_state.fast_clear_flush.dcc_decompress_pipeline;
542       pred_offset = image->dcc_pred_offset;
543 
544       /* Flushing CB is required before and after DCC_DECOMPRESS. */
545       flush_cb = true;
546       break;
547    default:
548       unreachable("Invalid color op");
549    }
550 
551    if (radv_dcc_enabled(image, subresourceRange->baseMipLevel) &&
552        (image->vk.array_layers != vk_image_subresource_layer_count(&image->vk, subresourceRange) ||
553         subresourceRange->baseArrayLayer != 0)) {
554       /* Only use predication if the image has DCC with mipmaps or
555        * if the range of layers covers the whole image because the
556        * predication is based on mip level.
557        */
558       pred_offset = 0;
559    }
560 
561    if (!*pipeline) {
562       VkResult ret;
563 
564       ret = radv_device_init_meta_fast_clear_flush_state_internal(device);
565       if (ret != VK_SUCCESS) {
566          vk_command_buffer_set_error(&cmd_buffer->vk, ret);
567          return;
568       }
569    }
570 
571    radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_RENDER);
572 
573    if (pred_offset) {
574       pred_offset += 8 * subresourceRange->baseMipLevel;
575 
576       old_predicating = cmd_buffer->state.predicating;
577 
578       radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, true);
579       cmd_buffer->state.predicating = true;
580    }
581 
582    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
583 
584    for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, subresourceRange); ++l) {
585       uint32_t width, height;
586 
587       /* Do not decompress levels without DCC. */
588       if (op == DCC_DECOMPRESS && !radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
589          continue;
590 
591       width = radv_minify(image->vk.extent.width, subresourceRange->baseMipLevel + l);
592       height = radv_minify(image->vk.extent.height, subresourceRange->baseMipLevel + l);
593 
594       radv_CmdSetViewport(
595          radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
596          &(VkViewport){.x = 0, .y = 0, .width = width, .height = height, .minDepth = 0.0f, .maxDepth = 1.0f});
597 
598       radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
599                          &(VkRect2D){
600                             .offset = {0, 0},
601                             .extent = {width, height},
602                          });
603 
604       for (uint32_t s = 0; s < vk_image_subresource_layer_count(&image->vk, subresourceRange); s++) {
605          radv_process_color_image_layer(cmd_buffer, image, subresourceRange, l, s, flush_cb);
606       }
607    }
608 
609    cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
610 
611    if (pred_offset) {
612       pred_offset += 8 * subresourceRange->baseMipLevel;
613 
614       cmd_buffer->state.predicating = old_predicating;
615 
616       radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, false);
617 
618       if (cmd_buffer->state.predication_type != -1) {
619          /* Restore previous conditional rendering user state. */
620          radv_emit_set_predication_state(cmd_buffer, cmd_buffer->state.predication_type,
621                                          cmd_buffer->state.predication_op, cmd_buffer->state.predication_va);
622       }
623    }
624 
625    radv_meta_restore(&saved_state, cmd_buffer);
626 
627    /* Clear the image's fast-clear eliminate predicate because FMASK_DECOMPRESS and DCC_DECOMPRESS
628     * also perform a fast-clear eliminate.
629     */
630    radv_update_fce_metadata(cmd_buffer, image, subresourceRange, false);
631 
632    /* Mark the image as being decompressed. */
633    if (op == DCC_DECOMPRESS)
634       radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
635 }
636 
637 static void
radv_fast_clear_eliminate(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)638 radv_fast_clear_eliminate(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
639                           const VkImageSubresourceRange *subresourceRange)
640 {
641    struct radv_barrier_data barrier = {0};
642 
643    barrier.layout_transitions.fast_clear_eliminate = 1;
644    radv_describe_layout_transition(cmd_buffer, &barrier);
645 
646    radv_process_color_image(cmd_buffer, image, subresourceRange, FAST_CLEAR_ELIMINATE);
647 }
648 
649 static void
radv_fmask_decompress(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)650 radv_fmask_decompress(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
651                       const VkImageSubresourceRange *subresourceRange)
652 {
653    struct radv_barrier_data barrier = {0};
654 
655    barrier.layout_transitions.fmask_decompress = 1;
656    radv_describe_layout_transition(cmd_buffer, &barrier);
657 
658    radv_process_color_image(cmd_buffer, image, subresourceRange, FMASK_DECOMPRESS);
659 }
660 
661 void
radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)662 radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
663                                     const VkImageSubresourceRange *subresourceRange)
664 {
665    if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {
666       if (radv_image_has_dcc(image) && radv_image_has_cmask(image)) {
667          /* MSAA images with DCC and CMASK might have been fast-cleared and might require a FCE but
668           * FMASK_DECOMPRESS can't eliminate DCC fast clears.
669           */
670          radv_fast_clear_eliminate(cmd_buffer, image, subresourceRange);
671       }
672 
673       radv_fmask_decompress(cmd_buffer, image, subresourceRange);
674    } else {
675       /* Skip fast clear eliminate for images that support comp-to-single fast clears. */
676       if (image->support_comp_to_single)
677          return;
678 
679       radv_fast_clear_eliminate(cmd_buffer, image, subresourceRange);
680    }
681 }
682 
683 static void
radv_decompress_dcc_compute(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)684 radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
685                             const VkImageSubresourceRange *subresourceRange)
686 {
687    struct radv_meta_saved_state saved_state;
688    struct radv_image_view load_iview = {0};
689    struct radv_image_view store_iview = {0};
690    struct radv_device *device = cmd_buffer->device;
691 
692    cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
693 
694    if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
695       VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device);
696       if (ret != VK_SUCCESS) {
697          vk_command_buffer_set_error(&cmd_buffer->vk, ret);
698          return;
699       }
700    }
701 
702    radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
703 
704    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
705                         device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
706 
707    for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, subresourceRange); l++) {
708       uint32_t width, height;
709 
710       /* Do not decompress levels without DCC. */
711       if (!radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
712          continue;
713 
714       width = radv_minify(image->vk.extent.width, subresourceRange->baseMipLevel + l);
715       height = radv_minify(image->vk.extent.height, subresourceRange->baseMipLevel + l);
716 
717       for (uint32_t s = 0; s < vk_image_subresource_layer_count(&image->vk, subresourceRange); s++) {
718          radv_image_view_init(&load_iview, cmd_buffer->device,
719                               &(VkImageViewCreateInfo){
720                                  .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
721                                  .image = radv_image_to_handle(image),
722                                  .viewType = VK_IMAGE_VIEW_TYPE_2D,
723                                  .format = image->vk.format,
724                                  .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
725                                                       .baseMipLevel = subresourceRange->baseMipLevel + l,
726                                                       .levelCount = 1,
727                                                       .baseArrayLayer = subresourceRange->baseArrayLayer + s,
728                                                       .layerCount = 1},
729                               },
730                               0, &(struct radv_image_view_extra_create_info){.enable_compression = true});
731          radv_image_view_init(&store_iview, cmd_buffer->device,
732                               &(VkImageViewCreateInfo){
733                                  .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
734                                  .image = radv_image_to_handle(image),
735                                  .viewType = VK_IMAGE_VIEW_TYPE_2D,
736                                  .format = image->vk.format,
737                                  .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
738                                                       .baseMipLevel = subresourceRange->baseMipLevel + l,
739                                                       .levelCount = 1,
740                                                       .baseArrayLayer = subresourceRange->baseArrayLayer + s,
741                                                       .layerCount = 1},
742                               },
743                               0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
744 
745          radv_meta_push_descriptor_set(
746             cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
747             device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout, 0, /* set */
748             2,                                                                      /* descriptorWriteCount */
749             (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
750                                       .dstBinding = 0,
751                                       .dstArrayElement = 0,
752                                       .descriptorCount = 1,
753                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
754                                       .pImageInfo =
755                                          (VkDescriptorImageInfo[]){
756                                             {
757                                                .sampler = VK_NULL_HANDLE,
758                                                .imageView = radv_image_view_to_handle(&load_iview),
759                                                .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
760                                             },
761                                          }},
762                                      {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
763                                       .dstBinding = 1,
764                                       .dstArrayElement = 0,
765                                       .descriptorCount = 1,
766                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
767                                       .pImageInfo = (VkDescriptorImageInfo[]){
768                                          {
769                                             .sampler = VK_NULL_HANDLE,
770                                             .imageView = radv_image_view_to_handle(&store_iview),
771                                             .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
772                                          },
773                                       }}});
774 
775          radv_unaligned_dispatch(cmd_buffer, width, height, 1);
776 
777          radv_image_view_finish(&load_iview);
778          radv_image_view_finish(&store_iview);
779       }
780    }
781 
782    /* Mark this image as actually being decompressed. */
783    radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
784 
785    radv_meta_restore(&saved_state, cmd_buffer);
786 
787    cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
788                                    radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
789 
790    /* Initialize the DCC metadata as "fully expanded". */
791    cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, subresourceRange, 0xffffffff);
792 }
793 
794 void
radv_decompress_dcc(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)795 radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
796                     const VkImageSubresourceRange *subresourceRange)
797 {
798    struct radv_barrier_data barrier = {0};
799 
800    barrier.layout_transitions.dcc_decompress = 1;
801    radv_describe_layout_transition(cmd_buffer, &barrier);
802 
803    if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
804       radv_process_color_image(cmd_buffer, image, subresourceRange, DCC_DECOMPRESS);
805    else
806       radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange);
807 }
808