• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 
27 #include "radv_meta.h"
28 #include "radv_private.h"
29 #include "sid.h"
30 
31 enum radv_color_op {
32    FAST_CLEAR_ELIMINATE,
33    FMASK_DECOMPRESS,
34    DCC_DECOMPRESS,
35 };
36 
37 static nir_shader *
build_dcc_decompress_compute_shader(struct radv_device * dev)38 build_dcc_decompress_compute_shader(struct radv_device *dev)
39 {
40    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
41 
42    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "dcc_decompress_compute");
43 
44    /* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */
45    b.shader->info.workgroup_size[0] = 16;
46    b.shader->info.workgroup_size[1] = 16;
47    nir_variable *input_img = nir_variable_create(b.shader, nir_var_image, img_type, "in_img");
48    input_img->data.descriptor_set = 0;
49    input_img->data.binding = 0;
50 
51    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
52    output_img->data.descriptor_set = 0;
53    output_img->data.binding = 1;
54 
55    nir_ssa_def *global_id = get_global_ids(&b, 2);
56    nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, global_id, 0),
57                                          nir_channel(&b, global_id, 1),
58                                          nir_ssa_undef(&b, 1, 32),
59                                          nir_ssa_undef(&b, 1, 32));
60 
61    nir_ssa_def *data = nir_image_deref_load(
62       &b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32),
63       nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
64 
65    /* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid
66     * creating a vmcnt(0) because it expects the L1 cache to keep memory
67     * operations in-order for the same workgroup. The vmcnt(0) seems
68     * necessary however. */
69    nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE,
70                       .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
71 
72    nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
73                          nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0),
74                          .image_dim = GLSL_SAMPLER_DIM_2D);
75    return b.shader;
76 }
77 
78 static VkResult
create_dcc_compress_compute(struct radv_device * device)79 create_dcc_compress_compute(struct radv_device *device)
80 {
81    VkResult result = VK_SUCCESS;
82    nir_shader *cs = build_dcc_decompress_compute_shader(device);
83 
84    VkDescriptorSetLayoutCreateInfo ds_create_info = {
85       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
86       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
87       .bindingCount = 2,
88       .pBindings = (VkDescriptorSetLayoutBinding[]){
89          {.binding = 0,
90           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
91           .descriptorCount = 1,
92           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
93           .pImmutableSamplers = NULL},
94          {.binding = 1,
95           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
96           .descriptorCount = 1,
97           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
98           .pImmutableSamplers = NULL},
99       }};
100 
101    result = radv_CreateDescriptorSetLayout(
102       radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
103       &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);
104    if (result != VK_SUCCESS)
105       goto cleanup;
106 
107    VkPipelineLayoutCreateInfo pl_create_info = {
108       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
109       .setLayoutCount = 1,
110       .pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout,
111       .pushConstantRangeCount = 0,
112       .pPushConstantRanges = NULL,
113    };
114 
115    result = radv_CreatePipelineLayout(
116       radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
117       &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);
118    if (result != VK_SUCCESS)
119       goto cleanup;
120 
121    /* compute shader */
122 
123    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
124       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
125       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
126       .module = vk_shader_module_handle_from_nir(cs),
127       .pName = "main",
128       .pSpecializationInfo = NULL,
129    };
130 
131    VkComputePipelineCreateInfo vk_pipeline_info = {
132       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
133       .stage = pipeline_shader_stage,
134       .flags = 0,
135       .layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
136    };
137 
138    result = radv_CreateComputePipelines(
139       radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
140       &vk_pipeline_info, NULL,
141       &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
142    if (result != VK_SUCCESS)
143       goto cleanup;
144 
145 cleanup:
146    ralloc_free(cs);
147    return result;
148 }
149 
150 static VkResult
create_pipeline_layout(struct radv_device * device,VkPipelineLayout * layout)151 create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
152 {
153    VkPipelineLayoutCreateInfo pl_create_info = {
154       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
155       .setLayoutCount = 0,
156       .pSetLayouts = NULL,
157       .pushConstantRangeCount = 0,
158       .pPushConstantRanges = NULL,
159    };
160 
161    return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
162                                     &device->meta_state.alloc, layout);
163 }
164 
165 static VkResult
create_pipeline(struct radv_device * device,VkShaderModule vs_module_h,VkPipelineLayout layout)166 create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipelineLayout layout)
167 {
168    VkResult result;
169    VkDevice device_h = radv_device_to_handle(device);
170 
171    nir_shader *fs_module = radv_meta_build_nir_fs_noop(device);
172 
173    if (!fs_module) {
174       /* XXX: Need more accurate error */
175       result = VK_ERROR_OUT_OF_HOST_MEMORY;
176       goto cleanup;
177    }
178 
179    const VkPipelineShaderStageCreateInfo stages[2] = {
180       {
181          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
182          .stage = VK_SHADER_STAGE_VERTEX_BIT,
183          .module = vs_module_h,
184          .pName = "main",
185       },
186       {
187          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
188          .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
189          .module = vk_shader_module_handle_from_nir(fs_module),
190          .pName = "main",
191       },
192    };
193 
194    const VkPipelineVertexInputStateCreateInfo vi_state = {
195       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
196       .vertexBindingDescriptionCount = 0,
197       .vertexAttributeDescriptionCount = 0,
198    };
199 
200    const VkPipelineInputAssemblyStateCreateInfo ia_state = {
201       .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
202       .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
203       .primitiveRestartEnable = false,
204    };
205 
206    const VkPipelineColorBlendStateCreateInfo blend_state = {
207       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
208       .logicOpEnable = false,
209       .attachmentCount = 1,
210       .pAttachments = (VkPipelineColorBlendAttachmentState[]){
211          {
212             .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
213                               VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
214          },
215       }};
216    const VkPipelineRasterizationStateCreateInfo rs_state = {
217       .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
218       .depthClampEnable = false,
219       .rasterizerDiscardEnable = false,
220       .polygonMode = VK_POLYGON_MODE_FILL,
221       .cullMode = VK_CULL_MODE_NONE,
222       .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
223    };
224 
225    const VkFormat color_format = VK_FORMAT_R8_UNORM;
226    const VkPipelineRenderingCreateInfo rendering_create_info = {
227       .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
228       .colorAttachmentCount = 1,
229       .pColorAttachmentFormats = &color_format,
230    };
231 
232    result = radv_graphics_pipeline_create(
233       device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
234       &(VkGraphicsPipelineCreateInfo){
235          .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
236          .pNext = &rendering_create_info,
237          .stageCount = 2,
238          .pStages = stages,
239 
240          .pVertexInputState = &vi_state,
241          .pInputAssemblyState = &ia_state,
242 
243          .pViewportState =
244             &(VkPipelineViewportStateCreateInfo){
245                .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
246                .viewportCount = 1,
247                .scissorCount = 1,
248             },
249          .pRasterizationState = &rs_state,
250          .pMultisampleState =
251             &(VkPipelineMultisampleStateCreateInfo){
252                .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
253                .rasterizationSamples = 1,
254                .sampleShadingEnable = false,
255                .pSampleMask = NULL,
256                .alphaToCoverageEnable = false,
257                .alphaToOneEnable = false,
258             },
259          .pColorBlendState = &blend_state,
260          .pDynamicState =
261             &(VkPipelineDynamicStateCreateInfo){
262                .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
263                .dynamicStateCount = 2,
264                .pDynamicStates =
265                   (VkDynamicState[]){
266                      VK_DYNAMIC_STATE_VIEWPORT,
267                      VK_DYNAMIC_STATE_SCISSOR,
268                   },
269             },
270          .layout = layout,
271          .renderPass = VK_NULL_HANDLE,
272          .subpass = 0,
273       },
274       &(struct radv_graphics_pipeline_create_info){
275          .use_rectlist = true,
276          .custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR,
277       },
278       &device->meta_state.alloc, &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline);
279    if (result != VK_SUCCESS)
280       goto cleanup;
281 
282    result = radv_graphics_pipeline_create(
283       device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
284       &(VkGraphicsPipelineCreateInfo){
285          .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
286          .pNext = &rendering_create_info,
287          .stageCount = 2,
288          .pStages = stages,
289 
290          .pVertexInputState = &vi_state,
291          .pInputAssemblyState = &ia_state,
292 
293          .pViewportState =
294             &(VkPipelineViewportStateCreateInfo){
295                .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
296                .viewportCount = 1,
297                .scissorCount = 1,
298             },
299          .pRasterizationState = &rs_state,
300          .pMultisampleState =
301             &(VkPipelineMultisampleStateCreateInfo){
302                .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
303                .rasterizationSamples = 1,
304                .sampleShadingEnable = false,
305                .pSampleMask = NULL,
306                .alphaToCoverageEnable = false,
307                .alphaToOneEnable = false,
308             },
309          .pColorBlendState = &blend_state,
310          .pDynamicState =
311             &(VkPipelineDynamicStateCreateInfo){
312                .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
313                .dynamicStateCount = 2,
314                .pDynamicStates =
315                   (VkDynamicState[]){
316                      VK_DYNAMIC_STATE_VIEWPORT,
317                      VK_DYNAMIC_STATE_SCISSOR,
318                   },
319             },
320          .layout = layout,
321          .renderPass = VK_NULL_HANDLE,
322          .subpass = 0,
323       },
324       &(struct radv_graphics_pipeline_create_info){
325          .use_rectlist = true,
326          .custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS,
327       },
328       &device->meta_state.alloc, &device->meta_state.fast_clear_flush.fmask_decompress_pipeline);
329    if (result != VK_SUCCESS)
330       goto cleanup;
331 
332    result = radv_graphics_pipeline_create(
333       device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
334       &(VkGraphicsPipelineCreateInfo){
335          .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
336          .pNext = &rendering_create_info,
337          .stageCount = 2,
338          .pStages = stages,
339 
340          .pVertexInputState = &vi_state,
341          .pInputAssemblyState = &ia_state,
342 
343          .pViewportState =
344             &(VkPipelineViewportStateCreateInfo){
345                .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
346                .viewportCount = 1,
347                .scissorCount = 1,
348             },
349          .pRasterizationState = &rs_state,
350          .pMultisampleState =
351             &(VkPipelineMultisampleStateCreateInfo){
352                .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
353                .rasterizationSamples = 1,
354                .sampleShadingEnable = false,
355                .pSampleMask = NULL,
356                .alphaToCoverageEnable = false,
357                .alphaToOneEnable = false,
358             },
359          .pColorBlendState = &blend_state,
360          .pDynamicState =
361             &(VkPipelineDynamicStateCreateInfo){
362                .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
363                .dynamicStateCount = 2,
364                .pDynamicStates =
365                   (VkDynamicState[]){
366                      VK_DYNAMIC_STATE_VIEWPORT,
367                      VK_DYNAMIC_STATE_SCISSOR,
368                   },
369             },
370          .layout = layout,
371          .renderPass = VK_NULL_HANDLE,
372          .subpass = 0,
373       },
374       &(struct radv_graphics_pipeline_create_info){
375          .use_rectlist = true,
376          .custom_blend_mode = device->physical_device->rad_info.gfx_level >= GFX11
377                                  ? V_028808_CB_DCC_DECOMPRESS_GFX11
378                                  : V_028808_CB_DCC_DECOMPRESS_GFX8,
379       },
380       &device->meta_state.alloc, &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
381    if (result != VK_SUCCESS)
382       goto cleanup;
383 
384 cleanup:
385    ralloc_free(fs_module);
386    return result;
387 }
388 
389 void
radv_device_finish_meta_fast_clear_flush_state(struct radv_device * device)390 radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)
391 {
392    struct radv_meta_state *state = &device->meta_state;
393 
394    radv_DestroyPipeline(radv_device_to_handle(device),
395                         state->fast_clear_flush.dcc_decompress_pipeline, &state->alloc);
396    radv_DestroyPipeline(radv_device_to_handle(device),
397                         state->fast_clear_flush.fmask_decompress_pipeline, &state->alloc);
398    radv_DestroyPipeline(radv_device_to_handle(device),
399                         state->fast_clear_flush.cmask_eliminate_pipeline, &state->alloc);
400    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.p_layout,
401                               &state->alloc);
402 
403    radv_DestroyPipeline(radv_device_to_handle(device),
404                         state->fast_clear_flush.dcc_decompress_compute_pipeline, &state->alloc);
405    radv_DestroyPipelineLayout(radv_device_to_handle(device),
406                               state->fast_clear_flush.dcc_decompress_compute_p_layout,
407                               &state->alloc);
408    device->vk.dispatch_table.DestroyDescriptorSetLayout(
409       radv_device_to_handle(device), state->fast_clear_flush.dcc_decompress_compute_ds_layout,
410       &state->alloc);
411 }
412 
413 static VkResult
radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device * device)414 radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device *device)
415 {
416    VkResult res = VK_SUCCESS;
417 
418    mtx_lock(&device->meta_state.mtx);
419    if (device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
420       mtx_unlock(&device->meta_state.mtx);
421       return VK_SUCCESS;
422    }
423 
424    nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices(device);
425    if (!vs_module) {
426       /* XXX: Need more accurate error */
427       res = VK_ERROR_OUT_OF_HOST_MEMORY;
428       goto cleanup;
429    }
430 
431    res = create_pipeline_layout(device, &device->meta_state.fast_clear_flush.p_layout);
432    if (res != VK_SUCCESS)
433       goto cleanup;
434 
435    VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
436    res = create_pipeline(device, vs_module_h, device->meta_state.fast_clear_flush.p_layout);
437    if (res != VK_SUCCESS)
438       goto cleanup;
439 
440    res = create_dcc_compress_compute(device);
441    if (res != VK_SUCCESS)
442       goto cleanup;
443 
444 cleanup:
445    ralloc_free(vs_module);
446    mtx_unlock(&device->meta_state.mtx);
447 
448    return res;
449 }
450 
451 VkResult
radv_device_init_meta_fast_clear_flush_state(struct radv_device * device,bool on_demand)452 radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand)
453 {
454    if (on_demand)
455       return VK_SUCCESS;
456 
457    return radv_device_init_meta_fast_clear_flush_state_internal(device);
458 }
459 
460 static void
radv_emit_set_predication_state_from_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,uint64_t pred_offset,bool value)461 radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer,
462                                            struct radv_image *image, uint64_t pred_offset,
463                                            bool value)
464 {
465    uint64_t va = 0;
466 
467    if (value) {
468       va = radv_buffer_get_va(image->bindings[0].bo) + image->bindings[0].offset;
469       va += pred_offset;
470    }
471 
472    si_emit_set_predication_state(cmd_buffer, true, PREDICATION_OP_BOOL64, va);
473 }
474 
475 static void
radv_process_color_image_layer(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,int level,int layer,bool flush_cb)476 radv_process_color_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
477                                const VkImageSubresourceRange *range, int level, int layer,
478                                bool flush_cb)
479 {
480    struct radv_device *device = cmd_buffer->device;
481    struct radv_image_view iview;
482    uint32_t width, height;
483 
484    width = radv_minify(image->info.width, range->baseMipLevel + level);
485    height = radv_minify(image->info.height, range->baseMipLevel + level);
486 
487    radv_image_view_init(&iview, device,
488                         &(VkImageViewCreateInfo){
489                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
490                            .image = radv_image_to_handle(image),
491                            .viewType = radv_meta_get_view_type(image),
492                            .format = image->vk.format,
493                            .subresourceRange =
494                               {
495                                  .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
496                                  .baseMipLevel = range->baseMipLevel + level,
497                                  .levelCount = 1,
498                                  .baseArrayLayer = range->baseArrayLayer + layer,
499                                  .layerCount = 1,
500                               },
501                         },
502                         0, NULL);
503 
504    const VkRenderingAttachmentInfo color_att = {
505       .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
506       .imageView = radv_image_view_to_handle(&iview),
507       .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
508       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
509       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
510    };
511 
512    const VkRenderingInfo rendering_info = {
513       .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
514       .renderArea = {
515          .offset = { 0, 0 },
516          .extent = { width, height }
517       },
518       .layerCount = 1,
519       .colorAttachmentCount = 1,
520       .pColorAttachments = &color_att,
521    };
522 
523    radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
524 
525    if (flush_cb)
526       cmd_buffer->state.flush_bits |=
527          radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image);
528 
529    radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
530 
531    if (flush_cb)
532       cmd_buffer->state.flush_bits |=
533          radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image);
534 
535    radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
536 
537    radv_image_view_finish(&iview);
538 }
539 
540 static void
radv_process_color_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange,enum radv_color_op op)541 radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
542                          const VkImageSubresourceRange *subresourceRange, enum radv_color_op op)
543 {
544    struct radv_device *device = cmd_buffer->device;
545    struct radv_meta_saved_state saved_state;
546    bool old_predicating = false;
547    bool flush_cb = false;
548    uint64_t pred_offset;
549    VkPipeline *pipeline;
550 
551    switch (op) {
552    case FAST_CLEAR_ELIMINATE:
553       pipeline = &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
554       pred_offset = image->fce_pred_offset;
555       break;
556    case FMASK_DECOMPRESS:
557       pipeline = &device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
558       pred_offset = 0; /* FMASK_DECOMPRESS is never predicated. */
559 
560       /* Flushing CB is required before and after FMASK_DECOMPRESS. */
561       flush_cb = true;
562       break;
563    case DCC_DECOMPRESS:
564       pipeline = &device->meta_state.fast_clear_flush.dcc_decompress_pipeline;
565       pred_offset = image->dcc_pred_offset;
566 
567       /* Flushing CB is required before and after DCC_DECOMPRESS. */
568       flush_cb = true;
569       break;
570    default:
571       unreachable("Invalid color op");
572    }
573 
574    if (radv_dcc_enabled(image, subresourceRange->baseMipLevel) &&
575        (image->info.array_size != radv_get_layerCount(image, subresourceRange) ||
576         subresourceRange->baseArrayLayer != 0)) {
577       /* Only use predication if the image has DCC with mipmaps or
578        * if the range of layers covers the whole image because the
579        * predication is based on mip level.
580        */
581       pred_offset = 0;
582    }
583 
584    if (!*pipeline) {
585       VkResult ret;
586 
587       ret = radv_device_init_meta_fast_clear_flush_state_internal(device);
588       if (ret != VK_SUCCESS) {
589          cmd_buffer->record_result = ret;
590          return;
591       }
592    }
593 
594    radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_PASS);
595 
596    if (pred_offset) {
597       pred_offset += 8 * subresourceRange->baseMipLevel;
598 
599       old_predicating = cmd_buffer->state.predicating;
600 
601       radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, true);
602       cmd_buffer->state.predicating = true;
603    }
604 
605    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
606                         *pipeline);
607 
608    for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
609       uint32_t width, height;
610 
611       /* Do not decompress levels without DCC. */
612       if (op == DCC_DECOMPRESS && !radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
613          continue;
614 
615       width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
616       height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
617 
618       radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
619                           &(VkViewport){.x = 0,
620                                         .y = 0,
621                                         .width = width,
622                                         .height = height,
623                                         .minDepth = 0.0f,
624                                         .maxDepth = 1.0f});
625 
626       radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
627                          &(VkRect2D){
628                             .offset = {0, 0},
629                             .extent = {width, height},
630                          });
631 
632       for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
633          radv_process_color_image_layer(cmd_buffer, image, subresourceRange, l, s, flush_cb);
634       }
635    }
636 
637    cmd_buffer->state.flush_bits |=
638       RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
639 
640    if (pred_offset) {
641       pred_offset += 8 * subresourceRange->baseMipLevel;
642 
643       cmd_buffer->state.predicating = old_predicating;
644 
645       radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, false);
646 
647       if (cmd_buffer->state.predication_type != -1) {
648          /* Restore previous conditional rendering user state. */
649          si_emit_set_predication_state(cmd_buffer, cmd_buffer->state.predication_type,
650                                        cmd_buffer->state.predication_op,
651                                        cmd_buffer->state.predication_va);
652       }
653    }
654 
655    radv_meta_restore(&saved_state, cmd_buffer);
656 
657    /* Clear the image's fast-clear eliminate predicate because FMASK_DECOMPRESS and DCC_DECOMPRESS
658     * also perform a fast-clear eliminate.
659     */
660    radv_update_fce_metadata(cmd_buffer, image, subresourceRange, false);
661 
662    /* Mark the image as being decompressed. */
663    if (op == DCC_DECOMPRESS)
664       radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
665 }
666 
667 static void
radv_fast_clear_eliminate(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)668 radv_fast_clear_eliminate(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
669                           const VkImageSubresourceRange *subresourceRange)
670 {
671    struct radv_barrier_data barrier = {0};
672 
673    barrier.layout_transitions.fast_clear_eliminate = 1;
674    radv_describe_layout_transition(cmd_buffer, &barrier);
675 
676    radv_process_color_image(cmd_buffer, image, subresourceRange, FAST_CLEAR_ELIMINATE);
677 }
678 
679 static void
radv_fmask_decompress(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)680 radv_fmask_decompress(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
681                       const VkImageSubresourceRange *subresourceRange)
682 {
683    struct radv_barrier_data barrier = {0};
684 
685    barrier.layout_transitions.fmask_decompress = 1;
686    radv_describe_layout_transition(cmd_buffer, &barrier);
687 
688    radv_process_color_image(cmd_buffer, image, subresourceRange, FMASK_DECOMPRESS);
689 }
690 
691 void
radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)692 radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
693                                     const VkImageSubresourceRange *subresourceRange)
694 {
695    if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {
696       if (radv_image_has_dcc(image) && radv_image_has_cmask(image)) {
697          /* MSAA images with DCC and CMASK might have been fast-cleared and might require a FCE but
698           * FMASK_DECOMPRESS can't eliminate DCC fast clears.
699           */
700          radv_fast_clear_eliminate(cmd_buffer, image, subresourceRange);
701       }
702 
703       radv_fmask_decompress(cmd_buffer, image, subresourceRange);
704    } else {
705       /* Skip fast clear eliminate for images that support comp-to-single fast clears. */
706       if (image->support_comp_to_single)
707          return;
708 
709       radv_fast_clear_eliminate(cmd_buffer, image, subresourceRange);
710    }
711 }
712 
713 static void
radv_decompress_dcc_compute(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)714 radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
715                             const VkImageSubresourceRange *subresourceRange)
716 {
717    struct radv_meta_saved_state saved_state;
718    struct radv_image_view load_iview = {0};
719    struct radv_image_view store_iview = {0};
720    struct radv_device *device = cmd_buffer->device;
721 
722    cmd_buffer->state.flush_bits |=
723       radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
724 
725    if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
726       VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device);
727       if (ret != VK_SUCCESS) {
728          cmd_buffer->record_result = ret;
729          return;
730       }
731    }
732 
733    radv_meta_save(&saved_state, cmd_buffer,
734                   RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
735 
736    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
737                         device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
738 
739    for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); l++) {
740       uint32_t width, height;
741 
742       /* Do not decompress levels without DCC. */
743       if (!radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
744          continue;
745 
746       width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
747       height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
748 
749       for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
750          radv_image_view_init(
751             &load_iview, cmd_buffer->device,
752             &(VkImageViewCreateInfo){
753                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
754                .image = radv_image_to_handle(image),
755                .viewType = VK_IMAGE_VIEW_TYPE_2D,
756                .format = image->vk.format,
757                .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
758                                     .baseMipLevel = subresourceRange->baseMipLevel + l,
759                                     .levelCount = 1,
760                                     .baseArrayLayer = subresourceRange->baseArrayLayer + s,
761                                     .layerCount = 1},
762             },
763             0, &(struct radv_image_view_extra_create_info){.enable_compression = true});
764          radv_image_view_init(
765             &store_iview, cmd_buffer->device,
766             &(VkImageViewCreateInfo){
767                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
768                .image = radv_image_to_handle(image),
769                .viewType = VK_IMAGE_VIEW_TYPE_2D,
770                .format = image->vk.format,
771                .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
772                                     .baseMipLevel = subresourceRange->baseMipLevel + l,
773                                     .levelCount = 1,
774                                     .baseArrayLayer = subresourceRange->baseArrayLayer + s,
775                                     .layerCount = 1},
776             },
777             0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
778 
779          radv_meta_push_descriptor_set(
780             cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
781             device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout, 0, /* set */
782             2, /* descriptorWriteCount */
783             (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
784                                       .dstBinding = 0,
785                                       .dstArrayElement = 0,
786                                       .descriptorCount = 1,
787                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
788                                       .pImageInfo =
789                                          (VkDescriptorImageInfo[]){
790                                             {
791                                                .sampler = VK_NULL_HANDLE,
792                                                .imageView = radv_image_view_to_handle(&load_iview),
793                                                .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
794                                             },
795                                          }},
796                                      {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
797                                       .dstBinding = 1,
798                                       .dstArrayElement = 0,
799                                       .descriptorCount = 1,
800                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
801                                       .pImageInfo = (VkDescriptorImageInfo[]){
802                                          {
803                                             .sampler = VK_NULL_HANDLE,
804                                             .imageView = radv_image_view_to_handle(&store_iview),
805                                             .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
806                                          },
807                                       }}});
808 
809          radv_unaligned_dispatch(cmd_buffer, width, height, 1);
810 
811          radv_image_view_finish(&load_iview);
812          radv_image_view_finish(&store_iview);
813       }
814    }
815 
816    /* Mark this image as actually being decompressed. */
817    radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
818 
819    radv_meta_restore(&saved_state, cmd_buffer);
820 
821    cmd_buffer->state.flush_bits |=
822       RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
823       radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
824 
825    /* Initialize the DCC metadata as "fully expanded". */
826    cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, subresourceRange, 0xffffffff);
827 }
828 
829 void
radv_decompress_dcc(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)830 radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
831                     const VkImageSubresourceRange *subresourceRange)
832 {
833    struct radv_barrier_data barrier = {0};
834 
835    barrier.layout_transitions.dcc_decompress = 1;
836    radv_describe_layout_transition(cmd_buffer, &barrier);
837 
838    if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
839       radv_process_color_image(cmd_buffer, image, subresourceRange, DCC_DECOMPRESS);
840    else
841       radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange);
842 }
843