• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "nir/nir_builder.h"
25 #include "radv_debug.h"
26 #include "radv_meta.h"
27 #include "radv_private.h"
28 
29 #include "util/format_rgb9e5.h"
30 #include "vk_format.h"
31 
32 enum { DEPTH_CLEAR_SLOW, DEPTH_CLEAR_FAST };
33 
34 static void
build_color_shaders(struct radv_device * dev,struct nir_shader ** out_vs,struct nir_shader ** out_fs,uint32_t frag_output)35 build_color_shaders(struct radv_device *dev, struct nir_shader **out_vs, struct nir_shader **out_fs,
36                     uint32_t frag_output)
37 {
38    nir_builder vs_b = radv_meta_init_shader(dev, MESA_SHADER_VERTEX, "meta_clear_color_vs");
39    nir_builder fs_b =
40       radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_clear_color_fs-%d", frag_output);
41 
42    const struct glsl_type *position_type = glsl_vec4_type();
43    const struct glsl_type *color_type = glsl_vec4_type();
44 
45    nir_variable *vs_out_pos =
46       nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position");
47    vs_out_pos->data.location = VARYING_SLOT_POS;
48 
49    nir_ssa_def *in_color_load =
50       nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16);
51 
52    nir_variable *fs_out_color =
53       nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color");
54    fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
55 
56    nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf);
57 
58    nir_ssa_def *outvec = nir_gen_rect_vertices(&vs_b, NULL, NULL);
59    nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
60 
61    const struct glsl_type *layer_type = glsl_int_type();
62    nir_variable *vs_out_layer =
63       nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
64    vs_out_layer->data.location = VARYING_SLOT_LAYER;
65    vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
66    nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
67    nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
68 
69    nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
70    nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
71 
72    *out_vs = vs_b.shader;
73    *out_fs = fs_b.shader;
74 }
75 
76 static VkResult
create_pipeline(struct radv_device * device,uint32_t samples,struct nir_shader * vs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineRenderingCreateInfo * dyn_state,const VkPipelineLayout layout,const struct radv_graphics_pipeline_create_info * extra,const VkAllocationCallbacks * alloc,VkPipeline * pipeline)77 create_pipeline(struct radv_device *device, uint32_t samples,
78                 struct nir_shader *vs_nir, struct nir_shader *fs_nir,
79                 const VkPipelineVertexInputStateCreateInfo *vi_state,
80                 const VkPipelineDepthStencilStateCreateInfo *ds_state,
81                 const VkPipelineColorBlendStateCreateInfo *cb_state,
82                 const VkPipelineRenderingCreateInfo *dyn_state,
83                 const VkPipelineLayout layout,
84                 const struct radv_graphics_pipeline_create_info *extra,
85                 const VkAllocationCallbacks *alloc, VkPipeline *pipeline)
86 {
87    VkDevice device_h = radv_device_to_handle(device);
88    VkResult result;
89 
90    result = radv_graphics_pipeline_create(
91       device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
92       &(VkGraphicsPipelineCreateInfo){
93          .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
94          .pNext = dyn_state,
95          .stageCount = fs_nir ? 2 : 1,
96          .pStages =
97             (VkPipelineShaderStageCreateInfo[]){
98                {
99                   .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
100                   .stage = VK_SHADER_STAGE_VERTEX_BIT,
101                   .module = vk_shader_module_handle_from_nir(vs_nir),
102                   .pName = "main",
103                },
104                {
105                   .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
106                   .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
107                   .module = vk_shader_module_handle_from_nir(fs_nir),
108                   .pName = "main",
109                },
110             },
111          .pVertexInputState = vi_state,
112          .pInputAssemblyState =
113             &(VkPipelineInputAssemblyStateCreateInfo){
114                .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
115                .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
116                .primitiveRestartEnable = false,
117             },
118          .pViewportState =
119             &(VkPipelineViewportStateCreateInfo){
120                .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
121                .viewportCount = 1,
122                .scissorCount = 1,
123             },
124          .pRasterizationState =
125             &(VkPipelineRasterizationStateCreateInfo){
126                .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
127                .rasterizerDiscardEnable = false,
128                .polygonMode = VK_POLYGON_MODE_FILL,
129                .cullMode = VK_CULL_MODE_NONE,
130                .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
131                .depthBiasEnable = false,
132                .depthBiasConstantFactor = 0.0f,
133                .depthBiasClamp = 0.0f,
134                .depthBiasSlopeFactor = 0.0f,
135                .lineWidth = 1.0f,
136             },
137          .pMultisampleState =
138             &(VkPipelineMultisampleStateCreateInfo){
139                .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
140                .rasterizationSamples = samples,
141                .sampleShadingEnable = false,
142                .pSampleMask = NULL,
143                .alphaToCoverageEnable = false,
144                .alphaToOneEnable = false,
145             },
146          .pDepthStencilState = ds_state,
147          .pColorBlendState = cb_state,
148          .pDynamicState =
149             &(VkPipelineDynamicStateCreateInfo){
150                .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
151                .dynamicStateCount = 3,
152                .pDynamicStates =
153                   (VkDynamicState[]){
154                      VK_DYNAMIC_STATE_VIEWPORT,
155                      VK_DYNAMIC_STATE_SCISSOR,
156                      VK_DYNAMIC_STATE_STENCIL_REFERENCE,
157                   },
158             },
159          .layout = layout,
160          .flags = 0,
161          .renderPass = VK_NULL_HANDLE,
162          .subpass = 0,
163       },
164       extra, alloc, pipeline);
165 
166    ralloc_free(vs_nir);
167    ralloc_free(fs_nir);
168 
169    return result;
170 }
171 
172 static VkResult
create_color_pipeline(struct radv_device * device,uint32_t samples,uint32_t frag_output,VkFormat format,VkPipeline * pipeline)173 create_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t frag_output,
174                       VkFormat format, VkPipeline *pipeline)
175 {
176    struct nir_shader *vs_nir;
177    struct nir_shader *fs_nir;
178    VkResult result;
179 
180    mtx_lock(&device->meta_state.mtx);
181    if (*pipeline) {
182       mtx_unlock(&device->meta_state.mtx);
183       return VK_SUCCESS;
184    }
185 
186    build_color_shaders(device, &vs_nir, &fs_nir, frag_output);
187 
188    const VkPipelineVertexInputStateCreateInfo vi_state = {
189       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
190       .vertexBindingDescriptionCount = 0,
191       .vertexAttributeDescriptionCount = 0,
192    };
193 
194    const VkPipelineDepthStencilStateCreateInfo ds_state = {
195       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
196       .depthTestEnable = false,
197       .depthWriteEnable = false,
198       .depthBoundsTestEnable = false,
199       .stencilTestEnable = false,
200       .minDepthBounds = 0.0f,
201       .maxDepthBounds = 1.0f,
202    };
203 
204    VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = {0};
205    blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState){
206       .blendEnable = false,
207       .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |
208                         VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT,
209    };
210 
211    const VkPipelineColorBlendStateCreateInfo cb_state = {
212       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
213       .logicOpEnable = false,
214       .attachmentCount = MAX_RTS,
215       .pAttachments = blend_attachment_state,
216       .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }};
217 
218    VkFormat att_formats[MAX_RTS] = { 0 };
219    att_formats[frag_output] = format;
220 
221    const VkPipelineRenderingCreateInfo rendering_create_info = {
222       .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
223       .colorAttachmentCount = MAX_RTS,
224       .pColorAttachmentFormats = att_formats,
225    };
226 
227    struct radv_graphics_pipeline_create_info extra = {
228       .use_rectlist = true,
229    };
230    result =
231       create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
232                       &rendering_create_info, device->meta_state.clear_color_p_layout,
233                       &extra, &device->meta_state.alloc, pipeline);
234 
235    mtx_unlock(&device->meta_state.mtx);
236    return result;
237 }
238 
239 static void
finish_meta_clear_htile_mask_state(struct radv_device * device)240 finish_meta_clear_htile_mask_state(struct radv_device *device)
241 {
242    struct radv_meta_state *state = &device->meta_state;
243 
244    radv_DestroyPipeline(radv_device_to_handle(device), state->clear_htile_mask_pipeline,
245                         &state->alloc);
246    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_htile_mask_p_layout,
247                               &state->alloc);
248    device->vk.dispatch_table.DestroyDescriptorSetLayout(
249       radv_device_to_handle(device), state->clear_htile_mask_ds_layout, &state->alloc);
250 }
251 
252 static void
finish_meta_clear_dcc_comp_to_single_state(struct radv_device * device)253 finish_meta_clear_dcc_comp_to_single_state(struct radv_device *device)
254 {
255    struct radv_meta_state *state = &device->meta_state;
256 
257    for (uint32_t i = 0; i < 2; i++) {
258       radv_DestroyPipeline(radv_device_to_handle(device),
259                            state->clear_dcc_comp_to_single_pipeline[i], &state->alloc);
260    }
261    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_dcc_comp_to_single_p_layout,
262                               &state->alloc);
263    device->vk.dispatch_table.DestroyDescriptorSetLayout(
264       radv_device_to_handle(device), state->clear_dcc_comp_to_single_ds_layout, &state->alloc);
265 }
266 
267 void
radv_device_finish_meta_clear_state(struct radv_device * device)268 radv_device_finish_meta_clear_state(struct radv_device *device)
269 {
270    struct radv_meta_state *state = &device->meta_state;
271 
272    for (uint32_t i = 0; i < ARRAY_SIZE(state->color_clear); ++i) {
273       for (uint32_t j = 0; j < ARRAY_SIZE(state->color_clear[0]); ++j) {
274          for (uint32_t k = 0; k < ARRAY_SIZE(state->color_clear[i][j].color_pipelines); ++k) {
275             radv_DestroyPipeline(radv_device_to_handle(device),
276                                  state->color_clear[i][j].color_pipelines[k], &state->alloc);
277          }
278       }
279    }
280    for (uint32_t i = 0; i < ARRAY_SIZE(state->ds_clear); ++i) {
281       for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
282          radv_DestroyPipeline(radv_device_to_handle(device),
283                               state->ds_clear[i].depth_only_pipeline[j], &state->alloc);
284          radv_DestroyPipeline(radv_device_to_handle(device),
285                               state->ds_clear[i].stencil_only_pipeline[j], &state->alloc);
286          radv_DestroyPipeline(radv_device_to_handle(device),
287                               state->ds_clear[i].depthstencil_pipeline[j], &state->alloc);
288 
289          radv_DestroyPipeline(radv_device_to_handle(device),
290                               state->ds_clear[i].depth_only_unrestricted_pipeline[j],
291                               &state->alloc);
292          radv_DestroyPipeline(radv_device_to_handle(device),
293                               state->ds_clear[i].stencil_only_unrestricted_pipeline[j],
294                               &state->alloc);
295          radv_DestroyPipeline(radv_device_to_handle(device),
296                               state->ds_clear[i].depthstencil_unrestricted_pipeline[j],
297                               &state->alloc);
298       }
299    }
300    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_color_p_layout,
301                               &state->alloc);
302    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_p_layout,
303                               &state->alloc);
304    radv_DestroyPipelineLayout(radv_device_to_handle(device),
305                               state->clear_depth_unrestricted_p_layout, &state->alloc);
306 
307    finish_meta_clear_htile_mask_state(device);
308    finish_meta_clear_dcc_comp_to_single_state(device);
309 }
310 
311 static void
emit_color_clear(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,const VkClearRect * clear_rect,uint32_t view_mask)312 emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
313                  const VkClearRect *clear_rect, uint32_t view_mask)
314 {
315    struct radv_device *device = cmd_buffer->device;
316    const struct radv_subpass *subpass = cmd_buffer->state.subpass;
317    const uint32_t subpass_att = clear_att->colorAttachment;
318    const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
319    const struct radv_image_view *iview =
320       cmd_buffer->state.attachments ? cmd_buffer->state.attachments[pass_att].iview : NULL;
321    uint32_t samples, samples_log2;
322    VkFormat format;
323    unsigned fs_key;
324    VkClearColorValue clear_value = clear_att->clearValue.color;
325    VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
326    VkPipeline pipeline;
327 
328    /* When a framebuffer is bound to the current command buffer, get the
329     * number of samples from it. Otherwise, get the number of samples from
330     * the render pass because it's likely a secondary command buffer.
331     */
332    if (iview) {
333       samples = iview->image->info.samples;
334       format = iview->vk.format;
335    } else {
336       samples = cmd_buffer->state.pass->attachments[pass_att].samples;
337       format = cmd_buffer->state.pass->attachments[pass_att].format;
338    }
339 
340    samples_log2 = ffs(samples) - 1;
341    fs_key = radv_format_meta_fs_key(device, format);
342    assert(fs_key != -1);
343 
344    if (device->meta_state.color_clear[samples_log2][clear_att->colorAttachment]
345           .color_pipelines[fs_key] == VK_NULL_HANDLE) {
346       VkResult ret = create_color_pipeline(
347          device, samples, clear_att->colorAttachment, radv_fs_key_format_exemplars[fs_key],
348          &device->meta_state.color_clear[samples_log2][clear_att->colorAttachment]
349              .color_pipelines[fs_key]);
350       if (ret != VK_SUCCESS) {
351          cmd_buffer->record_result = ret;
352          return;
353       }
354    }
355 
356    pipeline = device->meta_state.color_clear[samples_log2][clear_att->colorAttachment]
357                  .color_pipelines[fs_key];
358 
359    assert(samples_log2 < ARRAY_SIZE(device->meta_state.color_clear));
360    assert(pipeline);
361    assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
362    assert(clear_att->colorAttachment < subpass->color_count);
363 
364    radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
365                          device->meta_state.clear_color_p_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0,
366                          16, &clear_value);
367 
368    radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
369 
370    radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
371                        &(VkViewport){.x = clear_rect->rect.offset.x,
372                                      .y = clear_rect->rect.offset.y,
373                                      .width = clear_rect->rect.extent.width,
374                                      .height = clear_rect->rect.extent.height,
375                                      .minDepth = 0.0f,
376                                      .maxDepth = 1.0f});
377 
378    radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
379 
380    if (view_mask) {
381       u_foreach_bit(i, view_mask) radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
382    } else {
383       radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
384    }
385 }
386 
387 static void
build_depthstencil_shader(struct radv_device * dev,struct nir_shader ** out_vs,struct nir_shader ** out_fs,bool unrestricted)388 build_depthstencil_shader(struct radv_device *dev, struct nir_shader **out_vs,
389                           struct nir_shader **out_fs, bool unrestricted)
390 {
391    nir_builder vs_b = radv_meta_init_shader(
392       dev, MESA_SHADER_VERTEX,
393       unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs");
394    nir_builder fs_b = radv_meta_init_shader(
395       dev, MESA_SHADER_FRAGMENT,
396       unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs");
397 
398    const struct glsl_type *position_out_type = glsl_vec4_type();
399 
400    nir_variable *vs_out_pos =
401       nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position");
402    vs_out_pos->data.location = VARYING_SLOT_POS;
403 
404    nir_ssa_def *z;
405    if (unrestricted) {
406       nir_ssa_def *in_color_load =
407          nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4);
408 
409       nir_variable *fs_out_depth =
410          nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth");
411       fs_out_depth->data.location = FRAG_RESULT_DEPTH;
412       nir_store_var(&fs_b, fs_out_depth, in_color_load, 0x1);
413 
414       z = nir_imm_float(&vs_b, 0.0);
415    } else {
416       z = nir_load_push_constant(&vs_b, 1, 32, nir_imm_int(&vs_b, 0), .range = 4);
417    }
418 
419    nir_ssa_def *outvec = nir_gen_rect_vertices(&vs_b, z, NULL);
420    nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
421 
422    const struct glsl_type *layer_type = glsl_int_type();
423    nir_variable *vs_out_layer =
424       nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
425    vs_out_layer->data.location = VARYING_SLOT_LAYER;
426    vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
427    nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
428    nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
429 
430    nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
431    nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
432 
433    *out_vs = vs_b.shader;
434    *out_fs = fs_b.shader;
435 }
436 
437 static VkResult
create_depthstencil_pipeline(struct radv_device * device,VkImageAspectFlags aspects,uint32_t samples,int index,bool unrestricted,VkPipeline * pipeline)438 create_depthstencil_pipeline(struct radv_device *device, VkImageAspectFlags aspects,
439                              uint32_t samples, int index, bool unrestricted, VkPipeline *pipeline)
440 {
441    struct nir_shader *vs_nir, *fs_nir;
442    VkResult result;
443 
444    mtx_lock(&device->meta_state.mtx);
445    if (*pipeline) {
446       mtx_unlock(&device->meta_state.mtx);
447       return VK_SUCCESS;
448    }
449 
450    build_depthstencil_shader(device, &vs_nir, &fs_nir, unrestricted);
451 
452    const VkPipelineVertexInputStateCreateInfo vi_state = {
453       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
454       .vertexBindingDescriptionCount = 0,
455       .vertexAttributeDescriptionCount = 0,
456    };
457 
458    const VkPipelineDepthStencilStateCreateInfo ds_state = {
459       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
460       .depthTestEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
461       .depthCompareOp = VK_COMPARE_OP_ALWAYS,
462       .depthWriteEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
463       .depthBoundsTestEnable = false,
464       .stencilTestEnable = !!(aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
465       .front =
466          {
467             .passOp = VK_STENCIL_OP_REPLACE,
468             .compareOp = VK_COMPARE_OP_ALWAYS,
469             .writeMask = UINT32_MAX,
470             .reference = 0, /* dynamic */
471          },
472       .back = {0 /* dont care */},
473       .minDepthBounds = 0.0f,
474       .maxDepthBounds = 1.0f,
475    };
476 
477    const VkPipelineColorBlendStateCreateInfo cb_state = {
478       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
479       .logicOpEnable = false,
480       .attachmentCount = 0,
481       .pAttachments = NULL,
482       .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f },
483    };
484 
485    const VkPipelineRenderingCreateInfo rendering_create_info = {
486       .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
487       .depthAttachmentFormat =
488          (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? VK_FORMAT_D32_SFLOAT : VK_FORMAT_UNDEFINED,
489       .stencilAttachmentFormat =
490          (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? VK_FORMAT_S8_UINT : VK_FORMAT_UNDEFINED,
491    };
492 
493    struct radv_graphics_pipeline_create_info extra = {
494       .use_rectlist = true,
495    };
496 
497    if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
498       extra.db_depth_clear = index == DEPTH_CLEAR_SLOW ? false : true;
499    }
500    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
501       extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true;
502    }
503    result =
504       create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
505                       &rendering_create_info, device->meta_state.clear_depth_p_layout, &extra,
506                       &device->meta_state.alloc, pipeline);
507 
508    mtx_unlock(&device->meta_state.mtx);
509    return result;
510 }
511 
512 static bool
depth_view_can_fast_clear(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkImageAspectFlags aspects,VkImageLayout layout,bool in_render_loop,const VkClearRect * clear_rect,VkClearDepthStencilValue clear_value)513 depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
514                           VkImageAspectFlags aspects, VkImageLayout layout, bool in_render_loop,
515                           const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value)
516 {
517    if (!iview)
518       return false;
519 
520    uint32_t queue_mask = radv_image_queue_family_mask(iview->image, cmd_buffer->qf,
521                                                       cmd_buffer->qf);
522    if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
523        clear_rect->rect.extent.width != iview->extent.width ||
524        clear_rect->rect.extent.height != iview->extent.height)
525       return false;
526    if (radv_image_is_tc_compat_htile(iview->image) &&
527        (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && clear_value.depth != 0.0 &&
528          clear_value.depth != 1.0) ||
529         ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && clear_value.stencil != 0)))
530       return false;
531    if (radv_htile_enabled(iview->image, iview->vk.base_mip_level) && iview->vk.base_mip_level == 0 &&
532        iview->vk.base_array_layer == 0 && iview->vk.layer_count == iview->image->info.array_size &&
533        radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, layout, in_render_loop,
534                                        queue_mask) &&
535        radv_image_extent_compare(iview->image, &iview->extent))
536       return true;
537    return false;
538 }
539 
540 static VkPipeline
pick_depthstencil_pipeline(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_state * meta_state,const struct radv_image_view * iview,int samples_log2,VkImageAspectFlags aspects,VkImageLayout layout,bool in_render_loop,const VkClearRect * clear_rect,VkClearDepthStencilValue clear_value)541 pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_state *meta_state,
542                            const struct radv_image_view *iview, int samples_log2,
543                            VkImageAspectFlags aspects, VkImageLayout layout, bool in_render_loop,
544                            const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value)
545 {
546    bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout, in_render_loop,
547                                          clear_rect, clear_value);
548    bool unrestricted = cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted;
549    int index = fast ? DEPTH_CLEAR_FAST : DEPTH_CLEAR_SLOW;
550    VkPipeline *pipeline;
551 
552    switch (aspects) {
553    case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
554       pipeline = unrestricted
555                     ? &meta_state->ds_clear[samples_log2].depthstencil_unrestricted_pipeline[index]
556                     : &meta_state->ds_clear[samples_log2].depthstencil_pipeline[index];
557       break;
558    case VK_IMAGE_ASPECT_DEPTH_BIT:
559       pipeline = unrestricted
560                     ? &meta_state->ds_clear[samples_log2].depth_only_unrestricted_pipeline[index]
561                     : &meta_state->ds_clear[samples_log2].depth_only_pipeline[index];
562       break;
563    case VK_IMAGE_ASPECT_STENCIL_BIT:
564       pipeline = unrestricted
565                     ? &meta_state->ds_clear[samples_log2].stencil_only_unrestricted_pipeline[index]
566                     : &meta_state->ds_clear[samples_log2].stencil_only_pipeline[index];
567       break;
568    default:
569       unreachable("expected depth or stencil aspect");
570    }
571 
572    if (*pipeline == VK_NULL_HANDLE) {
573       VkResult ret = create_depthstencil_pipeline(
574          cmd_buffer->device, aspects, 1u << samples_log2, index, unrestricted, pipeline);
575       if (ret != VK_SUCCESS) {
576          cmd_buffer->record_result = ret;
577          return VK_NULL_HANDLE;
578       }
579    }
580    return *pipeline;
581 }
582 
583 static void
emit_depthstencil_clear(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,const VkClearRect * clear_rect,struct radv_subpass_attachment * ds_att,uint32_t view_mask,bool ds_resolve_clear)584 emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
585                         const VkClearRect *clear_rect, struct radv_subpass_attachment *ds_att,
586                         uint32_t view_mask, bool ds_resolve_clear)
587 {
588    struct radv_device *device = cmd_buffer->device;
589    struct radv_meta_state *meta_state = &device->meta_state;
590    const struct radv_subpass *subpass = cmd_buffer->state.subpass;
591    const uint32_t pass_att = ds_att->attachment;
592    VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
593    VkImageAspectFlags aspects = clear_att->aspectMask;
594    const struct radv_image_view *iview =
595       cmd_buffer->state.attachments ? cmd_buffer->state.attachments[pass_att].iview : NULL;
596    uint32_t samples, samples_log2;
597    VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
598 
599    /* When a framebuffer is bound to the current command buffer, get the
600     * number of samples from it. Otherwise, get the number of samples from
601     * the render pass because it's likely a secondary command buffer.
602     */
603    if (iview) {
604       samples = iview->image->info.samples;
605    } else {
606       samples = cmd_buffer->state.pass->attachments[pass_att].samples;
607    }
608 
609    samples_log2 = ffs(samples) - 1;
610 
611    assert(pass_att != VK_ATTACHMENT_UNUSED);
612 
613    if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
614       clear_value.depth = 1.0f;
615 
616    if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted) {
617       radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
618                             device->meta_state.clear_depth_unrestricted_p_layout,
619                             VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4, &clear_value.depth);
620    } else {
621       radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
622                             device->meta_state.clear_depth_p_layout, VK_SHADER_STAGE_VERTEX_BIT, 0,
623                             4, &clear_value.depth);
624    }
625 
626    uint32_t prev_reference = cmd_buffer->state.dynamic.stencil_reference.front;
627    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
628       radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, clear_value.stencil);
629    }
630 
631    VkPipeline pipeline =
632       pick_depthstencil_pipeline(cmd_buffer, meta_state, iview, samples_log2, aspects,
633                                  ds_att->layout, ds_att->in_render_loop, clear_rect, clear_value);
634    if (!pipeline)
635       return;
636 
637    struct radv_subpass clear_subpass = {
638       .color_count = 0,
639       .color_attachments = NULL,
640       .depth_stencil_attachment = ds_att,
641    };
642 
643    if (ds_resolve_clear)
644       radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);
645 
646    radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
647 
648    if (depth_view_can_fast_clear(cmd_buffer, iview, aspects, ds_att->layout, ds_att->in_render_loop,
649                                  clear_rect, clear_value))
650       radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
651 
652    radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
653                        &(VkViewport){.x = clear_rect->rect.offset.x,
654                                      .y = clear_rect->rect.offset.y,
655                                      .width = clear_rect->rect.extent.width,
656                                      .height = clear_rect->rect.extent.height,
657                                      .minDepth = 0.0f,
658                                      .maxDepth = 1.0f});
659 
660    radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
661 
662    if (view_mask) {
663       u_foreach_bit(i, view_mask) radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
664    } else {
665       radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
666    }
667 
668    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
669       radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, prev_reference);
670    }
671 
672    if (ds_resolve_clear)
673       radv_cmd_buffer_restore_subpass(cmd_buffer, subpass);
674 }
675 
676 static uint32_t
clear_htile_mask(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * image,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size,uint32_t htile_value,uint32_t htile_mask)677 clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
678                  struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, uint32_t htile_value,
679                  uint32_t htile_mask)
680 {
681    struct radv_device *device = cmd_buffer->device;
682    struct radv_meta_state *state = &device->meta_state;
683    uint64_t block_count = round_up_u64(size, 1024);
684    struct radv_meta_saved_state saved_state;
685    struct radv_buffer dst_buffer;
686 
687    radv_meta_save(
688       &saved_state, cmd_buffer,
689       RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
690 
691    radv_buffer_init(&dst_buffer, device, bo, size, offset);
692 
693    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
694                         state->clear_htile_mask_pipeline);
695 
696    radv_meta_push_descriptor_set(
697       cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->clear_htile_mask_p_layout, 0, /* set */
698       1, /* descriptorWriteCount */
699       (VkWriteDescriptorSet[]){
700          {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
701           .dstBinding = 0,
702           .dstArrayElement = 0,
703           .descriptorCount = 1,
704           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
705           .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer),
706                                                    .offset = 0,
707                                                    .range = size}}});
708 
709    const unsigned constants[2] = {
710       htile_value & htile_mask,
711       ~htile_mask,
712    };
713 
714    radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), state->clear_htile_mask_p_layout,
715                          VK_SHADER_STAGE_COMPUTE_BIT, 0, 8, constants);
716 
717    radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
718 
719    radv_buffer_finish(&dst_buffer);
720 
721    radv_meta_restore(&saved_state, cmd_buffer);
722 
723    return RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
724           radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
725 }
726 
727 static uint32_t
radv_get_htile_fast_clear_value(const struct radv_device * device,const struct radv_image * image,VkClearDepthStencilValue value)728 radv_get_htile_fast_clear_value(const struct radv_device *device, const struct radv_image *image,
729                                 VkClearDepthStencilValue value)
730 {
731    uint32_t max_zval = 0x3fff; /* maximum 14-bit value. */
732    uint32_t zmask = 0, smem = 0;
733    uint32_t htile_value;
734    uint32_t zmin, zmax;
735 
736    /* Convert the depth value to 14-bit zmin/zmax values. */
737    zmin = lroundf(value.depth * max_zval);
738    zmax = zmin;
739 
740    if (radv_image_tile_stencil_disabled(device, image)) {
741       /* Z only (no stencil):
742        *
743        * |31     18|17      4|3     0|
744        * +---------+---------+-------+
745        * |  Max Z  |  Min Z  | ZMask |
746        */
747       htile_value = (((zmax  & 0x3fff) << 18) |
748                      ((zmin  & 0x3fff) <<  4) |
749                      ((zmask &    0xf) <<  0));
750    } else {
751 
752       /* Z and stencil:
753        *
754        * |31       12|11 10|9    8|7   6|5   4|3     0|
755        * +-----------+-----+------+-----+-----+-------+
756        * |  Z Range  |     | SMem | SR1 | SR0 | ZMask |
757        *
758        * Z, stencil, 4 bit VRS encoding:
759        * |31       12| 11      10 |9    8|7         6 |5   4|3     0|
760        * +-----------+------------+------+------------+-----+-------+
761        * |  Z Range  | VRS Y-rate | SMem | VRS X-rate | SR0 | ZMask |
762        */
763       uint32_t delta = 0;
764       uint32_t zrange = ((zmax << 6) | delta);
765       uint32_t sresults = 0xf; /* SR0/SR1 both as 0x3. */
766 
767       if (radv_image_has_vrs_htile(device, image))
768          sresults = 0x3;
769 
770       htile_value = (((zrange   & 0xfffff) << 12) |
771                      ((smem     & 0x3)     <<  8) |
772                      ((sresults & 0xf)     <<  4) |
773                      ((zmask    & 0xf)     <<  0));
774    }
775 
776    return htile_value;
777 }
778 
779 static uint32_t
radv_get_htile_mask(const struct radv_device * device,const struct radv_image * image,VkImageAspectFlags aspects)780 radv_get_htile_mask(const struct radv_device *device, const struct radv_image *image,
781                     VkImageAspectFlags aspects)
782 {
783    uint32_t mask = 0;
784 
785    if (radv_image_tile_stencil_disabled(device, image)) {
786       /* All the HTILE buffer is used when there is no stencil. */
787       mask = UINT32_MAX;
788    } else {
789       if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
790          mask |= 0xfffffc0f;
791       if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
792          mask |= 0x000003f0;
793    }
794 
795    return mask;
796 }
797 
798 static bool
radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)799 radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)
800 {
801    return value.depth == 1.0f || value.depth == 0.0f;
802 }
803 
804 static bool
radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)805 radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)
806 {
807    return value.stencil == 0;
808 }
809 
810 static bool
radv_can_fast_clear_depth(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkImageLayout image_layout,bool in_render_loop,VkImageAspectFlags aspects,const VkClearRect * clear_rect,const VkClearDepthStencilValue clear_value,uint32_t view_mask)811 radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
812                           VkImageLayout image_layout, bool in_render_loop,
813                           VkImageAspectFlags aspects, const VkClearRect *clear_rect,
814                           const VkClearDepthStencilValue clear_value, uint32_t view_mask)
815 {
816    if (!iview || !iview->support_fast_clear)
817       return false;
818 
819    if (!radv_layout_is_htile_compressed(
820           cmd_buffer->device, iview->image, image_layout, in_render_loop,
821           radv_image_queue_family_mask(iview->image, cmd_buffer->qf,
822                                        cmd_buffer->qf)))
823       return false;
824 
825    if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
826        clear_rect->rect.extent.width != iview->image->info.width ||
827        clear_rect->rect.extent.height != iview->image->info.height)
828       return false;
829 
830    if (view_mask && (iview->image->info.array_size >= 32 ||
831                      (1u << iview->image->info.array_size) - 1u != view_mask))
832       return false;
833    if (!view_mask && clear_rect->baseArrayLayer != 0)
834       return false;
835    if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
836       return false;
837 
838    if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted &&
839        (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
840        (clear_value.depth < 0.0 || clear_value.depth > 1.0))
841       return false;
842 
843    if (radv_image_is_tc_compat_htile(iview->image) &&
844        (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && !radv_is_fast_clear_depth_allowed(clear_value)) ||
845         ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
846          !radv_is_fast_clear_stencil_allowed(clear_value))))
847       return false;
848 
849    if (iview->image->info.levels > 1) {
850       uint32_t last_level = iview->vk.base_mip_level + iview->vk.level_count - 1;
851       if (last_level >= iview->image->planes[0].surface.num_meta_levels) {
852          /* Do not fast clears if one level can't be fast cleared. */
853          return false;
854       }
855    }
856 
857    return true;
858 }
859 
860 static void
radv_fast_clear_depth(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,const VkClearAttachment * clear_att,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)861 radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
862                       const VkClearAttachment *clear_att, enum radv_cmd_flush_bits *pre_flush,
863                       enum radv_cmd_flush_bits *post_flush)
864 {
865    VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
866    VkImageAspectFlags aspects = clear_att->aspectMask;
867    uint32_t clear_word, flush_bits;
868 
869    clear_word = radv_get_htile_fast_clear_value(cmd_buffer->device, iview->image, clear_value);
870 
871    if (pre_flush) {
872       enum radv_cmd_flush_bits bits =
873          radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
874                                iview->image) |
875          radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT |
876                                            VK_ACCESS_2_SHADER_READ_BIT, iview->image);
877       cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
878       *pre_flush |= cmd_buffer->state.flush_bits;
879    }
880 
881    VkImageSubresourceRange range = {
882       .aspectMask = aspects,
883       .baseMipLevel = iview->vk.base_mip_level,
884       .levelCount = iview->vk.level_count,
885       .baseArrayLayer = iview->vk.base_array_layer,
886       .layerCount = iview->vk.layer_count,
887    };
888 
889    flush_bits = radv_clear_htile(cmd_buffer, iview->image, &range, clear_word);
890 
891    if (iview->image->planes[0].surface.has_stencil &&
892        !(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
893       /* Synchronize after performing a depth-only or a stencil-only
894        * fast clear because the driver uses an optimized path which
895        * performs a read-modify-write operation, and the two separate
896        * aspects might use the same HTILE memory.
897        */
898       cmd_buffer->state.flush_bits |= flush_bits;
899    }
900 
901    radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
902    if (post_flush) {
903       *post_flush |= flush_bits;
904    }
905 }
906 
907 static nir_shader *
build_clear_htile_mask_shader(struct radv_device * dev)908 build_clear_htile_mask_shader(struct radv_device *dev)
909 {
910    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_htile_mask");
911    b.shader->info.workgroup_size[0] = 64;
912 
913    nir_ssa_def *global_id = get_global_ids(&b, 1);
914 
915    nir_ssa_def *offset = nir_imul_imm(&b, global_id, 16);
916    offset = nir_channel(&b, offset, 0);
917 
918    nir_ssa_def *buf = radv_meta_load_descriptor(&b, 0, 0);
919 
920    nir_ssa_def *constants = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
921 
922    nir_ssa_def *load = nir_load_ssbo(&b, 4, 32, buf, offset, .align_mul = 16);
923 
924    /* data = (data & ~htile_mask) | (htile_value & htile_mask) */
925    nir_ssa_def *data = nir_iand(&b, load, nir_channel(&b, constants, 1));
926    data = nir_ior(&b, data, nir_channel(&b, constants, 0));
927 
928    nir_store_ssbo(&b, data, buf, offset, .access = ACCESS_NON_READABLE, .align_mul = 16);
929 
930    return b.shader;
931 }
932 
933 static VkResult
init_meta_clear_htile_mask_state(struct radv_device * device)934 init_meta_clear_htile_mask_state(struct radv_device *device)
935 {
936    struct radv_meta_state *state = &device->meta_state;
937    VkResult result;
938    nir_shader *cs = build_clear_htile_mask_shader(device);
939 
940    VkDescriptorSetLayoutCreateInfo ds_layout_info = {
941       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
942       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
943       .bindingCount = 1,
944       .pBindings = (VkDescriptorSetLayoutBinding[]){
945          {.binding = 0,
946           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
947           .descriptorCount = 1,
948           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
949           .pImmutableSamplers = NULL},
950       }};
951 
952    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info,
953                                            &state->alloc, &state->clear_htile_mask_ds_layout);
954    if (result != VK_SUCCESS)
955       goto fail;
956 
957    VkPipelineLayoutCreateInfo p_layout_info = {
958       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
959       .setLayoutCount = 1,
960       .pSetLayouts = &state->clear_htile_mask_ds_layout,
961       .pushConstantRangeCount = 1,
962       .pPushConstantRanges =
963          &(VkPushConstantRange){
964             VK_SHADER_STAGE_COMPUTE_BIT,
965             0,
966             8,
967          },
968    };
969 
970    result = radv_CreatePipelineLayout(radv_device_to_handle(device), &p_layout_info, &state->alloc,
971                                       &state->clear_htile_mask_p_layout);
972    if (result != VK_SUCCESS)
973       goto fail;
974 
975    VkPipelineShaderStageCreateInfo shader_stage = {
976       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
977       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
978       .module = vk_shader_module_handle_from_nir(cs),
979       .pName = "main",
980       .pSpecializationInfo = NULL,
981    };
982 
983    VkComputePipelineCreateInfo pipeline_info = {
984       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
985       .stage = shader_stage,
986       .flags = 0,
987       .layout = state->clear_htile_mask_p_layout,
988    };
989 
990    result = radv_CreateComputePipelines(radv_device_to_handle(device),
991                                         radv_pipeline_cache_to_handle(&state->cache), 1,
992                                         &pipeline_info, NULL, &state->clear_htile_mask_pipeline);
993 
994 fail:
995    ralloc_free(cs);
996    return result;
997 }
998 
999 /* Clear DCC using comp-to-single by storing the clear value at the beginning of every 256B block.
1000  * For MSAA images, clearing the first sample should be enough as long as CMASK is also cleared.
1001  */
1002 static nir_shader *
build_clear_dcc_comp_to_single_shader(struct radv_device * dev,bool is_msaa)1003 build_clear_dcc_comp_to_single_shader(struct radv_device *dev, bool is_msaa)
1004 {
1005    enum glsl_sampler_dim dim = is_msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
1006    const struct glsl_type *img_type = glsl_image_type(dim, true, GLSL_TYPE_FLOAT);
1007 
1008    nir_builder b =
1009       radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_dcc_comp_to_single-%s",
1010                             is_msaa ? "multisampled" : "singlesampled");
1011    b.shader->info.workgroup_size[0] = 8;
1012    b.shader->info.workgroup_size[1] = 8;
1013 
1014    nir_ssa_def *global_id = get_global_ids(&b, 3);
1015 
1016    /* Load the dimensions in pixels of a block that gets compressed to one DCC byte. */
1017    nir_ssa_def *dcc_block_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
1018 
1019    /* Compute the coordinates. */
1020    nir_ssa_def *coord = nir_channels(&b, global_id, 0x3);
1021    coord = nir_imul(&b, coord, dcc_block_size);
1022    coord = nir_vec4(&b, nir_channel(&b, coord, 0),
1023                         nir_channel(&b, coord, 1),
1024                         nir_channel(&b, global_id, 2),
1025                         nir_ssa_undef(&b, 1, 32));
1026 
1027    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
1028    output_img->data.descriptor_set = 0;
1029    output_img->data.binding = 0;
1030 
1031    /* Load the clear color values. */
1032    nir_ssa_def *clear_values = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8);
1033 
1034    nir_ssa_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0),
1035                                     nir_channel(&b, clear_values, 1),
1036                                     nir_channel(&b, clear_values, 1),
1037                                     nir_channel(&b, clear_values, 1));
1038 
1039    /* Store the clear color values. */
1040    nir_ssa_def *sample_id = is_msaa ? nir_imm_int(&b, 0) : nir_ssa_undef(&b, 1, 32);
1041    nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
1042                          sample_id, data, nir_imm_int(&b, 0),
1043                          .image_dim = dim, .image_array = true);
1044 
1045    return b.shader;
1046 }
1047 
1048 static VkResult
create_dcc_comp_to_single_pipeline(struct radv_device * device,bool is_msaa,VkPipeline * pipeline)1049 create_dcc_comp_to_single_pipeline(struct radv_device *device, bool is_msaa, VkPipeline *pipeline)
1050 {
1051    struct radv_meta_state *state = &device->meta_state;
1052    VkResult result;
1053    nir_shader *cs = build_clear_dcc_comp_to_single_shader(device, is_msaa);
1054 
1055    VkPipelineShaderStageCreateInfo shader_stage = {
1056       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1057       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1058       .module = vk_shader_module_handle_from_nir(cs),
1059       .pName = "main",
1060       .pSpecializationInfo = NULL,
1061    };
1062 
1063    VkComputePipelineCreateInfo pipeline_info = {
1064       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1065       .stage = shader_stage,
1066       .flags = 0,
1067       .layout = state->clear_dcc_comp_to_single_p_layout,
1068    };
1069 
1070    result = radv_CreateComputePipelines(radv_device_to_handle(device),
1071                                         radv_pipeline_cache_to_handle(&state->cache), 1,
1072                                         &pipeline_info, NULL, pipeline);
1073 
1074    ralloc_free(cs);
1075    return result;
1076 }
1077 
1078 static VkResult
init_meta_clear_dcc_comp_to_single_state(struct radv_device * device)1079 init_meta_clear_dcc_comp_to_single_state(struct radv_device *device)
1080 {
1081    struct radv_meta_state *state = &device->meta_state;
1082    VkResult result;
1083 
1084    VkDescriptorSetLayoutCreateInfo ds_layout_info = {
1085       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1086       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1087       .bindingCount = 1,
1088       .pBindings = (VkDescriptorSetLayoutBinding[]){
1089          {.binding = 0,
1090           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1091           .descriptorCount = 1,
1092           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1093           .pImmutableSamplers = NULL},
1094       }};
1095 
1096    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info,
1097                                            &state->alloc, &state->clear_dcc_comp_to_single_ds_layout);
1098    if (result != VK_SUCCESS)
1099       goto fail;
1100 
1101    VkPipelineLayoutCreateInfo p_layout_info = {
1102       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1103       .setLayoutCount = 1,
1104       .pSetLayouts = &state->clear_dcc_comp_to_single_ds_layout,
1105       .pushConstantRangeCount = 1,
1106       .pPushConstantRanges =
1107          &(VkPushConstantRange){
1108             VK_SHADER_STAGE_COMPUTE_BIT,
1109             0,
1110             16,
1111          },
1112    };
1113 
1114    result = radv_CreatePipelineLayout(radv_device_to_handle(device), &p_layout_info, &state->alloc,
1115                                       &state->clear_dcc_comp_to_single_p_layout);
1116    if (result != VK_SUCCESS)
1117       goto fail;
1118 
1119    for (uint32_t i = 0; i < 2; i++) {
1120       result = create_dcc_comp_to_single_pipeline(device, !!i,
1121                                                   &state->clear_dcc_comp_to_single_pipeline[i]);
1122       if (result != VK_SUCCESS)
1123          goto fail;
1124    }
1125 
1126 fail:
1127    return result;
1128 }
1129 
1130 VkResult
radv_device_init_meta_clear_state(struct radv_device * device,bool on_demand)1131 radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)
1132 {
1133    VkResult res;
1134    struct radv_meta_state *state = &device->meta_state;
1135 
1136    VkPipelineLayoutCreateInfo pl_color_create_info = {
1137       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1138       .setLayoutCount = 0,
1139       .pushConstantRangeCount = 1,
1140       .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16},
1141    };
1142 
1143    res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_color_create_info,
1144                                    &device->meta_state.alloc,
1145                                    &device->meta_state.clear_color_p_layout);
1146    if (res != VK_SUCCESS)
1147       return res;
1148 
1149    VkPipelineLayoutCreateInfo pl_depth_create_info = {
1150       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1151       .setLayoutCount = 0,
1152       .pushConstantRangeCount = 1,
1153       .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_VERTEX_BIT, 0, 4},
1154    };
1155 
1156    res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_depth_create_info,
1157                                    &device->meta_state.alloc,
1158                                    &device->meta_state.clear_depth_p_layout);
1159    if (res != VK_SUCCESS)
1160       return res;
1161 
1162    VkPipelineLayoutCreateInfo pl_depth_unrestricted_create_info = {
1163       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1164       .setLayoutCount = 0,
1165       .pushConstantRangeCount = 1,
1166       .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4},
1167    };
1168 
1169    res = radv_CreatePipelineLayout(radv_device_to_handle(device),
1170                                    &pl_depth_unrestricted_create_info, &device->meta_state.alloc,
1171                                    &device->meta_state.clear_depth_unrestricted_p_layout);
1172    if (res != VK_SUCCESS)
1173       return res;
1174 
1175    res = init_meta_clear_htile_mask_state(device);
1176    if (res != VK_SUCCESS)
1177       return res;
1178 
1179    res = init_meta_clear_dcc_comp_to_single_state(device);
1180    if (res != VK_SUCCESS)
1181       return res;
1182 
1183    if (on_demand)
1184       return VK_SUCCESS;
1185 
1186    for (uint32_t i = 0; i < ARRAY_SIZE(state->color_clear); ++i) {
1187       uint32_t samples = 1 << i;
1188 
1189       /* Only precompile meta pipelines for attachment 0 as other are uncommon. */
1190       for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) {
1191          VkFormat format = radv_fs_key_format_exemplars[j];
1192          unsigned fs_key = radv_format_meta_fs_key(device, format);
1193          assert(!state->color_clear[i][0].color_pipelines[fs_key]);
1194 
1195          res = create_color_pipeline(device, samples, 0, format,
1196                                      &state->color_clear[i][0].color_pipelines[fs_key]);
1197          if (res != VK_SUCCESS)
1198             return res;
1199       }
1200    }
1201    for (uint32_t i = 0; i < ARRAY_SIZE(state->ds_clear); ++i) {
1202       uint32_t samples = 1 << i;
1203 
1204       for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
1205          res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, false,
1206                                             &state->ds_clear[i].depth_only_pipeline[j]);
1207          if (res != VK_SUCCESS)
1208             return res;
1209 
1210          res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, false,
1211                                             &state->ds_clear[i].stencil_only_pipeline[j]);
1212          if (res != VK_SUCCESS)
1213             return res;
1214 
1215          res = create_depthstencil_pipeline(
1216             device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, false,
1217             &state->ds_clear[i].depthstencil_pipeline[j]);
1218          if (res != VK_SUCCESS)
1219             return res;
1220 
1221          res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, true,
1222                                             &state->ds_clear[i].depth_only_unrestricted_pipeline[j]);
1223          if (res != VK_SUCCESS)
1224             return res;
1225 
1226          res =
1227             create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true,
1228                                          &state->ds_clear[i].stencil_only_unrestricted_pipeline[j]);
1229          if (res != VK_SUCCESS)
1230             return res;
1231 
1232          res = create_depthstencil_pipeline(
1233             device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true,
1234             &state->ds_clear[i].depthstencil_unrestricted_pipeline[j]);
1235          if (res != VK_SUCCESS)
1236             return res;
1237       }
1238    }
1239    return VK_SUCCESS;
1240 }
1241 
1242 static uint32_t
radv_get_cmask_fast_clear_value(const struct radv_image * image)1243 radv_get_cmask_fast_clear_value(const struct radv_image *image)
1244 {
1245    uint32_t value = 0; /* Default value when no DCC. */
1246 
1247    /* The fast-clear value is different for images that have both DCC and
1248     * CMASK metadata.
1249     */
1250    if (radv_image_has_dcc(image)) {
1251       /* DCC fast clear with MSAA should clear CMASK to 0xC. */
1252       return image->info.samples > 1 ? 0xcccccccc : 0xffffffff;
1253    }
1254 
1255    return value;
1256 }
1257 
1258 uint32_t
radv_clear_cmask(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1259 radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1260                  const VkImageSubresourceRange *range, uint32_t value)
1261 {
1262    uint64_t offset = image->bindings[0].offset + image->planes[0].surface.cmask_offset;
1263    uint64_t size;
1264 
1265    if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
1266       /* TODO: clear layers. */
1267       size = image->planes[0].surface.cmask_size;
1268    } else {
1269       unsigned slice_size = image->planes[0].surface.cmask_slice_size;
1270 
1271       offset += slice_size * range->baseArrayLayer;
1272       size = slice_size * radv_get_layerCount(image, range);
1273    }
1274 
1275    return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1276          radv_buffer_get_va(image->bindings[0].bo) + offset, size, value);
1277 }
1278 
1279 uint32_t
radv_clear_fmask(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1280 radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1281                  const VkImageSubresourceRange *range, uint32_t value)
1282 {
1283    uint64_t offset = image->bindings[0].offset + image->planes[0].surface.fmask_offset;
1284    unsigned slice_size = image->planes[0].surface.fmask_slice_size;
1285    uint64_t size;
1286 
1287    /* MSAA images do not support mipmap levels. */
1288    assert(range->baseMipLevel == 0 && radv_get_levelCount(image, range) == 1);
1289 
1290    offset += slice_size * range->baseArrayLayer;
1291    size = slice_size * radv_get_layerCount(image, range);
1292 
1293    return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1294          radv_buffer_get_va(image->bindings[0].bo) + offset, size, value);
1295 }
1296 
1297 uint32_t
radv_clear_dcc(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1298 radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1299                const VkImageSubresourceRange *range, uint32_t value)
1300 {
1301    uint32_t level_count = radv_get_levelCount(image, range);
1302    uint32_t layer_count = radv_get_layerCount(image, range);
1303    uint32_t flush_bits = 0;
1304 
1305    /* Mark the image as being compressed. */
1306    radv_update_dcc_metadata(cmd_buffer, image, range, true);
1307 
1308    for (uint32_t l = 0; l < level_count; l++) {
1309       uint64_t offset = image->bindings[0].offset + image->planes[0].surface.meta_offset;
1310       uint32_t level = range->baseMipLevel + l;
1311       uint64_t size;
1312 
1313       if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) {
1314          /* DCC for mipmaps+layers is currently disabled. */
1315          offset += image->planes[0].surface.meta_slice_size * range->baseArrayLayer +
1316                    image->planes[0].surface.u.gfx9.meta_levels[level].offset;
1317          size = image->planes[0].surface.u.gfx9.meta_levels[level].size * layer_count;
1318       } else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
1319          /* Mipmap levels and layers aren't implemented. */
1320          assert(level == 0);
1321          size = image->planes[0].surface.meta_size;
1322       } else {
1323          const struct legacy_surf_dcc_level *dcc_level =
1324             &image->planes[0].surface.u.legacy.color.dcc_level[level];
1325 
1326          /* If dcc_fast_clear_size is 0 (which might happens for
1327           * mipmaps) the fill buffer operation below is a no-op.
1328           * This can only happen during initialization as the
1329           * fast clear path fallbacks to slow clears if one
1330           * level can't be fast cleared.
1331           */
1332          offset +=
1333             dcc_level->dcc_offset + dcc_level->dcc_slice_fast_clear_size * range->baseArrayLayer;
1334          size = dcc_level->dcc_slice_fast_clear_size * radv_get_layerCount(image, range);
1335       }
1336 
1337       /* Do not clear this level if it can't be compressed. */
1338       if (!size)
1339          continue;
1340 
1341       flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1342                                      radv_buffer_get_va(image->bindings[0].bo) + offset,
1343                                      size, value);
1344    }
1345 
1346    return flush_bits;
1347 }
1348 
1349 static uint32_t
radv_clear_dcc_comp_to_single(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t color_values[2])1350 radv_clear_dcc_comp_to_single(struct radv_cmd_buffer *cmd_buffer,
1351                               struct radv_image *image,
1352                               const VkImageSubresourceRange *range,
1353                               uint32_t color_values[2])
1354 {
1355    struct radv_device *device = cmd_buffer->device;
1356    unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format);
1357    unsigned layer_count = radv_get_layerCount(image, range);
1358    struct radv_meta_saved_state saved_state;
1359    bool is_msaa = image->info.samples > 1;
1360    struct radv_image_view iview;
1361    VkFormat format;
1362 
1363    switch (bytes_per_pixel) {
1364    case 1:
1365       format = VK_FORMAT_R8_UINT;
1366       break;
1367    case 2:
1368       format = VK_FORMAT_R16_UINT;
1369       break;
1370    case 4:
1371       format = VK_FORMAT_R32_UINT;
1372       break;
1373    case 8:
1374       format = VK_FORMAT_R32G32_UINT;
1375       break;
1376    case 16:
1377       format = VK_FORMAT_R32G32B32A32_UINT;
1378       break;
1379    default:
1380       unreachable("Unsupported number of bytes per pixel");
1381    }
1382 
1383    radv_meta_save(
1384       &saved_state, cmd_buffer,
1385       RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS);
1386 
1387    VkPipeline pipeline = device->meta_state.clear_dcc_comp_to_single_pipeline[is_msaa];
1388 
1389    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1390                         pipeline);
1391 
1392    for (uint32_t l = 0; l < radv_get_levelCount(image, range); l++) {
1393       uint32_t width, height;
1394 
1395       /* Do not write the clear color value for levels without DCC. */
1396       if (!radv_dcc_enabled(image, range->baseMipLevel + l))
1397          continue;
1398 
1399       width = radv_minify(image->info.width, range->baseMipLevel + l);
1400       height = radv_minify(image->info.height, range->baseMipLevel + l);
1401 
1402       radv_image_view_init(
1403          &iview, cmd_buffer->device,
1404          &(VkImageViewCreateInfo){
1405             .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1406             .image = radv_image_to_handle(image),
1407             .viewType = VK_IMAGE_VIEW_TYPE_2D,
1408             .format = format,
1409             .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1410                                  .baseMipLevel = range->baseMipLevel + l,
1411                                  .levelCount = 1,
1412                                  .baseArrayLayer = range->baseArrayLayer,
1413                                  .layerCount = layer_count},
1414          },
1415          0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
1416 
1417       radv_meta_push_descriptor_set(
1418          cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1419          device->meta_state.clear_dcc_comp_to_single_p_layout, 0,
1420          1,
1421          (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1422                                    .dstBinding = 0,
1423                                    .dstArrayElement = 0,
1424                                    .descriptorCount = 1,
1425                                    .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1426                                    .pImageInfo =
1427                                       (VkDescriptorImageInfo[]){
1428                                          {
1429                                             .sampler = VK_NULL_HANDLE,
1430                                             .imageView = radv_image_view_to_handle(&iview),
1431                                             .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1432                                          },
1433                                       }}});
1434 
1435       unsigned dcc_width =
1436          DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width);
1437       unsigned dcc_height =
1438          DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height);
1439 
1440       const unsigned constants[4] = {
1441          image->planes[0].surface.u.gfx9.color.dcc_block_width,
1442          image->planes[0].surface.u.gfx9.color.dcc_block_height,
1443          color_values[0],
1444          color_values[1],
1445       };
1446 
1447       radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1448                             device->meta_state.clear_dcc_comp_to_single_p_layout,
1449                             VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, constants);
1450 
1451       radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, layer_count);
1452 
1453       radv_image_view_finish(&iview);
1454    }
1455 
1456    radv_meta_restore(&saved_state, cmd_buffer);
1457 
1458    return RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
1459           radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
1460 }
1461 
1462 uint32_t
radv_clear_htile(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1463 radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
1464                  const VkImageSubresourceRange *range, uint32_t value)
1465 {
1466    uint32_t level_count = radv_get_levelCount(image, range);
1467    uint32_t flush_bits = 0;
1468    uint32_t htile_mask;
1469 
1470    htile_mask = radv_get_htile_mask(cmd_buffer->device, image, range->aspectMask);
1471 
1472    if (level_count != image->info.levels) {
1473       assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10);
1474 
1475       /* Clear individuals levels separately. */
1476       for (uint32_t l = 0; l < level_count; l++) {
1477          uint32_t level = range->baseMipLevel + l;
1478          uint64_t offset = image->bindings[0].offset + image->planes[0].surface.meta_offset +
1479                            image->planes[0].surface.u.gfx9.meta_levels[level].offset;
1480          uint32_t size = image->planes[0].surface.u.gfx9.meta_levels[level].size;
1481 
1482          /* Do not clear this level if it can be compressed. */
1483          if (!size)
1484             continue;
1485 
1486          if (htile_mask == UINT_MAX) {
1487             /* Clear the whole HTILE buffer. */
1488             flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1489                                            radv_buffer_get_va(image->bindings[0].bo) + offset,
1490                                            size, value);
1491          } else {
1492             /* Only clear depth or stencil bytes in the HTILE buffer. */
1493             flush_bits |=
1494                clear_htile_mask(cmd_buffer, image, image->bindings[0].bo, offset, size, value, htile_mask);
1495          }
1496       }
1497    } else {
1498       unsigned layer_count = radv_get_layerCount(image, range);
1499       uint64_t size = image->planes[0].surface.meta_slice_size * layer_count;
1500       uint64_t offset = image->bindings[0].offset + image->planes[0].surface.meta_offset +
1501                         image->planes[0].surface.meta_slice_size * range->baseArrayLayer;
1502 
1503       if (htile_mask == UINT_MAX) {
1504          /* Clear the whole HTILE buffer. */
1505          flush_bits = radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1506                                        radv_buffer_get_va(image->bindings[0].bo) + offset,
1507                                        size, value);
1508       } else {
1509          /* Only clear depth or stencil bytes in the HTILE buffer. */
1510          flush_bits =
1511             clear_htile_mask(cmd_buffer, image, image->bindings[0].bo, offset, size, value, htile_mask);
1512       }
1513    }
1514 
1515    return flush_bits;
1516 }
1517 
1518 enum {
1519    RADV_DCC_CLEAR_0000 = 0x00000000U,
1520    RADV_DCC_GFX8_CLEAR_0001 = 0x40404040U,
1521    RADV_DCC_GFX8_CLEAR_1110 = 0x80808080U,
1522    RADV_DCC_GFX8_CLEAR_1111 = 0xC0C0C0C0U,
1523    RADV_DCC_GFX8_CLEAR_REG = 0x20202020U,
1524    RADV_DCC_GFX9_CLEAR_SINGLE = 0x10101010U,
1525    RADV_DCC_GFX11_CLEAR_SINGLE = 0x01010101U,
1526    RADV_DCC_GFX11_CLEAR_0000 = 0x00000000U,
1527    RADV_DCC_GFX11_CLEAR_1111_UNORM = 0x02020202U,
1528    RADV_DCC_GFX11_CLEAR_1111_FP16 = 0x04040404U,
1529    RADV_DCC_GFX11_CLEAR_1111_FP32 = 0x06060606U,
1530    RADV_DCC_GFX11_CLEAR_0001_UNORM = 0x08080808U,
1531    RADV_DCC_GFX11_CLEAR_1110_UNORM = 0x0A0A0A0AU,
1532 };
1533 
1534 static uint32_t
radv_dcc_single_clear_value(const struct radv_device * device)1535 radv_dcc_single_clear_value(const struct radv_device *device)
1536 {
1537    return device->physical_device->rad_info.gfx_level >= GFX11 ? RADV_DCC_GFX11_CLEAR_SINGLE
1538                                                                : RADV_DCC_GFX9_CLEAR_SINGLE;
1539 }
1540 
1541 static void
gfx8_get_fast_clear_parameters(struct radv_device * device,const struct radv_image_view * iview,const VkClearColorValue * clear_value,uint32_t * reset_value,bool * can_avoid_fast_clear_elim)1542 gfx8_get_fast_clear_parameters(struct radv_device *device, const struct radv_image_view *iview,
1543                                const VkClearColorValue *clear_value, uint32_t *reset_value,
1544                                bool *can_avoid_fast_clear_elim)
1545 {
1546    bool values[4] = {0};
1547    int extra_channel;
1548    bool main_value = false;
1549    bool extra_value = false;
1550    bool has_color = false;
1551    bool has_alpha = false;
1552 
1553    /* comp-to-single allows to perform DCC fast clears without requiring a FCE. */
1554    if (iview->image->support_comp_to_single) {
1555       *reset_value = RADV_DCC_GFX9_CLEAR_SINGLE;
1556       *can_avoid_fast_clear_elim = true;
1557    } else {
1558       *reset_value = RADV_DCC_GFX8_CLEAR_REG;
1559       *can_avoid_fast_clear_elim = false;
1560    }
1561 
1562    const struct util_format_description *desc = vk_format_description(iview->vk.format);
1563    if (iview->vk.format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
1564        iview->vk.format == VK_FORMAT_R5G6B5_UNORM_PACK16 || iview->vk.format == VK_FORMAT_B5G6R5_UNORM_PACK16)
1565       extra_channel = -1;
1566    else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
1567       if (vi_alpha_is_on_msb(device, iview->vk.format))
1568          extra_channel = desc->nr_channels - 1;
1569       else
1570          extra_channel = 0;
1571    } else
1572       return;
1573 
1574    for (int i = 0; i < 4; i++) {
1575       int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
1576       if (desc->swizzle[i] < PIPE_SWIZZLE_X || desc->swizzle[i] > PIPE_SWIZZLE_W)
1577          continue;
1578 
1579       if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
1580          /* Use the maximum value for clamping the clear color. */
1581          int max = u_bit_consecutive(0, desc->channel[i].size - 1);
1582 
1583          values[i] = clear_value->int32[i] != 0;
1584          if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max)
1585             return;
1586       } else if (desc->channel[i].pure_integer &&
1587                  desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1588          /* Use the maximum value for clamping the clear color. */
1589          unsigned max = u_bit_consecutive(0, desc->channel[i].size);
1590 
1591          values[i] = clear_value->uint32[i] != 0U;
1592          if (clear_value->uint32[i] != 0U && MIN2(clear_value->uint32[i], max) != max)
1593             return;
1594       } else {
1595          values[i] = clear_value->float32[i] != 0.0F;
1596          if (clear_value->float32[i] != 0.0F && clear_value->float32[i] != 1.0F)
1597             return;
1598       }
1599 
1600       if (index == extra_channel) {
1601          extra_value = values[i];
1602          has_alpha = true;
1603       } else {
1604          main_value = values[i];
1605          has_color = true;
1606       }
1607    }
1608 
1609    /* If alpha isn't present, make it the same as color, and vice versa. */
1610    if (!has_alpha)
1611       extra_value = main_value;
1612    else if (!has_color)
1613       main_value = extra_value;
1614 
1615    for (int i = 0; i < 4; ++i)
1616       if (values[i] != main_value && desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel &&
1617           desc->swizzle[i] >= PIPE_SWIZZLE_X && desc->swizzle[i] <= PIPE_SWIZZLE_W)
1618          return;
1619 
1620    /* Only DCC clear code 0000 is allowed for signed<->unsigned formats. */
1621    if ((main_value || extra_value) && iview->image->dcc_sign_reinterpret)
1622       return;
1623 
1624    *can_avoid_fast_clear_elim = true;
1625 
1626    if (main_value) {
1627       if (extra_value)
1628          *reset_value = RADV_DCC_GFX8_CLEAR_1111;
1629       else
1630          *reset_value = RADV_DCC_GFX8_CLEAR_1110;
1631    } else {
1632       if (extra_value)
1633          *reset_value = RADV_DCC_GFX8_CLEAR_0001;
1634       else
1635          *reset_value = RADV_DCC_CLEAR_0000;
1636    }
1637 }
1638 
1639 static bool
gfx11_get_fast_clear_parameters(struct radv_device * device,const struct radv_image_view * iview,const VkClearColorValue * clear_value,uint32_t * reset_value)1640 gfx11_get_fast_clear_parameters(struct radv_device *device, const struct radv_image_view *iview,
1641                                 const VkClearColorValue *clear_value, uint32_t *reset_value)
1642 {
1643    int extra_channel;
1644 
1645    bool all_bits_are_0 = true;
1646    bool all_bits_are_1 = true;
1647    bool all_words_are_fp16_1 = true;
1648    bool all_words_are_fp32_1 = true;
1649    bool unorm_0001 = true;
1650    bool unorm_1110 = true;
1651 
1652    const struct util_format_description *desc = vk_format_description(iview->vk.format);
1653    if (iview->vk.format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
1654        iview->vk.format == VK_FORMAT_R5G6B5_UNORM_PACK16 ||
1655        iview->vk.format == VK_FORMAT_B5G6R5_UNORM_PACK16)
1656       extra_channel = -1;
1657    else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
1658       if (vi_alpha_is_on_msb(device, iview->vk.format))
1659          extra_channel = desc->nr_channels - 1;
1660       else
1661          extra_channel = 0;
1662    } else
1663       return false;
1664 
1665    for (int i = 0; i < 4; i++) {
1666       int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
1667       if (desc->swizzle[i] < PIPE_SWIZZLE_X || desc->swizzle[i] > PIPE_SWIZZLE_W)
1668          continue;
1669 
1670       uint32_t extra_xor = index == extra_channel ? ~0u : 0;
1671       if (clear_value->uint32[i] & ((1u << desc->channel[i].size) - 1))
1672          all_bits_are_0 = false;
1673       if (~clear_value->uint32[i] & ((1u << desc->channel[i].size) - 1))
1674          all_bits_are_1 = false;
1675       if (desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT || desc->channel[i].size != 16 ||
1676           clear_value->float32[i] != 1.0)
1677          all_words_are_fp16_1 = false;
1678       if (desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT || desc->channel[i].size != 32 ||
1679           clear_value->float32[i] != 1.0)
1680          all_words_are_fp32_1 = false;
1681       if ((clear_value->uint32[i] ^ extra_xor) & ((1u << desc->channel[i].size) - 1))
1682          unorm_0001 = false;
1683       if ((~clear_value->uint32[i] ^ extra_xor) & ((1u << desc->channel[i].size) - 1))
1684          unorm_1110 = false;
1685    }
1686 
1687    if (all_bits_are_0)
1688       *reset_value = RADV_DCC_CLEAR_0000;
1689    else if (all_bits_are_1)
1690       *reset_value = RADV_DCC_GFX11_CLEAR_1111_UNORM;
1691    else if (all_words_are_fp16_1)
1692       *reset_value = RADV_DCC_GFX11_CLEAR_1111_FP16;
1693    else if (all_words_are_fp32_1)
1694       *reset_value = RADV_DCC_GFX11_CLEAR_1111_FP32;
1695    else if (unorm_0001)
1696       *reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM;
1697    else if (unorm_1110)
1698       *reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM;
1699    else if (iview->image->support_comp_to_single)
1700       *reset_value = RADV_DCC_GFX11_CLEAR_SINGLE;
1701    else
1702       return false;
1703 
1704    return true;
1705 }
1706 
1707 static bool
radv_can_fast_clear_color(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkImageLayout image_layout,bool in_render_loop,const VkClearRect * clear_rect,VkClearColorValue clear_value,uint32_t view_mask)1708 radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
1709                           VkImageLayout image_layout, bool in_render_loop,
1710                           const VkClearRect *clear_rect, VkClearColorValue clear_value,
1711                           uint32_t view_mask)
1712 {
1713    uint32_t clear_color[2];
1714 
1715    if (!iview || !iview->support_fast_clear)
1716       return false;
1717 
1718    if (!radv_layout_can_fast_clear(
1719           cmd_buffer->device, iview->image, iview->vk.base_mip_level, image_layout, in_render_loop,
1720           radv_image_queue_family_mask(iview->image, cmd_buffer->qf,
1721                                        cmd_buffer->qf)))
1722       return false;
1723 
1724    if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
1725        clear_rect->rect.extent.width != iview->image->info.width ||
1726        clear_rect->rect.extent.height != iview->image->info.height)
1727       return false;
1728 
1729    if (view_mask && (iview->image->info.array_size >= 32 ||
1730                      (1u << iview->image->info.array_size) - 1u != view_mask))
1731       return false;
1732    if (!view_mask && clear_rect->baseArrayLayer != 0)
1733       return false;
1734    if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
1735       return false;
1736 
1737    /* DCC */
1738    if (!radv_format_pack_clear_color(iview->vk.format, clear_color, &clear_value))
1739       return false;
1740 
1741    /* Images that support comp-to-single clears don't have clear values. */
1742    if (!iview->image->support_comp_to_single &&
1743        !radv_image_has_clear_value(iview->image) && (clear_color[0] != 0 || clear_color[1] != 0))
1744       return false;
1745 
1746    if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level)) {
1747       bool can_avoid_fast_clear_elim;
1748       uint32_t reset_value;
1749 
1750       if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
1751          if (!gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value,
1752                                               &reset_value))
1753             return false;
1754       } else {
1755          gfx8_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value,
1756                                         &can_avoid_fast_clear_elim);
1757       }
1758 
1759       if (iview->image->info.levels > 1) {
1760          if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
1761             uint32_t last_level = iview->vk.base_mip_level + iview->vk.level_count - 1;
1762             if (last_level >= iview->image->planes[0].surface.num_meta_levels) {
1763                /* Do not fast clears if one level can't be fast cleard. */
1764                return false;
1765             }
1766          } else {
1767             for (uint32_t l = 0; l < iview->vk.level_count; l++) {
1768                uint32_t level = iview->vk.base_mip_level + l;
1769                struct legacy_surf_dcc_level *dcc_level =
1770                   &iview->image->planes[0].surface.u.legacy.color.dcc_level[level];
1771 
1772                /* Do not fast clears if one level can't be
1773                 * fast cleared.
1774                 */
1775                if (!dcc_level->dcc_fast_clear_size)
1776                   return false;
1777             }
1778          }
1779       }
1780    }
1781 
1782    return true;
1783 }
1784 
1785 static void
radv_fast_clear_color(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,const VkClearAttachment * clear_att,uint32_t subpass_att,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)1786 radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
1787                       const VkClearAttachment *clear_att, uint32_t subpass_att,
1788                       enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush)
1789 {
1790    VkClearColorValue clear_value = clear_att->clearValue.color;
1791    uint32_t clear_color[2], flush_bits = 0;
1792    uint32_t cmask_clear_value;
1793    VkImageSubresourceRange range = {
1794       .aspectMask = iview->vk.aspects,
1795       .baseMipLevel = iview->vk.base_mip_level,
1796       .levelCount = iview->vk.level_count,
1797       .baseArrayLayer = iview->vk.base_array_layer,
1798       .layerCount = iview->vk.layer_count,
1799    };
1800 
1801    if (pre_flush) {
1802       enum radv_cmd_flush_bits bits =
1803          radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, iview->image) |
1804          radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, iview->image);
1805       cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
1806       *pre_flush |= cmd_buffer->state.flush_bits;
1807    }
1808 
1809    /* DCC */
1810    radv_format_pack_clear_color(iview->vk.format, clear_color, &clear_value);
1811 
1812    cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image);
1813 
1814    /* clear cmask buffer */
1815    bool need_decompress_pass = false;
1816    if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level)) {
1817       uint32_t reset_value;
1818       bool can_avoid_fast_clear_elim = true;
1819 
1820       if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
1821          ASSERTED bool result =
1822             gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value);
1823          assert(result);
1824       } else {
1825          gfx8_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value,
1826                                         &can_avoid_fast_clear_elim);
1827       }
1828 
1829       if (radv_image_has_cmask(iview->image)) {
1830          flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);
1831       }
1832 
1833       if (!can_avoid_fast_clear_elim)
1834          need_decompress_pass = true;
1835 
1836       flush_bits |= radv_clear_dcc(cmd_buffer, iview->image, &range, reset_value);
1837 
1838       if (reset_value == radv_dcc_single_clear_value(cmd_buffer->device)) {
1839          /* Write the clear color to the first byte of each 256B block when the image supports DCC
1840           * fast clears with comp-to-single.
1841           */
1842          flush_bits |= radv_clear_dcc_comp_to_single(cmd_buffer, iview->image, &range, clear_color);
1843       }
1844    } else {
1845       flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);
1846 
1847       /* Fast clearing with CMASK should always be eliminated. */
1848       need_decompress_pass = true;
1849    }
1850 
1851    if (post_flush) {
1852       *post_flush |= flush_bits;
1853    }
1854 
1855    /* Update the FCE predicate to perform a fast-clear eliminate. */
1856    radv_update_fce_metadata(cmd_buffer, iview->image, &range, need_decompress_pass);
1857 
1858    radv_update_color_clear_metadata(cmd_buffer, iview, subpass_att, clear_color);
1859 }
1860 
1861 /**
1862  * The parameters mean that same as those in vkCmdClearAttachments.
1863  */
1864 static void
emit_clear(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,const VkClearRect * clear_rect,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush,uint32_t view_mask,bool ds_resolve_clear)1865 emit_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
1866            const VkClearRect *clear_rect, enum radv_cmd_flush_bits *pre_flush,
1867            enum radv_cmd_flush_bits *post_flush, uint32_t view_mask, bool ds_resolve_clear)
1868 {
1869    const struct vk_framebuffer *fb = cmd_buffer->state.framebuffer;
1870    const struct radv_subpass *subpass = cmd_buffer->state.subpass;
1871    VkImageAspectFlags aspects = clear_att->aspectMask;
1872 
1873    if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
1874       const uint32_t subpass_att = clear_att->colorAttachment;
1875       assert(subpass_att < subpass->color_count);
1876       const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
1877       if (pass_att == VK_ATTACHMENT_UNUSED)
1878          return;
1879 
1880       VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout;
1881       bool in_render_loop = subpass->color_attachments[subpass_att].in_render_loop;
1882       const struct radv_image_view *iview =
1883          fb ? cmd_buffer->state.attachments[pass_att].iview : NULL;
1884       VkClearColorValue clear_value = clear_att->clearValue.color;
1885 
1886       if (radv_can_fast_clear_color(cmd_buffer, iview, image_layout, in_render_loop, clear_rect,
1887                                     clear_value, view_mask)) {
1888          radv_fast_clear_color(cmd_buffer, iview, clear_att, subpass_att, pre_flush, post_flush);
1889       } else {
1890          emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
1891       }
1892    } else {
1893       struct radv_subpass_attachment *ds_att = subpass->depth_stencil_attachment;
1894 
1895       if (ds_resolve_clear)
1896          ds_att = subpass->ds_resolve_attachment;
1897 
1898       if (!ds_att || ds_att->attachment == VK_ATTACHMENT_UNUSED)
1899          return;
1900 
1901       VkImageLayout image_layout = ds_att->layout;
1902       bool in_render_loop = ds_att->in_render_loop;
1903       const struct radv_image_view *iview =
1904          fb ? cmd_buffer->state.attachments[ds_att->attachment].iview : NULL;
1905       VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
1906 
1907       assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
1908 
1909       if (radv_can_fast_clear_depth(cmd_buffer, iview, image_layout, in_render_loop, aspects,
1910                                     clear_rect, clear_value, view_mask)) {
1911          radv_fast_clear_depth(cmd_buffer, iview, clear_att, pre_flush, post_flush);
1912       } else {
1913          emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect, ds_att, view_mask,
1914                                  ds_resolve_clear);
1915       }
1916    }
1917 }
1918 
1919 static inline bool
radv_attachment_needs_clear(struct radv_cmd_state * cmd_state,uint32_t a)1920 radv_attachment_needs_clear(struct radv_cmd_state *cmd_state, uint32_t a)
1921 {
1922    uint32_t view_mask = cmd_state->subpass->view_mask;
1923    return (a != VK_ATTACHMENT_UNUSED && cmd_state->attachments[a].pending_clear_aspects &&
1924            (!view_mask || (view_mask & ~cmd_state->attachments[a].cleared_views)));
1925 }
1926 
1927 static bool
radv_subpass_needs_clear(struct radv_cmd_buffer * cmd_buffer)1928 radv_subpass_needs_clear(struct radv_cmd_buffer *cmd_buffer)
1929 {
1930    struct radv_cmd_state *cmd_state = &cmd_buffer->state;
1931    uint32_t a;
1932 
1933    if (!cmd_state->subpass)
1934       return false;
1935 
1936    for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
1937       a = cmd_state->subpass->color_attachments[i].attachment;
1938       if (radv_attachment_needs_clear(cmd_state, a))
1939          return true;
1940    }
1941 
1942    if (cmd_state->subpass->depth_stencil_attachment) {
1943       a = cmd_state->subpass->depth_stencil_attachment->attachment;
1944       if (radv_attachment_needs_clear(cmd_state, a))
1945          return true;
1946    }
1947 
1948    if (!cmd_state->subpass->ds_resolve_attachment)
1949       return false;
1950 
1951    a = cmd_state->subpass->ds_resolve_attachment->attachment;
1952    return radv_attachment_needs_clear(cmd_state, a);
1953 }
1954 
1955 static void
radv_subpass_clear_attachment(struct radv_cmd_buffer * cmd_buffer,struct radv_attachment_state * attachment,const VkClearAttachment * clear_att,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush,bool ds_resolve_clear)1956 radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer,
1957                               struct radv_attachment_state *attachment,
1958                               const VkClearAttachment *clear_att,
1959                               enum radv_cmd_flush_bits *pre_flush,
1960                               enum radv_cmd_flush_bits *post_flush, bool ds_resolve_clear)
1961 {
1962    struct radv_cmd_state *cmd_state = &cmd_buffer->state;
1963    uint32_t view_mask = cmd_state->subpass->view_mask;
1964 
1965    VkClearRect clear_rect = {
1966       .rect = cmd_state->render_area,
1967       .baseArrayLayer = 0,
1968       .layerCount = cmd_state->framebuffer->layers,
1969    };
1970 
1971    radv_describe_begin_render_pass_clear(cmd_buffer, clear_att->aspectMask);
1972 
1973    emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush,
1974               view_mask & ~attachment->cleared_views, ds_resolve_clear);
1975    if (view_mask)
1976       attachment->cleared_views |= view_mask;
1977    else
1978       attachment->pending_clear_aspects = 0;
1979 
1980    radv_describe_end_render_pass_clear(cmd_buffer);
1981 }
1982 
1983 /**
1984  * Emit any pending attachment clears for the current subpass.
1985  *
1986  * @see radv_attachment_state::pending_clear_aspects
1987  */
1988 void
radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer * cmd_buffer)1989 radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)
1990 {
1991    struct radv_cmd_state *cmd_state = &cmd_buffer->state;
1992    struct radv_meta_saved_state saved_state;
1993    enum radv_cmd_flush_bits pre_flush = 0;
1994    enum radv_cmd_flush_bits post_flush = 0;
1995 
1996    if (!radv_subpass_needs_clear(cmd_buffer))
1997       return;
1998 
1999    /* Subpass clear should not be affected by conditional rendering. */
2000    radv_meta_save(
2001       &saved_state, cmd_buffer,
2002       RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING);
2003 
2004    for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
2005       uint32_t a = cmd_state->subpass->color_attachments[i].attachment;
2006 
2007       if (!radv_attachment_needs_clear(cmd_state, a))
2008          continue;
2009 
2010       assert(cmd_state->attachments[a].pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
2011 
2012       VkClearAttachment clear_att = {
2013          .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
2014          .colorAttachment = i, /* Use attachment index relative to subpass */
2015          .clearValue = cmd_state->attachments[a].clear_value,
2016       };
2017 
2018       radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[a], &clear_att, &pre_flush,
2019                                     &post_flush, false);
2020    }
2021 
2022    if (cmd_state->subpass->depth_stencil_attachment) {
2023       uint32_t ds = cmd_state->subpass->depth_stencil_attachment->attachment;
2024       if (radv_attachment_needs_clear(cmd_state, ds)) {
2025          VkClearAttachment clear_att = {
2026             .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
2027             .clearValue = cmd_state->attachments[ds].clear_value,
2028          };
2029 
2030          radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[ds], &clear_att,
2031                                        &pre_flush, &post_flush, false);
2032       }
2033    }
2034 
2035    if (cmd_state->subpass->ds_resolve_attachment) {
2036       uint32_t ds_resolve = cmd_state->subpass->ds_resolve_attachment->attachment;
2037       if (radv_attachment_needs_clear(cmd_state, ds_resolve)) {
2038          VkClearAttachment clear_att = {
2039             .aspectMask = cmd_state->attachments[ds_resolve].pending_clear_aspects,
2040             .clearValue = cmd_state->attachments[ds_resolve].clear_value,
2041          };
2042 
2043          radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[ds_resolve], &clear_att,
2044                                        &pre_flush, &post_flush, true);
2045       }
2046    }
2047 
2048    radv_meta_restore(&saved_state, cmd_buffer);
2049    cmd_buffer->state.flush_bits |= post_flush;
2050 }
2051 
2052 static void
radv_clear_image_layer(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkImageLayout image_layout,const VkImageSubresourceRange * range,VkFormat format,int level,unsigned layer_count,const VkClearValue * clear_val)2053 radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2054                        VkImageLayout image_layout, const VkImageSubresourceRange *range,
2055                        VkFormat format, int level, unsigned layer_count,
2056                        const VkClearValue *clear_val)
2057 {
2058    struct radv_image_view iview;
2059    uint32_t width = radv_minify(image->info.width, range->baseMipLevel + level);
2060    uint32_t height = radv_minify(image->info.height, range->baseMipLevel + level);
2061 
2062    radv_image_view_init(&iview, cmd_buffer->device,
2063                         &(VkImageViewCreateInfo){
2064                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2065                            .image = radv_image_to_handle(image),
2066                            .viewType = radv_meta_get_view_type(image),
2067                            .format = format,
2068                            .subresourceRange = {.aspectMask = range->aspectMask,
2069                                                 .baseMipLevel = range->baseMipLevel + level,
2070                                                 .levelCount = 1,
2071                                                 .baseArrayLayer = range->baseArrayLayer,
2072                                                 .layerCount = layer_count},
2073                         },
2074                         0, NULL);
2075 
2076    VkClearAttachment clear_att = {
2077       .aspectMask = range->aspectMask,
2078       .colorAttachment = 0,
2079       .clearValue = *clear_val,
2080    };
2081 
2082    VkClearRect clear_rect = {
2083       .rect =
2084          {
2085             .offset = {0, 0},
2086             .extent = {width, height},
2087          },
2088       .baseArrayLayer = 0,
2089       .layerCount = layer_count,
2090    };
2091 
2092    VkRenderingAttachmentInfo color_att = {0}, depth_att = {0}, stencil_att = {0};
2093 
2094    if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
2095       color_att.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO;
2096       color_att.imageView = radv_image_view_to_handle(&iview);
2097       color_att.imageLayout = image_layout;
2098       color_att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
2099       color_att.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
2100    } else {
2101       if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
2102          depth_att.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO;
2103          depth_att.imageView = radv_image_view_to_handle(&iview);
2104          depth_att.imageLayout = image_layout;
2105          depth_att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
2106          depth_att.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
2107       }
2108 
2109       if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
2110          stencil_att.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO;
2111          stencil_att.imageView = radv_image_view_to_handle(&iview);
2112          stencil_att.imageLayout = image_layout;
2113          stencil_att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
2114          stencil_att.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
2115       }
2116    }
2117 
2118    VkRenderingInfo rendering_info = {
2119       .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
2120       .renderArea = {
2121          .offset = { 0, 0 },
2122          .extent = { width, height },
2123       },
2124       .layerCount = layer_count,
2125    };
2126 
2127    if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
2128       rendering_info.colorAttachmentCount = 1;
2129       rendering_info.pColorAttachments = &color_att;
2130    } else {
2131       if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
2132          rendering_info.pDepthAttachment = &depth_att;
2133       if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
2134          rendering_info.pStencilAttachment = &stencil_att;
2135    }
2136 
2137    radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
2138 
2139    emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0, false);
2140 
2141    radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
2142 
2143    radv_image_view_finish(&iview);
2144 }
2145 
2146 /**
2147  * Return TRUE if a fast color or depth clear has been performed.
2148  */
2149 static bool
radv_fast_clear_range(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkFormat format,VkImageLayout image_layout,bool in_render_loop,const VkImageSubresourceRange * range,const VkClearValue * clear_val)2150 radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkFormat format,
2151                       VkImageLayout image_layout, bool in_render_loop,
2152                       const VkImageSubresourceRange *range, const VkClearValue *clear_val)
2153 {
2154    struct radv_image_view iview;
2155    bool fast_cleared = false;
2156 
2157    radv_image_view_init(&iview, cmd_buffer->device,
2158                         &(VkImageViewCreateInfo){
2159                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2160                            .image = radv_image_to_handle(image),
2161                            .viewType = radv_meta_get_view_type(image),
2162                            .format = image->vk.format,
2163                            .subresourceRange =
2164                               {
2165                                  .aspectMask = range->aspectMask,
2166                                  .baseMipLevel = range->baseMipLevel,
2167                                  .levelCount = range->levelCount,
2168                                  .baseArrayLayer = range->baseArrayLayer,
2169                                  .layerCount = range->layerCount,
2170                               },
2171                         },
2172                         0, NULL);
2173 
2174    VkClearRect clear_rect = {
2175       .rect =
2176          {
2177             .offset = {0, 0},
2178             .extent =
2179                {
2180                   radv_minify(image->info.width, range->baseMipLevel),
2181                   radv_minify(image->info.height, range->baseMipLevel),
2182                },
2183          },
2184       .baseArrayLayer = range->baseArrayLayer,
2185       .layerCount = range->layerCount,
2186    };
2187 
2188    VkClearAttachment clear_att = {
2189       .aspectMask = range->aspectMask,
2190       .colorAttachment = 0,
2191       .clearValue = *clear_val,
2192    };
2193 
2194    if (vk_format_is_color(format)) {
2195       if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout, in_render_loop, &clear_rect,
2196                                     clear_att.clearValue.color, 0)) {
2197          radv_fast_clear_color(cmd_buffer, &iview, &clear_att, clear_att.colorAttachment, NULL,
2198                                NULL);
2199          fast_cleared = true;
2200       }
2201    } else {
2202       if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout, in_render_loop,
2203                                     range->aspectMask, &clear_rect,
2204                                     clear_att.clearValue.depthStencil, 0)) {
2205          radv_fast_clear_depth(cmd_buffer, &iview, &clear_att, NULL, NULL);
2206          fast_cleared = true;
2207       }
2208    }
2209 
2210    radv_image_view_finish(&iview);
2211    return fast_cleared;
2212 }
2213 
2214 static void
radv_cmd_clear_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkImageLayout image_layout,const VkClearValue * clear_value,uint32_t range_count,const VkImageSubresourceRange * ranges,bool cs)2215 radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2216                      VkImageLayout image_layout, const VkClearValue *clear_value,
2217                      uint32_t range_count, const VkImageSubresourceRange *ranges, bool cs)
2218 {
2219    VkFormat format = image->vk.format;
2220    VkClearValue internal_clear_value;
2221 
2222    if (ranges->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)
2223       internal_clear_value.color = clear_value->color;
2224    else
2225       internal_clear_value.depthStencil = clear_value->depthStencil;
2226 
2227    bool disable_compression = false;
2228 
2229    if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
2230       bool blendable;
2231       if (cs ? !radv_is_storage_image_format_supported(cmd_buffer->device->physical_device, format)
2232              : !radv_is_colorbuffer_format_supported(cmd_buffer->device->physical_device, format,
2233                                                      &blendable)) {
2234          format = VK_FORMAT_R32_UINT;
2235          internal_clear_value.color.uint32[0] = float3_to_rgb9e5(clear_value->color.float32);
2236 
2237          uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf,
2238                                                             cmd_buffer->qf);
2239 
2240          for (uint32_t r = 0; r < range_count; r++) {
2241             const VkImageSubresourceRange *range = &ranges[r];
2242 
2243             /* Don't use compressed image stores because they will use an incompatible format. */
2244             if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel,
2245                                            image_layout, false, queue_mask)) {
2246                disable_compression = cs;
2247                break;
2248             }
2249          }
2250       }
2251    }
2252 
2253    if (format == VK_FORMAT_R4G4_UNORM_PACK8) {
2254       uint8_t r, g;
2255       format = VK_FORMAT_R8_UINT;
2256       r = float_to_ubyte(clear_value->color.float32[0]) >> 4;
2257       g = float_to_ubyte(clear_value->color.float32[1]) >> 4;
2258       internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf);
2259    }
2260 
2261    for (uint32_t r = 0; r < range_count; r++) {
2262       const VkImageSubresourceRange *range = &ranges[r];
2263 
2264       /* Try to perform a fast clear first, otherwise fallback to
2265        * the legacy path.
2266        */
2267       if (!cs && radv_fast_clear_range(cmd_buffer, image, format, image_layout, false, range,
2268                                        &internal_clear_value)) {
2269          continue;
2270       }
2271 
2272       for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) {
2273          const uint32_t layer_count = image->vk.image_type == VK_IMAGE_TYPE_3D
2274                                          ? radv_minify(image->info.depth, range->baseMipLevel + l)
2275                                          : radv_get_layerCount(image, range);
2276 
2277          if (cs) {
2278             for (uint32_t s = 0; s < layer_count; ++s) {
2279                struct radv_meta_blit2d_surf surf;
2280                surf.format = format;
2281                surf.image = image;
2282                surf.level = range->baseMipLevel + l;
2283                surf.layer = range->baseArrayLayer + s;
2284                surf.aspect_mask = range->aspectMask;
2285                surf.disable_compression = disable_compression;
2286                radv_meta_clear_image_cs(cmd_buffer, &surf, &internal_clear_value.color);
2287             }
2288          } else {
2289             assert(!disable_compression);
2290             radv_clear_image_layer(cmd_buffer, image, image_layout, range, format, l, layer_count,
2291                                    &internal_clear_value);
2292          }
2293       }
2294    }
2295 
2296    if (disable_compression) {
2297       enum radv_cmd_flush_bits flush_bits = 0;
2298       for (unsigned i = 0; i < range_count; i++) {
2299          if (radv_dcc_enabled(image, ranges[i].baseMipLevel))
2300             flush_bits |= radv_clear_dcc(cmd_buffer, image, &ranges[i], 0xffffffffu);
2301       }
2302       cmd_buffer->state.flush_bits |= flush_bits;
2303    }
2304 }
2305 
2306 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)2307 radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
2308                         const VkClearColorValue *pColor, uint32_t rangeCount,
2309                         const VkImageSubresourceRange *pRanges)
2310 {
2311    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2312    RADV_FROM_HANDLE(radv_image, image, image_h);
2313    struct radv_meta_saved_state saved_state;
2314    bool cs;
2315 
2316    cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE ||
2317         !radv_image_is_renderable(cmd_buffer->device, image);
2318 
2319    /* Clear commands (except vkCmdClearAttachments) should not be affected by conditional rendering.
2320     */
2321    enum radv_meta_save_flags save_flags = RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING;
2322    if (cs)
2323       save_flags |= RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS;
2324    else
2325       save_flags |= RADV_META_SAVE_GRAPHICS_PIPELINE;
2326 
2327    radv_meta_save(&saved_state, cmd_buffer, save_flags);
2328 
2329    radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pColor, rangeCount,
2330                         pRanges, cs);
2331 
2332    radv_meta_restore(&saved_state, cmd_buffer);
2333 }
2334 
2335 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)2336 radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h,
2337                                VkImageLayout imageLayout,
2338                                const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
2339                                const VkImageSubresourceRange *pRanges)
2340 {
2341    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2342    RADV_FROM_HANDLE(radv_image, image, image_h);
2343    struct radv_meta_saved_state saved_state;
2344 
2345    /* Clear commands (except vkCmdClearAttachments) should not be affected by conditional rendering. */
2346    radv_meta_save(
2347       &saved_state, cmd_buffer,
2348       RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING);
2349 
2350    radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pDepthStencil,
2351                         rangeCount, pRanges, false);
2352 
2353    radv_meta_restore(&saved_state, cmd_buffer);
2354 }
2355 
2356 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)2357 radv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount,
2358                          const VkClearAttachment *pAttachments, uint32_t rectCount,
2359                          const VkClearRect *pRects)
2360 {
2361    RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2362    struct radv_meta_saved_state saved_state;
2363    enum radv_cmd_flush_bits pre_flush = 0;
2364    enum radv_cmd_flush_bits post_flush = 0;
2365 
2366    if (!cmd_buffer->state.subpass)
2367       return;
2368 
2369    radv_meta_save(&saved_state, cmd_buffer,
2370                   RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);
2371 
2372    /* FINISHME: We can do better than this dumb loop. It thrashes too much
2373     * state.
2374     */
2375    for (uint32_t a = 0; a < attachmentCount; ++a) {
2376       for (uint32_t r = 0; r < rectCount; ++r) {
2377          emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush,
2378                     cmd_buffer->state.subpass->view_mask, false);
2379       }
2380    }
2381 
2382    radv_meta_restore(&saved_state, cmd_buffer);
2383    cmd_buffer->state.flush_bits |= post_flush;
2384 }
2385