1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir/nir_builder.h"
25 #include "radv_debug.h"
26 #include "radv_meta.h"
27 #include "radv_private.h"
28
29 #include "util/format_rgb9e5.h"
30 #include "vk_format.h"
31
32 enum { DEPTH_CLEAR_SLOW, DEPTH_CLEAR_FAST };
33
34 static void
build_color_shaders(struct radv_device * dev,struct nir_shader ** out_vs,struct nir_shader ** out_fs,uint32_t frag_output)35 build_color_shaders(struct radv_device *dev, struct nir_shader **out_vs, struct nir_shader **out_fs,
36 uint32_t frag_output)
37 {
38 nir_builder vs_b = radv_meta_init_shader(dev, MESA_SHADER_VERTEX, "meta_clear_color_vs");
39 nir_builder fs_b =
40 radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_clear_color_fs-%d", frag_output);
41
42 const struct glsl_type *position_type = glsl_vec4_type();
43 const struct glsl_type *color_type = glsl_vec4_type();
44
45 nir_variable *vs_out_pos =
46 nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position");
47 vs_out_pos->data.location = VARYING_SLOT_POS;
48
49 nir_ssa_def *in_color_load =
50 nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16);
51
52 nir_variable *fs_out_color =
53 nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color");
54 fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
55
56 nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf);
57
58 nir_ssa_def *outvec = nir_gen_rect_vertices(&vs_b, NULL, NULL);
59 nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
60
61 const struct glsl_type *layer_type = glsl_int_type();
62 nir_variable *vs_out_layer =
63 nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
64 vs_out_layer->data.location = VARYING_SLOT_LAYER;
65 vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
66 nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
67 nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
68
69 nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
70 nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
71
72 *out_vs = vs_b.shader;
73 *out_fs = fs_b.shader;
74 }
75
76 static VkResult
create_pipeline(struct radv_device * device,uint32_t samples,struct nir_shader * vs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineRenderingCreateInfo * dyn_state,const VkPipelineLayout layout,const struct radv_graphics_pipeline_create_info * extra,const VkAllocationCallbacks * alloc,VkPipeline * pipeline)77 create_pipeline(struct radv_device *device, uint32_t samples,
78 struct nir_shader *vs_nir, struct nir_shader *fs_nir,
79 const VkPipelineVertexInputStateCreateInfo *vi_state,
80 const VkPipelineDepthStencilStateCreateInfo *ds_state,
81 const VkPipelineColorBlendStateCreateInfo *cb_state,
82 const VkPipelineRenderingCreateInfo *dyn_state,
83 const VkPipelineLayout layout,
84 const struct radv_graphics_pipeline_create_info *extra,
85 const VkAllocationCallbacks *alloc, VkPipeline *pipeline)
86 {
87 VkDevice device_h = radv_device_to_handle(device);
88 VkResult result;
89
90 result = radv_graphics_pipeline_create(
91 device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
92 &(VkGraphicsPipelineCreateInfo){
93 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
94 .pNext = dyn_state,
95 .stageCount = fs_nir ? 2 : 1,
96 .pStages =
97 (VkPipelineShaderStageCreateInfo[]){
98 {
99 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
100 .stage = VK_SHADER_STAGE_VERTEX_BIT,
101 .module = vk_shader_module_handle_from_nir(vs_nir),
102 .pName = "main",
103 },
104 {
105 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
106 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
107 .module = vk_shader_module_handle_from_nir(fs_nir),
108 .pName = "main",
109 },
110 },
111 .pVertexInputState = vi_state,
112 .pInputAssemblyState =
113 &(VkPipelineInputAssemblyStateCreateInfo){
114 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
115 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
116 .primitiveRestartEnable = false,
117 },
118 .pViewportState =
119 &(VkPipelineViewportStateCreateInfo){
120 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
121 .viewportCount = 1,
122 .scissorCount = 1,
123 },
124 .pRasterizationState =
125 &(VkPipelineRasterizationStateCreateInfo){
126 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
127 .rasterizerDiscardEnable = false,
128 .polygonMode = VK_POLYGON_MODE_FILL,
129 .cullMode = VK_CULL_MODE_NONE,
130 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
131 .depthBiasEnable = false,
132 .depthBiasConstantFactor = 0.0f,
133 .depthBiasClamp = 0.0f,
134 .depthBiasSlopeFactor = 0.0f,
135 .lineWidth = 1.0f,
136 },
137 .pMultisampleState =
138 &(VkPipelineMultisampleStateCreateInfo){
139 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
140 .rasterizationSamples = samples,
141 .sampleShadingEnable = false,
142 .pSampleMask = NULL,
143 .alphaToCoverageEnable = false,
144 .alphaToOneEnable = false,
145 },
146 .pDepthStencilState = ds_state,
147 .pColorBlendState = cb_state,
148 .pDynamicState =
149 &(VkPipelineDynamicStateCreateInfo){
150 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
151 .dynamicStateCount = 3,
152 .pDynamicStates =
153 (VkDynamicState[]){
154 VK_DYNAMIC_STATE_VIEWPORT,
155 VK_DYNAMIC_STATE_SCISSOR,
156 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
157 },
158 },
159 .layout = layout,
160 .flags = 0,
161 .renderPass = VK_NULL_HANDLE,
162 .subpass = 0,
163 },
164 extra, alloc, pipeline);
165
166 ralloc_free(vs_nir);
167 ralloc_free(fs_nir);
168
169 return result;
170 }
171
172 static VkResult
create_color_pipeline(struct radv_device * device,uint32_t samples,uint32_t frag_output,VkFormat format,VkPipeline * pipeline)173 create_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t frag_output,
174 VkFormat format, VkPipeline *pipeline)
175 {
176 struct nir_shader *vs_nir;
177 struct nir_shader *fs_nir;
178 VkResult result;
179
180 mtx_lock(&device->meta_state.mtx);
181 if (*pipeline) {
182 mtx_unlock(&device->meta_state.mtx);
183 return VK_SUCCESS;
184 }
185
186 build_color_shaders(device, &vs_nir, &fs_nir, frag_output);
187
188 const VkPipelineVertexInputStateCreateInfo vi_state = {
189 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
190 .vertexBindingDescriptionCount = 0,
191 .vertexAttributeDescriptionCount = 0,
192 };
193
194 const VkPipelineDepthStencilStateCreateInfo ds_state = {
195 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
196 .depthTestEnable = false,
197 .depthWriteEnable = false,
198 .depthBoundsTestEnable = false,
199 .stencilTestEnable = false,
200 .minDepthBounds = 0.0f,
201 .maxDepthBounds = 1.0f,
202 };
203
204 VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = {0};
205 blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState){
206 .blendEnable = false,
207 .colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |
208 VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT,
209 };
210
211 const VkPipelineColorBlendStateCreateInfo cb_state = {
212 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
213 .logicOpEnable = false,
214 .attachmentCount = MAX_RTS,
215 .pAttachments = blend_attachment_state,
216 .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f }};
217
218 VkFormat att_formats[MAX_RTS] = { 0 };
219 att_formats[frag_output] = format;
220
221 const VkPipelineRenderingCreateInfo rendering_create_info = {
222 .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
223 .colorAttachmentCount = MAX_RTS,
224 .pColorAttachmentFormats = att_formats,
225 };
226
227 struct radv_graphics_pipeline_create_info extra = {
228 .use_rectlist = true,
229 };
230 result =
231 create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
232 &rendering_create_info, device->meta_state.clear_color_p_layout,
233 &extra, &device->meta_state.alloc, pipeline);
234
235 mtx_unlock(&device->meta_state.mtx);
236 return result;
237 }
238
239 static void
finish_meta_clear_htile_mask_state(struct radv_device * device)240 finish_meta_clear_htile_mask_state(struct radv_device *device)
241 {
242 struct radv_meta_state *state = &device->meta_state;
243
244 radv_DestroyPipeline(radv_device_to_handle(device), state->clear_htile_mask_pipeline,
245 &state->alloc);
246 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_htile_mask_p_layout,
247 &state->alloc);
248 device->vk.dispatch_table.DestroyDescriptorSetLayout(
249 radv_device_to_handle(device), state->clear_htile_mask_ds_layout, &state->alloc);
250 }
251
252 static void
finish_meta_clear_dcc_comp_to_single_state(struct radv_device * device)253 finish_meta_clear_dcc_comp_to_single_state(struct radv_device *device)
254 {
255 struct radv_meta_state *state = &device->meta_state;
256
257 for (uint32_t i = 0; i < 2; i++) {
258 radv_DestroyPipeline(radv_device_to_handle(device),
259 state->clear_dcc_comp_to_single_pipeline[i], &state->alloc);
260 }
261 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_dcc_comp_to_single_p_layout,
262 &state->alloc);
263 device->vk.dispatch_table.DestroyDescriptorSetLayout(
264 radv_device_to_handle(device), state->clear_dcc_comp_to_single_ds_layout, &state->alloc);
265 }
266
267 void
radv_device_finish_meta_clear_state(struct radv_device * device)268 radv_device_finish_meta_clear_state(struct radv_device *device)
269 {
270 struct radv_meta_state *state = &device->meta_state;
271
272 for (uint32_t i = 0; i < ARRAY_SIZE(state->color_clear); ++i) {
273 for (uint32_t j = 0; j < ARRAY_SIZE(state->color_clear[0]); ++j) {
274 for (uint32_t k = 0; k < ARRAY_SIZE(state->color_clear[i][j].color_pipelines); ++k) {
275 radv_DestroyPipeline(radv_device_to_handle(device),
276 state->color_clear[i][j].color_pipelines[k], &state->alloc);
277 }
278 }
279 }
280 for (uint32_t i = 0; i < ARRAY_SIZE(state->ds_clear); ++i) {
281 for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
282 radv_DestroyPipeline(radv_device_to_handle(device),
283 state->ds_clear[i].depth_only_pipeline[j], &state->alloc);
284 radv_DestroyPipeline(radv_device_to_handle(device),
285 state->ds_clear[i].stencil_only_pipeline[j], &state->alloc);
286 radv_DestroyPipeline(radv_device_to_handle(device),
287 state->ds_clear[i].depthstencil_pipeline[j], &state->alloc);
288
289 radv_DestroyPipeline(radv_device_to_handle(device),
290 state->ds_clear[i].depth_only_unrestricted_pipeline[j],
291 &state->alloc);
292 radv_DestroyPipeline(radv_device_to_handle(device),
293 state->ds_clear[i].stencil_only_unrestricted_pipeline[j],
294 &state->alloc);
295 radv_DestroyPipeline(radv_device_to_handle(device),
296 state->ds_clear[i].depthstencil_unrestricted_pipeline[j],
297 &state->alloc);
298 }
299 }
300 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_color_p_layout,
301 &state->alloc);
302 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_p_layout,
303 &state->alloc);
304 radv_DestroyPipelineLayout(radv_device_to_handle(device),
305 state->clear_depth_unrestricted_p_layout, &state->alloc);
306
307 finish_meta_clear_htile_mask_state(device);
308 finish_meta_clear_dcc_comp_to_single_state(device);
309 }
310
311 static void
emit_color_clear(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,const VkClearRect * clear_rect,uint32_t view_mask)312 emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
313 const VkClearRect *clear_rect, uint32_t view_mask)
314 {
315 struct radv_device *device = cmd_buffer->device;
316 const struct radv_subpass *subpass = cmd_buffer->state.subpass;
317 const uint32_t subpass_att = clear_att->colorAttachment;
318 const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
319 const struct radv_image_view *iview =
320 cmd_buffer->state.attachments ? cmd_buffer->state.attachments[pass_att].iview : NULL;
321 uint32_t samples, samples_log2;
322 VkFormat format;
323 unsigned fs_key;
324 VkClearColorValue clear_value = clear_att->clearValue.color;
325 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
326 VkPipeline pipeline;
327
328 /* When a framebuffer is bound to the current command buffer, get the
329 * number of samples from it. Otherwise, get the number of samples from
330 * the render pass because it's likely a secondary command buffer.
331 */
332 if (iview) {
333 samples = iview->image->info.samples;
334 format = iview->vk.format;
335 } else {
336 samples = cmd_buffer->state.pass->attachments[pass_att].samples;
337 format = cmd_buffer->state.pass->attachments[pass_att].format;
338 }
339
340 samples_log2 = ffs(samples) - 1;
341 fs_key = radv_format_meta_fs_key(device, format);
342 assert(fs_key != -1);
343
344 if (device->meta_state.color_clear[samples_log2][clear_att->colorAttachment]
345 .color_pipelines[fs_key] == VK_NULL_HANDLE) {
346 VkResult ret = create_color_pipeline(
347 device, samples, clear_att->colorAttachment, radv_fs_key_format_exemplars[fs_key],
348 &device->meta_state.color_clear[samples_log2][clear_att->colorAttachment]
349 .color_pipelines[fs_key]);
350 if (ret != VK_SUCCESS) {
351 cmd_buffer->record_result = ret;
352 return;
353 }
354 }
355
356 pipeline = device->meta_state.color_clear[samples_log2][clear_att->colorAttachment]
357 .color_pipelines[fs_key];
358
359 assert(samples_log2 < ARRAY_SIZE(device->meta_state.color_clear));
360 assert(pipeline);
361 assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
362 assert(clear_att->colorAttachment < subpass->color_count);
363
364 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
365 device->meta_state.clear_color_p_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0,
366 16, &clear_value);
367
368 radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
369
370 radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
371 &(VkViewport){.x = clear_rect->rect.offset.x,
372 .y = clear_rect->rect.offset.y,
373 .width = clear_rect->rect.extent.width,
374 .height = clear_rect->rect.extent.height,
375 .minDepth = 0.0f,
376 .maxDepth = 1.0f});
377
378 radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
379
380 if (view_mask) {
381 u_foreach_bit(i, view_mask) radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
382 } else {
383 radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
384 }
385 }
386
387 static void
build_depthstencil_shader(struct radv_device * dev,struct nir_shader ** out_vs,struct nir_shader ** out_fs,bool unrestricted)388 build_depthstencil_shader(struct radv_device *dev, struct nir_shader **out_vs,
389 struct nir_shader **out_fs, bool unrestricted)
390 {
391 nir_builder vs_b = radv_meta_init_shader(
392 dev, MESA_SHADER_VERTEX,
393 unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs");
394 nir_builder fs_b = radv_meta_init_shader(
395 dev, MESA_SHADER_FRAGMENT,
396 unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs");
397
398 const struct glsl_type *position_out_type = glsl_vec4_type();
399
400 nir_variable *vs_out_pos =
401 nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position");
402 vs_out_pos->data.location = VARYING_SLOT_POS;
403
404 nir_ssa_def *z;
405 if (unrestricted) {
406 nir_ssa_def *in_color_load =
407 nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4);
408
409 nir_variable *fs_out_depth =
410 nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth");
411 fs_out_depth->data.location = FRAG_RESULT_DEPTH;
412 nir_store_var(&fs_b, fs_out_depth, in_color_load, 0x1);
413
414 z = nir_imm_float(&vs_b, 0.0);
415 } else {
416 z = nir_load_push_constant(&vs_b, 1, 32, nir_imm_int(&vs_b, 0), .range = 4);
417 }
418
419 nir_ssa_def *outvec = nir_gen_rect_vertices(&vs_b, z, NULL);
420 nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
421
422 const struct glsl_type *layer_type = glsl_int_type();
423 nir_variable *vs_out_layer =
424 nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
425 vs_out_layer->data.location = VARYING_SLOT_LAYER;
426 vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
427 nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);
428 nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);
429
430 nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
431 nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
432
433 *out_vs = vs_b.shader;
434 *out_fs = fs_b.shader;
435 }
436
437 static VkResult
create_depthstencil_pipeline(struct radv_device * device,VkImageAspectFlags aspects,uint32_t samples,int index,bool unrestricted,VkPipeline * pipeline)438 create_depthstencil_pipeline(struct radv_device *device, VkImageAspectFlags aspects,
439 uint32_t samples, int index, bool unrestricted, VkPipeline *pipeline)
440 {
441 struct nir_shader *vs_nir, *fs_nir;
442 VkResult result;
443
444 mtx_lock(&device->meta_state.mtx);
445 if (*pipeline) {
446 mtx_unlock(&device->meta_state.mtx);
447 return VK_SUCCESS;
448 }
449
450 build_depthstencil_shader(device, &vs_nir, &fs_nir, unrestricted);
451
452 const VkPipelineVertexInputStateCreateInfo vi_state = {
453 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
454 .vertexBindingDescriptionCount = 0,
455 .vertexAttributeDescriptionCount = 0,
456 };
457
458 const VkPipelineDepthStencilStateCreateInfo ds_state = {
459 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
460 .depthTestEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
461 .depthCompareOp = VK_COMPARE_OP_ALWAYS,
462 .depthWriteEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
463 .depthBoundsTestEnable = false,
464 .stencilTestEnable = !!(aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
465 .front =
466 {
467 .passOp = VK_STENCIL_OP_REPLACE,
468 .compareOp = VK_COMPARE_OP_ALWAYS,
469 .writeMask = UINT32_MAX,
470 .reference = 0, /* dynamic */
471 },
472 .back = {0 /* dont care */},
473 .minDepthBounds = 0.0f,
474 .maxDepthBounds = 1.0f,
475 };
476
477 const VkPipelineColorBlendStateCreateInfo cb_state = {
478 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
479 .logicOpEnable = false,
480 .attachmentCount = 0,
481 .pAttachments = NULL,
482 .blendConstants = { 0.0f, 0.0f, 0.0f, 0.0f },
483 };
484
485 const VkPipelineRenderingCreateInfo rendering_create_info = {
486 .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
487 .depthAttachmentFormat =
488 (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? VK_FORMAT_D32_SFLOAT : VK_FORMAT_UNDEFINED,
489 .stencilAttachmentFormat =
490 (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? VK_FORMAT_S8_UINT : VK_FORMAT_UNDEFINED,
491 };
492
493 struct radv_graphics_pipeline_create_info extra = {
494 .use_rectlist = true,
495 };
496
497 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
498 extra.db_depth_clear = index == DEPTH_CLEAR_SLOW ? false : true;
499 }
500 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
501 extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true;
502 }
503 result =
504 create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state,
505 &rendering_create_info, device->meta_state.clear_depth_p_layout, &extra,
506 &device->meta_state.alloc, pipeline);
507
508 mtx_unlock(&device->meta_state.mtx);
509 return result;
510 }
511
512 static bool
depth_view_can_fast_clear(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkImageAspectFlags aspects,VkImageLayout layout,bool in_render_loop,const VkClearRect * clear_rect,VkClearDepthStencilValue clear_value)513 depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
514 VkImageAspectFlags aspects, VkImageLayout layout, bool in_render_loop,
515 const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value)
516 {
517 if (!iview)
518 return false;
519
520 uint32_t queue_mask = radv_image_queue_family_mask(iview->image, cmd_buffer->qf,
521 cmd_buffer->qf);
522 if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
523 clear_rect->rect.extent.width != iview->extent.width ||
524 clear_rect->rect.extent.height != iview->extent.height)
525 return false;
526 if (radv_image_is_tc_compat_htile(iview->image) &&
527 (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && clear_value.depth != 0.0 &&
528 clear_value.depth != 1.0) ||
529 ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && clear_value.stencil != 0)))
530 return false;
531 if (radv_htile_enabled(iview->image, iview->vk.base_mip_level) && iview->vk.base_mip_level == 0 &&
532 iview->vk.base_array_layer == 0 && iview->vk.layer_count == iview->image->info.array_size &&
533 radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, layout, in_render_loop,
534 queue_mask) &&
535 radv_image_extent_compare(iview->image, &iview->extent))
536 return true;
537 return false;
538 }
539
540 static VkPipeline
pick_depthstencil_pipeline(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_state * meta_state,const struct radv_image_view * iview,int samples_log2,VkImageAspectFlags aspects,VkImageLayout layout,bool in_render_loop,const VkClearRect * clear_rect,VkClearDepthStencilValue clear_value)541 pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_state *meta_state,
542 const struct radv_image_view *iview, int samples_log2,
543 VkImageAspectFlags aspects, VkImageLayout layout, bool in_render_loop,
544 const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value)
545 {
546 bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout, in_render_loop,
547 clear_rect, clear_value);
548 bool unrestricted = cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted;
549 int index = fast ? DEPTH_CLEAR_FAST : DEPTH_CLEAR_SLOW;
550 VkPipeline *pipeline;
551
552 switch (aspects) {
553 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
554 pipeline = unrestricted
555 ? &meta_state->ds_clear[samples_log2].depthstencil_unrestricted_pipeline[index]
556 : &meta_state->ds_clear[samples_log2].depthstencil_pipeline[index];
557 break;
558 case VK_IMAGE_ASPECT_DEPTH_BIT:
559 pipeline = unrestricted
560 ? &meta_state->ds_clear[samples_log2].depth_only_unrestricted_pipeline[index]
561 : &meta_state->ds_clear[samples_log2].depth_only_pipeline[index];
562 break;
563 case VK_IMAGE_ASPECT_STENCIL_BIT:
564 pipeline = unrestricted
565 ? &meta_state->ds_clear[samples_log2].stencil_only_unrestricted_pipeline[index]
566 : &meta_state->ds_clear[samples_log2].stencil_only_pipeline[index];
567 break;
568 default:
569 unreachable("expected depth or stencil aspect");
570 }
571
572 if (*pipeline == VK_NULL_HANDLE) {
573 VkResult ret = create_depthstencil_pipeline(
574 cmd_buffer->device, aspects, 1u << samples_log2, index, unrestricted, pipeline);
575 if (ret != VK_SUCCESS) {
576 cmd_buffer->record_result = ret;
577 return VK_NULL_HANDLE;
578 }
579 }
580 return *pipeline;
581 }
582
583 static void
emit_depthstencil_clear(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,const VkClearRect * clear_rect,struct radv_subpass_attachment * ds_att,uint32_t view_mask,bool ds_resolve_clear)584 emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
585 const VkClearRect *clear_rect, struct radv_subpass_attachment *ds_att,
586 uint32_t view_mask, bool ds_resolve_clear)
587 {
588 struct radv_device *device = cmd_buffer->device;
589 struct radv_meta_state *meta_state = &device->meta_state;
590 const struct radv_subpass *subpass = cmd_buffer->state.subpass;
591 const uint32_t pass_att = ds_att->attachment;
592 VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
593 VkImageAspectFlags aspects = clear_att->aspectMask;
594 const struct radv_image_view *iview =
595 cmd_buffer->state.attachments ? cmd_buffer->state.attachments[pass_att].iview : NULL;
596 uint32_t samples, samples_log2;
597 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
598
599 /* When a framebuffer is bound to the current command buffer, get the
600 * number of samples from it. Otherwise, get the number of samples from
601 * the render pass because it's likely a secondary command buffer.
602 */
603 if (iview) {
604 samples = iview->image->info.samples;
605 } else {
606 samples = cmd_buffer->state.pass->attachments[pass_att].samples;
607 }
608
609 samples_log2 = ffs(samples) - 1;
610
611 assert(pass_att != VK_ATTACHMENT_UNUSED);
612
613 if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
614 clear_value.depth = 1.0f;
615
616 if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted) {
617 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
618 device->meta_state.clear_depth_unrestricted_p_layout,
619 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4, &clear_value.depth);
620 } else {
621 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
622 device->meta_state.clear_depth_p_layout, VK_SHADER_STAGE_VERTEX_BIT, 0,
623 4, &clear_value.depth);
624 }
625
626 uint32_t prev_reference = cmd_buffer->state.dynamic.stencil_reference.front;
627 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
628 radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, clear_value.stencil);
629 }
630
631 VkPipeline pipeline =
632 pick_depthstencil_pipeline(cmd_buffer, meta_state, iview, samples_log2, aspects,
633 ds_att->layout, ds_att->in_render_loop, clear_rect, clear_value);
634 if (!pipeline)
635 return;
636
637 struct radv_subpass clear_subpass = {
638 .color_count = 0,
639 .color_attachments = NULL,
640 .depth_stencil_attachment = ds_att,
641 };
642
643 if (ds_resolve_clear)
644 radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);
645
646 radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
647
648 if (depth_view_can_fast_clear(cmd_buffer, iview, aspects, ds_att->layout, ds_att->in_render_loop,
649 clear_rect, clear_value))
650 radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
651
652 radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
653 &(VkViewport){.x = clear_rect->rect.offset.x,
654 .y = clear_rect->rect.offset.y,
655 .width = clear_rect->rect.extent.width,
656 .height = clear_rect->rect.extent.height,
657 .minDepth = 0.0f,
658 .maxDepth = 1.0f});
659
660 radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
661
662 if (view_mask) {
663 u_foreach_bit(i, view_mask) radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
664 } else {
665 radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
666 }
667
668 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
669 radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, prev_reference);
670 }
671
672 if (ds_resolve_clear)
673 radv_cmd_buffer_restore_subpass(cmd_buffer, subpass);
674 }
675
676 static uint32_t
clear_htile_mask(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * image,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size,uint32_t htile_value,uint32_t htile_mask)677 clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
678 struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, uint32_t htile_value,
679 uint32_t htile_mask)
680 {
681 struct radv_device *device = cmd_buffer->device;
682 struct radv_meta_state *state = &device->meta_state;
683 uint64_t block_count = round_up_u64(size, 1024);
684 struct radv_meta_saved_state saved_state;
685 struct radv_buffer dst_buffer;
686
687 radv_meta_save(
688 &saved_state, cmd_buffer,
689 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
690
691 radv_buffer_init(&dst_buffer, device, bo, size, offset);
692
693 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
694 state->clear_htile_mask_pipeline);
695
696 radv_meta_push_descriptor_set(
697 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->clear_htile_mask_p_layout, 0, /* set */
698 1, /* descriptorWriteCount */
699 (VkWriteDescriptorSet[]){
700 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
701 .dstBinding = 0,
702 .dstArrayElement = 0,
703 .descriptorCount = 1,
704 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
705 .pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer),
706 .offset = 0,
707 .range = size}}});
708
709 const unsigned constants[2] = {
710 htile_value & htile_mask,
711 ~htile_mask,
712 };
713
714 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), state->clear_htile_mask_p_layout,
715 VK_SHADER_STAGE_COMPUTE_BIT, 0, 8, constants);
716
717 radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
718
719 radv_buffer_finish(&dst_buffer);
720
721 radv_meta_restore(&saved_state, cmd_buffer);
722
723 return RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
724 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
725 }
726
727 static uint32_t
radv_get_htile_fast_clear_value(const struct radv_device * device,const struct radv_image * image,VkClearDepthStencilValue value)728 radv_get_htile_fast_clear_value(const struct radv_device *device, const struct radv_image *image,
729 VkClearDepthStencilValue value)
730 {
731 uint32_t max_zval = 0x3fff; /* maximum 14-bit value. */
732 uint32_t zmask = 0, smem = 0;
733 uint32_t htile_value;
734 uint32_t zmin, zmax;
735
736 /* Convert the depth value to 14-bit zmin/zmax values. */
737 zmin = lroundf(value.depth * max_zval);
738 zmax = zmin;
739
740 if (radv_image_tile_stencil_disabled(device, image)) {
741 /* Z only (no stencil):
742 *
743 * |31 18|17 4|3 0|
744 * +---------+---------+-------+
745 * | Max Z | Min Z | ZMask |
746 */
747 htile_value = (((zmax & 0x3fff) << 18) |
748 ((zmin & 0x3fff) << 4) |
749 ((zmask & 0xf) << 0));
750 } else {
751
752 /* Z and stencil:
753 *
754 * |31 12|11 10|9 8|7 6|5 4|3 0|
755 * +-----------+-----+------+-----+-----+-------+
756 * | Z Range | | SMem | SR1 | SR0 | ZMask |
757 *
758 * Z, stencil, 4 bit VRS encoding:
759 * |31 12| 11 10 |9 8|7 6 |5 4|3 0|
760 * +-----------+------------+------+------------+-----+-------+
761 * | Z Range | VRS Y-rate | SMem | VRS X-rate | SR0 | ZMask |
762 */
763 uint32_t delta = 0;
764 uint32_t zrange = ((zmax << 6) | delta);
765 uint32_t sresults = 0xf; /* SR0/SR1 both as 0x3. */
766
767 if (radv_image_has_vrs_htile(device, image))
768 sresults = 0x3;
769
770 htile_value = (((zrange & 0xfffff) << 12) |
771 ((smem & 0x3) << 8) |
772 ((sresults & 0xf) << 4) |
773 ((zmask & 0xf) << 0));
774 }
775
776 return htile_value;
777 }
778
779 static uint32_t
radv_get_htile_mask(const struct radv_device * device,const struct radv_image * image,VkImageAspectFlags aspects)780 radv_get_htile_mask(const struct radv_device *device, const struct radv_image *image,
781 VkImageAspectFlags aspects)
782 {
783 uint32_t mask = 0;
784
785 if (radv_image_tile_stencil_disabled(device, image)) {
786 /* All the HTILE buffer is used when there is no stencil. */
787 mask = UINT32_MAX;
788 } else {
789 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
790 mask |= 0xfffffc0f;
791 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
792 mask |= 0x000003f0;
793 }
794
795 return mask;
796 }
797
798 static bool
radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)799 radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)
800 {
801 return value.depth == 1.0f || value.depth == 0.0f;
802 }
803
804 static bool
radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)805 radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)
806 {
807 return value.stencil == 0;
808 }
809
810 static bool
radv_can_fast_clear_depth(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkImageLayout image_layout,bool in_render_loop,VkImageAspectFlags aspects,const VkClearRect * clear_rect,const VkClearDepthStencilValue clear_value,uint32_t view_mask)811 radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
812 VkImageLayout image_layout, bool in_render_loop,
813 VkImageAspectFlags aspects, const VkClearRect *clear_rect,
814 const VkClearDepthStencilValue clear_value, uint32_t view_mask)
815 {
816 if (!iview || !iview->support_fast_clear)
817 return false;
818
819 if (!radv_layout_is_htile_compressed(
820 cmd_buffer->device, iview->image, image_layout, in_render_loop,
821 radv_image_queue_family_mask(iview->image, cmd_buffer->qf,
822 cmd_buffer->qf)))
823 return false;
824
825 if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
826 clear_rect->rect.extent.width != iview->image->info.width ||
827 clear_rect->rect.extent.height != iview->image->info.height)
828 return false;
829
830 if (view_mask && (iview->image->info.array_size >= 32 ||
831 (1u << iview->image->info.array_size) - 1u != view_mask))
832 return false;
833 if (!view_mask && clear_rect->baseArrayLayer != 0)
834 return false;
835 if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
836 return false;
837
838 if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted &&
839 (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
840 (clear_value.depth < 0.0 || clear_value.depth > 1.0))
841 return false;
842
843 if (radv_image_is_tc_compat_htile(iview->image) &&
844 (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && !radv_is_fast_clear_depth_allowed(clear_value)) ||
845 ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
846 !radv_is_fast_clear_stencil_allowed(clear_value))))
847 return false;
848
849 if (iview->image->info.levels > 1) {
850 uint32_t last_level = iview->vk.base_mip_level + iview->vk.level_count - 1;
851 if (last_level >= iview->image->planes[0].surface.num_meta_levels) {
852 /* Do not fast clears if one level can't be fast cleared. */
853 return false;
854 }
855 }
856
857 return true;
858 }
859
860 static void
radv_fast_clear_depth(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,const VkClearAttachment * clear_att,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)861 radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
862 const VkClearAttachment *clear_att, enum radv_cmd_flush_bits *pre_flush,
863 enum radv_cmd_flush_bits *post_flush)
864 {
865 VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
866 VkImageAspectFlags aspects = clear_att->aspectMask;
867 uint32_t clear_word, flush_bits;
868
869 clear_word = radv_get_htile_fast_clear_value(cmd_buffer->device, iview->image, clear_value);
870
871 if (pre_flush) {
872 enum radv_cmd_flush_bits bits =
873 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
874 iview->image) |
875 radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT |
876 VK_ACCESS_2_SHADER_READ_BIT, iview->image);
877 cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
878 *pre_flush |= cmd_buffer->state.flush_bits;
879 }
880
881 VkImageSubresourceRange range = {
882 .aspectMask = aspects,
883 .baseMipLevel = iview->vk.base_mip_level,
884 .levelCount = iview->vk.level_count,
885 .baseArrayLayer = iview->vk.base_array_layer,
886 .layerCount = iview->vk.layer_count,
887 };
888
889 flush_bits = radv_clear_htile(cmd_buffer, iview->image, &range, clear_word);
890
891 if (iview->image->planes[0].surface.has_stencil &&
892 !(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
893 /* Synchronize after performing a depth-only or a stencil-only
894 * fast clear because the driver uses an optimized path which
895 * performs a read-modify-write operation, and the two separate
896 * aspects might use the same HTILE memory.
897 */
898 cmd_buffer->state.flush_bits |= flush_bits;
899 }
900
901 radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
902 if (post_flush) {
903 *post_flush |= flush_bits;
904 }
905 }
906
907 static nir_shader *
build_clear_htile_mask_shader(struct radv_device * dev)908 build_clear_htile_mask_shader(struct radv_device *dev)
909 {
910 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_htile_mask");
911 b.shader->info.workgroup_size[0] = 64;
912
913 nir_ssa_def *global_id = get_global_ids(&b, 1);
914
915 nir_ssa_def *offset = nir_imul_imm(&b, global_id, 16);
916 offset = nir_channel(&b, offset, 0);
917
918 nir_ssa_def *buf = radv_meta_load_descriptor(&b, 0, 0);
919
920 nir_ssa_def *constants = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
921
922 nir_ssa_def *load = nir_load_ssbo(&b, 4, 32, buf, offset, .align_mul = 16);
923
924 /* data = (data & ~htile_mask) | (htile_value & htile_mask) */
925 nir_ssa_def *data = nir_iand(&b, load, nir_channel(&b, constants, 1));
926 data = nir_ior(&b, data, nir_channel(&b, constants, 0));
927
928 nir_store_ssbo(&b, data, buf, offset, .access = ACCESS_NON_READABLE, .align_mul = 16);
929
930 return b.shader;
931 }
932
933 static VkResult
init_meta_clear_htile_mask_state(struct radv_device * device)934 init_meta_clear_htile_mask_state(struct radv_device *device)
935 {
936 struct radv_meta_state *state = &device->meta_state;
937 VkResult result;
938 nir_shader *cs = build_clear_htile_mask_shader(device);
939
940 VkDescriptorSetLayoutCreateInfo ds_layout_info = {
941 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
942 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
943 .bindingCount = 1,
944 .pBindings = (VkDescriptorSetLayoutBinding[]){
945 {.binding = 0,
946 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
947 .descriptorCount = 1,
948 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
949 .pImmutableSamplers = NULL},
950 }};
951
952 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info,
953 &state->alloc, &state->clear_htile_mask_ds_layout);
954 if (result != VK_SUCCESS)
955 goto fail;
956
957 VkPipelineLayoutCreateInfo p_layout_info = {
958 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
959 .setLayoutCount = 1,
960 .pSetLayouts = &state->clear_htile_mask_ds_layout,
961 .pushConstantRangeCount = 1,
962 .pPushConstantRanges =
963 &(VkPushConstantRange){
964 VK_SHADER_STAGE_COMPUTE_BIT,
965 0,
966 8,
967 },
968 };
969
970 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &p_layout_info, &state->alloc,
971 &state->clear_htile_mask_p_layout);
972 if (result != VK_SUCCESS)
973 goto fail;
974
975 VkPipelineShaderStageCreateInfo shader_stage = {
976 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
977 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
978 .module = vk_shader_module_handle_from_nir(cs),
979 .pName = "main",
980 .pSpecializationInfo = NULL,
981 };
982
983 VkComputePipelineCreateInfo pipeline_info = {
984 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
985 .stage = shader_stage,
986 .flags = 0,
987 .layout = state->clear_htile_mask_p_layout,
988 };
989
990 result = radv_CreateComputePipelines(radv_device_to_handle(device),
991 radv_pipeline_cache_to_handle(&state->cache), 1,
992 &pipeline_info, NULL, &state->clear_htile_mask_pipeline);
993
994 fail:
995 ralloc_free(cs);
996 return result;
997 }
998
999 /* Clear DCC using comp-to-single by storing the clear value at the beginning of every 256B block.
1000 * For MSAA images, clearing the first sample should be enough as long as CMASK is also cleared.
1001 */
1002 static nir_shader *
build_clear_dcc_comp_to_single_shader(struct radv_device * dev,bool is_msaa)1003 build_clear_dcc_comp_to_single_shader(struct radv_device *dev, bool is_msaa)
1004 {
1005 enum glsl_sampler_dim dim = is_msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
1006 const struct glsl_type *img_type = glsl_image_type(dim, true, GLSL_TYPE_FLOAT);
1007
1008 nir_builder b =
1009 radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_dcc_comp_to_single-%s",
1010 is_msaa ? "multisampled" : "singlesampled");
1011 b.shader->info.workgroup_size[0] = 8;
1012 b.shader->info.workgroup_size[1] = 8;
1013
1014 nir_ssa_def *global_id = get_global_ids(&b, 3);
1015
1016 /* Load the dimensions in pixels of a block that gets compressed to one DCC byte. */
1017 nir_ssa_def *dcc_block_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
1018
1019 /* Compute the coordinates. */
1020 nir_ssa_def *coord = nir_channels(&b, global_id, 0x3);
1021 coord = nir_imul(&b, coord, dcc_block_size);
1022 coord = nir_vec4(&b, nir_channel(&b, coord, 0),
1023 nir_channel(&b, coord, 1),
1024 nir_channel(&b, global_id, 2),
1025 nir_ssa_undef(&b, 1, 32));
1026
1027 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
1028 output_img->data.descriptor_set = 0;
1029 output_img->data.binding = 0;
1030
1031 /* Load the clear color values. */
1032 nir_ssa_def *clear_values = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8);
1033
1034 nir_ssa_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0),
1035 nir_channel(&b, clear_values, 1),
1036 nir_channel(&b, clear_values, 1),
1037 nir_channel(&b, clear_values, 1));
1038
1039 /* Store the clear color values. */
1040 nir_ssa_def *sample_id = is_msaa ? nir_imm_int(&b, 0) : nir_ssa_undef(&b, 1, 32);
1041 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
1042 sample_id, data, nir_imm_int(&b, 0),
1043 .image_dim = dim, .image_array = true);
1044
1045 return b.shader;
1046 }
1047
1048 static VkResult
create_dcc_comp_to_single_pipeline(struct radv_device * device,bool is_msaa,VkPipeline * pipeline)1049 create_dcc_comp_to_single_pipeline(struct radv_device *device, bool is_msaa, VkPipeline *pipeline)
1050 {
1051 struct radv_meta_state *state = &device->meta_state;
1052 VkResult result;
1053 nir_shader *cs = build_clear_dcc_comp_to_single_shader(device, is_msaa);
1054
1055 VkPipelineShaderStageCreateInfo shader_stage = {
1056 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1057 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1058 .module = vk_shader_module_handle_from_nir(cs),
1059 .pName = "main",
1060 .pSpecializationInfo = NULL,
1061 };
1062
1063 VkComputePipelineCreateInfo pipeline_info = {
1064 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1065 .stage = shader_stage,
1066 .flags = 0,
1067 .layout = state->clear_dcc_comp_to_single_p_layout,
1068 };
1069
1070 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1071 radv_pipeline_cache_to_handle(&state->cache), 1,
1072 &pipeline_info, NULL, pipeline);
1073
1074 ralloc_free(cs);
1075 return result;
1076 }
1077
1078 static VkResult
init_meta_clear_dcc_comp_to_single_state(struct radv_device * device)1079 init_meta_clear_dcc_comp_to_single_state(struct radv_device *device)
1080 {
1081 struct radv_meta_state *state = &device->meta_state;
1082 VkResult result;
1083
1084 VkDescriptorSetLayoutCreateInfo ds_layout_info = {
1085 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1086 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1087 .bindingCount = 1,
1088 .pBindings = (VkDescriptorSetLayoutBinding[]){
1089 {.binding = 0,
1090 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1091 .descriptorCount = 1,
1092 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1093 .pImmutableSamplers = NULL},
1094 }};
1095
1096 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info,
1097 &state->alloc, &state->clear_dcc_comp_to_single_ds_layout);
1098 if (result != VK_SUCCESS)
1099 goto fail;
1100
1101 VkPipelineLayoutCreateInfo p_layout_info = {
1102 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1103 .setLayoutCount = 1,
1104 .pSetLayouts = &state->clear_dcc_comp_to_single_ds_layout,
1105 .pushConstantRangeCount = 1,
1106 .pPushConstantRanges =
1107 &(VkPushConstantRange){
1108 VK_SHADER_STAGE_COMPUTE_BIT,
1109 0,
1110 16,
1111 },
1112 };
1113
1114 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &p_layout_info, &state->alloc,
1115 &state->clear_dcc_comp_to_single_p_layout);
1116 if (result != VK_SUCCESS)
1117 goto fail;
1118
1119 for (uint32_t i = 0; i < 2; i++) {
1120 result = create_dcc_comp_to_single_pipeline(device, !!i,
1121 &state->clear_dcc_comp_to_single_pipeline[i]);
1122 if (result != VK_SUCCESS)
1123 goto fail;
1124 }
1125
1126 fail:
1127 return result;
1128 }
1129
1130 VkResult
radv_device_init_meta_clear_state(struct radv_device * device,bool on_demand)1131 radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)
1132 {
1133 VkResult res;
1134 struct radv_meta_state *state = &device->meta_state;
1135
1136 VkPipelineLayoutCreateInfo pl_color_create_info = {
1137 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1138 .setLayoutCount = 0,
1139 .pushConstantRangeCount = 1,
1140 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16},
1141 };
1142
1143 res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_color_create_info,
1144 &device->meta_state.alloc,
1145 &device->meta_state.clear_color_p_layout);
1146 if (res != VK_SUCCESS)
1147 return res;
1148
1149 VkPipelineLayoutCreateInfo pl_depth_create_info = {
1150 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1151 .setLayoutCount = 0,
1152 .pushConstantRangeCount = 1,
1153 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_VERTEX_BIT, 0, 4},
1154 };
1155
1156 res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_depth_create_info,
1157 &device->meta_state.alloc,
1158 &device->meta_state.clear_depth_p_layout);
1159 if (res != VK_SUCCESS)
1160 return res;
1161
1162 VkPipelineLayoutCreateInfo pl_depth_unrestricted_create_info = {
1163 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1164 .setLayoutCount = 0,
1165 .pushConstantRangeCount = 1,
1166 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4},
1167 };
1168
1169 res = radv_CreatePipelineLayout(radv_device_to_handle(device),
1170 &pl_depth_unrestricted_create_info, &device->meta_state.alloc,
1171 &device->meta_state.clear_depth_unrestricted_p_layout);
1172 if (res != VK_SUCCESS)
1173 return res;
1174
1175 res = init_meta_clear_htile_mask_state(device);
1176 if (res != VK_SUCCESS)
1177 return res;
1178
1179 res = init_meta_clear_dcc_comp_to_single_state(device);
1180 if (res != VK_SUCCESS)
1181 return res;
1182
1183 if (on_demand)
1184 return VK_SUCCESS;
1185
1186 for (uint32_t i = 0; i < ARRAY_SIZE(state->color_clear); ++i) {
1187 uint32_t samples = 1 << i;
1188
1189 /* Only precompile meta pipelines for attachment 0 as other are uncommon. */
1190 for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) {
1191 VkFormat format = radv_fs_key_format_exemplars[j];
1192 unsigned fs_key = radv_format_meta_fs_key(device, format);
1193 assert(!state->color_clear[i][0].color_pipelines[fs_key]);
1194
1195 res = create_color_pipeline(device, samples, 0, format,
1196 &state->color_clear[i][0].color_pipelines[fs_key]);
1197 if (res != VK_SUCCESS)
1198 return res;
1199 }
1200 }
1201 for (uint32_t i = 0; i < ARRAY_SIZE(state->ds_clear); ++i) {
1202 uint32_t samples = 1 << i;
1203
1204 for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
1205 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, false,
1206 &state->ds_clear[i].depth_only_pipeline[j]);
1207 if (res != VK_SUCCESS)
1208 return res;
1209
1210 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, false,
1211 &state->ds_clear[i].stencil_only_pipeline[j]);
1212 if (res != VK_SUCCESS)
1213 return res;
1214
1215 res = create_depthstencil_pipeline(
1216 device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, false,
1217 &state->ds_clear[i].depthstencil_pipeline[j]);
1218 if (res != VK_SUCCESS)
1219 return res;
1220
1221 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, true,
1222 &state->ds_clear[i].depth_only_unrestricted_pipeline[j]);
1223 if (res != VK_SUCCESS)
1224 return res;
1225
1226 res =
1227 create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true,
1228 &state->ds_clear[i].stencil_only_unrestricted_pipeline[j]);
1229 if (res != VK_SUCCESS)
1230 return res;
1231
1232 res = create_depthstencil_pipeline(
1233 device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true,
1234 &state->ds_clear[i].depthstencil_unrestricted_pipeline[j]);
1235 if (res != VK_SUCCESS)
1236 return res;
1237 }
1238 }
1239 return VK_SUCCESS;
1240 }
1241
1242 static uint32_t
radv_get_cmask_fast_clear_value(const struct radv_image * image)1243 radv_get_cmask_fast_clear_value(const struct radv_image *image)
1244 {
1245 uint32_t value = 0; /* Default value when no DCC. */
1246
1247 /* The fast-clear value is different for images that have both DCC and
1248 * CMASK metadata.
1249 */
1250 if (radv_image_has_dcc(image)) {
1251 /* DCC fast clear with MSAA should clear CMASK to 0xC. */
1252 return image->info.samples > 1 ? 0xcccccccc : 0xffffffff;
1253 }
1254
1255 return value;
1256 }
1257
1258 uint32_t
radv_clear_cmask(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1259 radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1260 const VkImageSubresourceRange *range, uint32_t value)
1261 {
1262 uint64_t offset = image->bindings[0].offset + image->planes[0].surface.cmask_offset;
1263 uint64_t size;
1264
1265 if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
1266 /* TODO: clear layers. */
1267 size = image->planes[0].surface.cmask_size;
1268 } else {
1269 unsigned slice_size = image->planes[0].surface.cmask_slice_size;
1270
1271 offset += slice_size * range->baseArrayLayer;
1272 size = slice_size * radv_get_layerCount(image, range);
1273 }
1274
1275 return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1276 radv_buffer_get_va(image->bindings[0].bo) + offset, size, value);
1277 }
1278
1279 uint32_t
radv_clear_fmask(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1280 radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1281 const VkImageSubresourceRange *range, uint32_t value)
1282 {
1283 uint64_t offset = image->bindings[0].offset + image->planes[0].surface.fmask_offset;
1284 unsigned slice_size = image->planes[0].surface.fmask_slice_size;
1285 uint64_t size;
1286
1287 /* MSAA images do not support mipmap levels. */
1288 assert(range->baseMipLevel == 0 && radv_get_levelCount(image, range) == 1);
1289
1290 offset += slice_size * range->baseArrayLayer;
1291 size = slice_size * radv_get_layerCount(image, range);
1292
1293 return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1294 radv_buffer_get_va(image->bindings[0].bo) + offset, size, value);
1295 }
1296
1297 uint32_t
radv_clear_dcc(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1298 radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1299 const VkImageSubresourceRange *range, uint32_t value)
1300 {
1301 uint32_t level_count = radv_get_levelCount(image, range);
1302 uint32_t layer_count = radv_get_layerCount(image, range);
1303 uint32_t flush_bits = 0;
1304
1305 /* Mark the image as being compressed. */
1306 radv_update_dcc_metadata(cmd_buffer, image, range, true);
1307
1308 for (uint32_t l = 0; l < level_count; l++) {
1309 uint64_t offset = image->bindings[0].offset + image->planes[0].surface.meta_offset;
1310 uint32_t level = range->baseMipLevel + l;
1311 uint64_t size;
1312
1313 if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) {
1314 /* DCC for mipmaps+layers is currently disabled. */
1315 offset += image->planes[0].surface.meta_slice_size * range->baseArrayLayer +
1316 image->planes[0].surface.u.gfx9.meta_levels[level].offset;
1317 size = image->planes[0].surface.u.gfx9.meta_levels[level].size * layer_count;
1318 } else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
1319 /* Mipmap levels and layers aren't implemented. */
1320 assert(level == 0);
1321 size = image->planes[0].surface.meta_size;
1322 } else {
1323 const struct legacy_surf_dcc_level *dcc_level =
1324 &image->planes[0].surface.u.legacy.color.dcc_level[level];
1325
1326 /* If dcc_fast_clear_size is 0 (which might happens for
1327 * mipmaps) the fill buffer operation below is a no-op.
1328 * This can only happen during initialization as the
1329 * fast clear path fallbacks to slow clears if one
1330 * level can't be fast cleared.
1331 */
1332 offset +=
1333 dcc_level->dcc_offset + dcc_level->dcc_slice_fast_clear_size * range->baseArrayLayer;
1334 size = dcc_level->dcc_slice_fast_clear_size * radv_get_layerCount(image, range);
1335 }
1336
1337 /* Do not clear this level if it can't be compressed. */
1338 if (!size)
1339 continue;
1340
1341 flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1342 radv_buffer_get_va(image->bindings[0].bo) + offset,
1343 size, value);
1344 }
1345
1346 return flush_bits;
1347 }
1348
1349 static uint32_t
radv_clear_dcc_comp_to_single(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t color_values[2])1350 radv_clear_dcc_comp_to_single(struct radv_cmd_buffer *cmd_buffer,
1351 struct radv_image *image,
1352 const VkImageSubresourceRange *range,
1353 uint32_t color_values[2])
1354 {
1355 struct radv_device *device = cmd_buffer->device;
1356 unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format);
1357 unsigned layer_count = radv_get_layerCount(image, range);
1358 struct radv_meta_saved_state saved_state;
1359 bool is_msaa = image->info.samples > 1;
1360 struct radv_image_view iview;
1361 VkFormat format;
1362
1363 switch (bytes_per_pixel) {
1364 case 1:
1365 format = VK_FORMAT_R8_UINT;
1366 break;
1367 case 2:
1368 format = VK_FORMAT_R16_UINT;
1369 break;
1370 case 4:
1371 format = VK_FORMAT_R32_UINT;
1372 break;
1373 case 8:
1374 format = VK_FORMAT_R32G32_UINT;
1375 break;
1376 case 16:
1377 format = VK_FORMAT_R32G32B32A32_UINT;
1378 break;
1379 default:
1380 unreachable("Unsupported number of bytes per pixel");
1381 }
1382
1383 radv_meta_save(
1384 &saved_state, cmd_buffer,
1385 RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS);
1386
1387 VkPipeline pipeline = device->meta_state.clear_dcc_comp_to_single_pipeline[is_msaa];
1388
1389 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1390 pipeline);
1391
1392 for (uint32_t l = 0; l < radv_get_levelCount(image, range); l++) {
1393 uint32_t width, height;
1394
1395 /* Do not write the clear color value for levels without DCC. */
1396 if (!radv_dcc_enabled(image, range->baseMipLevel + l))
1397 continue;
1398
1399 width = radv_minify(image->info.width, range->baseMipLevel + l);
1400 height = radv_minify(image->info.height, range->baseMipLevel + l);
1401
1402 radv_image_view_init(
1403 &iview, cmd_buffer->device,
1404 &(VkImageViewCreateInfo){
1405 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1406 .image = radv_image_to_handle(image),
1407 .viewType = VK_IMAGE_VIEW_TYPE_2D,
1408 .format = format,
1409 .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1410 .baseMipLevel = range->baseMipLevel + l,
1411 .levelCount = 1,
1412 .baseArrayLayer = range->baseArrayLayer,
1413 .layerCount = layer_count},
1414 },
1415 0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
1416
1417 radv_meta_push_descriptor_set(
1418 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1419 device->meta_state.clear_dcc_comp_to_single_p_layout, 0,
1420 1,
1421 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1422 .dstBinding = 0,
1423 .dstArrayElement = 0,
1424 .descriptorCount = 1,
1425 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1426 .pImageInfo =
1427 (VkDescriptorImageInfo[]){
1428 {
1429 .sampler = VK_NULL_HANDLE,
1430 .imageView = radv_image_view_to_handle(&iview),
1431 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1432 },
1433 }}});
1434
1435 unsigned dcc_width =
1436 DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width);
1437 unsigned dcc_height =
1438 DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height);
1439
1440 const unsigned constants[4] = {
1441 image->planes[0].surface.u.gfx9.color.dcc_block_width,
1442 image->planes[0].surface.u.gfx9.color.dcc_block_height,
1443 color_values[0],
1444 color_values[1],
1445 };
1446
1447 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1448 device->meta_state.clear_dcc_comp_to_single_p_layout,
1449 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, constants);
1450
1451 radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, layer_count);
1452
1453 radv_image_view_finish(&iview);
1454 }
1455
1456 radv_meta_restore(&saved_state, cmd_buffer);
1457
1458 return RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
1459 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
1460 }
1461
1462 uint32_t
radv_clear_htile(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1463 radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
1464 const VkImageSubresourceRange *range, uint32_t value)
1465 {
1466 uint32_t level_count = radv_get_levelCount(image, range);
1467 uint32_t flush_bits = 0;
1468 uint32_t htile_mask;
1469
1470 htile_mask = radv_get_htile_mask(cmd_buffer->device, image, range->aspectMask);
1471
1472 if (level_count != image->info.levels) {
1473 assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10);
1474
1475 /* Clear individuals levels separately. */
1476 for (uint32_t l = 0; l < level_count; l++) {
1477 uint32_t level = range->baseMipLevel + l;
1478 uint64_t offset = image->bindings[0].offset + image->planes[0].surface.meta_offset +
1479 image->planes[0].surface.u.gfx9.meta_levels[level].offset;
1480 uint32_t size = image->planes[0].surface.u.gfx9.meta_levels[level].size;
1481
1482 /* Do not clear this level if it can be compressed. */
1483 if (!size)
1484 continue;
1485
1486 if (htile_mask == UINT_MAX) {
1487 /* Clear the whole HTILE buffer. */
1488 flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1489 radv_buffer_get_va(image->bindings[0].bo) + offset,
1490 size, value);
1491 } else {
1492 /* Only clear depth or stencil bytes in the HTILE buffer. */
1493 flush_bits |=
1494 clear_htile_mask(cmd_buffer, image, image->bindings[0].bo, offset, size, value, htile_mask);
1495 }
1496 }
1497 } else {
1498 unsigned layer_count = radv_get_layerCount(image, range);
1499 uint64_t size = image->planes[0].surface.meta_slice_size * layer_count;
1500 uint64_t offset = image->bindings[0].offset + image->planes[0].surface.meta_offset +
1501 image->planes[0].surface.meta_slice_size * range->baseArrayLayer;
1502
1503 if (htile_mask == UINT_MAX) {
1504 /* Clear the whole HTILE buffer. */
1505 flush_bits = radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1506 radv_buffer_get_va(image->bindings[0].bo) + offset,
1507 size, value);
1508 } else {
1509 /* Only clear depth or stencil bytes in the HTILE buffer. */
1510 flush_bits =
1511 clear_htile_mask(cmd_buffer, image, image->bindings[0].bo, offset, size, value, htile_mask);
1512 }
1513 }
1514
1515 return flush_bits;
1516 }
1517
1518 enum {
1519 RADV_DCC_CLEAR_0000 = 0x00000000U,
1520 RADV_DCC_GFX8_CLEAR_0001 = 0x40404040U,
1521 RADV_DCC_GFX8_CLEAR_1110 = 0x80808080U,
1522 RADV_DCC_GFX8_CLEAR_1111 = 0xC0C0C0C0U,
1523 RADV_DCC_GFX8_CLEAR_REG = 0x20202020U,
1524 RADV_DCC_GFX9_CLEAR_SINGLE = 0x10101010U,
1525 RADV_DCC_GFX11_CLEAR_SINGLE = 0x01010101U,
1526 RADV_DCC_GFX11_CLEAR_0000 = 0x00000000U,
1527 RADV_DCC_GFX11_CLEAR_1111_UNORM = 0x02020202U,
1528 RADV_DCC_GFX11_CLEAR_1111_FP16 = 0x04040404U,
1529 RADV_DCC_GFX11_CLEAR_1111_FP32 = 0x06060606U,
1530 RADV_DCC_GFX11_CLEAR_0001_UNORM = 0x08080808U,
1531 RADV_DCC_GFX11_CLEAR_1110_UNORM = 0x0A0A0A0AU,
1532 };
1533
1534 static uint32_t
radv_dcc_single_clear_value(const struct radv_device * device)1535 radv_dcc_single_clear_value(const struct radv_device *device)
1536 {
1537 return device->physical_device->rad_info.gfx_level >= GFX11 ? RADV_DCC_GFX11_CLEAR_SINGLE
1538 : RADV_DCC_GFX9_CLEAR_SINGLE;
1539 }
1540
1541 static void
gfx8_get_fast_clear_parameters(struct radv_device * device,const struct radv_image_view * iview,const VkClearColorValue * clear_value,uint32_t * reset_value,bool * can_avoid_fast_clear_elim)1542 gfx8_get_fast_clear_parameters(struct radv_device *device, const struct radv_image_view *iview,
1543 const VkClearColorValue *clear_value, uint32_t *reset_value,
1544 bool *can_avoid_fast_clear_elim)
1545 {
1546 bool values[4] = {0};
1547 int extra_channel;
1548 bool main_value = false;
1549 bool extra_value = false;
1550 bool has_color = false;
1551 bool has_alpha = false;
1552
1553 /* comp-to-single allows to perform DCC fast clears without requiring a FCE. */
1554 if (iview->image->support_comp_to_single) {
1555 *reset_value = RADV_DCC_GFX9_CLEAR_SINGLE;
1556 *can_avoid_fast_clear_elim = true;
1557 } else {
1558 *reset_value = RADV_DCC_GFX8_CLEAR_REG;
1559 *can_avoid_fast_clear_elim = false;
1560 }
1561
1562 const struct util_format_description *desc = vk_format_description(iview->vk.format);
1563 if (iview->vk.format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
1564 iview->vk.format == VK_FORMAT_R5G6B5_UNORM_PACK16 || iview->vk.format == VK_FORMAT_B5G6R5_UNORM_PACK16)
1565 extra_channel = -1;
1566 else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
1567 if (vi_alpha_is_on_msb(device, iview->vk.format))
1568 extra_channel = desc->nr_channels - 1;
1569 else
1570 extra_channel = 0;
1571 } else
1572 return;
1573
1574 for (int i = 0; i < 4; i++) {
1575 int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
1576 if (desc->swizzle[i] < PIPE_SWIZZLE_X || desc->swizzle[i] > PIPE_SWIZZLE_W)
1577 continue;
1578
1579 if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
1580 /* Use the maximum value for clamping the clear color. */
1581 int max = u_bit_consecutive(0, desc->channel[i].size - 1);
1582
1583 values[i] = clear_value->int32[i] != 0;
1584 if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max)
1585 return;
1586 } else if (desc->channel[i].pure_integer &&
1587 desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1588 /* Use the maximum value for clamping the clear color. */
1589 unsigned max = u_bit_consecutive(0, desc->channel[i].size);
1590
1591 values[i] = clear_value->uint32[i] != 0U;
1592 if (clear_value->uint32[i] != 0U && MIN2(clear_value->uint32[i], max) != max)
1593 return;
1594 } else {
1595 values[i] = clear_value->float32[i] != 0.0F;
1596 if (clear_value->float32[i] != 0.0F && clear_value->float32[i] != 1.0F)
1597 return;
1598 }
1599
1600 if (index == extra_channel) {
1601 extra_value = values[i];
1602 has_alpha = true;
1603 } else {
1604 main_value = values[i];
1605 has_color = true;
1606 }
1607 }
1608
1609 /* If alpha isn't present, make it the same as color, and vice versa. */
1610 if (!has_alpha)
1611 extra_value = main_value;
1612 else if (!has_color)
1613 main_value = extra_value;
1614
1615 for (int i = 0; i < 4; ++i)
1616 if (values[i] != main_value && desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel &&
1617 desc->swizzle[i] >= PIPE_SWIZZLE_X && desc->swizzle[i] <= PIPE_SWIZZLE_W)
1618 return;
1619
1620 /* Only DCC clear code 0000 is allowed for signed<->unsigned formats. */
1621 if ((main_value || extra_value) && iview->image->dcc_sign_reinterpret)
1622 return;
1623
1624 *can_avoid_fast_clear_elim = true;
1625
1626 if (main_value) {
1627 if (extra_value)
1628 *reset_value = RADV_DCC_GFX8_CLEAR_1111;
1629 else
1630 *reset_value = RADV_DCC_GFX8_CLEAR_1110;
1631 } else {
1632 if (extra_value)
1633 *reset_value = RADV_DCC_GFX8_CLEAR_0001;
1634 else
1635 *reset_value = RADV_DCC_CLEAR_0000;
1636 }
1637 }
1638
1639 static bool
gfx11_get_fast_clear_parameters(struct radv_device * device,const struct radv_image_view * iview,const VkClearColorValue * clear_value,uint32_t * reset_value)1640 gfx11_get_fast_clear_parameters(struct radv_device *device, const struct radv_image_view *iview,
1641 const VkClearColorValue *clear_value, uint32_t *reset_value)
1642 {
1643 int extra_channel;
1644
1645 bool all_bits_are_0 = true;
1646 bool all_bits_are_1 = true;
1647 bool all_words_are_fp16_1 = true;
1648 bool all_words_are_fp32_1 = true;
1649 bool unorm_0001 = true;
1650 bool unorm_1110 = true;
1651
1652 const struct util_format_description *desc = vk_format_description(iview->vk.format);
1653 if (iview->vk.format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||
1654 iview->vk.format == VK_FORMAT_R5G6B5_UNORM_PACK16 ||
1655 iview->vk.format == VK_FORMAT_B5G6R5_UNORM_PACK16)
1656 extra_channel = -1;
1657 else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
1658 if (vi_alpha_is_on_msb(device, iview->vk.format))
1659 extra_channel = desc->nr_channels - 1;
1660 else
1661 extra_channel = 0;
1662 } else
1663 return false;
1664
1665 for (int i = 0; i < 4; i++) {
1666 int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
1667 if (desc->swizzle[i] < PIPE_SWIZZLE_X || desc->swizzle[i] > PIPE_SWIZZLE_W)
1668 continue;
1669
1670 uint32_t extra_xor = index == extra_channel ? ~0u : 0;
1671 if (clear_value->uint32[i] & ((1u << desc->channel[i].size) - 1))
1672 all_bits_are_0 = false;
1673 if (~clear_value->uint32[i] & ((1u << desc->channel[i].size) - 1))
1674 all_bits_are_1 = false;
1675 if (desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT || desc->channel[i].size != 16 ||
1676 clear_value->float32[i] != 1.0)
1677 all_words_are_fp16_1 = false;
1678 if (desc->channel[i].type != UTIL_FORMAT_TYPE_FLOAT || desc->channel[i].size != 32 ||
1679 clear_value->float32[i] != 1.0)
1680 all_words_are_fp32_1 = false;
1681 if ((clear_value->uint32[i] ^ extra_xor) & ((1u << desc->channel[i].size) - 1))
1682 unorm_0001 = false;
1683 if ((~clear_value->uint32[i] ^ extra_xor) & ((1u << desc->channel[i].size) - 1))
1684 unorm_1110 = false;
1685 }
1686
1687 if (all_bits_are_0)
1688 *reset_value = RADV_DCC_CLEAR_0000;
1689 else if (all_bits_are_1)
1690 *reset_value = RADV_DCC_GFX11_CLEAR_1111_UNORM;
1691 else if (all_words_are_fp16_1)
1692 *reset_value = RADV_DCC_GFX11_CLEAR_1111_FP16;
1693 else if (all_words_are_fp32_1)
1694 *reset_value = RADV_DCC_GFX11_CLEAR_1111_FP32;
1695 else if (unorm_0001)
1696 *reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM;
1697 else if (unorm_1110)
1698 *reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM;
1699 else if (iview->image->support_comp_to_single)
1700 *reset_value = RADV_DCC_GFX11_CLEAR_SINGLE;
1701 else
1702 return false;
1703
1704 return true;
1705 }
1706
1707 static bool
radv_can_fast_clear_color(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkImageLayout image_layout,bool in_render_loop,const VkClearRect * clear_rect,VkClearColorValue clear_value,uint32_t view_mask)1708 radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
1709 VkImageLayout image_layout, bool in_render_loop,
1710 const VkClearRect *clear_rect, VkClearColorValue clear_value,
1711 uint32_t view_mask)
1712 {
1713 uint32_t clear_color[2];
1714
1715 if (!iview || !iview->support_fast_clear)
1716 return false;
1717
1718 if (!radv_layout_can_fast_clear(
1719 cmd_buffer->device, iview->image, iview->vk.base_mip_level, image_layout, in_render_loop,
1720 radv_image_queue_family_mask(iview->image, cmd_buffer->qf,
1721 cmd_buffer->qf)))
1722 return false;
1723
1724 if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
1725 clear_rect->rect.extent.width != iview->image->info.width ||
1726 clear_rect->rect.extent.height != iview->image->info.height)
1727 return false;
1728
1729 if (view_mask && (iview->image->info.array_size >= 32 ||
1730 (1u << iview->image->info.array_size) - 1u != view_mask))
1731 return false;
1732 if (!view_mask && clear_rect->baseArrayLayer != 0)
1733 return false;
1734 if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)
1735 return false;
1736
1737 /* DCC */
1738 if (!radv_format_pack_clear_color(iview->vk.format, clear_color, &clear_value))
1739 return false;
1740
1741 /* Images that support comp-to-single clears don't have clear values. */
1742 if (!iview->image->support_comp_to_single &&
1743 !radv_image_has_clear_value(iview->image) && (clear_color[0] != 0 || clear_color[1] != 0))
1744 return false;
1745
1746 if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level)) {
1747 bool can_avoid_fast_clear_elim;
1748 uint32_t reset_value;
1749
1750 if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
1751 if (!gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value,
1752 &reset_value))
1753 return false;
1754 } else {
1755 gfx8_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value,
1756 &can_avoid_fast_clear_elim);
1757 }
1758
1759 if (iview->image->info.levels > 1) {
1760 if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
1761 uint32_t last_level = iview->vk.base_mip_level + iview->vk.level_count - 1;
1762 if (last_level >= iview->image->planes[0].surface.num_meta_levels) {
1763 /* Do not fast clears if one level can't be fast cleard. */
1764 return false;
1765 }
1766 } else {
1767 for (uint32_t l = 0; l < iview->vk.level_count; l++) {
1768 uint32_t level = iview->vk.base_mip_level + l;
1769 struct legacy_surf_dcc_level *dcc_level =
1770 &iview->image->planes[0].surface.u.legacy.color.dcc_level[level];
1771
1772 /* Do not fast clears if one level can't be
1773 * fast cleared.
1774 */
1775 if (!dcc_level->dcc_fast_clear_size)
1776 return false;
1777 }
1778 }
1779 }
1780 }
1781
1782 return true;
1783 }
1784
1785 static void
radv_fast_clear_color(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,const VkClearAttachment * clear_att,uint32_t subpass_att,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)1786 radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
1787 const VkClearAttachment *clear_att, uint32_t subpass_att,
1788 enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush)
1789 {
1790 VkClearColorValue clear_value = clear_att->clearValue.color;
1791 uint32_t clear_color[2], flush_bits = 0;
1792 uint32_t cmask_clear_value;
1793 VkImageSubresourceRange range = {
1794 .aspectMask = iview->vk.aspects,
1795 .baseMipLevel = iview->vk.base_mip_level,
1796 .levelCount = iview->vk.level_count,
1797 .baseArrayLayer = iview->vk.base_array_layer,
1798 .layerCount = iview->vk.layer_count,
1799 };
1800
1801 if (pre_flush) {
1802 enum radv_cmd_flush_bits bits =
1803 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, iview->image) |
1804 radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, iview->image);
1805 cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
1806 *pre_flush |= cmd_buffer->state.flush_bits;
1807 }
1808
1809 /* DCC */
1810 radv_format_pack_clear_color(iview->vk.format, clear_color, &clear_value);
1811
1812 cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image);
1813
1814 /* clear cmask buffer */
1815 bool need_decompress_pass = false;
1816 if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level)) {
1817 uint32_t reset_value;
1818 bool can_avoid_fast_clear_elim = true;
1819
1820 if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
1821 ASSERTED bool result =
1822 gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value);
1823 assert(result);
1824 } else {
1825 gfx8_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value,
1826 &can_avoid_fast_clear_elim);
1827 }
1828
1829 if (radv_image_has_cmask(iview->image)) {
1830 flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);
1831 }
1832
1833 if (!can_avoid_fast_clear_elim)
1834 need_decompress_pass = true;
1835
1836 flush_bits |= radv_clear_dcc(cmd_buffer, iview->image, &range, reset_value);
1837
1838 if (reset_value == radv_dcc_single_clear_value(cmd_buffer->device)) {
1839 /* Write the clear color to the first byte of each 256B block when the image supports DCC
1840 * fast clears with comp-to-single.
1841 */
1842 flush_bits |= radv_clear_dcc_comp_to_single(cmd_buffer, iview->image, &range, clear_color);
1843 }
1844 } else {
1845 flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);
1846
1847 /* Fast clearing with CMASK should always be eliminated. */
1848 need_decompress_pass = true;
1849 }
1850
1851 if (post_flush) {
1852 *post_flush |= flush_bits;
1853 }
1854
1855 /* Update the FCE predicate to perform a fast-clear eliminate. */
1856 radv_update_fce_metadata(cmd_buffer, iview->image, &range, need_decompress_pass);
1857
1858 radv_update_color_clear_metadata(cmd_buffer, iview, subpass_att, clear_color);
1859 }
1860
1861 /**
1862 * The parameters mean that same as those in vkCmdClearAttachments.
1863 */
1864 static void
emit_clear(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,const VkClearRect * clear_rect,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush,uint32_t view_mask,bool ds_resolve_clear)1865 emit_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
1866 const VkClearRect *clear_rect, enum radv_cmd_flush_bits *pre_flush,
1867 enum radv_cmd_flush_bits *post_flush, uint32_t view_mask, bool ds_resolve_clear)
1868 {
1869 const struct vk_framebuffer *fb = cmd_buffer->state.framebuffer;
1870 const struct radv_subpass *subpass = cmd_buffer->state.subpass;
1871 VkImageAspectFlags aspects = clear_att->aspectMask;
1872
1873 if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
1874 const uint32_t subpass_att = clear_att->colorAttachment;
1875 assert(subpass_att < subpass->color_count);
1876 const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;
1877 if (pass_att == VK_ATTACHMENT_UNUSED)
1878 return;
1879
1880 VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout;
1881 bool in_render_loop = subpass->color_attachments[subpass_att].in_render_loop;
1882 const struct radv_image_view *iview =
1883 fb ? cmd_buffer->state.attachments[pass_att].iview : NULL;
1884 VkClearColorValue clear_value = clear_att->clearValue.color;
1885
1886 if (radv_can_fast_clear_color(cmd_buffer, iview, image_layout, in_render_loop, clear_rect,
1887 clear_value, view_mask)) {
1888 radv_fast_clear_color(cmd_buffer, iview, clear_att, subpass_att, pre_flush, post_flush);
1889 } else {
1890 emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
1891 }
1892 } else {
1893 struct radv_subpass_attachment *ds_att = subpass->depth_stencil_attachment;
1894
1895 if (ds_resolve_clear)
1896 ds_att = subpass->ds_resolve_attachment;
1897
1898 if (!ds_att || ds_att->attachment == VK_ATTACHMENT_UNUSED)
1899 return;
1900
1901 VkImageLayout image_layout = ds_att->layout;
1902 bool in_render_loop = ds_att->in_render_loop;
1903 const struct radv_image_view *iview =
1904 fb ? cmd_buffer->state.attachments[ds_att->attachment].iview : NULL;
1905 VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
1906
1907 assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
1908
1909 if (radv_can_fast_clear_depth(cmd_buffer, iview, image_layout, in_render_loop, aspects,
1910 clear_rect, clear_value, view_mask)) {
1911 radv_fast_clear_depth(cmd_buffer, iview, clear_att, pre_flush, post_flush);
1912 } else {
1913 emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect, ds_att, view_mask,
1914 ds_resolve_clear);
1915 }
1916 }
1917 }
1918
1919 static inline bool
radv_attachment_needs_clear(struct radv_cmd_state * cmd_state,uint32_t a)1920 radv_attachment_needs_clear(struct radv_cmd_state *cmd_state, uint32_t a)
1921 {
1922 uint32_t view_mask = cmd_state->subpass->view_mask;
1923 return (a != VK_ATTACHMENT_UNUSED && cmd_state->attachments[a].pending_clear_aspects &&
1924 (!view_mask || (view_mask & ~cmd_state->attachments[a].cleared_views)));
1925 }
1926
1927 static bool
radv_subpass_needs_clear(struct radv_cmd_buffer * cmd_buffer)1928 radv_subpass_needs_clear(struct radv_cmd_buffer *cmd_buffer)
1929 {
1930 struct radv_cmd_state *cmd_state = &cmd_buffer->state;
1931 uint32_t a;
1932
1933 if (!cmd_state->subpass)
1934 return false;
1935
1936 for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
1937 a = cmd_state->subpass->color_attachments[i].attachment;
1938 if (radv_attachment_needs_clear(cmd_state, a))
1939 return true;
1940 }
1941
1942 if (cmd_state->subpass->depth_stencil_attachment) {
1943 a = cmd_state->subpass->depth_stencil_attachment->attachment;
1944 if (radv_attachment_needs_clear(cmd_state, a))
1945 return true;
1946 }
1947
1948 if (!cmd_state->subpass->ds_resolve_attachment)
1949 return false;
1950
1951 a = cmd_state->subpass->ds_resolve_attachment->attachment;
1952 return radv_attachment_needs_clear(cmd_state, a);
1953 }
1954
1955 static void
radv_subpass_clear_attachment(struct radv_cmd_buffer * cmd_buffer,struct radv_attachment_state * attachment,const VkClearAttachment * clear_att,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush,bool ds_resolve_clear)1956 radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer,
1957 struct radv_attachment_state *attachment,
1958 const VkClearAttachment *clear_att,
1959 enum radv_cmd_flush_bits *pre_flush,
1960 enum radv_cmd_flush_bits *post_flush, bool ds_resolve_clear)
1961 {
1962 struct radv_cmd_state *cmd_state = &cmd_buffer->state;
1963 uint32_t view_mask = cmd_state->subpass->view_mask;
1964
1965 VkClearRect clear_rect = {
1966 .rect = cmd_state->render_area,
1967 .baseArrayLayer = 0,
1968 .layerCount = cmd_state->framebuffer->layers,
1969 };
1970
1971 radv_describe_begin_render_pass_clear(cmd_buffer, clear_att->aspectMask);
1972
1973 emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush,
1974 view_mask & ~attachment->cleared_views, ds_resolve_clear);
1975 if (view_mask)
1976 attachment->cleared_views |= view_mask;
1977 else
1978 attachment->pending_clear_aspects = 0;
1979
1980 radv_describe_end_render_pass_clear(cmd_buffer);
1981 }
1982
1983 /**
1984 * Emit any pending attachment clears for the current subpass.
1985 *
1986 * @see radv_attachment_state::pending_clear_aspects
1987 */
1988 void
radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer * cmd_buffer)1989 radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)
1990 {
1991 struct radv_cmd_state *cmd_state = &cmd_buffer->state;
1992 struct radv_meta_saved_state saved_state;
1993 enum radv_cmd_flush_bits pre_flush = 0;
1994 enum radv_cmd_flush_bits post_flush = 0;
1995
1996 if (!radv_subpass_needs_clear(cmd_buffer))
1997 return;
1998
1999 /* Subpass clear should not be affected by conditional rendering. */
2000 radv_meta_save(
2001 &saved_state, cmd_buffer,
2002 RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING);
2003
2004 for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {
2005 uint32_t a = cmd_state->subpass->color_attachments[i].attachment;
2006
2007 if (!radv_attachment_needs_clear(cmd_state, a))
2008 continue;
2009
2010 assert(cmd_state->attachments[a].pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT);
2011
2012 VkClearAttachment clear_att = {
2013 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
2014 .colorAttachment = i, /* Use attachment index relative to subpass */
2015 .clearValue = cmd_state->attachments[a].clear_value,
2016 };
2017
2018 radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[a], &clear_att, &pre_flush,
2019 &post_flush, false);
2020 }
2021
2022 if (cmd_state->subpass->depth_stencil_attachment) {
2023 uint32_t ds = cmd_state->subpass->depth_stencil_attachment->attachment;
2024 if (radv_attachment_needs_clear(cmd_state, ds)) {
2025 VkClearAttachment clear_att = {
2026 .aspectMask = cmd_state->attachments[ds].pending_clear_aspects,
2027 .clearValue = cmd_state->attachments[ds].clear_value,
2028 };
2029
2030 radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[ds], &clear_att,
2031 &pre_flush, &post_flush, false);
2032 }
2033 }
2034
2035 if (cmd_state->subpass->ds_resolve_attachment) {
2036 uint32_t ds_resolve = cmd_state->subpass->ds_resolve_attachment->attachment;
2037 if (radv_attachment_needs_clear(cmd_state, ds_resolve)) {
2038 VkClearAttachment clear_att = {
2039 .aspectMask = cmd_state->attachments[ds_resolve].pending_clear_aspects,
2040 .clearValue = cmd_state->attachments[ds_resolve].clear_value,
2041 };
2042
2043 radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[ds_resolve], &clear_att,
2044 &pre_flush, &post_flush, true);
2045 }
2046 }
2047
2048 radv_meta_restore(&saved_state, cmd_buffer);
2049 cmd_buffer->state.flush_bits |= post_flush;
2050 }
2051
2052 static void
radv_clear_image_layer(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkImageLayout image_layout,const VkImageSubresourceRange * range,VkFormat format,int level,unsigned layer_count,const VkClearValue * clear_val)2053 radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2054 VkImageLayout image_layout, const VkImageSubresourceRange *range,
2055 VkFormat format, int level, unsigned layer_count,
2056 const VkClearValue *clear_val)
2057 {
2058 struct radv_image_view iview;
2059 uint32_t width = radv_minify(image->info.width, range->baseMipLevel + level);
2060 uint32_t height = radv_minify(image->info.height, range->baseMipLevel + level);
2061
2062 radv_image_view_init(&iview, cmd_buffer->device,
2063 &(VkImageViewCreateInfo){
2064 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2065 .image = radv_image_to_handle(image),
2066 .viewType = radv_meta_get_view_type(image),
2067 .format = format,
2068 .subresourceRange = {.aspectMask = range->aspectMask,
2069 .baseMipLevel = range->baseMipLevel + level,
2070 .levelCount = 1,
2071 .baseArrayLayer = range->baseArrayLayer,
2072 .layerCount = layer_count},
2073 },
2074 0, NULL);
2075
2076 VkClearAttachment clear_att = {
2077 .aspectMask = range->aspectMask,
2078 .colorAttachment = 0,
2079 .clearValue = *clear_val,
2080 };
2081
2082 VkClearRect clear_rect = {
2083 .rect =
2084 {
2085 .offset = {0, 0},
2086 .extent = {width, height},
2087 },
2088 .baseArrayLayer = 0,
2089 .layerCount = layer_count,
2090 };
2091
2092 VkRenderingAttachmentInfo color_att = {0}, depth_att = {0}, stencil_att = {0};
2093
2094 if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
2095 color_att.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO;
2096 color_att.imageView = radv_image_view_to_handle(&iview);
2097 color_att.imageLayout = image_layout;
2098 color_att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
2099 color_att.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
2100 } else {
2101 if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
2102 depth_att.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO;
2103 depth_att.imageView = radv_image_view_to_handle(&iview);
2104 depth_att.imageLayout = image_layout;
2105 depth_att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
2106 depth_att.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
2107 }
2108
2109 if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
2110 stencil_att.sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO;
2111 stencil_att.imageView = radv_image_view_to_handle(&iview);
2112 stencil_att.imageLayout = image_layout;
2113 stencil_att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
2114 stencil_att.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
2115 }
2116 }
2117
2118 VkRenderingInfo rendering_info = {
2119 .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
2120 .renderArea = {
2121 .offset = { 0, 0 },
2122 .extent = { width, height },
2123 },
2124 .layerCount = layer_count,
2125 };
2126
2127 if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
2128 rendering_info.colorAttachmentCount = 1;
2129 rendering_info.pColorAttachments = &color_att;
2130 } else {
2131 if (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)
2132 rendering_info.pDepthAttachment = &depth_att;
2133 if (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT)
2134 rendering_info.pStencilAttachment = &stencil_att;
2135 }
2136
2137 radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
2138
2139 emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0, false);
2140
2141 radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
2142
2143 radv_image_view_finish(&iview);
2144 }
2145
2146 /**
2147 * Return TRUE if a fast color or depth clear has been performed.
2148 */
2149 static bool
radv_fast_clear_range(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkFormat format,VkImageLayout image_layout,bool in_render_loop,const VkImageSubresourceRange * range,const VkClearValue * clear_val)2150 radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkFormat format,
2151 VkImageLayout image_layout, bool in_render_loop,
2152 const VkImageSubresourceRange *range, const VkClearValue *clear_val)
2153 {
2154 struct radv_image_view iview;
2155 bool fast_cleared = false;
2156
2157 radv_image_view_init(&iview, cmd_buffer->device,
2158 &(VkImageViewCreateInfo){
2159 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2160 .image = radv_image_to_handle(image),
2161 .viewType = radv_meta_get_view_type(image),
2162 .format = image->vk.format,
2163 .subresourceRange =
2164 {
2165 .aspectMask = range->aspectMask,
2166 .baseMipLevel = range->baseMipLevel,
2167 .levelCount = range->levelCount,
2168 .baseArrayLayer = range->baseArrayLayer,
2169 .layerCount = range->layerCount,
2170 },
2171 },
2172 0, NULL);
2173
2174 VkClearRect clear_rect = {
2175 .rect =
2176 {
2177 .offset = {0, 0},
2178 .extent =
2179 {
2180 radv_minify(image->info.width, range->baseMipLevel),
2181 radv_minify(image->info.height, range->baseMipLevel),
2182 },
2183 },
2184 .baseArrayLayer = range->baseArrayLayer,
2185 .layerCount = range->layerCount,
2186 };
2187
2188 VkClearAttachment clear_att = {
2189 .aspectMask = range->aspectMask,
2190 .colorAttachment = 0,
2191 .clearValue = *clear_val,
2192 };
2193
2194 if (vk_format_is_color(format)) {
2195 if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout, in_render_loop, &clear_rect,
2196 clear_att.clearValue.color, 0)) {
2197 radv_fast_clear_color(cmd_buffer, &iview, &clear_att, clear_att.colorAttachment, NULL,
2198 NULL);
2199 fast_cleared = true;
2200 }
2201 } else {
2202 if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout, in_render_loop,
2203 range->aspectMask, &clear_rect,
2204 clear_att.clearValue.depthStencil, 0)) {
2205 radv_fast_clear_depth(cmd_buffer, &iview, &clear_att, NULL, NULL);
2206 fast_cleared = true;
2207 }
2208 }
2209
2210 radv_image_view_finish(&iview);
2211 return fast_cleared;
2212 }
2213
2214 static void
radv_cmd_clear_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkImageLayout image_layout,const VkClearValue * clear_value,uint32_t range_count,const VkImageSubresourceRange * ranges,bool cs)2215 radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2216 VkImageLayout image_layout, const VkClearValue *clear_value,
2217 uint32_t range_count, const VkImageSubresourceRange *ranges, bool cs)
2218 {
2219 VkFormat format = image->vk.format;
2220 VkClearValue internal_clear_value;
2221
2222 if (ranges->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)
2223 internal_clear_value.color = clear_value->color;
2224 else
2225 internal_clear_value.depthStencil = clear_value->depthStencil;
2226
2227 bool disable_compression = false;
2228
2229 if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
2230 bool blendable;
2231 if (cs ? !radv_is_storage_image_format_supported(cmd_buffer->device->physical_device, format)
2232 : !radv_is_colorbuffer_format_supported(cmd_buffer->device->physical_device, format,
2233 &blendable)) {
2234 format = VK_FORMAT_R32_UINT;
2235 internal_clear_value.color.uint32[0] = float3_to_rgb9e5(clear_value->color.float32);
2236
2237 uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf,
2238 cmd_buffer->qf);
2239
2240 for (uint32_t r = 0; r < range_count; r++) {
2241 const VkImageSubresourceRange *range = &ranges[r];
2242
2243 /* Don't use compressed image stores because they will use an incompatible format. */
2244 if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel,
2245 image_layout, false, queue_mask)) {
2246 disable_compression = cs;
2247 break;
2248 }
2249 }
2250 }
2251 }
2252
2253 if (format == VK_FORMAT_R4G4_UNORM_PACK8) {
2254 uint8_t r, g;
2255 format = VK_FORMAT_R8_UINT;
2256 r = float_to_ubyte(clear_value->color.float32[0]) >> 4;
2257 g = float_to_ubyte(clear_value->color.float32[1]) >> 4;
2258 internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf);
2259 }
2260
2261 for (uint32_t r = 0; r < range_count; r++) {
2262 const VkImageSubresourceRange *range = &ranges[r];
2263
2264 /* Try to perform a fast clear first, otherwise fallback to
2265 * the legacy path.
2266 */
2267 if (!cs && radv_fast_clear_range(cmd_buffer, image, format, image_layout, false, range,
2268 &internal_clear_value)) {
2269 continue;
2270 }
2271
2272 for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) {
2273 const uint32_t layer_count = image->vk.image_type == VK_IMAGE_TYPE_3D
2274 ? radv_minify(image->info.depth, range->baseMipLevel + l)
2275 : radv_get_layerCount(image, range);
2276
2277 if (cs) {
2278 for (uint32_t s = 0; s < layer_count; ++s) {
2279 struct radv_meta_blit2d_surf surf;
2280 surf.format = format;
2281 surf.image = image;
2282 surf.level = range->baseMipLevel + l;
2283 surf.layer = range->baseArrayLayer + s;
2284 surf.aspect_mask = range->aspectMask;
2285 surf.disable_compression = disable_compression;
2286 radv_meta_clear_image_cs(cmd_buffer, &surf, &internal_clear_value.color);
2287 }
2288 } else {
2289 assert(!disable_compression);
2290 radv_clear_image_layer(cmd_buffer, image, image_layout, range, format, l, layer_count,
2291 &internal_clear_value);
2292 }
2293 }
2294 }
2295
2296 if (disable_compression) {
2297 enum radv_cmd_flush_bits flush_bits = 0;
2298 for (unsigned i = 0; i < range_count; i++) {
2299 if (radv_dcc_enabled(image, ranges[i].baseMipLevel))
2300 flush_bits |= radv_clear_dcc(cmd_buffer, image, &ranges[i], 0xffffffffu);
2301 }
2302 cmd_buffer->state.flush_bits |= flush_bits;
2303 }
2304 }
2305
2306 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)2307 radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
2308 const VkClearColorValue *pColor, uint32_t rangeCount,
2309 const VkImageSubresourceRange *pRanges)
2310 {
2311 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2312 RADV_FROM_HANDLE(radv_image, image, image_h);
2313 struct radv_meta_saved_state saved_state;
2314 bool cs;
2315
2316 cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE ||
2317 !radv_image_is_renderable(cmd_buffer->device, image);
2318
2319 /* Clear commands (except vkCmdClearAttachments) should not be affected by conditional rendering.
2320 */
2321 enum radv_meta_save_flags save_flags = RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING;
2322 if (cs)
2323 save_flags |= RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS;
2324 else
2325 save_flags |= RADV_META_SAVE_GRAPHICS_PIPELINE;
2326
2327 radv_meta_save(&saved_state, cmd_buffer, save_flags);
2328
2329 radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pColor, rangeCount,
2330 pRanges, cs);
2331
2332 radv_meta_restore(&saved_state, cmd_buffer);
2333 }
2334
2335 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)2336 radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h,
2337 VkImageLayout imageLayout,
2338 const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
2339 const VkImageSubresourceRange *pRanges)
2340 {
2341 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2342 RADV_FROM_HANDLE(radv_image, image, image_h);
2343 struct radv_meta_saved_state saved_state;
2344
2345 /* Clear commands (except vkCmdClearAttachments) should not be affected by conditional rendering. */
2346 radv_meta_save(
2347 &saved_state, cmd_buffer,
2348 RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING);
2349
2350 radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pDepthStencil,
2351 rangeCount, pRanges, false);
2352
2353 radv_meta_restore(&saved_state, cmd_buffer);
2354 }
2355
2356 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)2357 radv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount,
2358 const VkClearAttachment *pAttachments, uint32_t rectCount,
2359 const VkClearRect *pRects)
2360 {
2361 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2362 struct radv_meta_saved_state saved_state;
2363 enum radv_cmd_flush_bits pre_flush = 0;
2364 enum radv_cmd_flush_bits post_flush = 0;
2365
2366 if (!cmd_buffer->state.subpass)
2367 return;
2368
2369 radv_meta_save(&saved_state, cmd_buffer,
2370 RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);
2371
2372 /* FINISHME: We can do better than this dumb loop. It thrashes too much
2373 * state.
2374 */
2375 for (uint32_t a = 0; a < attachmentCount; ++a) {
2376 for (uint32_t r = 0; r < rectCount; ++r) {
2377 emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush,
2378 cmd_buffer->state.subpass->view_mask, false);
2379 }
2380 }
2381
2382 radv_meta_restore(&saved_state, cmd_buffer);
2383 cmd_buffer->state.flush_bits |= post_flush;
2384 }
2385