1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "nir/nir_builder.h"
8 #include "radv_debug.h"
9 #include "radv_entrypoints.h"
10 #include "radv_formats.h"
11 #include "radv_meta.h"
12
13 #include "util/format_rgb9e5.h"
14 #include "vk_common_entrypoints.h"
15 #include "vk_format.h"
16 #include "vk_shader_module.h"
17
18 #include "ac_formats.h"
19
20 static void
build_color_shaders(struct radv_device * dev,struct nir_shader ** out_vs,struct nir_shader ** out_fs,uint32_t frag_output)21 build_color_shaders(struct radv_device *dev, struct nir_shader **out_vs, struct nir_shader **out_fs,
22 uint32_t frag_output)
23 {
24 nir_builder vs_b = radv_meta_init_shader(dev, MESA_SHADER_VERTEX, "meta_clear_color_vs");
25 nir_builder fs_b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_clear_color_fs-%d", frag_output);
26
27 const struct glsl_type *position_type = glsl_vec4_type();
28 const struct glsl_type *color_type = glsl_vec4_type();
29
30 nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position");
31 vs_out_pos->data.location = VARYING_SLOT_POS;
32
33 nir_def *in_color_load = nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16);
34
35 nir_variable *fs_out_color = nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color");
36 fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
37
38 nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf);
39
40 nir_def *outvec = nir_gen_rect_vertices(&vs_b, NULL, NULL);
41 nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
42
43 const struct glsl_type *layer_type = glsl_int_type();
44 nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
45 vs_out_layer->data.location = VARYING_SLOT_LAYER;
46 vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
47 nir_def *inst_id = nir_load_instance_id(&vs_b);
48 nir_def *base_instance = nir_load_base_instance(&vs_b);
49
50 nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
51 nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
52
53 *out_vs = vs_b.shader;
54 *out_fs = fs_b.shader;
55 }
56
57 static VkResult
get_color_pipeline_layout(struct radv_device * device,VkPipelineLayout * layout_out)58 get_color_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout_out)
59 {
60 enum radv_meta_object_key_type key = RADV_META_OBJECT_KEY_CLEAR_COLOR;
61
62 const VkPushConstantRange pc_range = {
63 .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
64 .size = 16,
65 };
66
67 return vk_meta_get_pipeline_layout(&device->vk, &device->meta_state.device, NULL, &pc_range, &key, sizeof(key),
68 layout_out);
69 }
70
71 struct radv_clear_color_key {
72 enum radv_meta_object_key_type type;
73 uint8_t samples;
74 uint8_t frag_output;
75 uint32_t fs_key;
76 };
77
78 static VkResult
get_color_pipeline(struct radv_device * device,uint32_t samples,uint32_t frag_output,VkFormat format,VkPipeline * pipeline_out,VkPipelineLayout * layout_out)79 get_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t frag_output, VkFormat format,
80 VkPipeline *pipeline_out, VkPipelineLayout *layout_out)
81 {
82 const uint32_t fs_key = radv_format_meta_fs_key(device, format);
83 struct radv_clear_color_key key;
84 VkResult result;
85
86 result = get_color_pipeline_layout(device, layout_out);
87 if (result != VK_SUCCESS)
88 return result;
89
90 memset(&key, 0, sizeof(key));
91 key.type = RADV_META_OBJECT_KEY_CLEAR_COLOR;
92 key.samples = samples;
93 key.frag_output = frag_output;
94 key.fs_key = fs_key;
95
96 VkPipeline pipeline_from_cache = vk_meta_lookup_pipeline(&device->meta_state.device, &key, sizeof(key));
97 if (pipeline_from_cache != VK_NULL_HANDLE) {
98 *pipeline_out = pipeline_from_cache;
99 return VK_SUCCESS;
100 }
101
102 nir_shader *vs_module, *fs_module;
103
104 build_color_shaders(device, &vs_module, &fs_module, frag_output);
105
106 VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = {0};
107 blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState){
108 .blendEnable = false,
109 .colorWriteMask =
110 VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT,
111 };
112
113 const VkGraphicsPipelineCreateInfo pipeline_create_info = {
114 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
115 .stageCount = fs_module ? 2 : 1,
116 .pStages =
117 (VkPipelineShaderStageCreateInfo[]){
118 {
119 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
120 .stage = VK_SHADER_STAGE_VERTEX_BIT,
121 .module = vk_shader_module_handle_from_nir(vs_module),
122 .pName = "main",
123 },
124 {
125 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
126 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
127 .module = vk_shader_module_handle_from_nir(fs_module),
128 .pName = "main",
129 },
130 },
131 .pVertexInputState =
132 &(VkPipelineVertexInputStateCreateInfo){
133 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
134 .vertexBindingDescriptionCount = 0,
135 .vertexAttributeDescriptionCount = 0,
136 },
137 .pInputAssemblyState =
138 &(VkPipelineInputAssemblyStateCreateInfo){
139 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
140 .topology = VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA,
141 .primitiveRestartEnable = false,
142 },
143 .pViewportState =
144 &(VkPipelineViewportStateCreateInfo){
145 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
146 .viewportCount = 1,
147 .scissorCount = 1,
148 },
149 .pRasterizationState =
150 &(VkPipelineRasterizationStateCreateInfo){
151 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
152 .rasterizerDiscardEnable = false,
153 .polygonMode = VK_POLYGON_MODE_FILL,
154 .cullMode = VK_CULL_MODE_NONE,
155 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
156 .depthBiasEnable = false,
157 .depthBiasConstantFactor = 0.0f,
158 .depthBiasClamp = 0.0f,
159 .depthBiasSlopeFactor = 0.0f,
160 .lineWidth = 1.0f,
161 },
162 .pMultisampleState =
163 &(VkPipelineMultisampleStateCreateInfo){
164 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
165 .rasterizationSamples = samples,
166 .sampleShadingEnable = false,
167 .pSampleMask = NULL,
168 .alphaToCoverageEnable = false,
169 .alphaToOneEnable = false,
170 },
171 .pDepthStencilState =
172 &(VkPipelineDepthStencilStateCreateInfo){
173 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
174 .depthTestEnable = false,
175 .depthWriteEnable = false,
176 .depthBoundsTestEnable = false,
177 .stencilTestEnable = false,
178 .minDepthBounds = 0.0f,
179 .maxDepthBounds = 1.0f,
180 },
181 .pColorBlendState =
182 &(VkPipelineColorBlendStateCreateInfo){
183 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
184 .logicOpEnable = false,
185 .attachmentCount = MAX_RTS,
186 .pAttachments = blend_attachment_state,
187 .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
188 },
189 .pDynamicState =
190 &(VkPipelineDynamicStateCreateInfo){
191 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
192 .dynamicStateCount = 3,
193 .pDynamicStates =
194 (VkDynamicState[]){
195 VK_DYNAMIC_STATE_VIEWPORT,
196 VK_DYNAMIC_STATE_SCISSOR,
197 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
198 },
199 },
200 .layout = *layout_out,
201
202 };
203
204 struct vk_meta_rendering_info render = {
205 .color_attachment_count = MAX_RTS,
206 };
207
208 for (uint32_t i = 0; i < MAX_RTS; i++)
209 render.color_attachment_formats[i] = format;
210
211 result = vk_meta_create_graphics_pipeline(&device->vk, &device->meta_state.device, &pipeline_create_info, &render,
212 &key, sizeof(key), pipeline_out);
213
214 ralloc_free(vs_module);
215 ralloc_free(fs_module);
216 return result;
217 }
218
219 static void
emit_color_clear(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,const VkClearRect * clear_rect,uint32_t view_mask)220 emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, const VkClearRect *clear_rect,
221 uint32_t view_mask)
222 {
223 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
224 const struct radv_rendering_state *render = &cmd_buffer->state.render;
225 uint32_t samples;
226 VkFormat format;
227 VkClearColorValue clear_value = clear_att->clearValue.color;
228 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
229 VkPipelineLayout layout;
230 VkPipeline pipeline;
231 VkResult result;
232
233 assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
234 assert(clear_att->colorAttachment < render->color_att_count);
235 const struct radv_attachment *color_att = &render->color_att[clear_att->colorAttachment];
236
237 /* When a framebuffer is bound to the current command buffer, get the
238 * number of samples from it. Otherwise, get the number of samples from
239 * the render pass because it's likely a secondary command buffer.
240 */
241 if (color_att->iview) {
242 samples = color_att->iview->image->vk.samples;
243 format = color_att->iview->vk.format;
244 } else {
245 samples = render->max_samples;
246 format = color_att->format;
247 }
248 assert(format != VK_FORMAT_UNDEFINED);
249
250 assert(util_is_power_of_two_nonzero(samples));
251
252 result = get_color_pipeline(device, samples, clear_att->colorAttachment, format, &pipeline, &layout);
253 if (result != VK_SUCCESS) {
254 vk_command_buffer_set_error(&cmd_buffer->vk, result);
255 return;
256 }
257
258 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
259 &clear_value);
260
261 radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
262
263 radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
264 &(VkViewport){.x = clear_rect->rect.offset.x,
265 .y = clear_rect->rect.offset.y,
266 .width = clear_rect->rect.extent.width,
267 .height = clear_rect->rect.extent.height,
268 .minDepth = 0.0f,
269 .maxDepth = 1.0f});
270
271 radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
272
273 if (view_mask) {
274 u_foreach_bit (i, view_mask)
275 radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
276 } else {
277 radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
278 }
279 }
280
281 static void
build_depthstencil_shader(struct radv_device * dev,struct nir_shader ** out_vs,struct nir_shader ** out_fs,bool unrestricted)282 build_depthstencil_shader(struct radv_device *dev, struct nir_shader **out_vs, struct nir_shader **out_fs,
283 bool unrestricted)
284 {
285 nir_builder vs_b = radv_meta_init_shader(
286 dev, MESA_SHADER_VERTEX, unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs");
287 nir_builder fs_b =
288 radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT,
289 unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs");
290
291 const struct glsl_type *position_out_type = glsl_vec4_type();
292
293 nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position");
294 vs_out_pos->data.location = VARYING_SLOT_POS;
295
296 nir_def *z;
297 if (unrestricted) {
298 nir_def *in_color_load = nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4);
299
300 nir_variable *fs_out_depth = nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth");
301 fs_out_depth->data.location = FRAG_RESULT_DEPTH;
302 nir_store_var(&fs_b, fs_out_depth, in_color_load, 0x1);
303
304 z = nir_imm_float(&vs_b, 0.0);
305 } else {
306 z = nir_load_push_constant(&vs_b, 1, 32, nir_imm_int(&vs_b, 0), .range = 4);
307 }
308
309 nir_def *outvec = nir_gen_rect_vertices(&vs_b, z, NULL);
310 nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
311
312 const struct glsl_type *layer_type = glsl_int_type();
313 nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
314 vs_out_layer->data.location = VARYING_SLOT_LAYER;
315 vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
316 nir_def *inst_id = nir_load_instance_id(&vs_b);
317 nir_def *base_instance = nir_load_base_instance(&vs_b);
318
319 nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
320 nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
321
322 *out_vs = vs_b.shader;
323 *out_fs = fs_b.shader;
324 }
325
326 static bool radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
327 VkImageLayout image_layout, VkImageAspectFlags aspects,
328 const VkClearRect *clear_rect, const VkClearDepthStencilValue clear_value,
329 uint32_t view_mask);
330
331 struct radv_clear_ds_layout_key {
332 enum radv_meta_object_key_type type;
333 bool unrestricted;
334 };
335
336 static VkResult
get_depth_stencil_pipeline_layout(struct radv_device * device,bool unrestricted,VkPipelineLayout * layout_out)337 get_depth_stencil_pipeline_layout(struct radv_device *device, bool unrestricted, VkPipelineLayout *layout_out)
338 {
339 struct radv_clear_ds_layout_key key;
340
341 memset(&key, 0, sizeof(key));
342 key.type = RADV_META_OBJECT_KEY_CLEAR_DS;
343 key.unrestricted = unrestricted;
344
345 const VkPushConstantRange pc_range = {
346 .stageFlags = unrestricted ? VK_SHADER_STAGE_FRAGMENT_BIT : VK_SHADER_STAGE_VERTEX_BIT,
347 .size = 4,
348 };
349
350 return vk_meta_get_pipeline_layout(&device->vk, &device->meta_state.device, NULL, &pc_range, &key, sizeof(key),
351 layout_out);
352 }
353
354 struct radv_clear_ds_key {
355 enum radv_meta_object_key_type type;
356 VkImageAspectFlags aspects;
357 uint8_t samples;
358 bool fast;
359 bool unrestricted;
360 };
361
362 static VkResult
get_depth_stencil_pipeline(struct radv_device * device,int samples,VkImageAspectFlags aspects,bool fast,VkPipeline * pipeline_out,VkPipelineLayout * layout_out)363 get_depth_stencil_pipeline(struct radv_device *device, int samples, VkImageAspectFlags aspects, bool fast,
364 VkPipeline *pipeline_out, VkPipelineLayout *layout_out)
365 {
366 const bool unrestricted = device->vk.enabled_extensions.EXT_depth_range_unrestricted;
367 struct radv_clear_ds_key key;
368 VkResult result;
369
370 result = get_depth_stencil_pipeline_layout(device, unrestricted, layout_out);
371 if (result != VK_SUCCESS)
372 return result;
373
374 memset(&key, 0, sizeof(key));
375 key.type = RADV_META_OBJECT_KEY_CLEAR_DS;
376 key.aspects = aspects;
377 key.samples = samples;
378 key.fast = fast;
379 key.unrestricted = unrestricted;
380
381 VkPipeline pipeline_from_cache = vk_meta_lookup_pipeline(&device->meta_state.device, &key, sizeof(key));
382 if (pipeline_from_cache != VK_NULL_HANDLE) {
383 *pipeline_out = pipeline_from_cache;
384 return VK_SUCCESS;
385 }
386
387 nir_shader *vs_module, *fs_module;
388
389 build_depthstencil_shader(device, &vs_module, &fs_module, unrestricted);
390
391 VkGraphicsPipelineCreateInfoRADV radv_info = {
392 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO_RADV,
393 };
394
395 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
396 radv_info.db_depth_clear = fast;
397 }
398 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
399 radv_info.db_stencil_clear = fast;
400 }
401
402 const VkGraphicsPipelineCreateInfo pipeline_create_info = {
403 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
404 .pNext = &radv_info,
405 .stageCount = fs_module ? 2 : 1,
406 .pStages =
407 (VkPipelineShaderStageCreateInfo[]){
408 {
409 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
410 .stage = VK_SHADER_STAGE_VERTEX_BIT,
411 .module = vk_shader_module_handle_from_nir(vs_module),
412 .pName = "main",
413 },
414 {
415 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
416 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
417 .module = vk_shader_module_handle_from_nir(fs_module),
418 .pName = "main",
419 },
420 },
421 .pVertexInputState =
422 &(VkPipelineVertexInputStateCreateInfo){
423 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
424 .vertexBindingDescriptionCount = 0,
425 .vertexAttributeDescriptionCount = 0,
426
427 },
428 .pInputAssemblyState =
429 &(VkPipelineInputAssemblyStateCreateInfo){
430 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
431 .topology = VK_PRIMITIVE_TOPOLOGY_META_RECT_LIST_MESA,
432 .primitiveRestartEnable = false,
433 },
434 .pViewportState =
435 &(VkPipelineViewportStateCreateInfo){
436 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
437 .viewportCount = 1,
438 .scissorCount = 1,
439 },
440 .pRasterizationState =
441 &(VkPipelineRasterizationStateCreateInfo){
442 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
443 .rasterizerDiscardEnable = false,
444 .polygonMode = VK_POLYGON_MODE_FILL,
445 .cullMode = VK_CULL_MODE_NONE,
446 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
447 .depthBiasEnable = false,
448 .depthBiasConstantFactor = 0.0f,
449 .depthBiasClamp = 0.0f,
450 .depthBiasSlopeFactor = 0.0f,
451 .lineWidth = 1.0f,
452 },
453 .pMultisampleState =
454 &(VkPipelineMultisampleStateCreateInfo){
455 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
456 .rasterizationSamples = samples,
457 .sampleShadingEnable = false,
458 .pSampleMask = NULL,
459 .alphaToCoverageEnable = false,
460 .alphaToOneEnable = false,
461 },
462 .pDepthStencilState =
463 &(VkPipelineDepthStencilStateCreateInfo){
464 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
465 .depthTestEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
466 .depthCompareOp = VK_COMPARE_OP_ALWAYS,
467 .depthWriteEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
468 .depthBoundsTestEnable = false,
469 .stencilTestEnable = !!(aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
470 .front =
471 {
472 .passOp = VK_STENCIL_OP_REPLACE,
473 .compareOp = VK_COMPARE_OP_ALWAYS,
474 .writeMask = UINT32_MAX,
475 .reference = 0, /* dynamic */
476 },
477 .back = {0 /* dont care */},
478 .minDepthBounds = 0.0f,
479 .maxDepthBounds = 1.0f,
480 },
481 .pColorBlendState =
482 &(VkPipelineColorBlendStateCreateInfo){
483 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
484 .logicOpEnable = false,
485 .attachmentCount = 0,
486 .pAttachments = NULL,
487 .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
488 },
489 .pDynamicState =
490 &(VkPipelineDynamicStateCreateInfo){
491 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
492 .dynamicStateCount = 3,
493 .pDynamicStates =
494 (VkDynamicState[]){
495 VK_DYNAMIC_STATE_VIEWPORT,
496 VK_DYNAMIC_STATE_SCISSOR,
497 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
498 },
499 },
500 .layout = *layout_out,
501 };
502
503 struct vk_meta_rendering_info render = {
504 .depth_attachment_format = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? VK_FORMAT_D32_SFLOAT : VK_FORMAT_UNDEFINED,
505 .stencil_attachment_format = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? VK_FORMAT_S8_UINT : VK_FORMAT_UNDEFINED,
506 };
507
508 result = vk_meta_create_graphics_pipeline(&device->vk, &device->meta_state.device, &pipeline_create_info, &render,
509 &key, sizeof(key), pipeline_out);
510
511 ralloc_free(vs_module);
512 ralloc_free(fs_module);
513 return result;
514 }
515
516 static void
emit_depthstencil_clear(struct radv_cmd_buffer * cmd_buffer,VkClearDepthStencilValue clear_value,VkImageAspectFlags aspects,const VkClearRect * clear_rect,uint32_t view_mask,bool can_fast_clear)517 emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, VkClearDepthStencilValue clear_value,
518 VkImageAspectFlags aspects, const VkClearRect *clear_rect, uint32_t view_mask,
519 bool can_fast_clear)
520 {
521 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
522 const bool unrestricted = device->vk.enabled_extensions.EXT_depth_range_unrestricted;
523 const struct radv_rendering_state *render = &cmd_buffer->state.render;
524 uint32_t samples;
525 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
526 VkPipelineLayout layout;
527 VkPipeline pipeline;
528 VkResult result;
529
530 /* When a framebuffer is bound to the current command buffer, get the
531 * number of samples from it. Otherwise, get the number of samples from
532 * the render pass because it's likely a secondary command buffer.
533 */
534 struct radv_image_view *iview = render->ds_att.iview;
535 if (iview) {
536 samples = iview->image->vk.samples;
537 } else {
538 assert(render->ds_att.format != VK_FORMAT_UNDEFINED);
539 samples = render->max_samples;
540 }
541
542 assert(util_is_power_of_two_nonzero(samples));
543
544 result = get_depth_stencil_pipeline(device, samples, aspects, can_fast_clear, &pipeline, &layout);
545 if (result != VK_SUCCESS) {
546 vk_command_buffer_set_error(&cmd_buffer->vk, result);
547 return;
548 }
549
550 if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
551 clear_value.depth = 1.0f;
552
553 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), layout,
554 unrestricted ? VK_SHADER_STAGE_FRAGMENT_BIT : VK_SHADER_STAGE_VERTEX_BIT, 0, 4,
555 &clear_value.depth);
556
557 uint32_t prev_reference = cmd_buffer->state.dynamic.vk.ds.stencil.front.reference;
558 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
559 radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, clear_value.stencil);
560 }
561
562 radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
563
564 if (can_fast_clear)
565 radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
566
567 radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
568 &(VkViewport){.x = clear_rect->rect.offset.x,
569 .y = clear_rect->rect.offset.y,
570 .width = clear_rect->rect.extent.width,
571 .height = clear_rect->rect.extent.height,
572 .minDepth = 0.0f,
573 .maxDepth = 1.0f});
574
575 radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
576
577 if (view_mask) {
578 u_foreach_bit (i, view_mask)
579 radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
580 } else {
581 radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
582 }
583
584 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
585 radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, prev_reference);
586 }
587 }
588
589 static nir_shader *
build_clear_htile_mask_shader(struct radv_device * dev)590 build_clear_htile_mask_shader(struct radv_device *dev)
591 {
592 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_htile_mask");
593 b.shader->info.workgroup_size[0] = 64;
594
595 nir_def *global_id = get_global_ids(&b, 1);
596
597 nir_def *offset = nir_imul_imm(&b, global_id, 16);
598 offset = nir_channel(&b, offset, 0);
599
600 nir_def *buf = radv_meta_load_descriptor(&b, 0, 0);
601
602 nir_def *constants = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
603
604 nir_def *load = nir_load_ssbo(&b, 4, 32, buf, offset, .align_mul = 16);
605
606 /* data = (data & ~htile_mask) | (htile_value & htile_mask) */
607 nir_def *data = nir_iand(&b, load, nir_channel(&b, constants, 1));
608 data = nir_ior(&b, data, nir_channel(&b, constants, 0));
609
610 nir_store_ssbo(&b, data, buf, offset, .access = ACCESS_NON_READABLE, .align_mul = 16);
611
612 return b.shader;
613 }
614
615 static VkResult
get_clear_htile_mask_pipeline(struct radv_device * device,VkPipeline * pipeline_out,VkPipelineLayout * layout_out)616 get_clear_htile_mask_pipeline(struct radv_device *device, VkPipeline *pipeline_out, VkPipelineLayout *layout_out)
617 {
618 enum radv_meta_object_key_type key = RADV_META_OBJECT_KEY_CLEAR_HTILE;
619 VkResult result;
620
621 const VkDescriptorSetLayoutBinding binding = {
622 .binding = 0,
623 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
624 .descriptorCount = 1,
625 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
626 };
627
628 const VkDescriptorSetLayoutCreateInfo desc_info = {
629 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
630 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT,
631 .bindingCount = 1,
632 .pBindings = &binding,
633 };
634
635 const VkPushConstantRange pc_range = {
636 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
637 .size = 8,
638 };
639
640 result = vk_meta_get_pipeline_layout(&device->vk, &device->meta_state.device, &desc_info, &pc_range, &key,
641 sizeof(key), layout_out);
642 if (result != VK_SUCCESS)
643 return result;
644
645 VkPipeline pipeline_from_cache = vk_meta_lookup_pipeline(&device->meta_state.device, &key, sizeof(key));
646 if (pipeline_from_cache != VK_NULL_HANDLE) {
647 *pipeline_out = pipeline_from_cache;
648 return VK_SUCCESS;
649 }
650
651 nir_shader *cs = build_clear_htile_mask_shader(device);
652
653 const VkPipelineShaderStageCreateInfo stage_info = {
654 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
655 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
656 .module = vk_shader_module_handle_from_nir(cs),
657 .pName = "main",
658 .pSpecializationInfo = NULL,
659 };
660
661 const VkComputePipelineCreateInfo pipeline_info = {
662 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
663 .stage = stage_info,
664 .flags = 0,
665 .layout = *layout_out,
666 };
667
668 result = vk_meta_create_compute_pipeline(&device->vk, &device->meta_state.device, &pipeline_info, &key, sizeof(key),
669 pipeline_out);
670
671 ralloc_free(cs);
672 return result;
673 }
674
675 static uint32_t
clear_htile_mask(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * image,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size,uint32_t htile_value,uint32_t htile_mask)676 clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, struct radeon_winsys_bo *bo,
677 uint64_t offset, uint64_t size, uint32_t htile_value, uint32_t htile_mask)
678 {
679 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
680 uint64_t block_count = DIV_ROUND_UP(size, 1024);
681 struct radv_meta_saved_state saved_state;
682 struct radv_buffer dst_buffer;
683 VkPipelineLayout layout;
684 VkPipeline pipeline;
685 VkResult result;
686
687 result = get_clear_htile_mask_pipeline(device, &pipeline, &layout);
688 if (result != VK_SUCCESS) {
689 vk_command_buffer_set_error(&cmd_buffer->vk, result);
690 return 0;
691 }
692
693 radv_meta_save(&saved_state, cmd_buffer,
694 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
695
696 radv_buffer_init(&dst_buffer, device, bo, size, offset);
697
698 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
699
700 radv_meta_push_descriptor_set(
701 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, 1,
702 (VkWriteDescriptorSet[]){
703 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
704 .dstBinding = 0,
705 .dstArrayElement = 0,
706 .descriptorCount = 1,
707 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
708 .pBufferInfo =
709 &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer), .offset = 0, .range = size}}});
710
711 const unsigned constants[2] = {
712 htile_value & htile_mask,
713 ~htile_mask,
714 };
715
716 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 8,
717 constants);
718
719 vk_common_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
720
721 radv_buffer_finish(&dst_buffer);
722
723 radv_meta_restore(&saved_state, cmd_buffer);
724
725 return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
726 VK_ACCESS_2_SHADER_WRITE_BIT, 0, image, NULL);
727 }
728
729 static uint32_t
radv_get_htile_fast_clear_value(const struct radv_device * device,const struct radv_image * image,VkClearDepthStencilValue value)730 radv_get_htile_fast_clear_value(const struct radv_device *device, const struct radv_image *image,
731 VkClearDepthStencilValue value)
732 {
733 uint32_t max_zval = 0x3fff; /* maximum 14-bit value. */
734 uint32_t zmask = 0, smem = 0;
735 uint32_t htile_value;
736 uint32_t zmin, zmax;
737
738 /* Convert the depth value to 14-bit zmin/zmax values. */
739 zmin = lroundf(value.depth * max_zval);
740 zmax = zmin;
741
742 if (radv_image_tile_stencil_disabled(device, image)) {
743 /* Z only (no stencil):
744 *
745 * |31 18|17 4|3 0|
746 * +---------+---------+-------+
747 * | Max Z | Min Z | ZMask |
748 */
749 htile_value = (((zmax & 0x3fff) << 18) | ((zmin & 0x3fff) << 4) | ((zmask & 0xf) << 0));
750 } else {
751
752 /* Z and stencil:
753 *
754 * |31 12|11 10|9 8|7 6|5 4|3 0|
755 * +-----------+-----+------+-----+-----+-------+
756 * | Z Range | | SMem | SR1 | SR0 | ZMask |
757 *
758 * Z, stencil, 4 bit VRS encoding:
759 * |31 12| 11 10 |9 8|7 6 |5 4|3 0|
760 * +-----------+------------+------+------------+-----+-------+
761 * | Z Range | VRS Y-rate | SMem | VRS X-rate | SR0 | ZMask |
762 */
763 uint32_t delta = 0;
764 uint32_t zrange = ((zmax << 6) | delta);
765 uint32_t sresults = 0xf; /* SR0/SR1 both as 0x3. */
766
767 if (radv_image_has_vrs_htile(device, image))
768 sresults = 0x3;
769
770 htile_value = (((zrange & 0xfffff) << 12) | ((smem & 0x3) << 8) | ((sresults & 0xf) << 4) | ((zmask & 0xf) << 0));
771 }
772
773 return htile_value;
774 }
775
776 static uint32_t
radv_get_htile_mask(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * image,VkImageAspectFlags aspects,bool is_clear)777 radv_get_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, VkImageAspectFlags aspects,
778 bool is_clear)
779 {
780 const struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
781 uint32_t mask = 0;
782
783 if (radv_image_tile_stencil_disabled(device, image)) {
784 /* All the HTILE buffer is used when there is no stencil. */
785 mask = UINT32_MAX;
786 } else {
787 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
788 mask |= 0xfffffc0f;
789 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
790 mask |= 0x000003f0;
791
792 /* Preserve VRS rates during clears but not during initialization. */
793 if (is_clear && radv_image_has_vrs_htile(device, image)) {
794 mask &= ~(0x3 << 6); /* VRS X-rate */
795 mask &= ~(0x3 << 10); /* VRS Y-rate */
796 }
797
798 if (cmd_buffer->qf == RADV_QUEUE_TRANSFER) {
799 /* Clear both aspects on SDMA, it's not ideal but there is no other way to initialize the
800 * HTILE buffer.
801 */
802 mask = UINT32_MAX;
803 }
804 }
805
806 return mask;
807 }
808
809 static bool
radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)810 radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)
811 {
812 return value.depth == 1.0f || value.depth == 0.0f;
813 }
814
815 static bool
radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)816 radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)
817 {
818 return value.stencil == 0;
819 }
820
821 static bool
radv_can_fast_clear_depth(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkImageLayout image_layout,VkImageAspectFlags aspects,const VkClearRect * clear_rect,const VkClearDepthStencilValue clear_value,uint32_t view_mask)822 radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
823 VkImageLayout image_layout, VkImageAspectFlags aspects, const VkClearRect *clear_rect,
824 const VkClearDepthStencilValue clear_value, uint32_t view_mask)
825 {
826 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
827
828 if (!iview || !iview->support_fast_clear)
829 return false;
830
831 if (!radv_layout_is_htile_compressed(device, iview->image, image_layout,
832 radv_image_queue_family_mask(iview->image, cmd_buffer->qf, cmd_buffer->qf)))
833 return false;
834
835 if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
836 clear_rect->rect.extent.width != iview->image->vk.extent.width ||
837 clear_rect->rect.extent.height != iview->image->vk.extent.height)
838 return false;
839
840 if (view_mask && (iview->image->vk.array_layers >= 32 || (1u << iview->image->vk.array_layers) - 1u != view_mask))
841 return false;
842 if (!view_mask && clear_rect->baseArrayLayer != 0)
843 return false;
844 if (!view_mask && clear_rect->layerCount != iview->image->vk.array_layers)
845 return false;
846
847 if (device->vk.enabled_extensions.EXT_depth_range_unrestricted && (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
848 (clear_value.depth < 0.0 || clear_value.depth > 1.0))
849 return false;
850
851 if (radv_image_is_tc_compat_htile(iview->image) &&
852 (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && !radv_is_fast_clear_depth_allowed(clear_value)) ||
853 ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && !radv_is_fast_clear_stencil_allowed(clear_value))))
854 return false;
855
856 if (iview->image->vk.mip_levels > 1) {
857 uint32_t last_level = iview->vk.base_mip_level + iview->vk.level_count - 1;
858 if (last_level >= iview->image->planes[0].surface.num_meta_levels) {
859 /* Do not fast clears if one level can't be fast cleared. */
860 return false;
861 }
862 }
863
864 return true;
865 }
866
867 static void
radv_fast_clear_depth(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkClearDepthStencilValue clear_value,VkImageAspectFlags aspects,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)868 radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
869 VkClearDepthStencilValue clear_value, VkImageAspectFlags aspects,
870 enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush)
871 {
872 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
873 uint32_t clear_word, flush_bits;
874
875 clear_word = radv_get_htile_fast_clear_value(device, iview->image, clear_value);
876
877 VkImageSubresourceRange range = {
878 .aspectMask = aspects,
879 .baseMipLevel = iview->vk.base_mip_level,
880 .levelCount = iview->vk.level_count,
881 .baseArrayLayer = iview->vk.base_array_layer,
882 .layerCount = iview->vk.layer_count,
883 };
884
885 if (pre_flush) {
886 enum radv_cmd_flush_bits bits =
887 radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT,
888 VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, 0, iview->image, &range) |
889 radv_dst_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT, VK_ACCESS_2_SHADER_READ_BIT, 0,
890 iview->image, &range);
891 cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
892 *pre_flush |= cmd_buffer->state.flush_bits;
893 }
894
895 flush_bits = radv_clear_htile(cmd_buffer, iview->image, &range, clear_word, true);
896
897 if (iview->image->planes[0].surface.has_stencil &&
898 !(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
899 /* Synchronize after performing a depth-only or a stencil-only
900 * fast clear because the driver uses an optimized path which
901 * performs a read-modify-write operation, and the two separate
902 * aspects might use the same HTILE memory.
903 */
904 cmd_buffer->state.flush_bits |= flush_bits;
905 }
906
907 radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
908 if (post_flush) {
909 *post_flush |= flush_bits;
910 }
911 }
912
913 /* Clear DCC using comp-to-single by storing the clear value at the beginning of every 256B block.
914 * For MSAA images, clearing the first sample should be enough as long as CMASK is also cleared.
915 */
916 static nir_shader *
build_clear_dcc_comp_to_single_shader(struct radv_device * dev,bool is_msaa)917 build_clear_dcc_comp_to_single_shader(struct radv_device *dev, bool is_msaa)
918 {
919 enum glsl_sampler_dim dim = is_msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
920 const struct glsl_type *img_type = glsl_image_type(dim, true, GLSL_TYPE_FLOAT);
921
922 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_dcc_comp_to_single-%s",
923 is_msaa ? "multisampled" : "singlesampled");
924 b.shader->info.workgroup_size[0] = 8;
925 b.shader->info.workgroup_size[1] = 8;
926
927 nir_def *global_id = get_global_ids(&b, 3);
928
929 /* Load the dimensions in pixels of a block that gets compressed to one DCC byte. */
930 nir_def *dcc_block_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
931
932 /* Compute the coordinates. */
933 nir_def *coord = nir_trim_vector(&b, global_id, 2);
934 coord = nir_imul(&b, coord, dcc_block_size);
935 coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), nir_channel(&b, global_id, 2),
936 nir_undef(&b, 1, 32));
937
938 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
939 output_img->data.descriptor_set = 0;
940 output_img->data.binding = 0;
941
942 /* Load the clear color values. */
943 nir_def *clear_values = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 8), .range = 24);
944
945 nir_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0), nir_channel(&b, clear_values, 1),
946 nir_channel(&b, clear_values, 2), nir_channel(&b, clear_values, 3));
947
948 /* Store the clear color values. */
949 nir_def *sample_id = is_msaa ? nir_imm_int(&b, 0) : nir_undef(&b, 1, 32);
950 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, sample_id, data, nir_imm_int(&b, 0),
951 .image_dim = dim, .image_array = true);
952
953 return b.shader;
954 }
955
956 static uint32_t
radv_get_cmask_fast_clear_value(const struct radv_image * image)957 radv_get_cmask_fast_clear_value(const struct radv_image *image)
958 {
959 uint32_t value = 0; /* Default value when no DCC. */
960
961 /* The fast-clear value is different for images that have both DCC and
962 * CMASK metadata.
963 */
964 if (radv_image_has_dcc(image)) {
965 /* DCC fast clear with MSAA should clear CMASK to 0xC. */
966 return image->vk.samples > 1 ? 0xcccccccc : 0xffffffff;
967 }
968
969 return value;
970 }
971
972 uint32_t
radv_clear_cmask(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)973 radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
974 uint32_t value)
975 {
976 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
977 const struct radv_physical_device *pdev = radv_device_physical(device);
978 uint64_t cmask_offset = image->planes[0].surface.cmask_offset;
979 uint64_t size;
980
981 if (pdev->info.gfx_level == GFX9) {
982 /* TODO: clear layers. */
983 size = image->planes[0].surface.cmask_size;
984 } else {
985 unsigned slice_size = image->planes[0].surface.cmask_slice_size;
986
987 cmask_offset += slice_size * range->baseArrayLayer;
988 size = slice_size * vk_image_subresource_layer_count(&image->vk, range);
989 }
990
991 return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, radv_image_get_va(image, 0) + cmask_offset, size,
992 value);
993 }
994
995 uint32_t
radv_clear_fmask(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)996 radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
997 uint32_t value)
998 {
999 uint64_t fmask_offset = image->planes[0].surface.fmask_offset;
1000 unsigned slice_size = image->planes[0].surface.fmask_slice_size;
1001 uint64_t size;
1002
1003 /* MSAA images do not support mipmap levels. */
1004 assert(range->baseMipLevel == 0 && vk_image_subresource_level_count(&image->vk, range) == 1);
1005
1006 fmask_offset += slice_size * range->baseArrayLayer;
1007 size = slice_size * vk_image_subresource_layer_count(&image->vk, range);
1008
1009 return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, radv_image_get_va(image, 0) + fmask_offset, size,
1010 value);
1011 }
1012
1013 uint32_t
radv_clear_dcc(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1014 radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
1015 uint32_t value)
1016 {
1017 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1018 const struct radv_physical_device *pdev = radv_device_physical(device);
1019 uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
1020 uint32_t layer_count = vk_image_subresource_layer_count(&image->vk, range);
1021 uint32_t flush_bits = 0;
1022
1023 /* Mark the image as being compressed. */
1024 radv_update_dcc_metadata(cmd_buffer, image, range, true);
1025
1026 for (uint32_t l = 0; l < level_count; l++) {
1027 uint64_t dcc_offset = image->planes[0].surface.meta_offset;
1028 uint32_t level = range->baseMipLevel + l;
1029 uint64_t size;
1030
1031 if (pdev->info.gfx_level >= GFX10) {
1032 /* DCC for mipmaps+layers is currently disabled. */
1033 dcc_offset += image->planes[0].surface.meta_slice_size * range->baseArrayLayer +
1034 image->planes[0].surface.u.gfx9.meta_levels[level].offset;
1035 size = image->planes[0].surface.u.gfx9.meta_levels[level].size * layer_count;
1036 } else if (pdev->info.gfx_level == GFX9) {
1037 /* Mipmap levels and layers aren't implemented. */
1038 assert(level == 0);
1039 size = image->planes[0].surface.meta_size;
1040 } else {
1041 const struct legacy_surf_dcc_level *dcc_level = &image->planes[0].surface.u.legacy.color.dcc_level[level];
1042
1043 /* If dcc_fast_clear_size is 0 (which might happens for
1044 * mipmaps) the fill buffer operation below is a no-op.
1045 * This can only happen during initialization as the
1046 * fast clear path fallbacks to slow clears if one
1047 * level can't be fast cleared.
1048 */
1049 dcc_offset += dcc_level->dcc_offset + dcc_level->dcc_slice_fast_clear_size * range->baseArrayLayer;
1050 size = dcc_level->dcc_slice_fast_clear_size * vk_image_subresource_layer_count(&image->vk, range);
1051 }
1052
1053 /* Do not clear this level if it can't be compressed. */
1054 if (!size)
1055 continue;
1056
1057 flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, radv_image_get_va(image, 0) + dcc_offset,
1058 size, value);
1059 }
1060
1061 return flush_bits;
1062 }
1063
1064 static VkResult
get_clear_dcc_comp_to_single_pipeline_layout(struct radv_device * device,VkPipelineLayout * layout_out)1065 get_clear_dcc_comp_to_single_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout_out)
1066 {
1067 enum radv_meta_object_key_type key = RADV_META_OBJECT_KEY_CLEAR_DCC_COMP_TO_SINGLE;
1068
1069 const VkDescriptorSetLayoutBinding binding = {
1070 .binding = 0,
1071 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1072 .descriptorCount = 1,
1073 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1074 };
1075
1076 const VkDescriptorSetLayoutCreateInfo desc_info = {
1077 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1078 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT,
1079 .bindingCount = 1,
1080 .pBindings = &binding,
1081 };
1082
1083 const VkPushConstantRange pc_range = {
1084 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1085 .size = 24,
1086 };
1087
1088 return vk_meta_get_pipeline_layout(&device->vk, &device->meta_state.device, &desc_info, &pc_range, &key, sizeof(key),
1089 layout_out);
1090 }
1091
1092 struct radv_clear_dcc_comp_to_single_key {
1093 enum radv_meta_object_key_type type;
1094 bool is_msaa;
1095 };
1096
1097 static VkResult
get_clear_dcc_comp_to_single_pipeline(struct radv_device * device,bool is_msaa,VkPipeline * pipeline_out,VkPipelineLayout * layout_out)1098 get_clear_dcc_comp_to_single_pipeline(struct radv_device *device, bool is_msaa, VkPipeline *pipeline_out,
1099 VkPipelineLayout *layout_out)
1100 {
1101 struct radv_clear_dcc_comp_to_single_key key;
1102 VkResult result;
1103
1104 result = get_clear_dcc_comp_to_single_pipeline_layout(device, layout_out);
1105 if (result != VK_SUCCESS)
1106 return result;
1107
1108 memset(&key, 0, sizeof(key));
1109 key.type = RADV_META_OBJECT_KEY_CLEAR_DCC_COMP_TO_SINGLE;
1110 key.is_msaa = is_msaa;
1111
1112 VkPipeline pipeline_from_cache = vk_meta_lookup_pipeline(&device->meta_state.device, &key, sizeof(key));
1113 if (pipeline_from_cache != VK_NULL_HANDLE) {
1114 *pipeline_out = pipeline_from_cache;
1115 return VK_SUCCESS;
1116 }
1117
1118 nir_shader *cs = build_clear_dcc_comp_to_single_shader(device, is_msaa);
1119
1120 const VkPipelineShaderStageCreateInfo stage_info = {
1121 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1122 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1123 .module = vk_shader_module_handle_from_nir(cs),
1124 .pName = "main",
1125 .pSpecializationInfo = NULL,
1126 };
1127
1128 const VkComputePipelineCreateInfo pipeline_info = {
1129 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1130 .stage = stage_info,
1131 .flags = 0,
1132 .layout = *layout_out,
1133 };
1134
1135 result = vk_meta_create_compute_pipeline(&device->vk, &device->meta_state.device, &pipeline_info, &key, sizeof(key),
1136 pipeline_out);
1137
1138 ralloc_free(cs);
1139 return result;
1140 }
1141
1142 static uint32_t
radv_clear_dcc_comp_to_single(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t color_values[4])1143 radv_clear_dcc_comp_to_single(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1144 const VkImageSubresourceRange *range, uint32_t color_values[4])
1145 {
1146 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1147 unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format);
1148 unsigned layer_count = vk_image_subresource_layer_count(&image->vk, range);
1149 struct radv_meta_saved_state saved_state;
1150 bool is_msaa = image->vk.samples > 1;
1151 struct radv_image_view iview;
1152 VkPipelineLayout layout;
1153 VkPipeline pipeline;
1154 VkResult result;
1155 VkFormat format;
1156
1157 switch (bytes_per_pixel) {
1158 case 1:
1159 format = VK_FORMAT_R8_UINT;
1160 break;
1161 case 2:
1162 format = VK_FORMAT_R16_UINT;
1163 break;
1164 case 4:
1165 format = VK_FORMAT_R32_UINT;
1166 break;
1167 case 8:
1168 format = VK_FORMAT_R32G32_UINT;
1169 break;
1170 case 16:
1171 format = VK_FORMAT_R32G32B32A32_UINT;
1172 break;
1173 default:
1174 unreachable("Unsupported number of bytes per pixel");
1175 }
1176
1177 result = get_clear_dcc_comp_to_single_pipeline(device, is_msaa, &pipeline, &layout);
1178 if (result != VK_SUCCESS) {
1179 vk_command_buffer_set_error(&cmd_buffer->vk, result);
1180 return 0;
1181 }
1182
1183 radv_meta_save(&saved_state, cmd_buffer,
1184 RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS);
1185
1186 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1187
1188 for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, range); l++) {
1189 uint32_t width, height;
1190
1191 /* Do not write the clear color value for levels without DCC. */
1192 if (!radv_dcc_enabled(image, range->baseMipLevel + l))
1193 continue;
1194
1195 width = u_minify(image->vk.extent.width, range->baseMipLevel + l);
1196 height = u_minify(image->vk.extent.height, range->baseMipLevel + l);
1197
1198 radv_image_view_init(&iview, device,
1199 &(VkImageViewCreateInfo){
1200 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1201 .image = radv_image_to_handle(image),
1202 .viewType = VK_IMAGE_VIEW_TYPE_2D,
1203 .format = format,
1204 .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1205 .baseMipLevel = range->baseMipLevel + l,
1206 .levelCount = 1,
1207 .baseArrayLayer = range->baseArrayLayer,
1208 .layerCount = layer_count},
1209 },
1210 &(struct radv_image_view_extra_create_info){.disable_compression = true});
1211
1212 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, 1,
1213 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1214 .dstBinding = 0,
1215 .dstArrayElement = 0,
1216 .descriptorCount = 1,
1217 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1218 .pImageInfo = (VkDescriptorImageInfo[]){
1219 {
1220 .sampler = VK_NULL_HANDLE,
1221 .imageView = radv_image_view_to_handle(&iview),
1222 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1223 },
1224 }}});
1225
1226 unsigned dcc_width = DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width);
1227 unsigned dcc_height = DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height);
1228
1229 const unsigned constants[6] = {
1230 image->planes[0].surface.u.gfx9.color.dcc_block_width,
1231 image->planes[0].surface.u.gfx9.color.dcc_block_height,
1232 color_values[0],
1233 color_values[1],
1234 color_values[2],
1235 color_values[3],
1236 };
1237
1238 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
1239 constants);
1240
1241 radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, layer_count);
1242
1243 radv_image_view_finish(&iview);
1244 }
1245
1246 radv_meta_restore(&saved_state, cmd_buffer);
1247
1248 return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT,
1249 VK_ACCESS_2_SHADER_WRITE_BIT, 0, image, NULL);
1250 }
1251
1252 uint32_t
radv_clear_htile(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value,bool is_clear)1253 radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
1254 const VkImageSubresourceRange *range, uint32_t value, bool is_clear)
1255 {
1256 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1257 const struct radv_physical_device *pdev = radv_device_physical(device);
1258 uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
1259 uint32_t flush_bits = 0;
1260 uint32_t htile_mask;
1261
1262 htile_mask = radv_get_htile_mask(cmd_buffer, image, range->aspectMask, is_clear);
1263
1264 if (level_count != image->vk.mip_levels) {
1265 assert(pdev->info.gfx_level >= GFX10);
1266
1267 /* Clear individuals levels separately. */
1268 for (uint32_t l = 0; l < level_count; l++) {
1269 uint32_t level = range->baseMipLevel + l;
1270 uint64_t htile_offset =
1271 image->planes[0].surface.meta_offset + image->planes[0].surface.u.gfx9.meta_levels[level].offset;
1272 uint32_t size = image->planes[0].surface.u.gfx9.meta_levels[level].size;
1273
1274 /* Do not clear this level if it can be compressed. */
1275 if (!size)
1276 continue;
1277
1278 if (htile_mask == UINT_MAX) {
1279 /* Clear the whole HTILE buffer. */
1280 flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1281 radv_image_get_va(image, 0) + htile_offset, size, value);
1282 } else {
1283 /* Only clear depth or stencil bytes in the HTILE buffer. */
1284 flush_bits |= clear_htile_mask(cmd_buffer, image, image->bindings[0].bo,
1285 image->bindings[0].offset + htile_offset, size, value, htile_mask);
1286 }
1287 }
1288 } else {
1289 unsigned layer_count = vk_image_subresource_layer_count(&image->vk, range);
1290 uint64_t size = image->planes[0].surface.meta_slice_size * layer_count;
1291 uint64_t htile_offset =
1292 image->planes[0].surface.meta_offset + image->planes[0].surface.meta_slice_size * range->baseArrayLayer;
1293
1294 if (htile_mask == UINT_MAX) {
1295 /* Clear the whole HTILE buffer. */
1296 flush_bits = radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1297 radv_image_get_va(image, 0) + htile_offset, size, value);
1298 } else {
1299 /* Only clear depth or stencil bytes in the HTILE buffer. */
1300 flush_bits = clear_htile_mask(cmd_buffer, image, image->bindings[0].bo,
1301 image->bindings[0].offset + htile_offset, size, value, htile_mask);
1302 }
1303 }
1304
1305 return flush_bits;
1306 }
1307
1308 enum {
1309 RADV_DCC_CLEAR_0000 = 0x00000000U,
1310 RADV_DCC_GFX8_CLEAR_0001 = 0x40404040U,
1311 RADV_DCC_GFX8_CLEAR_1110 = 0x80808080U,
1312 RADV_DCC_GFX8_CLEAR_1111 = 0xC0C0C0C0U,
1313 RADV_DCC_GFX8_CLEAR_REG = 0x20202020U,
1314 RADV_DCC_GFX9_CLEAR_SINGLE = 0x10101010U,
1315 RADV_DCC_GFX11_CLEAR_SINGLE = 0x01010101U,
1316 RADV_DCC_GFX11_CLEAR_0000 = 0x00000000U,
1317 RADV_DCC_GFX11_CLEAR_1111_UNORM = 0x02020202U,
1318 RADV_DCC_GFX11_CLEAR_1111_FP16 = 0x04040404U,
1319 RADV_DCC_GFX11_CLEAR_1111_FP32 = 0x06060606U,
1320 RADV_DCC_GFX11_CLEAR_0001_UNORM = 0x08080808U,
1321 RADV_DCC_GFX11_CLEAR_1110_UNORM = 0x0A0A0A0AU,
1322 };
1323
1324 static uint32_t
radv_dcc_single_clear_value(const struct radv_device * device)1325 radv_dcc_single_clear_value(const struct radv_device *device)
1326 {
1327 const struct radv_physical_device *pdev = radv_device_physical(device);
1328 return pdev->info.gfx_level >= GFX11 ? RADV_DCC_GFX11_CLEAR_SINGLE : RADV_DCC_GFX9_CLEAR_SINGLE;
1329 }
1330
1331 static void
gfx8_get_fast_clear_parameters(struct radv_device * device,const struct radv_image_view * iview,const VkClearColorValue * clear_value,uint32_t * reset_value,bool * can_avoid_fast_clear_elim)1332 gfx8_get_fast_clear_parameters(struct radv_device *device, const struct radv_image_view *iview,
1333 const VkClearColorValue *clear_value, uint32_t *reset_value,
1334 bool *can_avoid_fast_clear_elim)
1335 {
1336 const struct radv_physical_device *pdev = radv_device_physical(device);
1337 bool values[4] = {0};
1338 int extra_channel;
1339 bool main_value = false;
1340 bool extra_value = false;
1341 bool has_color = false;
1342 bool has_alpha = false;
1343
1344 /* comp-to-single allows to perform DCC fast clears without requiring a FCE. */
1345 if (iview->image->support_comp_to_single) {
1346 *reset_value = RADV_DCC_GFX9_CLEAR_SINGLE;
1347 *can_avoid_fast_clear_elim = true;
1348 } else {
1349 *reset_value = RADV_DCC_GFX8_CLEAR_REG;
1350 *can_avoid_fast_clear_elim = false;
1351 }
1352
1353 const struct util_format_description *desc = vk_format_description(iview->vk.format);
1354 if (iview->vk.format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || iview->vk.format == VK_FORMAT_R5G6B5_UNORM_PACK16 ||
1355 iview->vk.format == VK_FORMAT_B5G6R5_UNORM_PACK16)
1356 extra_channel = -1;
1357 else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
1358 if (ac_alpha_is_on_msb(&pdev->info, radv_format_to_pipe_format(iview->vk.format)))
1359 extra_channel = desc->nr_channels - 1;
1360 else
1361 extra_channel = 0;
1362 } else
1363 return;
1364
1365 for (int i = 0; i < 4; i++) {
1366 int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
1367 if (desc->swizzle[i] < PIPE_SWIZZLE_X || desc->swizzle[i] > PIPE_SWIZZLE_W)
1368 continue;
1369
1370 if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
1371 /* Use the maximum value for clamping the clear color. */
1372 int max = u_bit_consecutive(0, desc->channel[i].size - 1);
1373
1374 values[i] = clear_value->int32[i] != 0;
1375 if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max)
1376 return;
1377 } else if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1378 /* Use the maximum value for clamping the clear color. */
1379 unsigned max = u_bit_consecutive(0, desc->channel[i].size);
1380
1381 values[i] = clear_value->uint32[i] != 0U;
1382 if (clear_value->uint32[i] != 0U && MIN2(clear_value->uint32[i], max) != max)
1383 return;
1384 } else {
1385 values[i] = clear_value->float32[i] != 0.0F;
1386 if (clear_value->float32[i] != 0.0F && clear_value->float32[i] != 1.0F)
1387 return;
1388 }
1389
1390 if (index == extra_channel) {
1391 extra_value = values[i];
1392 has_alpha = true;
1393 } else {
1394 main_value = values[i];
1395 has_color = true;
1396 }
1397 }
1398
1399 /* If alpha isn't present, make it the same as color, and vice versa. */
1400 if (!has_alpha)
1401 extra_value = main_value;
1402 else if (!has_color)
1403 main_value = extra_value;
1404
1405 for (int i = 0; i < 4; ++i)
1406 if (values[i] != main_value && desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel &&
1407 desc->swizzle[i] >= PIPE_SWIZZLE_X && desc->swizzle[i] <= PIPE_SWIZZLE_W)
1408 return;
1409
1410 /* Only DCC clear code 0000 is allowed for signed<->unsigned formats. */
1411 if ((main_value || extra_value) && iview->image->dcc_sign_reinterpret)
1412 return;
1413
1414 *can_avoid_fast_clear_elim = true;
1415
1416 if (main_value) {
1417 if (extra_value)
1418 *reset_value = RADV_DCC_GFX8_CLEAR_1111;
1419 else
1420 *reset_value = RADV_DCC_GFX8_CLEAR_1110;
1421 } else {
1422 if (extra_value)
1423 *reset_value = RADV_DCC_GFX8_CLEAR_0001;
1424 else
1425 *reset_value = RADV_DCC_CLEAR_0000;
1426 }
1427 }
1428
1429 static bool
gfx11_get_fast_clear_parameters(struct radv_device * device,const struct radv_image_view * iview,const VkClearColorValue * clear_value,uint32_t * reset_value)1430 gfx11_get_fast_clear_parameters(struct radv_device *device, const struct radv_image_view *iview,
1431 const VkClearColorValue *clear_value, uint32_t *reset_value)
1432 {
1433 const struct util_format_description *desc = vk_format_description(iview->vk.format);
1434 unsigned start_bit = UINT_MAX;
1435 unsigned end_bit = 0;
1436
1437 /* TODO: 8bpp and 16bpp fast DCC clears don't work. */
1438 if (desc->block.bits <= 16)
1439 return false;
1440
1441 /* Find the used bit range. */
1442 for (unsigned i = 0; i < 4; i++) {
1443 unsigned swizzle = desc->swizzle[i];
1444
1445 if (swizzle >= PIPE_SWIZZLE_0)
1446 continue;
1447
1448 start_bit = MIN2(start_bit, desc->channel[swizzle].shift);
1449 end_bit = MAX2(end_bit, desc->channel[swizzle].shift + desc->channel[swizzle].size);
1450 }
1451
1452 union {
1453 uint8_t ub[16];
1454 uint16_t us[8];
1455 uint32_t ui[4];
1456 } value;
1457 memset(&value, 0, sizeof(value));
1458 util_format_pack_rgba(radv_format_to_pipe_format(iview->vk.format), &value, clear_value, 1);
1459
1460 /* Check the cases where all components or bits are either all 0 or all 1. */
1461 bool all_bits_are_0 = true;
1462 bool all_bits_are_1 = true;
1463 bool all_words_are_fp16_1 = false;
1464 bool all_words_are_fp32_1 = false;
1465
1466 for (unsigned i = start_bit; i < end_bit; i++) {
1467 bool bit = value.ub[i / 8] & BITFIELD_BIT(i % 8);
1468
1469 all_bits_are_0 &= !bit;
1470 all_bits_are_1 &= bit;
1471 }
1472
1473 if (start_bit % 16 == 0 && end_bit % 16 == 0) {
1474 all_words_are_fp16_1 = true;
1475 for (unsigned i = start_bit / 16; i < end_bit / 16; i++)
1476 all_words_are_fp16_1 &= value.us[i] == 0x3c00;
1477 }
1478
1479 if (start_bit % 32 == 0 && end_bit % 32 == 0) {
1480 all_words_are_fp32_1 = true;
1481 for (unsigned i = start_bit / 32; i < end_bit / 32; i++)
1482 all_words_are_fp32_1 &= value.ui[i] == 0x3f800000;
1483 }
1484
1485 if (all_bits_are_0 || all_bits_are_1 || all_words_are_fp16_1 || all_words_are_fp32_1) {
1486 if (all_bits_are_0)
1487 *reset_value = RADV_DCC_CLEAR_0000;
1488 else if (all_bits_are_1)
1489 *reset_value = RADV_DCC_GFX11_CLEAR_1111_UNORM;
1490 else if (all_words_are_fp16_1)
1491 *reset_value = RADV_DCC_GFX11_CLEAR_1111_FP16;
1492 else if (all_words_are_fp32_1)
1493 *reset_value = RADV_DCC_GFX11_CLEAR_1111_FP32;
1494 return true;
1495 }
1496
1497 if (desc->nr_channels == 2 && desc->channel[0].size == 8) {
1498 if (value.ub[0] == 0x00 && value.ub[1] == 0xff) {
1499 *reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM;
1500 return true;
1501 } else if (value.ub[0] == 0xff && value.ub[1] == 0x00) {
1502 *reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM;
1503 return true;
1504 }
1505 } else if (desc->nr_channels == 4 && desc->channel[0].size == 8) {
1506 if (value.ub[0] == 0x00 && value.ub[1] == 0x00 && value.ub[2] == 0x00 && value.ub[3] == 0xff) {
1507 *reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM;
1508 return true;
1509 } else if (value.ub[0] == 0xff && value.ub[1] == 0xff && value.ub[2] == 0xff && value.ub[3] == 0x00) {
1510 *reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM;
1511 return true;
1512 }
1513 } else if (desc->nr_channels == 4 && desc->channel[0].size == 16) {
1514 if (value.us[0] == 0x0000 && value.us[1] == 0x0000 && value.us[2] == 0x0000 && value.us[3] == 0xffff) {
1515 *reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM;
1516 return true;
1517 } else if (value.us[0] == 0xffff && value.us[1] == 0xffff && value.us[2] == 0xffff && value.us[3] == 0x0000) {
1518 *reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM;
1519 return true;
1520 }
1521 }
1522
1523 if (iview->image->support_comp_to_single) {
1524 *reset_value = RADV_DCC_GFX11_CLEAR_SINGLE;
1525 return true;
1526 }
1527
1528 return false;
1529 }
1530
1531 static bool
radv_can_fast_clear_color(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkImageLayout image_layout,const VkClearRect * clear_rect,VkClearColorValue clear_value,uint32_t view_mask)1532 radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
1533 VkImageLayout image_layout, const VkClearRect *clear_rect, VkClearColorValue clear_value,
1534 uint32_t view_mask)
1535 {
1536 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1537 const struct radv_physical_device *pdev = radv_device_physical(device);
1538 uint32_t clear_color[2];
1539
1540 if (!iview || !iview->support_fast_clear)
1541 return false;
1542
1543 if (!radv_layout_can_fast_clear(device, iview->image, iview->vk.base_mip_level, image_layout,
1544 radv_image_queue_family_mask(iview->image, cmd_buffer->qf, cmd_buffer->qf)))
1545 return false;
1546
1547 if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
1548 clear_rect->rect.extent.width != iview->image->vk.extent.width ||
1549 clear_rect->rect.extent.height != iview->image->vk.extent.height)
1550 return false;
1551
1552 if (view_mask && (iview->image->vk.array_layers >= 32 || (1u << iview->image->vk.array_layers) - 1u != view_mask))
1553 return false;
1554 if (!view_mask && clear_rect->baseArrayLayer != 0)
1555 return false;
1556 if (!view_mask && clear_rect->layerCount != iview->image->vk.array_layers)
1557 return false;
1558
1559 /* DCC */
1560
1561 /* Images that support comp-to-single clears don't have clear values. */
1562 if (!iview->image->support_comp_to_single) {
1563 if (!radv_format_pack_clear_color(iview->vk.format, clear_color, &clear_value))
1564 return false;
1565
1566 if (!radv_image_has_clear_value(iview->image) && (clear_color[0] != 0 || clear_color[1] != 0))
1567 return false;
1568 }
1569
1570 if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level)) {
1571 bool can_avoid_fast_clear_elim;
1572 uint32_t reset_value;
1573
1574 if (pdev->info.gfx_level >= GFX11) {
1575 if (!gfx11_get_fast_clear_parameters(device, iview, &clear_value, &reset_value))
1576 return false;
1577 } else {
1578 gfx8_get_fast_clear_parameters(device, iview, &clear_value, &reset_value, &can_avoid_fast_clear_elim);
1579 }
1580
1581 if (iview->image->vk.mip_levels > 1) {
1582 if (pdev->info.gfx_level >= GFX9) {
1583 uint32_t last_level = iview->vk.base_mip_level + iview->vk.level_count - 1;
1584 if (last_level >= iview->image->planes[0].surface.num_meta_levels) {
1585 /* Do not fast clears if one level can't be fast cleard. */
1586 return false;
1587 }
1588 } else {
1589 for (uint32_t l = 0; l < iview->vk.level_count; l++) {
1590 uint32_t level = iview->vk.base_mip_level + l;
1591 struct legacy_surf_dcc_level *dcc_level =
1592 &iview->image->planes[0].surface.u.legacy.color.dcc_level[level];
1593
1594 /* Do not fast clears if one level can't be
1595 * fast cleared.
1596 */
1597 if (!dcc_level->dcc_fast_clear_size)
1598 return false;
1599 }
1600 }
1601 }
1602 }
1603
1604 return true;
1605 }
1606
1607 static void
radv_fast_clear_color(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,const VkClearAttachment * clear_att,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)1608 radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
1609 const VkClearAttachment *clear_att, enum radv_cmd_flush_bits *pre_flush,
1610 enum radv_cmd_flush_bits *post_flush)
1611 {
1612 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1613 const struct radv_physical_device *pdev = radv_device_physical(device);
1614 VkClearColorValue clear_value = clear_att->clearValue.color;
1615 uint32_t clear_color[4], flush_bits = 0;
1616 uint32_t cmask_clear_value;
1617 VkImageSubresourceRange range = vk_image_view_subresource_range(&iview->vk);
1618
1619 if (pre_flush) {
1620 enum radv_cmd_flush_bits bits =
1621 radv_src_access_flush(cmd_buffer, VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT,
1622 VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, 0, iview->image, NULL);
1623 cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
1624 *pre_flush |= cmd_buffer->state.flush_bits;
1625 }
1626
1627 /* DCC */
1628 radv_format_pack_clear_color(iview->vk.format, clear_color, &clear_value);
1629
1630 cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image);
1631
1632 /* clear cmask buffer */
1633 bool need_decompress_pass = false;
1634 if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level)) {
1635 uint32_t reset_value;
1636 bool can_avoid_fast_clear_elim = true;
1637
1638 if (pdev->info.gfx_level >= GFX11) {
1639 ASSERTED bool result = gfx11_get_fast_clear_parameters(device, iview, &clear_value, &reset_value);
1640 assert(result);
1641 } else {
1642 gfx8_get_fast_clear_parameters(device, iview, &clear_value, &reset_value, &can_avoid_fast_clear_elim);
1643 }
1644
1645 if (radv_image_has_cmask(iview->image)) {
1646 flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);
1647 }
1648
1649 if (!can_avoid_fast_clear_elim)
1650 need_decompress_pass = true;
1651
1652 flush_bits |= radv_clear_dcc(cmd_buffer, iview->image, &range, reset_value);
1653
1654 if (reset_value == radv_dcc_single_clear_value(device)) {
1655 /* Write the clear color to the first byte of each 256B block when the image supports DCC
1656 * fast clears with comp-to-single.
1657 */
1658 if (vk_format_get_blocksize(iview->image->vk.format) == 16) {
1659 flush_bits |= radv_clear_dcc_comp_to_single(cmd_buffer, iview->image, &range, clear_value.uint32);
1660 } else {
1661 clear_color[2] = clear_color[3] = 0;
1662 flush_bits |= radv_clear_dcc_comp_to_single(cmd_buffer, iview->image, &range, clear_color);
1663 }
1664 }
1665 } else {
1666 flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);
1667
1668 /* Fast clearing with CMASK should always be eliminated. */
1669 need_decompress_pass = true;
1670 }
1671
1672 if (post_flush) {
1673 *post_flush |= flush_bits;
1674 }
1675
1676 /* Update the FCE predicate to perform a fast-clear eliminate. */
1677 radv_update_fce_metadata(cmd_buffer, iview->image, &range, need_decompress_pass);
1678
1679 radv_update_color_clear_metadata(cmd_buffer, iview, clear_att->colorAttachment, clear_color);
1680 }
1681
1682 /**
1683 * The parameters mean that same as those in vkCmdClearAttachments.
1684 */
1685 static void
emit_clear(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,const VkClearRect * clear_rect,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush,uint32_t view_mask)1686 emit_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, const VkClearRect *clear_rect,
1687 enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush, uint32_t view_mask)
1688 {
1689 const struct radv_rendering_state *render = &cmd_buffer->state.render;
1690 VkImageAspectFlags aspects = clear_att->aspectMask;
1691
1692 if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
1693 assert(clear_att->colorAttachment < render->color_att_count);
1694 const struct radv_attachment *color_att = &render->color_att[clear_att->colorAttachment];
1695
1696 if (color_att->format == VK_FORMAT_UNDEFINED)
1697 return;
1698
1699 VkClearColorValue clear_value = clear_att->clearValue.color;
1700
1701 if (radv_can_fast_clear_color(cmd_buffer, color_att->iview, color_att->layout, clear_rect, clear_value,
1702 view_mask)) {
1703 radv_fast_clear_color(cmd_buffer, color_att->iview, clear_att, pre_flush, post_flush);
1704 } else {
1705 emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
1706 }
1707 } else {
1708 const struct radv_attachment *ds_att = &render->ds_att;
1709
1710 if (ds_att->format == VK_FORMAT_UNDEFINED)
1711 return;
1712
1713 VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
1714
1715 assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
1716 bool can_fast_clear_depth = false;
1717 bool can_fast_clear_stencil = false;
1718 if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) &&
1719 ds_att->layout != ds_att->stencil_layout) {
1720 can_fast_clear_depth = radv_can_fast_clear_depth(cmd_buffer, ds_att->iview, ds_att->layout, aspects,
1721 clear_rect, clear_value, view_mask);
1722 can_fast_clear_stencil = radv_can_fast_clear_depth(cmd_buffer, ds_att->iview, ds_att->stencil_layout, aspects,
1723 clear_rect, clear_value, view_mask);
1724 } else {
1725 VkImageLayout layout = aspects & VK_IMAGE_ASPECT_DEPTH_BIT ? ds_att->layout : ds_att->stencil_layout;
1726 can_fast_clear_depth =
1727 radv_can_fast_clear_depth(cmd_buffer, ds_att->iview, layout, aspects, clear_rect, clear_value, view_mask);
1728 can_fast_clear_stencil = can_fast_clear_depth;
1729 }
1730
1731 if (can_fast_clear_depth && can_fast_clear_stencil) {
1732 radv_fast_clear_depth(cmd_buffer, ds_att->iview, clear_att->clearValue.depthStencil, clear_att->aspectMask,
1733 pre_flush, post_flush);
1734 } else if (!can_fast_clear_depth && !can_fast_clear_stencil) {
1735 emit_depthstencil_clear(cmd_buffer, clear_att->clearValue.depthStencil, clear_att->aspectMask, clear_rect,
1736 view_mask, false);
1737 } else {
1738 if (can_fast_clear_depth) {
1739 radv_fast_clear_depth(cmd_buffer, ds_att->iview, clear_att->clearValue.depthStencil,
1740 VK_IMAGE_ASPECT_DEPTH_BIT, pre_flush, post_flush);
1741 } else {
1742 emit_depthstencil_clear(cmd_buffer, clear_att->clearValue.depthStencil, VK_IMAGE_ASPECT_DEPTH_BIT,
1743 clear_rect, view_mask, can_fast_clear_depth);
1744 }
1745
1746 if (can_fast_clear_stencil) {
1747 radv_fast_clear_depth(cmd_buffer, ds_att->iview, clear_att->clearValue.depthStencil,
1748 VK_IMAGE_ASPECT_STENCIL_BIT, pre_flush, post_flush);
1749 } else {
1750 emit_depthstencil_clear(cmd_buffer, clear_att->clearValue.depthStencil, VK_IMAGE_ASPECT_STENCIL_BIT,
1751 clear_rect, view_mask, can_fast_clear_stencil);
1752 }
1753 }
1754 }
1755 }
1756
1757 static bool
radv_rendering_needs_clear(const VkRenderingInfo * pRenderingInfo)1758 radv_rendering_needs_clear(const VkRenderingInfo *pRenderingInfo)
1759 {
1760 for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
1761 if (pRenderingInfo->pColorAttachments[i].imageView != VK_NULL_HANDLE &&
1762 pRenderingInfo->pColorAttachments[i].loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
1763 return true;
1764 }
1765
1766 if (pRenderingInfo->pDepthAttachment != NULL && pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE &&
1767 pRenderingInfo->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
1768 return true;
1769
1770 if (pRenderingInfo->pStencilAttachment != NULL && pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE &&
1771 pRenderingInfo->pStencilAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
1772 return true;
1773
1774 return false;
1775 }
1776
1777 static void
radv_subpass_clear_attachment(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)1778 radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
1779 enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush)
1780 {
1781 const struct radv_rendering_state *render = &cmd_buffer->state.render;
1782
1783 VkClearRect clear_rect = {
1784 .rect = render->area,
1785 .baseArrayLayer = 0,
1786 .layerCount = render->layer_count,
1787 };
1788
1789 radv_describe_begin_render_pass_clear(cmd_buffer, clear_att->aspectMask);
1790
1791 emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush, render->view_mask);
1792
1793 radv_describe_end_render_pass_clear(cmd_buffer);
1794 }
1795
1796 /**
1797 * Emit any pending attachment clears for the current subpass.
1798 *
1799 * @see radv_attachment_state::pending_clear_aspects
1800 */
1801 void
radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer * cmd_buffer,const VkRenderingInfo * pRenderingInfo)1802 radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer *cmd_buffer, const VkRenderingInfo *pRenderingInfo)
1803 {
1804 const struct radv_rendering_state *render = &cmd_buffer->state.render;
1805 struct radv_meta_saved_state saved_state;
1806 enum radv_cmd_flush_bits pre_flush = 0;
1807 enum radv_cmd_flush_bits post_flush = 0;
1808
1809 if (!radv_rendering_needs_clear(pRenderingInfo))
1810 return;
1811
1812 /* Subpass clear should not be affected by conditional rendering. */
1813 radv_meta_save(&saved_state, cmd_buffer,
1814 RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING);
1815
1816 assert(render->color_att_count == pRenderingInfo->colorAttachmentCount);
1817 for (uint32_t i = 0; i < render->color_att_count; i++) {
1818 if (render->color_att[i].iview == NULL ||
1819 pRenderingInfo->pColorAttachments[i].loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR)
1820 continue;
1821
1822 VkClearAttachment clear_att = {
1823 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1824 .colorAttachment = i,
1825 .clearValue = pRenderingInfo->pColorAttachments[i].clearValue,
1826 };
1827
1828 radv_subpass_clear_attachment(cmd_buffer, &clear_att, &pre_flush, &post_flush);
1829 }
1830
1831 if (render->ds_att.iview != NULL) {
1832 VkClearAttachment clear_att = {.aspectMask = 0};
1833
1834 if (pRenderingInfo->pDepthAttachment != NULL && pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE &&
1835 pRenderingInfo->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1836 clear_att.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
1837 clear_att.clearValue.depthStencil.depth = pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth;
1838 }
1839
1840 if (pRenderingInfo->pStencilAttachment != NULL &&
1841 pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE &&
1842 pRenderingInfo->pStencilAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1843 clear_att.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
1844 clear_att.clearValue.depthStencil.stencil =
1845 pRenderingInfo->pStencilAttachment->clearValue.depthStencil.stencil;
1846 }
1847
1848 if (clear_att.aspectMask != 0) {
1849 radv_subpass_clear_attachment(cmd_buffer, &clear_att, &pre_flush, &post_flush);
1850 }
1851 }
1852
1853 radv_meta_restore(&saved_state, cmd_buffer);
1854 cmd_buffer->state.flush_bits |= post_flush;
1855 }
1856
1857 static void
radv_clear_image_layer(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkImageLayout image_layout,const VkImageSubresourceRange * range,VkFormat format,int level,unsigned layer_count,const VkClearValue * clear_val)1858 radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout image_layout,
1859 const VkImageSubresourceRange *range, VkFormat format, int level, unsigned layer_count,
1860 const VkClearValue *clear_val)
1861 {
1862 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1863 struct radv_image_view iview;
1864 uint32_t width = u_minify(image->vk.extent.width, range->baseMipLevel + level);
1865 uint32_t height = u_minify(image->vk.extent.height, range->baseMipLevel + level);
1866
1867 radv_image_view_init(&iview, device,
1868 &(VkImageViewCreateInfo){
1869 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1870 .image = radv_image_to_handle(image),
1871 .viewType = radv_meta_get_view_type(image),
1872 .format = format,
1873 .subresourceRange = {.aspectMask = range->aspectMask,
1874 .baseMipLevel = range->baseMipLevel + level,
1875 .levelCount = 1,
1876 .baseArrayLayer = range->baseArrayLayer,
1877 .layerCount = layer_count},
1878 },
1879 NULL);
1880
1881 VkClearAttachment clear_att = {
1882 .aspectMask = range->aspectMask,
1883 .colorAttachment = 0,
1884 .clearValue = *clear_val,
1885 };
1886
1887 VkClearRect clear_rect = {
1888 .rect =
1889 {
1890 .offset = {0, 0},
1891 .extent = {width, height},
1892 },
1893 .baseArrayLayer = 0,
1894 .layerCount = layer_count,
1895 };
1896
1897 VkRenderingAttachmentInfo att = {
1898 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
1899 .imageView = radv_image_view_to_handle(&iview),
1900 .imageLayout = image_layout,
1901 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
1902 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
1903 };
1904
1905 VkRenderingInfo rendering_info = {
1906 .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
1907 .flags = VK_RENDERING_INPUT_ATTACHMENT_NO_CONCURRENT_WRITES_BIT_MESA,
1908 .renderArea =
1909 {
1910 .offset = {0, 0},
1911 .extent = {width, height},
1912 },
1913 .layerCount = layer_count,
1914 };
1915
1916 if (image->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
1917 rendering_info.colorAttachmentCount = 1;
1918 rendering_info.pColorAttachments = &att;
1919 }
1920 if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
1921 rendering_info.pDepthAttachment = &att;
1922 if (image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
1923 rendering_info.pStencilAttachment = &att;
1924
1925 radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
1926
1927 emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0);
1928
1929 radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
1930
1931 radv_image_view_finish(&iview);
1932 }
1933
1934 /**
1935 * Return TRUE if a fast color or depth clear has been performed.
1936 */
1937 static bool
radv_fast_clear_range(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkFormat format,VkImageLayout image_layout,const VkImageSubresourceRange * range,const VkClearValue * clear_val)1938 radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkFormat format,
1939 VkImageLayout image_layout, const VkImageSubresourceRange *range, const VkClearValue *clear_val)
1940 {
1941 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
1942 struct radv_image_view iview;
1943 bool fast_cleared = false;
1944
1945 radv_image_view_init(&iview, device,
1946 &(VkImageViewCreateInfo){
1947 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1948 .image = radv_image_to_handle(image),
1949 .viewType = radv_meta_get_view_type(image),
1950 .format = image->vk.format,
1951 .subresourceRange =
1952 {
1953 .aspectMask = range->aspectMask,
1954 .baseMipLevel = range->baseMipLevel,
1955 .levelCount = vk_image_subresource_level_count(&image->vk, range),
1956 .baseArrayLayer = range->baseArrayLayer,
1957 .layerCount = vk_image_subresource_layer_count(&image->vk, range),
1958 },
1959 },
1960 NULL);
1961
1962 VkClearRect clear_rect = {
1963 .rect =
1964 {
1965 .offset = {0, 0},
1966 .extent =
1967 {
1968 u_minify(image->vk.extent.width, range->baseMipLevel),
1969 u_minify(image->vk.extent.height, range->baseMipLevel),
1970 },
1971 },
1972 .baseArrayLayer = range->baseArrayLayer,
1973 .layerCount = vk_image_subresource_layer_count(&image->vk, range),
1974 };
1975
1976 VkClearAttachment clear_att = {
1977 .aspectMask = range->aspectMask,
1978 .colorAttachment = 0,
1979 .clearValue = *clear_val,
1980 };
1981
1982 if (vk_format_is_color(format)) {
1983 if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout, &clear_rect, clear_att.clearValue.color, 0)) {
1984 radv_fast_clear_color(cmd_buffer, &iview, &clear_att, NULL, NULL);
1985 fast_cleared = true;
1986 }
1987 } else {
1988 if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout, range->aspectMask, &clear_rect,
1989 clear_att.clearValue.depthStencil, 0)) {
1990 radv_fast_clear_depth(cmd_buffer, &iview, clear_att.clearValue.depthStencil, clear_att.aspectMask, NULL, NULL);
1991 fast_cleared = true;
1992 }
1993 }
1994
1995 radv_image_view_finish(&iview);
1996 return fast_cleared;
1997 }
1998
1999 static void
radv_cmd_clear_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkImageLayout image_layout,const VkClearValue * clear_value,uint32_t range_count,const VkImageSubresourceRange * ranges,bool cs)2000 radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout image_layout,
2001 const VkClearValue *clear_value, uint32_t range_count, const VkImageSubresourceRange *ranges,
2002 bool cs)
2003 {
2004 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2005 const struct radv_physical_device *pdev = radv_device_physical(device);
2006 VkFormat format = image->vk.format;
2007 VkClearValue internal_clear_value;
2008
2009 if (ranges->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)
2010 internal_clear_value.color = clear_value->color;
2011 else
2012 internal_clear_value.depthStencil = clear_value->depthStencil;
2013
2014 bool disable_compression = false;
2015
2016 if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
2017 if (cs ? !radv_is_storage_image_format_supported(pdev, format)
2018 : !radv_is_colorbuffer_format_supported(pdev, format)) {
2019 format = VK_FORMAT_R32_UINT;
2020 internal_clear_value.color.uint32[0] = float3_to_rgb9e5(clear_value->color.float32);
2021
2022 uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf);
2023
2024 for (uint32_t r = 0; r < range_count; r++) {
2025 const VkImageSubresourceRange *range = &ranges[r];
2026
2027 /* Don't use compressed image stores because they will use an incompatible format. */
2028 if (radv_layout_dcc_compressed(device, image, range->baseMipLevel, image_layout, queue_mask)) {
2029 disable_compression = cs;
2030 break;
2031 }
2032 }
2033 }
2034 }
2035
2036 if (format == VK_FORMAT_R4G4_UNORM_PACK8) {
2037 uint8_t r, g;
2038 format = VK_FORMAT_R8_UINT;
2039 r = float_to_ubyte(clear_value->color.float32[0]) >> 4;
2040 g = float_to_ubyte(clear_value->color.float32[1]) >> 4;
2041 internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf);
2042 }
2043
2044 for (uint32_t r = 0; r < range_count; r++) {
2045 const VkImageSubresourceRange *range = &ranges[r];
2046
2047 /* Try to perform a fast clear first, otherwise fallback to
2048 * the legacy path.
2049 */
2050 if (!cs && radv_fast_clear_range(cmd_buffer, image, format, image_layout, range, &internal_clear_value)) {
2051 continue;
2052 }
2053
2054 for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, range); ++l) {
2055 const uint32_t layer_count = image->vk.image_type == VK_IMAGE_TYPE_3D
2056 ? u_minify(image->vk.extent.depth, range->baseMipLevel + l)
2057 : vk_image_subresource_layer_count(&image->vk, range);
2058 if (cs) {
2059 for (uint32_t s = 0; s < layer_count; ++s) {
2060 struct radv_meta_blit2d_surf surf;
2061 surf.format = format;
2062 surf.image = image;
2063 surf.level = range->baseMipLevel + l;
2064 surf.layer = range->baseArrayLayer + s;
2065 surf.aspect_mask = range->aspectMask;
2066 surf.disable_compression = disable_compression;
2067 radv_meta_clear_image_cs(cmd_buffer, &surf, &internal_clear_value.color);
2068 }
2069 } else {
2070 assert(!disable_compression);
2071 radv_clear_image_layer(cmd_buffer, image, image_layout, range, format, l, layer_count,
2072 &internal_clear_value);
2073 }
2074 }
2075 }
2076
2077 if (disable_compression) {
2078 enum radv_cmd_flush_bits flush_bits = 0;
2079 for (unsigned i = 0; i < range_count; i++) {
2080 if (radv_dcc_enabled(image, ranges[i].baseMipLevel))
2081 flush_bits |= radv_clear_dcc(cmd_buffer, image, &ranges[i], 0xffffffffu);
2082 }
2083 cmd_buffer->state.flush_bits |= flush_bits;
2084 }
2085 }
2086
2087 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)2088 radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
2089 const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges)
2090 {
2091 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2092 VK_FROM_HANDLE(radv_image, image, image_h);
2093 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
2094 struct radv_meta_saved_state saved_state;
2095 bool cs;
2096
2097 cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE || !radv_image_is_renderable(device, image);
2098
2099 /* Clear commands (except vkCmdClearAttachments) should not be affected by conditional rendering.
2100 */
2101 enum radv_meta_save_flags save_flags = RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING;
2102 if (cs)
2103 save_flags |= RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS;
2104 else
2105 save_flags |= RADV_META_SAVE_GRAPHICS_PIPELINE;
2106
2107 radv_meta_save(&saved_state, cmd_buffer, save_flags);
2108
2109 radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pColor, rangeCount, pRanges, cs);
2110
2111 radv_meta_restore(&saved_state, cmd_buffer);
2112 }
2113
2114 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)2115 radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
2116 const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
2117 const VkImageSubresourceRange *pRanges)
2118 {
2119 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2120 VK_FROM_HANDLE(radv_image, image, image_h);
2121 struct radv_meta_saved_state saved_state;
2122
2123 /* Clear commands (except vkCmdClearAttachments) should not be affected by conditional rendering. */
2124 radv_meta_save(&saved_state, cmd_buffer,
2125 RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING);
2126
2127 radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pDepthStencil, rangeCount, pRanges,
2128 false);
2129
2130 radv_meta_restore(&saved_state, cmd_buffer);
2131 }
2132
2133 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)2134 radv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments,
2135 uint32_t rectCount, const VkClearRect *pRects)
2136 {
2137 VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2138 struct radv_meta_saved_state saved_state;
2139 enum radv_cmd_flush_bits pre_flush = 0;
2140 enum radv_cmd_flush_bits post_flush = 0;
2141
2142 if (!cmd_buffer->state.render.active)
2143 return;
2144
2145 radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);
2146
2147 /* FINISHME: We can do better than this dumb loop. It thrashes too much
2148 * state.
2149 */
2150 for (uint32_t a = 0; a < attachmentCount; ++a) {
2151 for (uint32_t r = 0; r < rectCount; ++r) {
2152 emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush,
2153 cmd_buffer->state.render.view_mask);
2154 }
2155 }
2156
2157 radv_meta_restore(&saved_state, cmd_buffer);
2158 cmd_buffer->state.flush_bits |= post_flush;
2159 }
2160