1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "nir/nir_builder.h"
25 #include "radv_debug.h"
26 #include "radv_meta.h"
27 #include "radv_private.h"
28
29 #include "util/format_rgb9e5.h"
30 #include "vk_common_entrypoints.h"
31 #include "vk_format.h"
32
33 enum { DEPTH_CLEAR_SLOW, DEPTH_CLEAR_FAST };
34
35 static void
build_color_shaders(struct radv_device * dev,struct nir_shader ** out_vs,struct nir_shader ** out_fs,uint32_t frag_output)36 build_color_shaders(struct radv_device *dev, struct nir_shader **out_vs, struct nir_shader **out_fs,
37 uint32_t frag_output)
38 {
39 nir_builder vs_b = radv_meta_init_shader(dev, MESA_SHADER_VERTEX, "meta_clear_color_vs");
40 nir_builder fs_b = radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT, "meta_clear_color_fs-%d", frag_output);
41
42 const struct glsl_type *position_type = glsl_vec4_type();
43 const struct glsl_type *color_type = glsl_vec4_type();
44
45 nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position");
46 vs_out_pos->data.location = VARYING_SLOT_POS;
47
48 nir_def *in_color_load = nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16);
49
50 nir_variable *fs_out_color = nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color");
51 fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;
52
53 nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf);
54
55 nir_def *outvec = nir_gen_rect_vertices(&vs_b, NULL, NULL);
56 nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
57
58 const struct glsl_type *layer_type = glsl_int_type();
59 nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
60 vs_out_layer->data.location = VARYING_SLOT_LAYER;
61 vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
62 nir_def *inst_id = nir_load_instance_id(&vs_b);
63 nir_def *base_instance = nir_load_base_instance(&vs_b);
64
65 nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
66 nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
67
68 *out_vs = vs_b.shader;
69 *out_fs = fs_b.shader;
70 }
71
72 static VkResult
create_pipeline(struct radv_device * device,uint32_t samples,struct nir_shader * vs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineRenderingCreateInfo * dyn_state,const VkPipelineLayout layout,const struct radv_graphics_pipeline_create_info * extra,const VkAllocationCallbacks * alloc,VkPipeline * pipeline)73 create_pipeline(struct radv_device *device, uint32_t samples, struct nir_shader *vs_nir, struct nir_shader *fs_nir,
74 const VkPipelineVertexInputStateCreateInfo *vi_state,
75 const VkPipelineDepthStencilStateCreateInfo *ds_state,
76 const VkPipelineColorBlendStateCreateInfo *cb_state, const VkPipelineRenderingCreateInfo *dyn_state,
77 const VkPipelineLayout layout, const struct radv_graphics_pipeline_create_info *extra,
78 const VkAllocationCallbacks *alloc, VkPipeline *pipeline)
79 {
80 VkDevice device_h = radv_device_to_handle(device);
81 VkResult result;
82
83 result = radv_graphics_pipeline_create(device_h, device->meta_state.cache,
84 &(VkGraphicsPipelineCreateInfo){
85 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
86 .pNext = dyn_state,
87 .stageCount = fs_nir ? 2 : 1,
88 .pStages =
89 (VkPipelineShaderStageCreateInfo[]){
90 {
91 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
92 .stage = VK_SHADER_STAGE_VERTEX_BIT,
93 .module = vk_shader_module_handle_from_nir(vs_nir),
94 .pName = "main",
95 },
96 {
97 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
98 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
99 .module = vk_shader_module_handle_from_nir(fs_nir),
100 .pName = "main",
101 },
102 },
103 .pVertexInputState = vi_state,
104 .pInputAssemblyState =
105 &(VkPipelineInputAssemblyStateCreateInfo){
106 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
107 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
108 .primitiveRestartEnable = false,
109 },
110 .pViewportState =
111 &(VkPipelineViewportStateCreateInfo){
112 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
113 .viewportCount = 1,
114 .scissorCount = 1,
115 },
116 .pRasterizationState =
117 &(VkPipelineRasterizationStateCreateInfo){
118 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
119 .rasterizerDiscardEnable = false,
120 .polygonMode = VK_POLYGON_MODE_FILL,
121 .cullMode = VK_CULL_MODE_NONE,
122 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
123 .depthBiasEnable = false,
124 .depthBiasConstantFactor = 0.0f,
125 .depthBiasClamp = 0.0f,
126 .depthBiasSlopeFactor = 0.0f,
127 .lineWidth = 1.0f,
128 },
129 .pMultisampleState =
130 &(VkPipelineMultisampleStateCreateInfo){
131 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
132 .rasterizationSamples = samples,
133 .sampleShadingEnable = false,
134 .pSampleMask = NULL,
135 .alphaToCoverageEnable = false,
136 .alphaToOneEnable = false,
137 },
138 .pDepthStencilState = ds_state,
139 .pColorBlendState = cb_state,
140 .pDynamicState =
141 &(VkPipelineDynamicStateCreateInfo){
142 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
143 .dynamicStateCount = 3,
144 .pDynamicStates =
145 (VkDynamicState[]){
146 VK_DYNAMIC_STATE_VIEWPORT,
147 VK_DYNAMIC_STATE_SCISSOR,
148 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
149 },
150 },
151 .layout = layout,
152 .flags = 0,
153 .renderPass = VK_NULL_HANDLE,
154 .subpass = 0,
155 },
156 extra, alloc, pipeline);
157
158 ralloc_free(vs_nir);
159 ralloc_free(fs_nir);
160
161 return result;
162 }
163
164 static VkResult
create_color_pipeline(struct radv_device * device,uint32_t samples,uint32_t frag_output,VkFormat format,VkPipeline * pipeline)165 create_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t frag_output, VkFormat format,
166 VkPipeline *pipeline)
167 {
168 struct nir_shader *vs_nir;
169 struct nir_shader *fs_nir;
170 VkResult result;
171
172 mtx_lock(&device->meta_state.mtx);
173 if (*pipeline) {
174 mtx_unlock(&device->meta_state.mtx);
175 return VK_SUCCESS;
176 }
177
178 build_color_shaders(device, &vs_nir, &fs_nir, frag_output);
179
180 const VkPipelineVertexInputStateCreateInfo vi_state = {
181 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
182 .vertexBindingDescriptionCount = 0,
183 .vertexAttributeDescriptionCount = 0,
184 };
185
186 const VkPipelineDepthStencilStateCreateInfo ds_state = {
187 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
188 .depthTestEnable = false,
189 .depthWriteEnable = false,
190 .depthBoundsTestEnable = false,
191 .stencilTestEnable = false,
192 .minDepthBounds = 0.0f,
193 .maxDepthBounds = 1.0f,
194 };
195
196 VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = {0};
197 blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState){
198 .blendEnable = false,
199 .colorWriteMask =
200 VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT,
201 };
202
203 const VkPipelineColorBlendStateCreateInfo cb_state = {
204 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
205 .logicOpEnable = false,
206 .attachmentCount = MAX_RTS,
207 .pAttachments = blend_attachment_state,
208 .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f}};
209
210 VkFormat att_formats[MAX_RTS] = {0};
211 att_formats[frag_output] = format;
212
213 const VkPipelineRenderingCreateInfo rendering_create_info = {
214 .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
215 .colorAttachmentCount = MAX_RTS,
216 .pColorAttachmentFormats = att_formats,
217 };
218
219 struct radv_graphics_pipeline_create_info extra = {
220 .use_rectlist = true,
221 };
222 result = create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, &rendering_create_info,
223 device->meta_state.clear_color_p_layout, &extra, &device->meta_state.alloc, pipeline);
224
225 mtx_unlock(&device->meta_state.mtx);
226 return result;
227 }
228
229 static void
finish_meta_clear_htile_mask_state(struct radv_device * device)230 finish_meta_clear_htile_mask_state(struct radv_device *device)
231 {
232 struct radv_meta_state *state = &device->meta_state;
233
234 radv_DestroyPipeline(radv_device_to_handle(device), state->clear_htile_mask_pipeline, &state->alloc);
235 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_htile_mask_p_layout, &state->alloc);
236 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
237 state->clear_htile_mask_ds_layout, &state->alloc);
238 }
239
240 static void
finish_meta_clear_dcc_comp_to_single_state(struct radv_device * device)241 finish_meta_clear_dcc_comp_to_single_state(struct radv_device *device)
242 {
243 struct radv_meta_state *state = &device->meta_state;
244
245 for (uint32_t i = 0; i < 2; i++) {
246 radv_DestroyPipeline(radv_device_to_handle(device), state->clear_dcc_comp_to_single_pipeline[i], &state->alloc);
247 }
248 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_dcc_comp_to_single_p_layout, &state->alloc);
249 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
250 state->clear_dcc_comp_to_single_ds_layout, &state->alloc);
251 }
252
253 void
radv_device_finish_meta_clear_state(struct radv_device * device)254 radv_device_finish_meta_clear_state(struct radv_device *device)
255 {
256 struct radv_meta_state *state = &device->meta_state;
257
258 for (uint32_t i = 0; i < ARRAY_SIZE(state->color_clear); ++i) {
259 for (uint32_t j = 0; j < ARRAY_SIZE(state->color_clear[0]); ++j) {
260 for (uint32_t k = 0; k < ARRAY_SIZE(state->color_clear[i][j].color_pipelines); ++k) {
261 radv_DestroyPipeline(radv_device_to_handle(device), state->color_clear[i][j].color_pipelines[k],
262 &state->alloc);
263 }
264 }
265 }
266 for (uint32_t i = 0; i < ARRAY_SIZE(state->ds_clear); ++i) {
267 for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
268 radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depth_only_pipeline[j], &state->alloc);
269 radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].stencil_only_pipeline[j],
270 &state->alloc);
271 radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depthstencil_pipeline[j],
272 &state->alloc);
273
274 radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depth_only_unrestricted_pipeline[j],
275 &state->alloc);
276 radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].stencil_only_unrestricted_pipeline[j],
277 &state->alloc);
278 radv_DestroyPipeline(radv_device_to_handle(device), state->ds_clear[i].depthstencil_unrestricted_pipeline[j],
279 &state->alloc);
280 }
281 }
282 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_color_p_layout, &state->alloc);
283 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_p_layout, &state->alloc);
284 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_unrestricted_p_layout, &state->alloc);
285
286 finish_meta_clear_htile_mask_state(device);
287 finish_meta_clear_dcc_comp_to_single_state(device);
288 }
289
290 static void
emit_color_clear(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,const VkClearRect * clear_rect,uint32_t view_mask)291 emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, const VkClearRect *clear_rect,
292 uint32_t view_mask)
293 {
294 struct radv_device *device = cmd_buffer->device;
295 const struct radv_rendering_state *render = &cmd_buffer->state.render;
296 uint32_t samples, samples_log2;
297 VkFormat format;
298 unsigned fs_key;
299 VkClearColorValue clear_value = clear_att->clearValue.color;
300 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
301 VkPipeline pipeline;
302
303 assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
304 assert(clear_att->colorAttachment < render->color_att_count);
305 const struct radv_attachment *color_att = &render->color_att[clear_att->colorAttachment];
306
307 /* When a framebuffer is bound to the current command buffer, get the
308 * number of samples from it. Otherwise, get the number of samples from
309 * the render pass because it's likely a secondary command buffer.
310 */
311 if (color_att->iview) {
312 samples = color_att->iview->image->vk.samples;
313 format = color_att->iview->vk.format;
314 } else {
315 samples = render->max_samples;
316 format = color_att->format;
317 }
318 assert(format != VK_FORMAT_UNDEFINED);
319
320 assert(util_is_power_of_two_nonzero(samples));
321 samples_log2 = ffs(samples) - 1;
322 fs_key = radv_format_meta_fs_key(device, format);
323 assert(fs_key != -1);
324
325 if (device->meta_state.color_clear[samples_log2][clear_att->colorAttachment].color_pipelines[fs_key] ==
326 VK_NULL_HANDLE) {
327 VkResult ret = create_color_pipeline(
328 device, samples, clear_att->colorAttachment, radv_fs_key_format_exemplars[fs_key],
329 &device->meta_state.color_clear[samples_log2][clear_att->colorAttachment].color_pipelines[fs_key]);
330 if (ret != VK_SUCCESS) {
331 vk_command_buffer_set_error(&cmd_buffer->vk, ret);
332 return;
333 }
334 }
335
336 pipeline = device->meta_state.color_clear[samples_log2][clear_att->colorAttachment].color_pipelines[fs_key];
337
338 assert(samples_log2 < ARRAY_SIZE(device->meta_state.color_clear));
339 assert(pipeline);
340
341 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.clear_color_p_layout,
342 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16, &clear_value);
343
344 radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
345
346 radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
347 &(VkViewport){.x = clear_rect->rect.offset.x,
348 .y = clear_rect->rect.offset.y,
349 .width = clear_rect->rect.extent.width,
350 .height = clear_rect->rect.extent.height,
351 .minDepth = 0.0f,
352 .maxDepth = 1.0f});
353
354 radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
355
356 if (view_mask) {
357 u_foreach_bit (i, view_mask)
358 radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
359 } else {
360 radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
361 }
362 }
363
364 static void
build_depthstencil_shader(struct radv_device * dev,struct nir_shader ** out_vs,struct nir_shader ** out_fs,bool unrestricted)365 build_depthstencil_shader(struct radv_device *dev, struct nir_shader **out_vs, struct nir_shader **out_fs,
366 bool unrestricted)
367 {
368 nir_builder vs_b = radv_meta_init_shader(
369 dev, MESA_SHADER_VERTEX, unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs");
370 nir_builder fs_b =
371 radv_meta_init_shader(dev, MESA_SHADER_FRAGMENT,
372 unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs");
373
374 const struct glsl_type *position_out_type = glsl_vec4_type();
375
376 nir_variable *vs_out_pos = nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position");
377 vs_out_pos->data.location = VARYING_SLOT_POS;
378
379 nir_def *z;
380 if (unrestricted) {
381 nir_def *in_color_load = nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4);
382
383 nir_variable *fs_out_depth = nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth");
384 fs_out_depth->data.location = FRAG_RESULT_DEPTH;
385 nir_store_var(&fs_b, fs_out_depth, in_color_load, 0x1);
386
387 z = nir_imm_float(&vs_b, 0.0);
388 } else {
389 z = nir_load_push_constant(&vs_b, 1, 32, nir_imm_int(&vs_b, 0), .range = 4);
390 }
391
392 nir_def *outvec = nir_gen_rect_vertices(&vs_b, z, NULL);
393 nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);
394
395 const struct glsl_type *layer_type = glsl_int_type();
396 nir_variable *vs_out_layer = nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");
397 vs_out_layer->data.location = VARYING_SLOT_LAYER;
398 vs_out_layer->data.interpolation = INTERP_MODE_FLAT;
399 nir_def *inst_id = nir_load_instance_id(&vs_b);
400 nir_def *base_instance = nir_load_base_instance(&vs_b);
401
402 nir_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);
403 nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);
404
405 *out_vs = vs_b.shader;
406 *out_fs = fs_b.shader;
407 }
408
409 static VkResult
create_depthstencil_pipeline(struct radv_device * device,VkImageAspectFlags aspects,uint32_t samples,int index,bool unrestricted,VkPipeline * pipeline)410 create_depthstencil_pipeline(struct radv_device *device, VkImageAspectFlags aspects, uint32_t samples, int index,
411 bool unrestricted, VkPipeline *pipeline)
412 {
413 struct nir_shader *vs_nir, *fs_nir;
414 VkResult result;
415
416 mtx_lock(&device->meta_state.mtx);
417 if (*pipeline) {
418 mtx_unlock(&device->meta_state.mtx);
419 return VK_SUCCESS;
420 }
421
422 build_depthstencil_shader(device, &vs_nir, &fs_nir, unrestricted);
423
424 const VkPipelineVertexInputStateCreateInfo vi_state = {
425 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
426 .vertexBindingDescriptionCount = 0,
427 .vertexAttributeDescriptionCount = 0,
428 };
429
430 const VkPipelineDepthStencilStateCreateInfo ds_state = {
431 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
432 .depthTestEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
433 .depthCompareOp = VK_COMPARE_OP_ALWAYS,
434 .depthWriteEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),
435 .depthBoundsTestEnable = false,
436 .stencilTestEnable = !!(aspects & VK_IMAGE_ASPECT_STENCIL_BIT),
437 .front =
438 {
439 .passOp = VK_STENCIL_OP_REPLACE,
440 .compareOp = VK_COMPARE_OP_ALWAYS,
441 .writeMask = UINT32_MAX,
442 .reference = 0, /* dynamic */
443 },
444 .back = {0 /* dont care */},
445 .minDepthBounds = 0.0f,
446 .maxDepthBounds = 1.0f,
447 };
448
449 const VkPipelineColorBlendStateCreateInfo cb_state = {
450 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
451 .logicOpEnable = false,
452 .attachmentCount = 0,
453 .pAttachments = NULL,
454 .blendConstants = {0.0f, 0.0f, 0.0f, 0.0f},
455 };
456
457 const VkPipelineRenderingCreateInfo rendering_create_info = {
458 .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
459 .depthAttachmentFormat = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? VK_FORMAT_D32_SFLOAT : VK_FORMAT_UNDEFINED,
460 .stencilAttachmentFormat = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? VK_FORMAT_S8_UINT : VK_FORMAT_UNDEFINED,
461 };
462
463 struct radv_graphics_pipeline_create_info extra = {
464 .use_rectlist = true,
465 };
466
467 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
468 extra.db_depth_clear = index == DEPTH_CLEAR_SLOW ? false : true;
469 }
470 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
471 extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true;
472 }
473 result = create_pipeline(device, samples, vs_nir, fs_nir, &vi_state, &ds_state, &cb_state, &rendering_create_info,
474 device->meta_state.clear_depth_p_layout, &extra, &device->meta_state.alloc, pipeline);
475
476 mtx_unlock(&device->meta_state.mtx);
477 return result;
478 }
479
480 static bool radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
481 VkImageLayout image_layout, VkImageAspectFlags aspects,
482 const VkClearRect *clear_rect, const VkClearDepthStencilValue clear_value,
483 uint32_t view_mask);
484
485 static VkPipeline
pick_depthstencil_pipeline(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_state * meta_state,int samples_log2,VkImageAspectFlags aspects,bool fast)486 pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_state *meta_state, int samples_log2,
487 VkImageAspectFlags aspects, bool fast)
488 {
489 bool unrestricted = cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted;
490 int index = fast ? DEPTH_CLEAR_FAST : DEPTH_CLEAR_SLOW;
491 VkPipeline *pipeline;
492
493 switch (aspects) {
494 case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
495 pipeline = unrestricted ? &meta_state->ds_clear[samples_log2].depthstencil_unrestricted_pipeline[index]
496 : &meta_state->ds_clear[samples_log2].depthstencil_pipeline[index];
497 break;
498 case VK_IMAGE_ASPECT_DEPTH_BIT:
499 pipeline = unrestricted ? &meta_state->ds_clear[samples_log2].depth_only_unrestricted_pipeline[index]
500 : &meta_state->ds_clear[samples_log2].depth_only_pipeline[index];
501 break;
502 case VK_IMAGE_ASPECT_STENCIL_BIT:
503 pipeline = unrestricted ? &meta_state->ds_clear[samples_log2].stencil_only_unrestricted_pipeline[index]
504 : &meta_state->ds_clear[samples_log2].stencil_only_pipeline[index];
505 break;
506 default:
507 unreachable("expected depth or stencil aspect");
508 }
509
510 if (*pipeline == VK_NULL_HANDLE) {
511 VkResult ret =
512 create_depthstencil_pipeline(cmd_buffer->device, aspects, 1u << samples_log2, index, unrestricted, pipeline);
513 if (ret != VK_SUCCESS) {
514 vk_command_buffer_set_error(&cmd_buffer->vk, ret);
515 return VK_NULL_HANDLE;
516 }
517 }
518 return *pipeline;
519 }
520
521 static void
emit_depthstencil_clear(struct radv_cmd_buffer * cmd_buffer,VkClearDepthStencilValue clear_value,VkImageAspectFlags aspects,const VkClearRect * clear_rect,uint32_t view_mask,bool can_fast_clear)522 emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, VkClearDepthStencilValue clear_value,
523 VkImageAspectFlags aspects, const VkClearRect *clear_rect, uint32_t view_mask,
524 bool can_fast_clear)
525 {
526 struct radv_device *device = cmd_buffer->device;
527 struct radv_meta_state *meta_state = &device->meta_state;
528 const struct radv_rendering_state *render = &cmd_buffer->state.render;
529 uint32_t samples, samples_log2;
530 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
531
532 /* When a framebuffer is bound to the current command buffer, get the
533 * number of samples from it. Otherwise, get the number of samples from
534 * the render pass because it's likely a secondary command buffer.
535 */
536 struct radv_image_view *iview = render->ds_att.iview;
537 if (iview) {
538 samples = iview->image->vk.samples;
539 } else {
540 assert(render->ds_att.format != VK_FORMAT_UNDEFINED);
541 samples = render->max_samples;
542 }
543
544 assert(util_is_power_of_two_nonzero(samples));
545 samples_log2 = ffs(samples) - 1;
546
547 if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))
548 clear_value.depth = 1.0f;
549
550 if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted) {
551 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
552 device->meta_state.clear_depth_unrestricted_p_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0,
553 4, &clear_value.depth);
554 } else {
555 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.clear_depth_p_layout,
556 VK_SHADER_STAGE_VERTEX_BIT, 0, 4, &clear_value.depth);
557 }
558
559 uint32_t prev_reference = cmd_buffer->state.dynamic.vk.ds.stencil.front.reference;
560 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
561 radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, clear_value.stencil);
562 }
563
564 VkPipeline pipeline = pick_depthstencil_pipeline(cmd_buffer, meta_state, samples_log2, aspects, can_fast_clear);
565 if (!pipeline)
566 return;
567
568 radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
569
570 if (can_fast_clear)
571 radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
572
573 radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
574 &(VkViewport){.x = clear_rect->rect.offset.x,
575 .y = clear_rect->rect.offset.y,
576 .width = clear_rect->rect.extent.width,
577 .height = clear_rect->rect.extent.height,
578 .minDepth = 0.0f,
579 .maxDepth = 1.0f});
580
581 radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);
582
583 if (view_mask) {
584 u_foreach_bit (i, view_mask)
585 radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);
586 } else {
587 radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);
588 }
589
590 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
591 radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, prev_reference);
592 }
593 }
594
595 static uint32_t
clear_htile_mask(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * image,struct radeon_winsys_bo * bo,uint64_t offset,uint64_t size,uint32_t htile_value,uint32_t htile_mask)596 clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image, struct radeon_winsys_bo *bo,
597 uint64_t offset, uint64_t size, uint32_t htile_value, uint32_t htile_mask)
598 {
599 struct radv_device *device = cmd_buffer->device;
600 struct radv_meta_state *state = &device->meta_state;
601 uint64_t block_count = DIV_ROUND_UP(size, 1024);
602 struct radv_meta_saved_state saved_state;
603 struct radv_buffer dst_buffer;
604
605 radv_meta_save(&saved_state, cmd_buffer,
606 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
607
608 radv_buffer_init(&dst_buffer, device, bo, size, offset);
609
610 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
611 state->clear_htile_mask_pipeline);
612
613 radv_meta_push_descriptor_set(
614 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->clear_htile_mask_p_layout, 0, /* set */
615 1, /* descriptorWriteCount */
616 (VkWriteDescriptorSet[]){
617 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
618 .dstBinding = 0,
619 .dstArrayElement = 0,
620 .descriptorCount = 1,
621 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
622 .pBufferInfo =
623 &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer), .offset = 0, .range = size}}});
624
625 const unsigned constants[2] = {
626 htile_value & htile_mask,
627 ~htile_mask,
628 };
629
630 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), state->clear_htile_mask_p_layout,
631 VK_SHADER_STAGE_COMPUTE_BIT, 0, 8, constants);
632
633 vk_common_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);
634
635 radv_buffer_finish(&dst_buffer);
636
637 radv_meta_restore(&saved_state, cmd_buffer);
638
639 return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
640 }
641
642 static uint32_t
radv_get_htile_fast_clear_value(const struct radv_device * device,const struct radv_image * image,VkClearDepthStencilValue value)643 radv_get_htile_fast_clear_value(const struct radv_device *device, const struct radv_image *image,
644 VkClearDepthStencilValue value)
645 {
646 uint32_t max_zval = 0x3fff; /* maximum 14-bit value. */
647 uint32_t zmask = 0, smem = 0;
648 uint32_t htile_value;
649 uint32_t zmin, zmax;
650
651 /* Convert the depth value to 14-bit zmin/zmax values. */
652 zmin = lroundf(value.depth * max_zval);
653 zmax = zmin;
654
655 if (radv_image_tile_stencil_disabled(device, image)) {
656 /* Z only (no stencil):
657 *
658 * |31 18|17 4|3 0|
659 * +---------+---------+-------+
660 * | Max Z | Min Z | ZMask |
661 */
662 htile_value = (((zmax & 0x3fff) << 18) | ((zmin & 0x3fff) << 4) | ((zmask & 0xf) << 0));
663 } else {
664
665 /* Z and stencil:
666 *
667 * |31 12|11 10|9 8|7 6|5 4|3 0|
668 * +-----------+-----+------+-----+-----+-------+
669 * | Z Range | | SMem | SR1 | SR0 | ZMask |
670 *
671 * Z, stencil, 4 bit VRS encoding:
672 * |31 12| 11 10 |9 8|7 6 |5 4|3 0|
673 * +-----------+------------+------+------------+-----+-------+
674 * | Z Range | VRS Y-rate | SMem | VRS X-rate | SR0 | ZMask |
675 */
676 uint32_t delta = 0;
677 uint32_t zrange = ((zmax << 6) | delta);
678 uint32_t sresults = 0xf; /* SR0/SR1 both as 0x3. */
679
680 if (radv_image_has_vrs_htile(device, image))
681 sresults = 0x3;
682
683 htile_value = (((zrange & 0xfffff) << 12) | ((smem & 0x3) << 8) | ((sresults & 0xf) << 4) | ((zmask & 0xf) << 0));
684 }
685
686 return htile_value;
687 }
688
689 static uint32_t
radv_get_htile_mask(const struct radv_device * device,const struct radv_image * image,VkImageAspectFlags aspects)690 radv_get_htile_mask(const struct radv_device *device, const struct radv_image *image, VkImageAspectFlags aspects)
691 {
692 uint32_t mask = 0;
693
694 if (radv_image_tile_stencil_disabled(device, image)) {
695 /* All the HTILE buffer is used when there is no stencil. */
696 mask = UINT32_MAX;
697 } else {
698 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
699 mask |= 0xfffffc0f;
700 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
701 mask |= 0x000003f0;
702 }
703
704 return mask;
705 }
706
707 static bool
radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)708 radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)
709 {
710 return value.depth == 1.0f || value.depth == 0.0f;
711 }
712
713 static bool
radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)714 radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)
715 {
716 return value.stencil == 0;
717 }
718
719 static bool
radv_can_fast_clear_depth(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkImageLayout image_layout,VkImageAspectFlags aspects,const VkClearRect * clear_rect,const VkClearDepthStencilValue clear_value,uint32_t view_mask)720 radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
721 VkImageLayout image_layout, VkImageAspectFlags aspects, const VkClearRect *clear_rect,
722 const VkClearDepthStencilValue clear_value, uint32_t view_mask)
723 {
724 if (!iview || !iview->support_fast_clear)
725 return false;
726
727 if (!radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, image_layout,
728 radv_image_queue_family_mask(iview->image, cmd_buffer->qf, cmd_buffer->qf)))
729 return false;
730
731 if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
732 clear_rect->rect.extent.width != iview->image->vk.extent.width ||
733 clear_rect->rect.extent.height != iview->image->vk.extent.height)
734 return false;
735
736 if (view_mask && (iview->image->vk.array_layers >= 32 || (1u << iview->image->vk.array_layers) - 1u != view_mask))
737 return false;
738 if (!view_mask && clear_rect->baseArrayLayer != 0)
739 return false;
740 if (!view_mask && clear_rect->layerCount != iview->image->vk.array_layers)
741 return false;
742
743 if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted &&
744 (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && (clear_value.depth < 0.0 || clear_value.depth > 1.0))
745 return false;
746
747 if (radv_image_is_tc_compat_htile(iview->image) &&
748 (((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && !radv_is_fast_clear_depth_allowed(clear_value)) ||
749 ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && !radv_is_fast_clear_stencil_allowed(clear_value))))
750 return false;
751
752 if (iview->image->vk.mip_levels > 1) {
753 uint32_t last_level = iview->vk.base_mip_level + iview->vk.level_count - 1;
754 if (last_level >= iview->image->planes[0].surface.num_meta_levels) {
755 /* Do not fast clears if one level can't be fast cleared. */
756 return false;
757 }
758 }
759
760 return true;
761 }
762
763 static void
radv_fast_clear_depth(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkClearDepthStencilValue clear_value,VkImageAspectFlags aspects,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)764 radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
765 VkClearDepthStencilValue clear_value, VkImageAspectFlags aspects,
766 enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush)
767 {
768 uint32_t clear_word, flush_bits;
769
770 clear_word = radv_get_htile_fast_clear_value(cmd_buffer->device, iview->image, clear_value);
771
772 if (pre_flush) {
773 enum radv_cmd_flush_bits bits =
774 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, iview->image) |
775 radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT | VK_ACCESS_2_SHADER_READ_BIT, iview->image);
776 cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
777 *pre_flush |= cmd_buffer->state.flush_bits;
778 }
779
780 VkImageSubresourceRange range = {
781 .aspectMask = aspects,
782 .baseMipLevel = iview->vk.base_mip_level,
783 .levelCount = iview->vk.level_count,
784 .baseArrayLayer = iview->vk.base_array_layer,
785 .layerCount = iview->vk.layer_count,
786 };
787
788 flush_bits = radv_clear_htile(cmd_buffer, iview->image, &range, clear_word);
789
790 if (iview->image->planes[0].surface.has_stencil &&
791 !(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
792 /* Synchronize after performing a depth-only or a stencil-only
793 * fast clear because the driver uses an optimized path which
794 * performs a read-modify-write operation, and the two separate
795 * aspects might use the same HTILE memory.
796 */
797 cmd_buffer->state.flush_bits |= flush_bits;
798 }
799
800 radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);
801 if (post_flush) {
802 *post_flush |= flush_bits;
803 }
804 }
805
806 static nir_shader *
build_clear_htile_mask_shader(struct radv_device * dev)807 build_clear_htile_mask_shader(struct radv_device *dev)
808 {
809 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_htile_mask");
810 b.shader->info.workgroup_size[0] = 64;
811
812 nir_def *global_id = get_global_ids(&b, 1);
813
814 nir_def *offset = nir_imul_imm(&b, global_id, 16);
815 offset = nir_channel(&b, offset, 0);
816
817 nir_def *buf = radv_meta_load_descriptor(&b, 0, 0);
818
819 nir_def *constants = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
820
821 nir_def *load = nir_load_ssbo(&b, 4, 32, buf, offset, .align_mul = 16);
822
823 /* data = (data & ~htile_mask) | (htile_value & htile_mask) */
824 nir_def *data = nir_iand(&b, load, nir_channel(&b, constants, 1));
825 data = nir_ior(&b, data, nir_channel(&b, constants, 0));
826
827 nir_store_ssbo(&b, data, buf, offset, .access = ACCESS_NON_READABLE, .align_mul = 16);
828
829 return b.shader;
830 }
831
832 static VkResult
init_meta_clear_htile_mask_state(struct radv_device * device)833 init_meta_clear_htile_mask_state(struct radv_device *device)
834 {
835 struct radv_meta_state *state = &device->meta_state;
836 VkResult result;
837 nir_shader *cs = build_clear_htile_mask_shader(device);
838
839 VkDescriptorSetLayoutCreateInfo ds_layout_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
840 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
841 .bindingCount = 1,
842 .pBindings = (VkDescriptorSetLayoutBinding[]){
843 {.binding = 0,
844 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
845 .descriptorCount = 1,
846 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
847 .pImmutableSamplers = NULL},
848 }};
849
850 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info, &state->alloc,
851 &state->clear_htile_mask_ds_layout);
852 if (result != VK_SUCCESS)
853 goto fail;
854
855 VkPipelineLayoutCreateInfo p_layout_info = {
856 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
857 .setLayoutCount = 1,
858 .pSetLayouts = &state->clear_htile_mask_ds_layout,
859 .pushConstantRangeCount = 1,
860 .pPushConstantRanges =
861 &(VkPushConstantRange){
862 VK_SHADER_STAGE_COMPUTE_BIT,
863 0,
864 8,
865 },
866 };
867
868 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &p_layout_info, &state->alloc,
869 &state->clear_htile_mask_p_layout);
870 if (result != VK_SUCCESS)
871 goto fail;
872
873 VkPipelineShaderStageCreateInfo shader_stage = {
874 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
875 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
876 .module = vk_shader_module_handle_from_nir(cs),
877 .pName = "main",
878 .pSpecializationInfo = NULL,
879 };
880
881 VkComputePipelineCreateInfo pipeline_info = {
882 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
883 .stage = shader_stage,
884 .flags = 0,
885 .layout = state->clear_htile_mask_p_layout,
886 };
887
888 result = radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &pipeline_info, NULL,
889 &state->clear_htile_mask_pipeline);
890
891 fail:
892 ralloc_free(cs);
893 return result;
894 }
895
896 /* Clear DCC using comp-to-single by storing the clear value at the beginning of every 256B block.
897 * For MSAA images, clearing the first sample should be enough as long as CMASK is also cleared.
898 */
899 static nir_shader *
build_clear_dcc_comp_to_single_shader(struct radv_device * dev,bool is_msaa)900 build_clear_dcc_comp_to_single_shader(struct radv_device *dev, bool is_msaa)
901 {
902 enum glsl_sampler_dim dim = is_msaa ? GLSL_SAMPLER_DIM_MS : GLSL_SAMPLER_DIM_2D;
903 const struct glsl_type *img_type = glsl_image_type(dim, true, GLSL_TYPE_FLOAT);
904
905 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_clear_dcc_comp_to_single-%s",
906 is_msaa ? "multisampled" : "singlesampled");
907 b.shader->info.workgroup_size[0] = 8;
908 b.shader->info.workgroup_size[1] = 8;
909
910 nir_def *global_id = get_global_ids(&b, 3);
911
912 /* Load the dimensions in pixels of a block that gets compressed to one DCC byte. */
913 nir_def *dcc_block_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
914
915 /* Compute the coordinates. */
916 nir_def *coord = nir_trim_vector(&b, global_id, 2);
917 coord = nir_imul(&b, coord, dcc_block_size);
918 coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), nir_channel(&b, global_id, 2),
919 nir_undef(&b, 1, 32));
920
921 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
922 output_img->data.descriptor_set = 0;
923 output_img->data.binding = 0;
924
925 /* Load the clear color values. */
926 nir_def *clear_values = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 8), .range = 24);
927
928 nir_def *data = nir_vec4(&b, nir_channel(&b, clear_values, 0), nir_channel(&b, clear_values, 1),
929 nir_channel(&b, clear_values, 2), nir_channel(&b, clear_values, 3));
930
931 /* Store the clear color values. */
932 nir_def *sample_id = is_msaa ? nir_imm_int(&b, 0) : nir_undef(&b, 1, 32);
933 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, sample_id, data, nir_imm_int(&b, 0),
934 .image_dim = dim, .image_array = true);
935
936 return b.shader;
937 }
938
939 static VkResult
create_dcc_comp_to_single_pipeline(struct radv_device * device,bool is_msaa,VkPipeline * pipeline)940 create_dcc_comp_to_single_pipeline(struct radv_device *device, bool is_msaa, VkPipeline *pipeline)
941 {
942 struct radv_meta_state *state = &device->meta_state;
943 VkResult result;
944 nir_shader *cs = build_clear_dcc_comp_to_single_shader(device, is_msaa);
945
946 VkPipelineShaderStageCreateInfo shader_stage = {
947 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
948 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
949 .module = vk_shader_module_handle_from_nir(cs),
950 .pName = "main",
951 .pSpecializationInfo = NULL,
952 };
953
954 VkComputePipelineCreateInfo pipeline_info = {
955 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
956 .stage = shader_stage,
957 .flags = 0,
958 .layout = state->clear_dcc_comp_to_single_p_layout,
959 };
960
961 result = radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &pipeline_info, NULL, pipeline);
962
963 ralloc_free(cs);
964 return result;
965 }
966
967 static VkResult
init_meta_clear_dcc_comp_to_single_state(struct radv_device * device)968 init_meta_clear_dcc_comp_to_single_state(struct radv_device *device)
969 {
970 struct radv_meta_state *state = &device->meta_state;
971 VkResult result;
972
973 VkDescriptorSetLayoutCreateInfo ds_layout_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
974 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
975 .bindingCount = 1,
976 .pBindings = (VkDescriptorSetLayoutBinding[]){
977 {.binding = 0,
978 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
979 .descriptorCount = 1,
980 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
981 .pImmutableSamplers = NULL},
982 }};
983
984 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info, &state->alloc,
985 &state->clear_dcc_comp_to_single_ds_layout);
986 if (result != VK_SUCCESS)
987 goto fail;
988
989 VkPipelineLayoutCreateInfo p_layout_info = {
990 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
991 .setLayoutCount = 1,
992 .pSetLayouts = &state->clear_dcc_comp_to_single_ds_layout,
993 .pushConstantRangeCount = 1,
994 .pPushConstantRanges =
995 &(VkPushConstantRange){
996 VK_SHADER_STAGE_COMPUTE_BIT,
997 0,
998 24,
999 },
1000 };
1001
1002 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &p_layout_info, &state->alloc,
1003 &state->clear_dcc_comp_to_single_p_layout);
1004 if (result != VK_SUCCESS)
1005 goto fail;
1006
1007 for (uint32_t i = 0; i < 2; i++) {
1008 result = create_dcc_comp_to_single_pipeline(device, !!i, &state->clear_dcc_comp_to_single_pipeline[i]);
1009 if (result != VK_SUCCESS)
1010 goto fail;
1011 }
1012
1013 fail:
1014 return result;
1015 }
1016
1017 VkResult
radv_device_init_meta_clear_state(struct radv_device * device,bool on_demand)1018 radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)
1019 {
1020 VkResult res;
1021 struct radv_meta_state *state = &device->meta_state;
1022
1023 VkPipelineLayoutCreateInfo pl_color_create_info = {
1024 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1025 .setLayoutCount = 0,
1026 .pushConstantRangeCount = 1,
1027 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16},
1028 };
1029
1030 res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_color_create_info, &device->meta_state.alloc,
1031 &device->meta_state.clear_color_p_layout);
1032 if (res != VK_SUCCESS)
1033 return res;
1034
1035 VkPipelineLayoutCreateInfo pl_depth_create_info = {
1036 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1037 .setLayoutCount = 0,
1038 .pushConstantRangeCount = 1,
1039 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_VERTEX_BIT, 0, 4},
1040 };
1041
1042 res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_depth_create_info, &device->meta_state.alloc,
1043 &device->meta_state.clear_depth_p_layout);
1044 if (res != VK_SUCCESS)
1045 return res;
1046
1047 VkPipelineLayoutCreateInfo pl_depth_unrestricted_create_info = {
1048 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1049 .setLayoutCount = 0,
1050 .pushConstantRangeCount = 1,
1051 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4},
1052 };
1053
1054 res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_depth_unrestricted_create_info,
1055 &device->meta_state.alloc, &device->meta_state.clear_depth_unrestricted_p_layout);
1056 if (res != VK_SUCCESS)
1057 return res;
1058
1059 res = init_meta_clear_htile_mask_state(device);
1060 if (res != VK_SUCCESS)
1061 return res;
1062
1063 res = init_meta_clear_dcc_comp_to_single_state(device);
1064 if (res != VK_SUCCESS)
1065 return res;
1066
1067 if (on_demand)
1068 return VK_SUCCESS;
1069
1070 for (uint32_t i = 0; i < ARRAY_SIZE(state->color_clear); ++i) {
1071 uint32_t samples = 1 << i;
1072
1073 /* Only precompile meta pipelines for attachment 0 as other are uncommon. */
1074 for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) {
1075 VkFormat format = radv_fs_key_format_exemplars[j];
1076 unsigned fs_key = radv_format_meta_fs_key(device, format);
1077 assert(!state->color_clear[i][0].color_pipelines[fs_key]);
1078
1079 res = create_color_pipeline(device, samples, 0, format, &state->color_clear[i][0].color_pipelines[fs_key]);
1080 if (res != VK_SUCCESS)
1081 return res;
1082 }
1083 }
1084 for (uint32_t i = 0; i < ARRAY_SIZE(state->ds_clear); ++i) {
1085 uint32_t samples = 1 << i;
1086
1087 for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {
1088 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, false,
1089 &state->ds_clear[i].depth_only_pipeline[j]);
1090 if (res != VK_SUCCESS)
1091 return res;
1092
1093 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, false,
1094 &state->ds_clear[i].stencil_only_pipeline[j]);
1095 if (res != VK_SUCCESS)
1096 return res;
1097
1098 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j,
1099 false, &state->ds_clear[i].depthstencil_pipeline[j]);
1100 if (res != VK_SUCCESS)
1101 return res;
1102
1103 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, true,
1104 &state->ds_clear[i].depth_only_unrestricted_pipeline[j]);
1105 if (res != VK_SUCCESS)
1106 return res;
1107
1108 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true,
1109 &state->ds_clear[i].stencil_only_unrestricted_pipeline[j]);
1110 if (res != VK_SUCCESS)
1111 return res;
1112
1113 res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j,
1114 true, &state->ds_clear[i].depthstencil_unrestricted_pipeline[j]);
1115 if (res != VK_SUCCESS)
1116 return res;
1117 }
1118 }
1119 return VK_SUCCESS;
1120 }
1121
1122 static uint32_t
radv_get_cmask_fast_clear_value(const struct radv_image * image)1123 radv_get_cmask_fast_clear_value(const struct radv_image *image)
1124 {
1125 uint32_t value = 0; /* Default value when no DCC. */
1126
1127 /* The fast-clear value is different for images that have both DCC and
1128 * CMASK metadata.
1129 */
1130 if (radv_image_has_dcc(image)) {
1131 /* DCC fast clear with MSAA should clear CMASK to 0xC. */
1132 return image->vk.samples > 1 ? 0xcccccccc : 0xffffffff;
1133 }
1134
1135 return value;
1136 }
1137
1138 uint32_t
radv_clear_cmask(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1139 radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
1140 uint32_t value)
1141 {
1142 uint64_t offset = image->bindings[0].offset + image->planes[0].surface.cmask_offset;
1143 uint64_t size;
1144
1145 if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
1146 /* TODO: clear layers. */
1147 size = image->planes[0].surface.cmask_size;
1148 } else {
1149 unsigned slice_size = image->planes[0].surface.cmask_slice_size;
1150
1151 offset += slice_size * range->baseArrayLayer;
1152 size = slice_size * vk_image_subresource_layer_count(&image->vk, range);
1153 }
1154
1155 return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, radv_buffer_get_va(image->bindings[0].bo) + offset,
1156 size, value);
1157 }
1158
1159 uint32_t
radv_clear_fmask(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1160 radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
1161 uint32_t value)
1162 {
1163 uint64_t offset = image->bindings[0].offset + image->planes[0].surface.fmask_offset;
1164 unsigned slice_size = image->planes[0].surface.fmask_slice_size;
1165 uint64_t size;
1166
1167 /* MSAA images do not support mipmap levels. */
1168 assert(range->baseMipLevel == 0 && vk_image_subresource_level_count(&image->vk, range) == 1);
1169
1170 offset += slice_size * range->baseArrayLayer;
1171 size = slice_size * vk_image_subresource_layer_count(&image->vk, range);
1172
1173 return radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo, radv_buffer_get_va(image->bindings[0].bo) + offset,
1174 size, value);
1175 }
1176
1177 uint32_t
radv_clear_dcc(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1178 radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, const VkImageSubresourceRange *range,
1179 uint32_t value)
1180 {
1181 uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
1182 uint32_t layer_count = vk_image_subresource_layer_count(&image->vk, range);
1183 uint32_t flush_bits = 0;
1184
1185 /* Mark the image as being compressed. */
1186 radv_update_dcc_metadata(cmd_buffer, image, range, true);
1187
1188 for (uint32_t l = 0; l < level_count; l++) {
1189 uint64_t offset = image->bindings[0].offset + image->planes[0].surface.meta_offset;
1190 uint32_t level = range->baseMipLevel + l;
1191 uint64_t size;
1192
1193 if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) {
1194 /* DCC for mipmaps+layers is currently disabled. */
1195 offset += image->planes[0].surface.meta_slice_size * range->baseArrayLayer +
1196 image->planes[0].surface.u.gfx9.meta_levels[level].offset;
1197 size = image->planes[0].surface.u.gfx9.meta_levels[level].size * layer_count;
1198 } else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
1199 /* Mipmap levels and layers aren't implemented. */
1200 assert(level == 0);
1201 size = image->planes[0].surface.meta_size;
1202 } else {
1203 const struct legacy_surf_dcc_level *dcc_level = &image->planes[0].surface.u.legacy.color.dcc_level[level];
1204
1205 /* If dcc_fast_clear_size is 0 (which might happens for
1206 * mipmaps) the fill buffer operation below is a no-op.
1207 * This can only happen during initialization as the
1208 * fast clear path fallbacks to slow clears if one
1209 * level can't be fast cleared.
1210 */
1211 offset += dcc_level->dcc_offset + dcc_level->dcc_slice_fast_clear_size * range->baseArrayLayer;
1212 size = dcc_level->dcc_slice_fast_clear_size * vk_image_subresource_layer_count(&image->vk, range);
1213 }
1214
1215 /* Do not clear this level if it can't be compressed. */
1216 if (!size)
1217 continue;
1218
1219 flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1220 radv_buffer_get_va(image->bindings[0].bo) + offset, size, value);
1221 }
1222
1223 return flush_bits;
1224 }
1225
1226 static uint32_t
radv_clear_dcc_comp_to_single(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,uint32_t color_values[4])1227 radv_clear_dcc_comp_to_single(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
1228 const VkImageSubresourceRange *range, uint32_t color_values[4])
1229 {
1230 struct radv_device *device = cmd_buffer->device;
1231 unsigned bytes_per_pixel = vk_format_get_blocksize(image->vk.format);
1232 unsigned layer_count = vk_image_subresource_layer_count(&image->vk, range);
1233 struct radv_meta_saved_state saved_state;
1234 bool is_msaa = image->vk.samples > 1;
1235 struct radv_image_view iview;
1236 VkFormat format;
1237
1238 switch (bytes_per_pixel) {
1239 case 1:
1240 format = VK_FORMAT_R8_UINT;
1241 break;
1242 case 2:
1243 format = VK_FORMAT_R16_UINT;
1244 break;
1245 case 4:
1246 format = VK_FORMAT_R32_UINT;
1247 break;
1248 case 8:
1249 format = VK_FORMAT_R32G32_UINT;
1250 break;
1251 case 16:
1252 format = VK_FORMAT_R32G32B32A32_UINT;
1253 break;
1254 default:
1255 unreachable("Unsupported number of bytes per pixel");
1256 }
1257
1258 radv_meta_save(&saved_state, cmd_buffer,
1259 RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS);
1260
1261 VkPipeline pipeline = device->meta_state.clear_dcc_comp_to_single_pipeline[is_msaa];
1262
1263 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1264
1265 for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, range); l++) {
1266 uint32_t width, height;
1267
1268 /* Do not write the clear color value for levels without DCC. */
1269 if (!radv_dcc_enabled(image, range->baseMipLevel + l))
1270 continue;
1271
1272 width = radv_minify(image->vk.extent.width, range->baseMipLevel + l);
1273 height = radv_minify(image->vk.extent.height, range->baseMipLevel + l);
1274
1275 radv_image_view_init(&iview, cmd_buffer->device,
1276 &(VkImageViewCreateInfo){
1277 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1278 .image = radv_image_to_handle(image),
1279 .viewType = VK_IMAGE_VIEW_TYPE_2D,
1280 .format = format,
1281 .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1282 .baseMipLevel = range->baseMipLevel + l,
1283 .levelCount = 1,
1284 .baseArrayLayer = range->baseArrayLayer,
1285 .layerCount = layer_count},
1286 },
1287 0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
1288
1289 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1290 device->meta_state.clear_dcc_comp_to_single_p_layout, 0, 1,
1291 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1292 .dstBinding = 0,
1293 .dstArrayElement = 0,
1294 .descriptorCount = 1,
1295 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1296 .pImageInfo = (VkDescriptorImageInfo[]){
1297 {
1298 .sampler = VK_NULL_HANDLE,
1299 .imageView = radv_image_view_to_handle(&iview),
1300 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1301 },
1302 }}});
1303
1304 unsigned dcc_width = DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width);
1305 unsigned dcc_height = DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height);
1306
1307 const unsigned constants[6] = {
1308 image->planes[0].surface.u.gfx9.color.dcc_block_width,
1309 image->planes[0].surface.u.gfx9.color.dcc_block_height,
1310 color_values[0],
1311 color_values[1],
1312 color_values[2],
1313 color_values[3],
1314 };
1315
1316 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1317 device->meta_state.clear_dcc_comp_to_single_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
1318 24, constants);
1319
1320 radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, layer_count);
1321
1322 radv_image_view_finish(&iview);
1323 }
1324
1325 radv_meta_restore(&saved_state, cmd_buffer);
1326
1327 return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
1328 }
1329
1330 uint32_t
radv_clear_htile(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * image,const VkImageSubresourceRange * range,uint32_t value)1331 radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
1332 const VkImageSubresourceRange *range, uint32_t value)
1333 {
1334 uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
1335 uint32_t flush_bits = 0;
1336 uint32_t htile_mask;
1337
1338 htile_mask = radv_get_htile_mask(cmd_buffer->device, image, range->aspectMask);
1339
1340 if (level_count != image->vk.mip_levels) {
1341 assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10);
1342
1343 /* Clear individuals levels separately. */
1344 for (uint32_t l = 0; l < level_count; l++) {
1345 uint32_t level = range->baseMipLevel + l;
1346 uint64_t offset = image->bindings[0].offset + image->planes[0].surface.meta_offset +
1347 image->planes[0].surface.u.gfx9.meta_levels[level].offset;
1348 uint32_t size = image->planes[0].surface.u.gfx9.meta_levels[level].size;
1349
1350 /* Do not clear this level if it can be compressed. */
1351 if (!size)
1352 continue;
1353
1354 if (htile_mask == UINT_MAX) {
1355 /* Clear the whole HTILE buffer. */
1356 flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1357 radv_buffer_get_va(image->bindings[0].bo) + offset, size, value);
1358 } else {
1359 /* Only clear depth or stencil bytes in the HTILE buffer. */
1360 flush_bits |= clear_htile_mask(cmd_buffer, image, image->bindings[0].bo, offset, size, value, htile_mask);
1361 }
1362 }
1363 } else {
1364 unsigned layer_count = vk_image_subresource_layer_count(&image->vk, range);
1365 uint64_t size = image->planes[0].surface.meta_slice_size * layer_count;
1366 uint64_t offset = image->bindings[0].offset + image->planes[0].surface.meta_offset +
1367 image->planes[0].surface.meta_slice_size * range->baseArrayLayer;
1368
1369 if (htile_mask == UINT_MAX) {
1370 /* Clear the whole HTILE buffer. */
1371 flush_bits = radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
1372 radv_buffer_get_va(image->bindings[0].bo) + offset, size, value);
1373 } else {
1374 /* Only clear depth or stencil bytes in the HTILE buffer. */
1375 flush_bits = clear_htile_mask(cmd_buffer, image, image->bindings[0].bo, offset, size, value, htile_mask);
1376 }
1377 }
1378
1379 return flush_bits;
1380 }
1381
1382 enum {
1383 RADV_DCC_CLEAR_0000 = 0x00000000U,
1384 RADV_DCC_GFX8_CLEAR_0001 = 0x40404040U,
1385 RADV_DCC_GFX8_CLEAR_1110 = 0x80808080U,
1386 RADV_DCC_GFX8_CLEAR_1111 = 0xC0C0C0C0U,
1387 RADV_DCC_GFX8_CLEAR_REG = 0x20202020U,
1388 RADV_DCC_GFX9_CLEAR_SINGLE = 0x10101010U,
1389 RADV_DCC_GFX11_CLEAR_SINGLE = 0x01010101U,
1390 RADV_DCC_GFX11_CLEAR_0000 = 0x00000000U,
1391 RADV_DCC_GFX11_CLEAR_1111_UNORM = 0x02020202U,
1392 RADV_DCC_GFX11_CLEAR_1111_FP16 = 0x04040404U,
1393 RADV_DCC_GFX11_CLEAR_1111_FP32 = 0x06060606U,
1394 RADV_DCC_GFX11_CLEAR_0001_UNORM = 0x08080808U,
1395 RADV_DCC_GFX11_CLEAR_1110_UNORM = 0x0A0A0A0AU,
1396 };
1397
1398 static uint32_t
radv_dcc_single_clear_value(const struct radv_device * device)1399 radv_dcc_single_clear_value(const struct radv_device *device)
1400 {
1401 return device->physical_device->rad_info.gfx_level >= GFX11 ? RADV_DCC_GFX11_CLEAR_SINGLE
1402 : RADV_DCC_GFX9_CLEAR_SINGLE;
1403 }
1404
1405 static void
gfx8_get_fast_clear_parameters(struct radv_device * device,const struct radv_image_view * iview,const VkClearColorValue * clear_value,uint32_t * reset_value,bool * can_avoid_fast_clear_elim)1406 gfx8_get_fast_clear_parameters(struct radv_device *device, const struct radv_image_view *iview,
1407 const VkClearColorValue *clear_value, uint32_t *reset_value,
1408 bool *can_avoid_fast_clear_elim)
1409 {
1410 bool values[4] = {0};
1411 int extra_channel;
1412 bool main_value = false;
1413 bool extra_value = false;
1414 bool has_color = false;
1415 bool has_alpha = false;
1416
1417 /* comp-to-single allows to perform DCC fast clears without requiring a FCE. */
1418 if (iview->image->support_comp_to_single) {
1419 *reset_value = RADV_DCC_GFX9_CLEAR_SINGLE;
1420 *can_avoid_fast_clear_elim = true;
1421 } else {
1422 *reset_value = RADV_DCC_GFX8_CLEAR_REG;
1423 *can_avoid_fast_clear_elim = false;
1424 }
1425
1426 const struct util_format_description *desc = vk_format_description(iview->vk.format);
1427 if (iview->vk.format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 || iview->vk.format == VK_FORMAT_R5G6B5_UNORM_PACK16 ||
1428 iview->vk.format == VK_FORMAT_B5G6R5_UNORM_PACK16)
1429 extra_channel = -1;
1430 else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
1431 if (vi_alpha_is_on_msb(device, iview->vk.format))
1432 extra_channel = desc->nr_channels - 1;
1433 else
1434 extra_channel = 0;
1435 } else
1436 return;
1437
1438 for (int i = 0; i < 4; i++) {
1439 int index = desc->swizzle[i] - PIPE_SWIZZLE_X;
1440 if (desc->swizzle[i] < PIPE_SWIZZLE_X || desc->swizzle[i] > PIPE_SWIZZLE_W)
1441 continue;
1442
1443 if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {
1444 /* Use the maximum value for clamping the clear color. */
1445 int max = u_bit_consecutive(0, desc->channel[i].size - 1);
1446
1447 values[i] = clear_value->int32[i] != 0;
1448 if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max)
1449 return;
1450 } else if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {
1451 /* Use the maximum value for clamping the clear color. */
1452 unsigned max = u_bit_consecutive(0, desc->channel[i].size);
1453
1454 values[i] = clear_value->uint32[i] != 0U;
1455 if (clear_value->uint32[i] != 0U && MIN2(clear_value->uint32[i], max) != max)
1456 return;
1457 } else {
1458 values[i] = clear_value->float32[i] != 0.0F;
1459 if (clear_value->float32[i] != 0.0F && clear_value->float32[i] != 1.0F)
1460 return;
1461 }
1462
1463 if (index == extra_channel) {
1464 extra_value = values[i];
1465 has_alpha = true;
1466 } else {
1467 main_value = values[i];
1468 has_color = true;
1469 }
1470 }
1471
1472 /* If alpha isn't present, make it the same as color, and vice versa. */
1473 if (!has_alpha)
1474 extra_value = main_value;
1475 else if (!has_color)
1476 main_value = extra_value;
1477
1478 for (int i = 0; i < 4; ++i)
1479 if (values[i] != main_value && desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel &&
1480 desc->swizzle[i] >= PIPE_SWIZZLE_X && desc->swizzle[i] <= PIPE_SWIZZLE_W)
1481 return;
1482
1483 /* Only DCC clear code 0000 is allowed for signed<->unsigned formats. */
1484 if ((main_value || extra_value) && iview->image->dcc_sign_reinterpret)
1485 return;
1486
1487 *can_avoid_fast_clear_elim = true;
1488
1489 if (main_value) {
1490 if (extra_value)
1491 *reset_value = RADV_DCC_GFX8_CLEAR_1111;
1492 else
1493 *reset_value = RADV_DCC_GFX8_CLEAR_1110;
1494 } else {
1495 if (extra_value)
1496 *reset_value = RADV_DCC_GFX8_CLEAR_0001;
1497 else
1498 *reset_value = RADV_DCC_CLEAR_0000;
1499 }
1500 }
1501
1502 static bool
gfx11_get_fast_clear_parameters(struct radv_device * device,const struct radv_image_view * iview,const VkClearColorValue * clear_value,uint32_t * reset_value)1503 gfx11_get_fast_clear_parameters(struct radv_device *device, const struct radv_image_view *iview,
1504 const VkClearColorValue *clear_value, uint32_t *reset_value)
1505 {
1506 const struct util_format_description *desc = vk_format_description(iview->vk.format);
1507 unsigned start_bit = UINT_MAX;
1508 unsigned end_bit = 0;
1509
1510 /* TODO: 8bpp and 16bpp fast DCC clears don't work. */
1511 if (desc->block.bits <= 16)
1512 return false;
1513
1514 /* Find the used bit range. */
1515 for (unsigned i = 0; i < 4; i++) {
1516 unsigned swizzle = desc->swizzle[i];
1517
1518 if (swizzle >= PIPE_SWIZZLE_0)
1519 continue;
1520
1521 start_bit = MIN2(start_bit, desc->channel[swizzle].shift);
1522 end_bit = MAX2(end_bit, desc->channel[swizzle].shift + desc->channel[swizzle].size);
1523 }
1524
1525 union {
1526 uint8_t ub[16];
1527 uint16_t us[8];
1528 uint32_t ui[4];
1529 } value;
1530 memset(&value, 0, sizeof(value));
1531 util_format_pack_rgba(vk_format_to_pipe_format(iview->vk.format), &value, clear_value, 1);
1532
1533 /* Check the cases where all components or bits are either all 0 or all 1. */
1534 bool all_bits_are_0 = true;
1535 bool all_bits_are_1 = true;
1536 bool all_words_are_fp16_1 = false;
1537 bool all_words_are_fp32_1 = false;
1538
1539 for (unsigned i = start_bit; i < end_bit; i++) {
1540 bool bit = value.ub[i / 8] & BITFIELD_BIT(i % 8);
1541
1542 all_bits_are_0 &= !bit;
1543 all_bits_are_1 &= bit;
1544 }
1545
1546 if (start_bit % 16 == 0 && end_bit % 16 == 0) {
1547 all_words_are_fp16_1 = true;
1548 for (unsigned i = start_bit / 16; i < end_bit / 16; i++)
1549 all_words_are_fp16_1 &= value.us[i] == 0x3c00;
1550 }
1551
1552 if (start_bit % 32 == 0 && end_bit % 32 == 0) {
1553 all_words_are_fp32_1 = true;
1554 for (unsigned i = start_bit / 32; i < end_bit / 32; i++)
1555 all_words_are_fp32_1 &= value.ui[i] == 0x3f800000;
1556 }
1557
1558 if (all_bits_are_0 || all_bits_are_1 || all_words_are_fp16_1 || all_words_are_fp32_1) {
1559 if (all_bits_are_0)
1560 *reset_value = RADV_DCC_CLEAR_0000;
1561 else if (all_bits_are_1)
1562 *reset_value = RADV_DCC_GFX11_CLEAR_1111_UNORM;
1563 else if (all_words_are_fp16_1)
1564 *reset_value = RADV_DCC_GFX11_CLEAR_1111_FP16;
1565 else if (all_words_are_fp32_1)
1566 *reset_value = RADV_DCC_GFX11_CLEAR_1111_FP32;
1567 return true;
1568 }
1569
1570 if (desc->nr_channels == 2 && desc->channel[0].size == 8) {
1571 if (value.ub[0] == 0x00 && value.ub[1] == 0xff) {
1572 *reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM;
1573 return true;
1574 } else if (value.ub[0] == 0xff && value.ub[1] == 0x00) {
1575 *reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM;
1576 return true;
1577 }
1578 } else if (desc->nr_channels == 4 && desc->channel[0].size == 8) {
1579 if (value.ub[0] == 0x00 && value.ub[1] == 0x00 && value.ub[2] == 0x00 && value.ub[3] == 0xff) {
1580 *reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM;
1581 return true;
1582 } else if (value.ub[0] == 0xff && value.ub[1] == 0xff && value.ub[2] == 0xff && value.ub[3] == 0x00) {
1583 *reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM;
1584 return true;
1585 }
1586 } else if (desc->nr_channels == 4 && desc->channel[0].size == 16) {
1587 if (value.us[0] == 0x0000 && value.us[1] == 0x0000 && value.us[2] == 0x0000 && value.us[3] == 0xffff) {
1588 *reset_value = RADV_DCC_GFX11_CLEAR_0001_UNORM;
1589 return true;
1590 } else if (value.us[0] == 0xffff && value.us[1] == 0xffff && value.us[2] == 0xffff && value.us[3] == 0x0000) {
1591 *reset_value = RADV_DCC_GFX11_CLEAR_1110_UNORM;
1592 return true;
1593 }
1594 }
1595
1596 if (iview->image->support_comp_to_single) {
1597 *reset_value = RADV_DCC_GFX11_CLEAR_SINGLE;
1598 return true;
1599 }
1600
1601 return false;
1602 }
1603
1604 static bool
radv_can_fast_clear_color(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,VkImageLayout image_layout,const VkClearRect * clear_rect,VkClearColorValue clear_value,uint32_t view_mask)1605 radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
1606 VkImageLayout image_layout, const VkClearRect *clear_rect, VkClearColorValue clear_value,
1607 uint32_t view_mask)
1608 {
1609 uint32_t clear_color[2];
1610
1611 if (!iview || !iview->support_fast_clear)
1612 return false;
1613
1614 if (!radv_layout_can_fast_clear(cmd_buffer->device, iview->image, iview->vk.base_mip_level, image_layout,
1615 radv_image_queue_family_mask(iview->image, cmd_buffer->qf, cmd_buffer->qf)))
1616 return false;
1617
1618 if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||
1619 clear_rect->rect.extent.width != iview->image->vk.extent.width ||
1620 clear_rect->rect.extent.height != iview->image->vk.extent.height)
1621 return false;
1622
1623 if (view_mask && (iview->image->vk.array_layers >= 32 || (1u << iview->image->vk.array_layers) - 1u != view_mask))
1624 return false;
1625 if (!view_mask && clear_rect->baseArrayLayer != 0)
1626 return false;
1627 if (!view_mask && clear_rect->layerCount != iview->image->vk.array_layers)
1628 return false;
1629
1630 /* DCC */
1631
1632 /* Images that support comp-to-single clears don't have clear values. */
1633 if (!iview->image->support_comp_to_single) {
1634 if (!radv_format_pack_clear_color(iview->vk.format, clear_color, &clear_value))
1635 return false;
1636
1637 if (!radv_image_has_clear_value(iview->image) && (clear_color[0] != 0 || clear_color[1] != 0))
1638 return false;
1639 }
1640
1641 if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level)) {
1642 bool can_avoid_fast_clear_elim;
1643 uint32_t reset_value;
1644
1645 if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
1646 if (!gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value))
1647 return false;
1648 } else {
1649 gfx8_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value,
1650 &can_avoid_fast_clear_elim);
1651 }
1652
1653 if (iview->image->vk.mip_levels > 1) {
1654 if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
1655 uint32_t last_level = iview->vk.base_mip_level + iview->vk.level_count - 1;
1656 if (last_level >= iview->image->planes[0].surface.num_meta_levels) {
1657 /* Do not fast clears if one level can't be fast cleard. */
1658 return false;
1659 }
1660 } else {
1661 for (uint32_t l = 0; l < iview->vk.level_count; l++) {
1662 uint32_t level = iview->vk.base_mip_level + l;
1663 struct legacy_surf_dcc_level *dcc_level =
1664 &iview->image->planes[0].surface.u.legacy.color.dcc_level[level];
1665
1666 /* Do not fast clears if one level can't be
1667 * fast cleared.
1668 */
1669 if (!dcc_level->dcc_fast_clear_size)
1670 return false;
1671 }
1672 }
1673 }
1674 }
1675
1676 return true;
1677 }
1678
1679 static void
radv_fast_clear_color(struct radv_cmd_buffer * cmd_buffer,const struct radv_image_view * iview,const VkClearAttachment * clear_att,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)1680 radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,
1681 const VkClearAttachment *clear_att, enum radv_cmd_flush_bits *pre_flush,
1682 enum radv_cmd_flush_bits *post_flush)
1683 {
1684 VkClearColorValue clear_value = clear_att->clearValue.color;
1685 uint32_t clear_color[4], flush_bits = 0;
1686 uint32_t cmask_clear_value;
1687 VkImageSubresourceRange range = {
1688 .aspectMask = iview->vk.aspects,
1689 .baseMipLevel = iview->vk.base_mip_level,
1690 .levelCount = iview->vk.level_count,
1691 .baseArrayLayer = iview->vk.base_array_layer,
1692 .layerCount = iview->vk.layer_count,
1693 };
1694
1695 if (pre_flush) {
1696 enum radv_cmd_flush_bits bits =
1697 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, iview->image) |
1698 radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, iview->image);
1699 cmd_buffer->state.flush_bits |= bits & ~*pre_flush;
1700 *pre_flush |= cmd_buffer->state.flush_bits;
1701 }
1702
1703 /* DCC */
1704 radv_format_pack_clear_color(iview->vk.format, clear_color, &clear_value);
1705
1706 cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image);
1707
1708 /* clear cmask buffer */
1709 bool need_decompress_pass = false;
1710 if (radv_dcc_enabled(iview->image, iview->vk.base_mip_level)) {
1711 uint32_t reset_value;
1712 bool can_avoid_fast_clear_elim = true;
1713
1714 if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
1715 ASSERTED bool result = gfx11_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value);
1716 assert(result);
1717 } else {
1718 gfx8_get_fast_clear_parameters(cmd_buffer->device, iview, &clear_value, &reset_value,
1719 &can_avoid_fast_clear_elim);
1720 }
1721
1722 if (radv_image_has_cmask(iview->image)) {
1723 flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);
1724 }
1725
1726 if (!can_avoid_fast_clear_elim)
1727 need_decompress_pass = true;
1728
1729 flush_bits |= radv_clear_dcc(cmd_buffer, iview->image, &range, reset_value);
1730
1731 if (reset_value == radv_dcc_single_clear_value(cmd_buffer->device)) {
1732 /* Write the clear color to the first byte of each 256B block when the image supports DCC
1733 * fast clears with comp-to-single.
1734 */
1735 if (vk_format_get_blocksize(iview->image->vk.format) == 16) {
1736 flush_bits |= radv_clear_dcc_comp_to_single(cmd_buffer, iview->image, &range, clear_value.uint32);
1737 } else {
1738 clear_color[2] = clear_color[3] = 0;
1739 flush_bits |= radv_clear_dcc_comp_to_single(cmd_buffer, iview->image, &range, clear_color);
1740 }
1741 }
1742 } else {
1743 flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);
1744
1745 /* Fast clearing with CMASK should always be eliminated. */
1746 need_decompress_pass = true;
1747 }
1748
1749 if (post_flush) {
1750 *post_flush |= flush_bits;
1751 }
1752
1753 /* Update the FCE predicate to perform a fast-clear eliminate. */
1754 radv_update_fce_metadata(cmd_buffer, iview->image, &range, need_decompress_pass);
1755
1756 radv_update_color_clear_metadata(cmd_buffer, iview, clear_att->colorAttachment, clear_color);
1757 }
1758
1759 /**
1760 * The parameters mean that same as those in vkCmdClearAttachments.
1761 */
1762 static void
emit_clear(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,const VkClearRect * clear_rect,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush,uint32_t view_mask)1763 emit_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att, const VkClearRect *clear_rect,
1764 enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush, uint32_t view_mask)
1765 {
1766 const struct radv_rendering_state *render = &cmd_buffer->state.render;
1767 VkImageAspectFlags aspects = clear_att->aspectMask;
1768
1769 if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
1770 assert(clear_att->colorAttachment < render->color_att_count);
1771 const struct radv_attachment *color_att = &render->color_att[clear_att->colorAttachment];
1772
1773 if (color_att->format == VK_FORMAT_UNDEFINED)
1774 return;
1775
1776 VkClearColorValue clear_value = clear_att->clearValue.color;
1777
1778 if (radv_can_fast_clear_color(cmd_buffer, color_att->iview, color_att->layout, clear_rect, clear_value,
1779 view_mask)) {
1780 radv_fast_clear_color(cmd_buffer, color_att->iview, clear_att, pre_flush, post_flush);
1781 } else {
1782 emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);
1783 }
1784 } else {
1785 const struct radv_attachment *ds_att = &render->ds_att;
1786
1787 if (ds_att->format == VK_FORMAT_UNDEFINED)
1788 return;
1789
1790 VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;
1791
1792 assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));
1793 bool can_fast_clear_depth = false;
1794 bool can_fast_clear_stencil = false;
1795 if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT) &&
1796 ds_att->layout != ds_att->stencil_layout) {
1797 can_fast_clear_depth = radv_can_fast_clear_depth(cmd_buffer, ds_att->iview, ds_att->layout, aspects,
1798 clear_rect, clear_value, view_mask);
1799 can_fast_clear_stencil = radv_can_fast_clear_depth(cmd_buffer, ds_att->iview, ds_att->stencil_layout, aspects,
1800 clear_rect, clear_value, view_mask);
1801 } else {
1802 VkImageLayout layout = aspects & VK_IMAGE_ASPECT_DEPTH_BIT ? ds_att->layout : ds_att->stencil_layout;
1803 can_fast_clear_depth =
1804 radv_can_fast_clear_depth(cmd_buffer, ds_att->iview, layout, aspects, clear_rect, clear_value, view_mask);
1805 can_fast_clear_stencil = can_fast_clear_depth;
1806 }
1807
1808 if (can_fast_clear_depth && can_fast_clear_stencil) {
1809 radv_fast_clear_depth(cmd_buffer, ds_att->iview, clear_att->clearValue.depthStencil, clear_att->aspectMask,
1810 pre_flush, post_flush);
1811 } else if (!can_fast_clear_depth && !can_fast_clear_stencil) {
1812 emit_depthstencil_clear(cmd_buffer, clear_att->clearValue.depthStencil, clear_att->aspectMask, clear_rect,
1813 view_mask, false);
1814 } else {
1815 if (can_fast_clear_depth) {
1816 radv_fast_clear_depth(cmd_buffer, ds_att->iview, clear_att->clearValue.depthStencil,
1817 VK_IMAGE_ASPECT_DEPTH_BIT, pre_flush, post_flush);
1818 } else {
1819 emit_depthstencil_clear(cmd_buffer, clear_att->clearValue.depthStencil, VK_IMAGE_ASPECT_DEPTH_BIT,
1820 clear_rect, view_mask, can_fast_clear_depth);
1821 }
1822
1823 if (can_fast_clear_stencil) {
1824 radv_fast_clear_depth(cmd_buffer, ds_att->iview, clear_att->clearValue.depthStencil,
1825 VK_IMAGE_ASPECT_STENCIL_BIT, pre_flush, post_flush);
1826 } else {
1827 emit_depthstencil_clear(cmd_buffer, clear_att->clearValue.depthStencil, VK_IMAGE_ASPECT_STENCIL_BIT,
1828 clear_rect, view_mask, can_fast_clear_stencil);
1829 }
1830 }
1831 }
1832 }
1833
1834 static bool
radv_rendering_needs_clear(const VkRenderingInfo * pRenderingInfo)1835 radv_rendering_needs_clear(const VkRenderingInfo *pRenderingInfo)
1836 {
1837 for (uint32_t i = 0; i < pRenderingInfo->colorAttachmentCount; i++) {
1838 if (pRenderingInfo->pColorAttachments[i].imageView != VK_NULL_HANDLE &&
1839 pRenderingInfo->pColorAttachments[i].loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
1840 return true;
1841 }
1842
1843 if (pRenderingInfo->pDepthAttachment != NULL && pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE &&
1844 pRenderingInfo->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
1845 return true;
1846
1847 if (pRenderingInfo->pStencilAttachment != NULL && pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE &&
1848 pRenderingInfo->pStencilAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR)
1849 return true;
1850
1851 return false;
1852 }
1853
1854 static void
radv_subpass_clear_attachment(struct radv_cmd_buffer * cmd_buffer,const VkClearAttachment * clear_att,enum radv_cmd_flush_bits * pre_flush,enum radv_cmd_flush_bits * post_flush)1855 radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,
1856 enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush)
1857 {
1858 const struct radv_rendering_state *render = &cmd_buffer->state.render;
1859
1860 VkClearRect clear_rect = {
1861 .rect = render->area,
1862 .baseArrayLayer = 0,
1863 .layerCount = render->layer_count,
1864 };
1865
1866 radv_describe_begin_render_pass_clear(cmd_buffer, clear_att->aspectMask);
1867
1868 emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush, render->view_mask);
1869
1870 radv_describe_end_render_pass_clear(cmd_buffer);
1871 }
1872
1873 /**
1874 * Emit any pending attachment clears for the current subpass.
1875 *
1876 * @see radv_attachment_state::pending_clear_aspects
1877 */
1878 void
radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer * cmd_buffer,const VkRenderingInfo * pRenderingInfo)1879 radv_cmd_buffer_clear_rendering(struct radv_cmd_buffer *cmd_buffer, const VkRenderingInfo *pRenderingInfo)
1880 {
1881 const struct radv_rendering_state *render = &cmd_buffer->state.render;
1882 struct radv_meta_saved_state saved_state;
1883 enum radv_cmd_flush_bits pre_flush = 0;
1884 enum radv_cmd_flush_bits post_flush = 0;
1885
1886 if (!radv_rendering_needs_clear(pRenderingInfo))
1887 return;
1888
1889 /* Subpass clear should not be affected by conditional rendering. */
1890 radv_meta_save(&saved_state, cmd_buffer,
1891 RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING);
1892
1893 assert(render->color_att_count == pRenderingInfo->colorAttachmentCount);
1894 for (uint32_t i = 0; i < render->color_att_count; i++) {
1895 if (render->color_att[i].iview == NULL ||
1896 pRenderingInfo->pColorAttachments[i].loadOp != VK_ATTACHMENT_LOAD_OP_CLEAR)
1897 continue;
1898
1899 VkClearAttachment clear_att = {
1900 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1901 .colorAttachment = i,
1902 .clearValue = pRenderingInfo->pColorAttachments[i].clearValue,
1903 };
1904
1905 radv_subpass_clear_attachment(cmd_buffer, &clear_att, &pre_flush, &post_flush);
1906 }
1907
1908 if (render->ds_att.iview != NULL) {
1909 VkClearAttachment clear_att = {.aspectMask = 0};
1910
1911 if (pRenderingInfo->pDepthAttachment != NULL && pRenderingInfo->pDepthAttachment->imageView != VK_NULL_HANDLE &&
1912 pRenderingInfo->pDepthAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1913 clear_att.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
1914 clear_att.clearValue.depthStencil.depth = pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth;
1915 }
1916
1917 if (pRenderingInfo->pStencilAttachment != NULL &&
1918 pRenderingInfo->pStencilAttachment->imageView != VK_NULL_HANDLE &&
1919 pRenderingInfo->pStencilAttachment->loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {
1920 clear_att.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
1921 clear_att.clearValue.depthStencil.stencil =
1922 pRenderingInfo->pStencilAttachment->clearValue.depthStencil.stencil;
1923 }
1924
1925 if (clear_att.aspectMask != 0) {
1926 radv_subpass_clear_attachment(cmd_buffer, &clear_att, &pre_flush, &post_flush);
1927 }
1928 }
1929
1930 radv_meta_restore(&saved_state, cmd_buffer);
1931 cmd_buffer->state.flush_bits |= post_flush;
1932 }
1933
1934 static void
radv_clear_image_layer(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkImageLayout image_layout,const VkImageSubresourceRange * range,VkFormat format,int level,unsigned layer_count,const VkClearValue * clear_val)1935 radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout image_layout,
1936 const VkImageSubresourceRange *range, VkFormat format, int level, unsigned layer_count,
1937 const VkClearValue *clear_val)
1938 {
1939 struct radv_image_view iview;
1940 uint32_t width = radv_minify(image->vk.extent.width, range->baseMipLevel + level);
1941 uint32_t height = radv_minify(image->vk.extent.height, range->baseMipLevel + level);
1942
1943 radv_image_view_init(&iview, cmd_buffer->device,
1944 &(VkImageViewCreateInfo){
1945 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1946 .image = radv_image_to_handle(image),
1947 .viewType = radv_meta_get_view_type(image),
1948 .format = format,
1949 .subresourceRange = {.aspectMask = range->aspectMask,
1950 .baseMipLevel = range->baseMipLevel + level,
1951 .levelCount = 1,
1952 .baseArrayLayer = range->baseArrayLayer,
1953 .layerCount = layer_count},
1954 },
1955 0, NULL);
1956
1957 VkClearAttachment clear_att = {
1958 .aspectMask = range->aspectMask,
1959 .colorAttachment = 0,
1960 .clearValue = *clear_val,
1961 };
1962
1963 VkClearRect clear_rect = {
1964 .rect =
1965 {
1966 .offset = {0, 0},
1967 .extent = {width, height},
1968 },
1969 .baseArrayLayer = 0,
1970 .layerCount = layer_count,
1971 };
1972
1973 VkRenderingAttachmentInfo att = {
1974 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
1975 .imageView = radv_image_view_to_handle(&iview),
1976 .imageLayout = image_layout,
1977 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
1978 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
1979 };
1980
1981 VkRenderingInfo rendering_info = {
1982 .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
1983 .renderArea =
1984 {
1985 .offset = {0, 0},
1986 .extent = {width, height},
1987 },
1988 .layerCount = layer_count,
1989 };
1990
1991 if (image->vk.aspects & VK_IMAGE_ASPECT_COLOR_BIT) {
1992 rendering_info.colorAttachmentCount = 1;
1993 rendering_info.pColorAttachments = &att;
1994 }
1995 if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
1996 rendering_info.pDepthAttachment = &att;
1997 if (image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT)
1998 rendering_info.pStencilAttachment = &att;
1999
2000 radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
2001
2002 emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0);
2003
2004 radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
2005
2006 radv_image_view_finish(&iview);
2007 }
2008
2009 /**
2010 * Return TRUE if a fast color or depth clear has been performed.
2011 */
2012 static bool
radv_fast_clear_range(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkFormat format,VkImageLayout image_layout,const VkImageSubresourceRange * range,const VkClearValue * clear_val)2013 radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkFormat format,
2014 VkImageLayout image_layout, const VkImageSubresourceRange *range, const VkClearValue *clear_val)
2015 {
2016 struct radv_image_view iview;
2017 bool fast_cleared = false;
2018
2019 radv_image_view_init(&iview, cmd_buffer->device,
2020 &(VkImageViewCreateInfo){
2021 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2022 .image = radv_image_to_handle(image),
2023 .viewType = radv_meta_get_view_type(image),
2024 .format = image->vk.format,
2025 .subresourceRange =
2026 {
2027 .aspectMask = range->aspectMask,
2028 .baseMipLevel = range->baseMipLevel,
2029 .levelCount = vk_image_subresource_level_count(&image->vk, range),
2030 .baseArrayLayer = range->baseArrayLayer,
2031 .layerCount = vk_image_subresource_layer_count(&image->vk, range),
2032 },
2033 },
2034 0, NULL);
2035
2036 VkClearRect clear_rect = {
2037 .rect =
2038 {
2039 .offset = {0, 0},
2040 .extent =
2041 {
2042 radv_minify(image->vk.extent.width, range->baseMipLevel),
2043 radv_minify(image->vk.extent.height, range->baseMipLevel),
2044 },
2045 },
2046 .baseArrayLayer = range->baseArrayLayer,
2047 .layerCount = vk_image_subresource_layer_count(&image->vk, range),
2048 };
2049
2050 VkClearAttachment clear_att = {
2051 .aspectMask = range->aspectMask,
2052 .colorAttachment = 0,
2053 .clearValue = *clear_val,
2054 };
2055
2056 if (vk_format_is_color(format)) {
2057 if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout, &clear_rect, clear_att.clearValue.color, 0)) {
2058 radv_fast_clear_color(cmd_buffer, &iview, &clear_att, NULL, NULL);
2059 fast_cleared = true;
2060 }
2061 } else {
2062 if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout, range->aspectMask, &clear_rect,
2063 clear_att.clearValue.depthStencil, 0)) {
2064 radv_fast_clear_depth(cmd_buffer, &iview, clear_att.clearValue.depthStencil, clear_att.aspectMask, NULL, NULL);
2065 fast_cleared = true;
2066 }
2067 }
2068
2069 radv_image_view_finish(&iview);
2070 return fast_cleared;
2071 }
2072
2073 static void
radv_cmd_clear_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,VkImageLayout image_layout,const VkClearValue * clear_value,uint32_t range_count,const VkImageSubresourceRange * ranges,bool cs)2074 radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkImageLayout image_layout,
2075 const VkClearValue *clear_value, uint32_t range_count, const VkImageSubresourceRange *ranges,
2076 bool cs)
2077 {
2078 VkFormat format = image->vk.format;
2079 VkClearValue internal_clear_value;
2080
2081 if (ranges->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)
2082 internal_clear_value.color = clear_value->color;
2083 else
2084 internal_clear_value.depthStencil = clear_value->depthStencil;
2085
2086 bool disable_compression = false;
2087
2088 if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {
2089 bool blendable;
2090 if (cs ? !radv_is_storage_image_format_supported(cmd_buffer->device->physical_device, format)
2091 : !radv_is_colorbuffer_format_supported(cmd_buffer->device->physical_device, format, &blendable)) {
2092 format = VK_FORMAT_R32_UINT;
2093 internal_clear_value.color.uint32[0] = float3_to_rgb9e5(clear_value->color.float32);
2094
2095 uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->qf, cmd_buffer->qf);
2096
2097 for (uint32_t r = 0; r < range_count; r++) {
2098 const VkImageSubresourceRange *range = &ranges[r];
2099
2100 /* Don't use compressed image stores because they will use an incompatible format. */
2101 if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel, image_layout, queue_mask)) {
2102 disable_compression = cs;
2103 break;
2104 }
2105 }
2106 }
2107 }
2108
2109 if (format == VK_FORMAT_R4G4_UNORM_PACK8) {
2110 uint8_t r, g;
2111 format = VK_FORMAT_R8_UINT;
2112 r = float_to_ubyte(clear_value->color.float32[0]) >> 4;
2113 g = float_to_ubyte(clear_value->color.float32[1]) >> 4;
2114 internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf);
2115 }
2116
2117 for (uint32_t r = 0; r < range_count; r++) {
2118 const VkImageSubresourceRange *range = &ranges[r];
2119
2120 /* Try to perform a fast clear first, otherwise fallback to
2121 * the legacy path.
2122 */
2123 if (!cs && radv_fast_clear_range(cmd_buffer, image, format, image_layout, range, &internal_clear_value)) {
2124 continue;
2125 }
2126
2127 for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, range); ++l) {
2128 const uint32_t layer_count = image->vk.image_type == VK_IMAGE_TYPE_3D
2129 ? radv_minify(image->vk.extent.depth, range->baseMipLevel + l)
2130 : vk_image_subresource_layer_count(&image->vk, range);
2131 if (cs) {
2132 for (uint32_t s = 0; s < layer_count; ++s) {
2133 struct radv_meta_blit2d_surf surf;
2134 surf.format = format;
2135 surf.image = image;
2136 surf.level = range->baseMipLevel + l;
2137 surf.layer = range->baseArrayLayer + s;
2138 surf.aspect_mask = range->aspectMask;
2139 surf.disable_compression = disable_compression;
2140 radv_meta_clear_image_cs(cmd_buffer, &surf, &internal_clear_value.color);
2141 }
2142 } else {
2143 assert(!disable_compression);
2144 radv_clear_image_layer(cmd_buffer, image, image_layout, range, format, l, layer_count,
2145 &internal_clear_value);
2146 }
2147 }
2148 }
2149
2150 if (disable_compression) {
2151 enum radv_cmd_flush_bits flush_bits = 0;
2152 for (unsigned i = 0; i < range_count; i++) {
2153 if (radv_dcc_enabled(image, ranges[i].baseMipLevel))
2154 flush_bits |= radv_clear_dcc(cmd_buffer, image, &ranges[i], 0xffffffffu);
2155 }
2156 cmd_buffer->state.flush_bits |= flush_bits;
2157 }
2158 }
2159
2160 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)2161 radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
2162 const VkClearColorValue *pColor, uint32_t rangeCount, const VkImageSubresourceRange *pRanges)
2163 {
2164 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2165 RADV_FROM_HANDLE(radv_image, image, image_h);
2166 struct radv_meta_saved_state saved_state;
2167 bool cs;
2168
2169 cs = cmd_buffer->qf == RADV_QUEUE_COMPUTE || !radv_image_is_renderable(cmd_buffer->device, image);
2170
2171 /* Clear commands (except vkCmdClearAttachments) should not be affected by conditional rendering.
2172 */
2173 enum radv_meta_save_flags save_flags = RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING;
2174 if (cs)
2175 save_flags |= RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS;
2176 else
2177 save_flags |= RADV_META_SAVE_GRAPHICS_PIPELINE;
2178
2179 radv_meta_save(&saved_state, cmd_buffer, save_flags);
2180
2181 radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pColor, rangeCount, pRanges, cs);
2182
2183 radv_meta_restore(&saved_state, cmd_buffer);
2184 }
2185
2186 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage image_h,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)2187 radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,
2188 const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,
2189 const VkImageSubresourceRange *pRanges)
2190 {
2191 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2192 RADV_FROM_HANDLE(radv_image, image, image_h);
2193 struct radv_meta_saved_state saved_state;
2194
2195 /* Clear commands (except vkCmdClearAttachments) should not be affected by conditional rendering. */
2196 radv_meta_save(&saved_state, cmd_buffer,
2197 RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SUSPEND_PREDICATING);
2198
2199 radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pDepthStencil, rangeCount, pRanges,
2200 false);
2201
2202 radv_meta_restore(&saved_state, cmd_buffer);
2203 }
2204
2205 VKAPI_ATTR void VKAPI_CALL
radv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)2206 radv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount, const VkClearAttachment *pAttachments,
2207 uint32_t rectCount, const VkClearRect *pRects)
2208 {
2209 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
2210 struct radv_meta_saved_state saved_state;
2211 enum radv_cmd_flush_bits pre_flush = 0;
2212 enum radv_cmd_flush_bits post_flush = 0;
2213
2214 if (!cmd_buffer->state.render.active)
2215 return;
2216
2217 radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);
2218
2219 /* FINISHME: We can do better than this dumb loop. It thrashes too much
2220 * state.
2221 */
2222 for (uint32_t a = 0; a < attachmentCount; ++a) {
2223 for (uint32_t r = 0; r < rectCount; ++r) {
2224 emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush,
2225 cmd_buffer->state.render.view_mask);
2226 }
2227 }
2228
2229 radv_meta_restore(&saved_state, cmd_buffer);
2230 cmd_buffer->state.flush_bits |= post_flush;
2231 }
2232