1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26
27 #include "radv_meta.h"
28 #include "radv_private.h"
29 #include "sid.h"
30
31 enum radv_color_op {
32 FAST_CLEAR_ELIMINATE,
33 FMASK_DECOMPRESS,
34 DCC_DECOMPRESS,
35 };
36
37 static nir_shader *
build_dcc_decompress_compute_shader(struct radv_device * dev)38 build_dcc_decompress_compute_shader(struct radv_device *dev)
39 {
40 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
41
42 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "dcc_decompress_compute");
43
44 /* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */
45 b.shader->info.workgroup_size[0] = 16;
46 b.shader->info.workgroup_size[1] = 16;
47 nir_variable *input_img = nir_variable_create(b.shader, nir_var_image, img_type, "in_img");
48 input_img->data.descriptor_set = 0;
49 input_img->data.binding = 0;
50
51 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
52 output_img->data.descriptor_set = 0;
53 output_img->data.binding = 1;
54
55 nir_ssa_def *global_id = get_global_ids(&b, 2);
56 nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, global_id, 0),
57 nir_channel(&b, global_id, 1),
58 nir_ssa_undef(&b, 1, 32),
59 nir_ssa_undef(&b, 1, 32));
60
61 nir_ssa_def *data = nir_image_deref_load(
62 &b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, img_coord, nir_ssa_undef(&b, 1, 32),
63 nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
64
65 /* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid
66 * creating a vmcnt(0) because it expects the L1 cache to keep memory
67 * operations in-order for the same workgroup. The vmcnt(0) seems
68 * necessary however. */
69 nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE,
70 .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
71
72 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
73 nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0),
74 .image_dim = GLSL_SAMPLER_DIM_2D);
75 return b.shader;
76 }
77
78 static VkResult
create_dcc_compress_compute(struct radv_device * device)79 create_dcc_compress_compute(struct radv_device *device)
80 {
81 VkResult result = VK_SUCCESS;
82 nir_shader *cs = build_dcc_decompress_compute_shader(device);
83
84 VkDescriptorSetLayoutCreateInfo ds_create_info = {
85 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
86 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
87 .bindingCount = 2,
88 .pBindings = (VkDescriptorSetLayoutBinding[]){
89 {.binding = 0,
90 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
91 .descriptorCount = 1,
92 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
93 .pImmutableSamplers = NULL},
94 {.binding = 1,
95 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
96 .descriptorCount = 1,
97 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
98 .pImmutableSamplers = NULL},
99 }};
100
101 result = radv_CreateDescriptorSetLayout(
102 radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
103 &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);
104 if (result != VK_SUCCESS)
105 goto cleanup;
106
107 VkPipelineLayoutCreateInfo pl_create_info = {
108 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
109 .setLayoutCount = 1,
110 .pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout,
111 .pushConstantRangeCount = 0,
112 .pPushConstantRanges = NULL,
113 };
114
115 result = radv_CreatePipelineLayout(
116 radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
117 &device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);
118 if (result != VK_SUCCESS)
119 goto cleanup;
120
121 /* compute shader */
122
123 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
124 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
125 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
126 .module = vk_shader_module_handle_from_nir(cs),
127 .pName = "main",
128 .pSpecializationInfo = NULL,
129 };
130
131 VkComputePipelineCreateInfo vk_pipeline_info = {
132 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
133 .stage = pipeline_shader_stage,
134 .flags = 0,
135 .layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
136 };
137
138 result = radv_CreateComputePipelines(
139 radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
140 &vk_pipeline_info, NULL,
141 &device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
142 if (result != VK_SUCCESS)
143 goto cleanup;
144
145 cleanup:
146 ralloc_free(cs);
147 return result;
148 }
149
150 static VkResult
create_pipeline_layout(struct radv_device * device,VkPipelineLayout * layout)151 create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
152 {
153 VkPipelineLayoutCreateInfo pl_create_info = {
154 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
155 .setLayoutCount = 0,
156 .pSetLayouts = NULL,
157 .pushConstantRangeCount = 0,
158 .pPushConstantRanges = NULL,
159 };
160
161 return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
162 &device->meta_state.alloc, layout);
163 }
164
165 static VkResult
create_pipeline(struct radv_device * device,VkShaderModule vs_module_h,VkPipelineLayout layout)166 create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipelineLayout layout)
167 {
168 VkResult result;
169 VkDevice device_h = radv_device_to_handle(device);
170
171 nir_shader *fs_module = radv_meta_build_nir_fs_noop(device);
172
173 if (!fs_module) {
174 /* XXX: Need more accurate error */
175 result = VK_ERROR_OUT_OF_HOST_MEMORY;
176 goto cleanup;
177 }
178
179 const VkPipelineShaderStageCreateInfo stages[2] = {
180 {
181 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
182 .stage = VK_SHADER_STAGE_VERTEX_BIT,
183 .module = vs_module_h,
184 .pName = "main",
185 },
186 {
187 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
188 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
189 .module = vk_shader_module_handle_from_nir(fs_module),
190 .pName = "main",
191 },
192 };
193
194 const VkPipelineVertexInputStateCreateInfo vi_state = {
195 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
196 .vertexBindingDescriptionCount = 0,
197 .vertexAttributeDescriptionCount = 0,
198 };
199
200 const VkPipelineInputAssemblyStateCreateInfo ia_state = {
201 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
202 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
203 .primitiveRestartEnable = false,
204 };
205
206 const VkPipelineColorBlendStateCreateInfo blend_state = {
207 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
208 .logicOpEnable = false,
209 .attachmentCount = 1,
210 .pAttachments = (VkPipelineColorBlendAttachmentState[]){
211 {
212 .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
213 VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
214 },
215 }};
216 const VkPipelineRasterizationStateCreateInfo rs_state = {
217 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
218 .depthClampEnable = false,
219 .rasterizerDiscardEnable = false,
220 .polygonMode = VK_POLYGON_MODE_FILL,
221 .cullMode = VK_CULL_MODE_NONE,
222 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
223 };
224
225 const VkFormat color_format = VK_FORMAT_R8_UNORM;
226 const VkPipelineRenderingCreateInfo rendering_create_info = {
227 .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
228 .colorAttachmentCount = 1,
229 .pColorAttachmentFormats = &color_format,
230 };
231
232 result = radv_graphics_pipeline_create(
233 device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
234 &(VkGraphicsPipelineCreateInfo){
235 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
236 .pNext = &rendering_create_info,
237 .stageCount = 2,
238 .pStages = stages,
239
240 .pVertexInputState = &vi_state,
241 .pInputAssemblyState = &ia_state,
242
243 .pViewportState =
244 &(VkPipelineViewportStateCreateInfo){
245 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
246 .viewportCount = 1,
247 .scissorCount = 1,
248 },
249 .pRasterizationState = &rs_state,
250 .pMultisampleState =
251 &(VkPipelineMultisampleStateCreateInfo){
252 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
253 .rasterizationSamples = 1,
254 .sampleShadingEnable = false,
255 .pSampleMask = NULL,
256 .alphaToCoverageEnable = false,
257 .alphaToOneEnable = false,
258 },
259 .pColorBlendState = &blend_state,
260 .pDynamicState =
261 &(VkPipelineDynamicStateCreateInfo){
262 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
263 .dynamicStateCount = 2,
264 .pDynamicStates =
265 (VkDynamicState[]){
266 VK_DYNAMIC_STATE_VIEWPORT,
267 VK_DYNAMIC_STATE_SCISSOR,
268 },
269 },
270 .layout = layout,
271 .renderPass = VK_NULL_HANDLE,
272 .subpass = 0,
273 },
274 &(struct radv_graphics_pipeline_create_info){
275 .use_rectlist = true,
276 .custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR,
277 },
278 &device->meta_state.alloc, &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline);
279 if (result != VK_SUCCESS)
280 goto cleanup;
281
282 result = radv_graphics_pipeline_create(
283 device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
284 &(VkGraphicsPipelineCreateInfo){
285 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
286 .pNext = &rendering_create_info,
287 .stageCount = 2,
288 .pStages = stages,
289
290 .pVertexInputState = &vi_state,
291 .pInputAssemblyState = &ia_state,
292
293 .pViewportState =
294 &(VkPipelineViewportStateCreateInfo){
295 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
296 .viewportCount = 1,
297 .scissorCount = 1,
298 },
299 .pRasterizationState = &rs_state,
300 .pMultisampleState =
301 &(VkPipelineMultisampleStateCreateInfo){
302 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
303 .rasterizationSamples = 1,
304 .sampleShadingEnable = false,
305 .pSampleMask = NULL,
306 .alphaToCoverageEnable = false,
307 .alphaToOneEnable = false,
308 },
309 .pColorBlendState = &blend_state,
310 .pDynamicState =
311 &(VkPipelineDynamicStateCreateInfo){
312 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
313 .dynamicStateCount = 2,
314 .pDynamicStates =
315 (VkDynamicState[]){
316 VK_DYNAMIC_STATE_VIEWPORT,
317 VK_DYNAMIC_STATE_SCISSOR,
318 },
319 },
320 .layout = layout,
321 .renderPass = VK_NULL_HANDLE,
322 .subpass = 0,
323 },
324 &(struct radv_graphics_pipeline_create_info){
325 .use_rectlist = true,
326 .custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS,
327 },
328 &device->meta_state.alloc, &device->meta_state.fast_clear_flush.fmask_decompress_pipeline);
329 if (result != VK_SUCCESS)
330 goto cleanup;
331
332 result = radv_graphics_pipeline_create(
333 device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
334 &(VkGraphicsPipelineCreateInfo){
335 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
336 .pNext = &rendering_create_info,
337 .stageCount = 2,
338 .pStages = stages,
339
340 .pVertexInputState = &vi_state,
341 .pInputAssemblyState = &ia_state,
342
343 .pViewportState =
344 &(VkPipelineViewportStateCreateInfo){
345 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
346 .viewportCount = 1,
347 .scissorCount = 1,
348 },
349 .pRasterizationState = &rs_state,
350 .pMultisampleState =
351 &(VkPipelineMultisampleStateCreateInfo){
352 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
353 .rasterizationSamples = 1,
354 .sampleShadingEnable = false,
355 .pSampleMask = NULL,
356 .alphaToCoverageEnable = false,
357 .alphaToOneEnable = false,
358 },
359 .pColorBlendState = &blend_state,
360 .pDynamicState =
361 &(VkPipelineDynamicStateCreateInfo){
362 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
363 .dynamicStateCount = 2,
364 .pDynamicStates =
365 (VkDynamicState[]){
366 VK_DYNAMIC_STATE_VIEWPORT,
367 VK_DYNAMIC_STATE_SCISSOR,
368 },
369 },
370 .layout = layout,
371 .renderPass = VK_NULL_HANDLE,
372 .subpass = 0,
373 },
374 &(struct radv_graphics_pipeline_create_info){
375 .use_rectlist = true,
376 .custom_blend_mode = device->physical_device->rad_info.gfx_level >= GFX11
377 ? V_028808_CB_DCC_DECOMPRESS_GFX11
378 : V_028808_CB_DCC_DECOMPRESS_GFX8,
379 },
380 &device->meta_state.alloc, &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
381 if (result != VK_SUCCESS)
382 goto cleanup;
383
384 cleanup:
385 ralloc_free(fs_module);
386 return result;
387 }
388
389 void
radv_device_finish_meta_fast_clear_flush_state(struct radv_device * device)390 radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)
391 {
392 struct radv_meta_state *state = &device->meta_state;
393
394 radv_DestroyPipeline(radv_device_to_handle(device),
395 state->fast_clear_flush.dcc_decompress_pipeline, &state->alloc);
396 radv_DestroyPipeline(radv_device_to_handle(device),
397 state->fast_clear_flush.fmask_decompress_pipeline, &state->alloc);
398 radv_DestroyPipeline(radv_device_to_handle(device),
399 state->fast_clear_flush.cmask_eliminate_pipeline, &state->alloc);
400 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.p_layout,
401 &state->alloc);
402
403 radv_DestroyPipeline(radv_device_to_handle(device),
404 state->fast_clear_flush.dcc_decompress_compute_pipeline, &state->alloc);
405 radv_DestroyPipelineLayout(radv_device_to_handle(device),
406 state->fast_clear_flush.dcc_decompress_compute_p_layout,
407 &state->alloc);
408 device->vk.dispatch_table.DestroyDescriptorSetLayout(
409 radv_device_to_handle(device), state->fast_clear_flush.dcc_decompress_compute_ds_layout,
410 &state->alloc);
411 }
412
413 static VkResult
radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device * device)414 radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device *device)
415 {
416 VkResult res = VK_SUCCESS;
417
418 mtx_lock(&device->meta_state.mtx);
419 if (device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
420 mtx_unlock(&device->meta_state.mtx);
421 return VK_SUCCESS;
422 }
423
424 nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices(device);
425 if (!vs_module) {
426 /* XXX: Need more accurate error */
427 res = VK_ERROR_OUT_OF_HOST_MEMORY;
428 goto cleanup;
429 }
430
431 res = create_pipeline_layout(device, &device->meta_state.fast_clear_flush.p_layout);
432 if (res != VK_SUCCESS)
433 goto cleanup;
434
435 VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
436 res = create_pipeline(device, vs_module_h, device->meta_state.fast_clear_flush.p_layout);
437 if (res != VK_SUCCESS)
438 goto cleanup;
439
440 res = create_dcc_compress_compute(device);
441 if (res != VK_SUCCESS)
442 goto cleanup;
443
444 cleanup:
445 ralloc_free(vs_module);
446 mtx_unlock(&device->meta_state.mtx);
447
448 return res;
449 }
450
451 VkResult
radv_device_init_meta_fast_clear_flush_state(struct radv_device * device,bool on_demand)452 radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand)
453 {
454 if (on_demand)
455 return VK_SUCCESS;
456
457 return radv_device_init_meta_fast_clear_flush_state_internal(device);
458 }
459
460 static void
radv_emit_set_predication_state_from_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,uint64_t pred_offset,bool value)461 radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer,
462 struct radv_image *image, uint64_t pred_offset,
463 bool value)
464 {
465 uint64_t va = 0;
466
467 if (value) {
468 va = radv_buffer_get_va(image->bindings[0].bo) + image->bindings[0].offset;
469 va += pred_offset;
470 }
471
472 si_emit_set_predication_state(cmd_buffer, true, PREDICATION_OP_BOOL64, va);
473 }
474
475 static void
radv_process_color_image_layer(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,int level,int layer,bool flush_cb)476 radv_process_color_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
477 const VkImageSubresourceRange *range, int level, int layer,
478 bool flush_cb)
479 {
480 struct radv_device *device = cmd_buffer->device;
481 struct radv_image_view iview;
482 uint32_t width, height;
483
484 width = radv_minify(image->info.width, range->baseMipLevel + level);
485 height = radv_minify(image->info.height, range->baseMipLevel + level);
486
487 radv_image_view_init(&iview, device,
488 &(VkImageViewCreateInfo){
489 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
490 .image = radv_image_to_handle(image),
491 .viewType = radv_meta_get_view_type(image),
492 .format = image->vk.format,
493 .subresourceRange =
494 {
495 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
496 .baseMipLevel = range->baseMipLevel + level,
497 .levelCount = 1,
498 .baseArrayLayer = range->baseArrayLayer + layer,
499 .layerCount = 1,
500 },
501 },
502 0, NULL);
503
504 const VkRenderingAttachmentInfo color_att = {
505 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
506 .imageView = radv_image_view_to_handle(&iview),
507 .imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
508 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
509 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
510 };
511
512 const VkRenderingInfo rendering_info = {
513 .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
514 .renderArea = {
515 .offset = { 0, 0 },
516 .extent = { width, height }
517 },
518 .layerCount = 1,
519 .colorAttachmentCount = 1,
520 .pColorAttachments = &color_att,
521 };
522
523 radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
524
525 if (flush_cb)
526 cmd_buffer->state.flush_bits |=
527 radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image);
528
529 radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
530
531 if (flush_cb)
532 cmd_buffer->state.flush_bits |=
533 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image);
534
535 radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
536
537 radv_image_view_finish(&iview);
538 }
539
540 static void
radv_process_color_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange,enum radv_color_op op)541 radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
542 const VkImageSubresourceRange *subresourceRange, enum radv_color_op op)
543 {
544 struct radv_device *device = cmd_buffer->device;
545 struct radv_meta_saved_state saved_state;
546 bool old_predicating = false;
547 bool flush_cb = false;
548 uint64_t pred_offset;
549 VkPipeline *pipeline;
550
551 switch (op) {
552 case FAST_CLEAR_ELIMINATE:
553 pipeline = &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
554 pred_offset = image->fce_pred_offset;
555 break;
556 case FMASK_DECOMPRESS:
557 pipeline = &device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
558 pred_offset = 0; /* FMASK_DECOMPRESS is never predicated. */
559
560 /* Flushing CB is required before and after FMASK_DECOMPRESS. */
561 flush_cb = true;
562 break;
563 case DCC_DECOMPRESS:
564 pipeline = &device->meta_state.fast_clear_flush.dcc_decompress_pipeline;
565 pred_offset = image->dcc_pred_offset;
566
567 /* Flushing CB is required before and after DCC_DECOMPRESS. */
568 flush_cb = true;
569 break;
570 default:
571 unreachable("Invalid color op");
572 }
573
574 if (radv_dcc_enabled(image, subresourceRange->baseMipLevel) &&
575 (image->info.array_size != radv_get_layerCount(image, subresourceRange) ||
576 subresourceRange->baseArrayLayer != 0)) {
577 /* Only use predication if the image has DCC with mipmaps or
578 * if the range of layers covers the whole image because the
579 * predication is based on mip level.
580 */
581 pred_offset = 0;
582 }
583
584 if (!*pipeline) {
585 VkResult ret;
586
587 ret = radv_device_init_meta_fast_clear_flush_state_internal(device);
588 if (ret != VK_SUCCESS) {
589 cmd_buffer->record_result = ret;
590 return;
591 }
592 }
593
594 radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_PASS);
595
596 if (pred_offset) {
597 pred_offset += 8 * subresourceRange->baseMipLevel;
598
599 old_predicating = cmd_buffer->state.predicating;
600
601 radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, true);
602 cmd_buffer->state.predicating = true;
603 }
604
605 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
606 *pipeline);
607
608 for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
609 uint32_t width, height;
610
611 /* Do not decompress levels without DCC. */
612 if (op == DCC_DECOMPRESS && !radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
613 continue;
614
615 width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
616 height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
617
618 radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
619 &(VkViewport){.x = 0,
620 .y = 0,
621 .width = width,
622 .height = height,
623 .minDepth = 0.0f,
624 .maxDepth = 1.0f});
625
626 radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
627 &(VkRect2D){
628 .offset = {0, 0},
629 .extent = {width, height},
630 });
631
632 for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
633 radv_process_color_image_layer(cmd_buffer, image, subresourceRange, l, s, flush_cb);
634 }
635 }
636
637 cmd_buffer->state.flush_bits |=
638 RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
639
640 if (pred_offset) {
641 pred_offset += 8 * subresourceRange->baseMipLevel;
642
643 cmd_buffer->state.predicating = old_predicating;
644
645 radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, false);
646
647 if (cmd_buffer->state.predication_type != -1) {
648 /* Restore previous conditional rendering user state. */
649 si_emit_set_predication_state(cmd_buffer, cmd_buffer->state.predication_type,
650 cmd_buffer->state.predication_op,
651 cmd_buffer->state.predication_va);
652 }
653 }
654
655 radv_meta_restore(&saved_state, cmd_buffer);
656
657 /* Clear the image's fast-clear eliminate predicate because FMASK_DECOMPRESS and DCC_DECOMPRESS
658 * also perform a fast-clear eliminate.
659 */
660 radv_update_fce_metadata(cmd_buffer, image, subresourceRange, false);
661
662 /* Mark the image as being decompressed. */
663 if (op == DCC_DECOMPRESS)
664 radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
665 }
666
667 static void
radv_fast_clear_eliminate(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)668 radv_fast_clear_eliminate(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
669 const VkImageSubresourceRange *subresourceRange)
670 {
671 struct radv_barrier_data barrier = {0};
672
673 barrier.layout_transitions.fast_clear_eliminate = 1;
674 radv_describe_layout_transition(cmd_buffer, &barrier);
675
676 radv_process_color_image(cmd_buffer, image, subresourceRange, FAST_CLEAR_ELIMINATE);
677 }
678
679 static void
radv_fmask_decompress(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)680 radv_fmask_decompress(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
681 const VkImageSubresourceRange *subresourceRange)
682 {
683 struct radv_barrier_data barrier = {0};
684
685 barrier.layout_transitions.fmask_decompress = 1;
686 radv_describe_layout_transition(cmd_buffer, &barrier);
687
688 radv_process_color_image(cmd_buffer, image, subresourceRange, FMASK_DECOMPRESS);
689 }
690
691 void
radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)692 radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
693 const VkImageSubresourceRange *subresourceRange)
694 {
695 if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {
696 if (radv_image_has_dcc(image) && radv_image_has_cmask(image)) {
697 /* MSAA images with DCC and CMASK might have been fast-cleared and might require a FCE but
698 * FMASK_DECOMPRESS can't eliminate DCC fast clears.
699 */
700 radv_fast_clear_eliminate(cmd_buffer, image, subresourceRange);
701 }
702
703 radv_fmask_decompress(cmd_buffer, image, subresourceRange);
704 } else {
705 /* Skip fast clear eliminate for images that support comp-to-single fast clears. */
706 if (image->support_comp_to_single)
707 return;
708
709 radv_fast_clear_eliminate(cmd_buffer, image, subresourceRange);
710 }
711 }
712
713 static void
radv_decompress_dcc_compute(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)714 radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
715 const VkImageSubresourceRange *subresourceRange)
716 {
717 struct radv_meta_saved_state saved_state;
718 struct radv_image_view load_iview = {0};
719 struct radv_image_view store_iview = {0};
720 struct radv_device *device = cmd_buffer->device;
721
722 cmd_buffer->state.flush_bits |=
723 radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
724
725 if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
726 VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device);
727 if (ret != VK_SUCCESS) {
728 cmd_buffer->record_result = ret;
729 return;
730 }
731 }
732
733 radv_meta_save(&saved_state, cmd_buffer,
734 RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
735
736 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
737 device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
738
739 for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); l++) {
740 uint32_t width, height;
741
742 /* Do not decompress levels without DCC. */
743 if (!radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
744 continue;
745
746 width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
747 height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
748
749 for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
750 radv_image_view_init(
751 &load_iview, cmd_buffer->device,
752 &(VkImageViewCreateInfo){
753 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
754 .image = radv_image_to_handle(image),
755 .viewType = VK_IMAGE_VIEW_TYPE_2D,
756 .format = image->vk.format,
757 .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
758 .baseMipLevel = subresourceRange->baseMipLevel + l,
759 .levelCount = 1,
760 .baseArrayLayer = subresourceRange->baseArrayLayer + s,
761 .layerCount = 1},
762 },
763 0, &(struct radv_image_view_extra_create_info){.enable_compression = true});
764 radv_image_view_init(
765 &store_iview, cmd_buffer->device,
766 &(VkImageViewCreateInfo){
767 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
768 .image = radv_image_to_handle(image),
769 .viewType = VK_IMAGE_VIEW_TYPE_2D,
770 .format = image->vk.format,
771 .subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
772 .baseMipLevel = subresourceRange->baseMipLevel + l,
773 .levelCount = 1,
774 .baseArrayLayer = subresourceRange->baseArrayLayer + s,
775 .layerCount = 1},
776 },
777 0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
778
779 radv_meta_push_descriptor_set(
780 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
781 device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout, 0, /* set */
782 2, /* descriptorWriteCount */
783 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
784 .dstBinding = 0,
785 .dstArrayElement = 0,
786 .descriptorCount = 1,
787 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
788 .pImageInfo =
789 (VkDescriptorImageInfo[]){
790 {
791 .sampler = VK_NULL_HANDLE,
792 .imageView = radv_image_view_to_handle(&load_iview),
793 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
794 },
795 }},
796 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
797 .dstBinding = 1,
798 .dstArrayElement = 0,
799 .descriptorCount = 1,
800 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
801 .pImageInfo = (VkDescriptorImageInfo[]){
802 {
803 .sampler = VK_NULL_HANDLE,
804 .imageView = radv_image_view_to_handle(&store_iview),
805 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
806 },
807 }}});
808
809 radv_unaligned_dispatch(cmd_buffer, width, height, 1);
810
811 radv_image_view_finish(&load_iview);
812 radv_image_view_finish(&store_iview);
813 }
814 }
815
816 /* Mark this image as actually being decompressed. */
817 radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
818
819 radv_meta_restore(&saved_state, cmd_buffer);
820
821 cmd_buffer->state.flush_bits |=
822 RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
823 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
824
825 /* Initialize the DCC metadata as "fully expanded". */
826 cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, subresourceRange, 0xffffffff);
827 }
828
829 void
radv_decompress_dcc(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)830 radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
831 const VkImageSubresourceRange *subresourceRange)
832 {
833 struct radv_barrier_data barrier = {0};
834
835 barrier.layout_transitions.dcc_decompress = 1;
836 radv_describe_layout_transition(cmd_buffer, &barrier);
837
838 if (cmd_buffer->qf == RADV_QUEUE_GENERAL)
839 radv_process_color_image(cmd_buffer, image, subresourceRange, DCC_DECOMPRESS);
840 else
841 radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange);
842 }
843