1 /*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26
27 #include "radv_meta.h"
28 #include "radv_private.h"
29 #include "sid.h"
30
31 enum radv_depth_op {
32 DEPTH_DECOMPRESS,
33 DEPTH_RESUMMARIZE,
34 };
35
36 static nir_shader *
build_expand_depth_stencil_compute_shader(struct radv_device * dev)37 build_expand_depth_stencil_compute_shader(struct radv_device *dev)
38 {
39 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
40
41 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "expand_depth_stencil_compute");
42
43 /* We need at least 8/8/1 to cover an entire HTILE block in a single workgroup. */
44 b.shader->info.workgroup_size[0] = 8;
45 b.shader->info.workgroup_size[1] = 8;
46 nir_variable *input_img = nir_variable_create(b.shader, nir_var_image, img_type, "in_img");
47 input_img->data.descriptor_set = 0;
48 input_img->data.binding = 0;
49
50 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
51 output_img->data.descriptor_set = 0;
52 output_img->data.binding = 1;
53
54 nir_def *invoc_id = nir_load_local_invocation_id(&b);
55 nir_def *wg_id = nir_load_workgroup_id(&b);
56 nir_def *block_size = nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
57 b.shader->info.workgroup_size[2], 0);
58
59 nir_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
60
61 nir_def *data = nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->def, global_id,
62 nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
63
64 /* We need a SCOPE_DEVICE memory_scope because ACO will avoid
65 * creating a vmcnt(0) because it expects the L1 cache to keep memory
66 * operations in-order for the same workgroup. The vmcnt(0) seems
67 * necessary however. */
68 nir_barrier(&b, .execution_scope = SCOPE_WORKGROUP, .memory_scope = SCOPE_DEVICE,
69 .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
70
71 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, global_id, nir_undef(&b, 1, 32), data,
72 nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
73 return b.shader;
74 }
75
76 static VkResult
create_expand_depth_stencil_compute(struct radv_device * device)77 create_expand_depth_stencil_compute(struct radv_device *device)
78 {
79 VkResult result = VK_SUCCESS;
80 nir_shader *cs = build_expand_depth_stencil_compute_shader(device);
81
82 VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
83 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
84 .bindingCount = 2,
85 .pBindings = (VkDescriptorSetLayoutBinding[]){
86 {.binding = 0,
87 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
88 .descriptorCount = 1,
89 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
90 .pImmutableSamplers = NULL},
91 {.binding = 1,
92 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
93 .descriptorCount = 1,
94 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
95 .pImmutableSamplers = NULL},
96 }};
97
98 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
99 &device->meta_state.expand_depth_stencil_compute_ds_layout);
100 if (result != VK_SUCCESS)
101 goto cleanup;
102
103 VkPipelineLayoutCreateInfo pl_create_info = {
104 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
105 .setLayoutCount = 1,
106 .pSetLayouts = &device->meta_state.expand_depth_stencil_compute_ds_layout,
107 .pushConstantRangeCount = 0,
108 .pPushConstantRanges = NULL,
109 };
110
111 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
112 &device->meta_state.expand_depth_stencil_compute_p_layout);
113 if (result != VK_SUCCESS)
114 goto cleanup;
115
116 /* compute shader */
117
118 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
119 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
120 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
121 .module = vk_shader_module_handle_from_nir(cs),
122 .pName = "main",
123 .pSpecializationInfo = NULL,
124 };
125
126 VkComputePipelineCreateInfo vk_pipeline_info = {
127 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
128 .stage = pipeline_shader_stage,
129 .flags = 0,
130 .layout = device->meta_state.expand_depth_stencil_compute_p_layout,
131 };
132
133 result = radv_CreateComputePipelines(radv_device_to_handle(device), device->meta_state.cache, 1, &vk_pipeline_info,
134 NULL, &device->meta_state.expand_depth_stencil_compute_pipeline);
135 if (result != VK_SUCCESS)
136 goto cleanup;
137
138 cleanup:
139 ralloc_free(cs);
140 return result;
141 }
142
143 static VkResult
create_pipeline_layout(struct radv_device * device,VkPipelineLayout * layout)144 create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
145 {
146 VkPipelineLayoutCreateInfo pl_create_info = {
147 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
148 .setLayoutCount = 0,
149 .pSetLayouts = NULL,
150 .pushConstantRangeCount = 0,
151 .pPushConstantRanges = NULL,
152 };
153
154 return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc, layout);
155 }
156
157 static VkResult
create_pipeline(struct radv_device * device,uint32_t samples,VkPipelineLayout layout,enum radv_depth_op op,VkPipeline * pipeline)158 create_pipeline(struct radv_device *device, uint32_t samples, VkPipelineLayout layout, enum radv_depth_op op,
159 VkPipeline *pipeline)
160 {
161 VkResult result;
162 VkDevice device_h = radv_device_to_handle(device);
163
164 mtx_lock(&device->meta_state.mtx);
165 if (*pipeline) {
166 mtx_unlock(&device->meta_state.mtx);
167 return VK_SUCCESS;
168 }
169
170 nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices(device);
171 nir_shader *fs_module = radv_meta_build_nir_fs_noop(device);
172
173 if (!vs_module || !fs_module) {
174 /* XXX: Need more accurate error */
175 result = VK_ERROR_OUT_OF_HOST_MEMORY;
176 goto cleanup;
177 }
178
179 const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
180 .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
181 .sampleLocationsEnable = false,
182 };
183
184 const VkPipelineRenderingCreateInfo rendering_create_info = {
185 .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
186 .depthAttachmentFormat = VK_FORMAT_D32_SFLOAT_S8_UINT,
187 .stencilAttachmentFormat = VK_FORMAT_D32_SFLOAT_S8_UINT,
188 };
189
190 const VkGraphicsPipelineCreateInfo pipeline_create_info = {
191 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
192 .pNext = &rendering_create_info,
193 .stageCount = 2,
194 .pStages =
195 (VkPipelineShaderStageCreateInfo[]){
196 {
197 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
198 .stage = VK_SHADER_STAGE_VERTEX_BIT,
199 .module = vk_shader_module_handle_from_nir(vs_module),
200 .pName = "main",
201 },
202 {
203 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
204 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
205 .module = vk_shader_module_handle_from_nir(fs_module),
206 .pName = "main",
207 },
208 },
209 .pVertexInputState =
210 &(VkPipelineVertexInputStateCreateInfo){
211 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
212 .vertexBindingDescriptionCount = 0,
213 .vertexAttributeDescriptionCount = 0,
214 },
215 .pInputAssemblyState =
216 &(VkPipelineInputAssemblyStateCreateInfo){
217 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
218 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
219 .primitiveRestartEnable = false,
220 },
221 .pViewportState =
222 &(VkPipelineViewportStateCreateInfo){
223 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
224 .viewportCount = 1,
225 .scissorCount = 1,
226 },
227 .pRasterizationState =
228 &(VkPipelineRasterizationStateCreateInfo){
229 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
230 .depthClampEnable = false,
231 .rasterizerDiscardEnable = false,
232 .polygonMode = VK_POLYGON_MODE_FILL,
233 .cullMode = VK_CULL_MODE_NONE,
234 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
235 },
236 .pMultisampleState =
237 &(VkPipelineMultisampleStateCreateInfo){
238 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
239 .pNext = &sample_locs_create_info,
240 .rasterizationSamples = samples,
241 .sampleShadingEnable = false,
242 .pSampleMask = NULL,
243 .alphaToCoverageEnable = false,
244 .alphaToOneEnable = false,
245 },
246 .pColorBlendState =
247 &(VkPipelineColorBlendStateCreateInfo){
248 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
249 .logicOpEnable = false,
250 .attachmentCount = 0,
251 .pAttachments = NULL,
252 },
253 .pDepthStencilState =
254 &(VkPipelineDepthStencilStateCreateInfo){
255 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
256 .depthTestEnable = false,
257 .depthWriteEnable = false,
258 .depthBoundsTestEnable = false,
259 .stencilTestEnable = false,
260 },
261 .pDynamicState =
262 &(VkPipelineDynamicStateCreateInfo){
263 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
264 .dynamicStateCount = 3,
265 .pDynamicStates =
266 (VkDynamicState[]){
267 VK_DYNAMIC_STATE_VIEWPORT,
268 VK_DYNAMIC_STATE_SCISSOR,
269 VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
270 },
271 },
272 .layout = layout,
273 .renderPass = VK_NULL_HANDLE,
274 .subpass = 0,
275 };
276
277 struct radv_graphics_pipeline_create_info extra = {
278 .use_rectlist = true,
279 .depth_compress_disable = true,
280 .stencil_compress_disable = true,
281 .resummarize_enable = op == DEPTH_RESUMMARIZE,
282 };
283
284 result = radv_graphics_pipeline_create(device_h, device->meta_state.cache, &pipeline_create_info, &extra,
285 &device->meta_state.alloc, pipeline);
286
287 cleanup:
288 ralloc_free(fs_module);
289 ralloc_free(vs_module);
290 mtx_unlock(&device->meta_state.mtx);
291 return result;
292 }
293
294 void
radv_device_finish_meta_depth_decomp_state(struct radv_device * device)295 radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
296 {
297 struct radv_meta_state *state = &device->meta_state;
298
299 for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
300 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->depth_decomp[i].p_layout, &state->alloc);
301
302 radv_DestroyPipeline(radv_device_to_handle(device), state->depth_decomp[i].decompress_pipeline, &state->alloc);
303 radv_DestroyPipeline(radv_device_to_handle(device), state->depth_decomp[i].resummarize_pipeline, &state->alloc);
304 }
305
306 radv_DestroyPipeline(radv_device_to_handle(device), state->expand_depth_stencil_compute_pipeline, &state->alloc);
307 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->expand_depth_stencil_compute_p_layout,
308 &state->alloc);
309 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
310 state->expand_depth_stencil_compute_ds_layout, &state->alloc);
311 }
312
313 VkResult
radv_device_init_meta_depth_decomp_state(struct radv_device * device,bool on_demand)314 radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
315 {
316 struct radv_meta_state *state = &device->meta_state;
317 VkResult res = VK_SUCCESS;
318
319 for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
320 uint32_t samples = 1 << i;
321
322 res = create_pipeline_layout(device, &state->depth_decomp[i].p_layout);
323 if (res != VK_SUCCESS)
324 return res;
325
326 if (on_demand)
327 continue;
328
329 res = create_pipeline(device, samples, state->depth_decomp[i].p_layout, DEPTH_DECOMPRESS,
330 &state->depth_decomp[i].decompress_pipeline);
331 if (res != VK_SUCCESS)
332 return res;
333
334 res = create_pipeline(device, samples, state->depth_decomp[i].p_layout, DEPTH_RESUMMARIZE,
335 &state->depth_decomp[i].resummarize_pipeline);
336 if (res != VK_SUCCESS)
337 return res;
338 }
339
340 return create_expand_depth_stencil_compute(device);
341 }
342
343 static VkPipeline *
radv_get_depth_pipeline(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange,enum radv_depth_op op)344 radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
345 const VkImageSubresourceRange *subresourceRange, enum radv_depth_op op)
346 {
347 struct radv_meta_state *state = &cmd_buffer->device->meta_state;
348 uint32_t samples = image->vk.samples;
349 uint32_t samples_log2 = ffs(samples) - 1;
350 VkPipeline *pipeline;
351
352 if (!state->depth_decomp[samples_log2].decompress_pipeline) {
353 VkResult ret;
354
355 ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].p_layout, DEPTH_DECOMPRESS,
356 &state->depth_decomp[samples_log2].decompress_pipeline);
357 if (ret != VK_SUCCESS) {
358 vk_command_buffer_set_error(&cmd_buffer->vk, ret);
359 return NULL;
360 }
361
362 ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].p_layout, DEPTH_RESUMMARIZE,
363 &state->depth_decomp[samples_log2].resummarize_pipeline);
364 if (ret != VK_SUCCESS) {
365 vk_command_buffer_set_error(&cmd_buffer->vk, ret);
366 return NULL;
367 }
368 }
369
370 switch (op) {
371 case DEPTH_DECOMPRESS:
372 pipeline = &state->depth_decomp[samples_log2].decompress_pipeline;
373 break;
374 case DEPTH_RESUMMARIZE:
375 pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline;
376 break;
377 default:
378 unreachable("unknown operation");
379 }
380
381 return pipeline;
382 }
383
384 static void
radv_process_depth_image_layer(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,int level,int layer)385 radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
386 const VkImageSubresourceRange *range, int level, int layer)
387 {
388 struct radv_device *device = cmd_buffer->device;
389 struct radv_image_view iview;
390 uint32_t width, height;
391
392 width = radv_minify(image->vk.extent.width, range->baseMipLevel + level);
393 height = radv_minify(image->vk.extent.height, range->baseMipLevel + level);
394
395 radv_image_view_init(&iview, device,
396 &(VkImageViewCreateInfo){
397 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
398 .image = radv_image_to_handle(image),
399 .viewType = radv_meta_get_view_type(image),
400 .format = image->vk.format,
401 .subresourceRange =
402 {
403 .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
404 .baseMipLevel = range->baseMipLevel + level,
405 .levelCount = 1,
406 .baseArrayLayer = range->baseArrayLayer + layer,
407 .layerCount = 1,
408 },
409 },
410 0, NULL);
411
412 const VkRenderingAttachmentInfo depth_att = {
413 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
414 .imageView = radv_image_view_to_handle(&iview),
415 .imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
416 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
417 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
418 };
419
420 const VkRenderingAttachmentInfo stencil_att = {
421 .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
422 .imageView = radv_image_view_to_handle(&iview),
423 .imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
424 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
425 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
426 };
427
428 const VkRenderingInfo rendering_info = {
429 .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
430 .renderArea = {.offset = {0, 0}, .extent = {width, height}},
431 .layerCount = 1,
432 .pDepthAttachment = &depth_att,
433 .pStencilAttachment = &stencil_att,
434 };
435
436 radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
437
438 radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
439
440 radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
441
442 radv_image_view_finish(&iview);
443 }
444
445 static void
radv_process_depth_stencil(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange,struct radv_sample_locations_state * sample_locs,enum radv_depth_op op)446 radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
447 const VkImageSubresourceRange *subresourceRange,
448 struct radv_sample_locations_state *sample_locs, enum radv_depth_op op)
449 {
450 struct radv_meta_saved_state saved_state;
451 VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
452 VkPipeline *pipeline;
453
454 radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_RENDER);
455
456 pipeline = radv_get_depth_pipeline(cmd_buffer, image, subresourceRange, op);
457
458 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
459
460 if (sample_locs) {
461 assert(image->vk.create_flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
462
463 /* Set the sample locations specified during explicit or
464 * automatic layout transitions, otherwise the depth decompress
465 * pass uses the default HW locations.
466 */
467 radv_CmdSetSampleLocationsEXT(cmd_buffer_h, &(VkSampleLocationsInfoEXT){
468 .sampleLocationsPerPixel = sample_locs->per_pixel,
469 .sampleLocationGridSize = sample_locs->grid_size,
470 .sampleLocationsCount = sample_locs->count,
471 .pSampleLocations = sample_locs->locations,
472 });
473 }
474
475 for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, subresourceRange); ++l) {
476
477 /* Do not decompress levels without HTILE. */
478 if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
479 continue;
480
481 uint32_t width = radv_minify(image->vk.extent.width, subresourceRange->baseMipLevel + l);
482 uint32_t height = radv_minify(image->vk.extent.height, subresourceRange->baseMipLevel + l);
483
484 radv_CmdSetViewport(
485 cmd_buffer_h, 0, 1,
486 &(VkViewport){.x = 0, .y = 0, .width = width, .height = height, .minDepth = 0.0f, .maxDepth = 1.0f});
487
488 radv_CmdSetScissor(cmd_buffer_h, 0, 1,
489 &(VkRect2D){
490 .offset = {0, 0},
491 .extent = {width, height},
492 });
493
494 for (uint32_t s = 0; s < vk_image_subresource_layer_count(&image->vk, subresourceRange); s++) {
495 radv_process_depth_image_layer(cmd_buffer, image, subresourceRange, l, s);
496 }
497 }
498
499 radv_meta_restore(&saved_state, cmd_buffer);
500 }
501
502 static void
radv_expand_depth_stencil_compute(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)503 radv_expand_depth_stencil_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
504 const VkImageSubresourceRange *subresourceRange)
505 {
506 struct radv_meta_saved_state saved_state;
507 struct radv_image_view load_iview = {0};
508 struct radv_image_view store_iview = {0};
509 struct radv_device *device = cmd_buffer->device;
510
511 assert(radv_image_is_tc_compat_htile(image));
512
513 cmd_buffer->state.flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
514
515 radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
516
517 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
518 device->meta_state.expand_depth_stencil_compute_pipeline);
519
520 for (uint32_t l = 0; l < vk_image_subresource_level_count(&image->vk, subresourceRange); l++) {
521 uint32_t width, height;
522
523 /* Do not decompress levels without HTILE. */
524 if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
525 continue;
526
527 width = radv_minify(image->vk.extent.width, subresourceRange->baseMipLevel + l);
528 height = radv_minify(image->vk.extent.height, subresourceRange->baseMipLevel + l);
529
530 for (uint32_t s = 0; s < vk_image_subresource_layer_count(&image->vk, subresourceRange); s++) {
531 radv_image_view_init(&load_iview, cmd_buffer->device,
532 &(VkImageViewCreateInfo){
533 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
534 .image = radv_image_to_handle(image),
535 .viewType = VK_IMAGE_VIEW_TYPE_2D,
536 .format = image->vk.format,
537 .subresourceRange = {.aspectMask = subresourceRange->aspectMask,
538 .baseMipLevel = subresourceRange->baseMipLevel + l,
539 .levelCount = 1,
540 .baseArrayLayer = subresourceRange->baseArrayLayer + s,
541 .layerCount = 1},
542 },
543 0, &(struct radv_image_view_extra_create_info){.enable_compression = true});
544 radv_image_view_init(&store_iview, cmd_buffer->device,
545 &(VkImageViewCreateInfo){
546 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
547 .image = radv_image_to_handle(image),
548 .viewType = VK_IMAGE_VIEW_TYPE_2D,
549 .format = image->vk.format,
550 .subresourceRange = {.aspectMask = subresourceRange->aspectMask,
551 .baseMipLevel = subresourceRange->baseMipLevel + l,
552 .levelCount = 1,
553 .baseArrayLayer = subresourceRange->baseArrayLayer + s,
554 .layerCount = 1},
555 },
556 0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
557
558 radv_meta_push_descriptor_set(
559 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.expand_depth_stencil_compute_p_layout,
560 0, /* set */
561 2, /* descriptorWriteCount */
562 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
563 .dstBinding = 0,
564 .dstArrayElement = 0,
565 .descriptorCount = 1,
566 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
567 .pImageInfo =
568 (VkDescriptorImageInfo[]){
569 {
570 .sampler = VK_NULL_HANDLE,
571 .imageView = radv_image_view_to_handle(&load_iview),
572 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
573 },
574 }},
575 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
576 .dstBinding = 1,
577 .dstArrayElement = 0,
578 .descriptorCount = 1,
579 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
580 .pImageInfo = (VkDescriptorImageInfo[]){
581 {
582 .sampler = VK_NULL_HANDLE,
583 .imageView = radv_image_view_to_handle(&store_iview),
584 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
585 },
586 }}});
587
588 radv_unaligned_dispatch(cmd_buffer, width, height, 1);
589
590 radv_image_view_finish(&load_iview);
591 radv_image_view_finish(&store_iview);
592 }
593 }
594
595 radv_meta_restore(&saved_state, cmd_buffer);
596
597 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
598 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
599
600 /* Initialize the HTILE metadata as "fully expanded". */
601 uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, image);
602
603 cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, image, subresourceRange, htile_value);
604 }
605
606 void
radv_expand_depth_stencil(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange,struct radv_sample_locations_state * sample_locs)607 radv_expand_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
608 const VkImageSubresourceRange *subresourceRange,
609 struct radv_sample_locations_state *sample_locs)
610 {
611 struct radv_barrier_data barrier = {0};
612
613 barrier.layout_transitions.depth_stencil_expand = 1;
614 radv_describe_layout_transition(cmd_buffer, &barrier);
615
616 if (cmd_buffer->qf == RADV_QUEUE_GENERAL) {
617 radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_DECOMPRESS);
618 } else {
619 radv_expand_depth_stencil_compute(cmd_buffer, image, subresourceRange);
620 }
621 }
622
623 void
radv_resummarize_depth_stencil(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange,struct radv_sample_locations_state * sample_locs)624 radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
625 const VkImageSubresourceRange *subresourceRange,
626 struct radv_sample_locations_state *sample_locs)
627 {
628 struct radv_barrier_data barrier = {0};
629
630 barrier.layout_transitions.depth_stencil_resummarize = 1;
631 radv_describe_layout_transition(cmd_buffer, &barrier);
632
633 assert(cmd_buffer->qf == RADV_QUEUE_GENERAL);
634 radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_RESUMMARIZE);
635 }
636