• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 
27 #include "radv_meta.h"
28 #include "radv_private.h"
29 #include "sid.h"
30 
31 enum radv_depth_op {
32    DEPTH_DECOMPRESS,
33    DEPTH_RESUMMARIZE,
34 };
35 
36 static nir_shader *
build_expand_depth_stencil_compute_shader(struct radv_device * dev)37 build_expand_depth_stencil_compute_shader(struct radv_device *dev)
38 {
39    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
40 
41    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "expand_depth_stencil_compute");
42 
43    /* We need at least 8/8/1 to cover an entire HTILE block in a single workgroup. */
44    b.shader->info.workgroup_size[0] = 8;
45    b.shader->info.workgroup_size[1] = 8;
46    nir_variable *input_img = nir_variable_create(b.shader, nir_var_image, img_type, "in_img");
47    input_img->data.descriptor_set = 0;
48    input_img->data.binding = 0;
49 
50    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
51    output_img->data.descriptor_set = 0;
52    output_img->data.binding = 1;
53 
54    nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
55    nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
56    nir_ssa_def *block_size =
57       nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
58                     b.shader->info.workgroup_size[2], 0);
59 
60    nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
61 
62    nir_ssa_def *data = nir_image_deref_load(
63       &b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, global_id, nir_ssa_undef(&b, 1, 32),
64       nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
65 
66    /* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid
67     * creating a vmcnt(0) because it expects the L1 cache to keep memory
68     * operations in-order for the same workgroup. The vmcnt(0) seems
69     * necessary however. */
70    nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE,
71                       .memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
72 
73    nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id,
74                          nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0),
75                          .image_dim = GLSL_SAMPLER_DIM_2D);
76    return b.shader;
77 }
78 
79 static VkResult
create_expand_depth_stencil_compute(struct radv_device * device)80 create_expand_depth_stencil_compute(struct radv_device *device)
81 {
82    VkResult result = VK_SUCCESS;
83    nir_shader *cs = build_expand_depth_stencil_compute_shader(device);
84 
85    VkDescriptorSetLayoutCreateInfo ds_create_info = {
86       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
87       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
88       .bindingCount = 2,
89       .pBindings = (VkDescriptorSetLayoutBinding[]){
90          {.binding = 0,
91           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
92           .descriptorCount = 1,
93           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
94           .pImmutableSamplers = NULL},
95          {.binding = 1,
96           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
97           .descriptorCount = 1,
98           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
99           .pImmutableSamplers = NULL},
100       }};
101 
102    result = radv_CreateDescriptorSetLayout(
103       radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
104       &device->meta_state.expand_depth_stencil_compute_ds_layout);
105    if (result != VK_SUCCESS)
106       goto cleanup;
107 
108    VkPipelineLayoutCreateInfo pl_create_info = {
109       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
110       .setLayoutCount = 1,
111       .pSetLayouts = &device->meta_state.expand_depth_stencil_compute_ds_layout,
112       .pushConstantRangeCount = 0,
113       .pPushConstantRanges = NULL,
114    };
115 
116    result = radv_CreatePipelineLayout(
117       radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
118       &device->meta_state.expand_depth_stencil_compute_p_layout);
119    if (result != VK_SUCCESS)
120       goto cleanup;
121 
122    /* compute shader */
123 
124    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
125       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
126       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
127       .module = vk_shader_module_handle_from_nir(cs),
128       .pName = "main",
129       .pSpecializationInfo = NULL,
130    };
131 
132    VkComputePipelineCreateInfo vk_pipeline_info = {
133       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
134       .stage = pipeline_shader_stage,
135       .flags = 0,
136       .layout = device->meta_state.expand_depth_stencil_compute_p_layout,
137    };
138 
139    result = radv_CreateComputePipelines(
140       radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
141       &vk_pipeline_info, NULL,
142       &device->meta_state.expand_depth_stencil_compute_pipeline);
143    if (result != VK_SUCCESS)
144       goto cleanup;
145 
146 cleanup:
147    ralloc_free(cs);
148    return result;
149 }
150 
151 static VkResult
create_pipeline_layout(struct radv_device * device,VkPipelineLayout * layout)152 create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
153 {
154    VkPipelineLayoutCreateInfo pl_create_info = {
155       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
156       .setLayoutCount = 0,
157       .pSetLayouts = NULL,
158       .pushConstantRangeCount = 0,
159       .pPushConstantRanges = NULL,
160    };
161 
162    return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
163                                     &device->meta_state.alloc, layout);
164 }
165 
166 static VkResult
create_pipeline(struct radv_device * device,uint32_t samples,VkPipelineLayout layout,enum radv_depth_op op,VkPipeline * pipeline)167 create_pipeline(struct radv_device *device, uint32_t samples, VkPipelineLayout layout,
168                 enum radv_depth_op op, VkPipeline *pipeline)
169 {
170    VkResult result;
171    VkDevice device_h = radv_device_to_handle(device);
172 
173    mtx_lock(&device->meta_state.mtx);
174    if (*pipeline) {
175       mtx_unlock(&device->meta_state.mtx);
176       return VK_SUCCESS;
177    }
178 
179    nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices(device);
180    nir_shader *fs_module = radv_meta_build_nir_fs_noop(device);
181 
182    if (!vs_module || !fs_module) {
183       /* XXX: Need more accurate error */
184       result = VK_ERROR_OUT_OF_HOST_MEMORY;
185       goto cleanup;
186    }
187 
188    const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {
189       .sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,
190       .sampleLocationsEnable = false,
191    };
192 
193    const VkPipelineRenderingCreateInfo rendering_create_info = {
194       .sType = VK_STRUCTURE_TYPE_PIPELINE_RENDERING_CREATE_INFO,
195       .depthAttachmentFormat = VK_FORMAT_D32_SFLOAT_S8_UINT,
196       .stencilAttachmentFormat = VK_FORMAT_D32_SFLOAT_S8_UINT,
197    };
198 
199    const VkGraphicsPipelineCreateInfo pipeline_create_info = {
200       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
201       .pNext = &rendering_create_info,
202       .stageCount = 2,
203       .pStages =
204          (VkPipelineShaderStageCreateInfo[]){
205             {
206                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
207                .stage = VK_SHADER_STAGE_VERTEX_BIT,
208                .module = vk_shader_module_handle_from_nir(vs_module),
209                .pName = "main",
210             },
211             {
212                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
213                .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
214                .module = vk_shader_module_handle_from_nir(fs_module),
215                .pName = "main",
216             },
217          },
218       .pVertexInputState =
219          &(VkPipelineVertexInputStateCreateInfo){
220             .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
221             .vertexBindingDescriptionCount = 0,
222             .vertexAttributeDescriptionCount = 0,
223          },
224       .pInputAssemblyState =
225          &(VkPipelineInputAssemblyStateCreateInfo){
226             .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
227             .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
228             .primitiveRestartEnable = false,
229          },
230       .pViewportState =
231          &(VkPipelineViewportStateCreateInfo){
232             .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
233             .viewportCount = 1,
234             .scissorCount = 1,
235          },
236       .pRasterizationState =
237          &(VkPipelineRasterizationStateCreateInfo){
238             .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
239             .depthClampEnable = false,
240             .rasterizerDiscardEnable = false,
241             .polygonMode = VK_POLYGON_MODE_FILL,
242             .cullMode = VK_CULL_MODE_NONE,
243             .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
244          },
245       .pMultisampleState =
246          &(VkPipelineMultisampleStateCreateInfo){
247             .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
248             .pNext = &sample_locs_create_info,
249             .rasterizationSamples = samples,
250             .sampleShadingEnable = false,
251             .pSampleMask = NULL,
252             .alphaToCoverageEnable = false,
253             .alphaToOneEnable = false,
254          },
255       .pColorBlendState =
256          &(VkPipelineColorBlendStateCreateInfo){
257             .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
258             .logicOpEnable = false,
259             .attachmentCount = 0,
260             .pAttachments = NULL,
261          },
262       .pDepthStencilState =
263          &(VkPipelineDepthStencilStateCreateInfo){
264             .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
265             .depthTestEnable = false,
266             .depthWriteEnable = false,
267             .depthBoundsTestEnable = false,
268             .stencilTestEnable = false,
269          },
270       .pDynamicState =
271          &(VkPipelineDynamicStateCreateInfo){
272             .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
273             .dynamicStateCount = 3,
274             .pDynamicStates =
275                (VkDynamicState[]){
276                   VK_DYNAMIC_STATE_VIEWPORT,
277                   VK_DYNAMIC_STATE_SCISSOR,
278                   VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,
279                },
280          },
281       .layout = layout,
282       .renderPass = VK_NULL_HANDLE,
283       .subpass = 0,
284    };
285 
286    struct radv_graphics_pipeline_create_info extra = {
287       .use_rectlist = true,
288       .depth_compress_disable = true,
289       .stencil_compress_disable = true,
290       .resummarize_enable = op == DEPTH_RESUMMARIZE,
291    };
292 
293    result = radv_graphics_pipeline_create(
294       device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache), &pipeline_create_info,
295       &extra, &device->meta_state.alloc, pipeline);
296 
297 cleanup:
298    ralloc_free(fs_module);
299    ralloc_free(vs_module);
300    mtx_unlock(&device->meta_state.mtx);
301    return result;
302 }
303 
304 void
radv_device_finish_meta_depth_decomp_state(struct radv_device * device)305 radv_device_finish_meta_depth_decomp_state(struct radv_device *device)
306 {
307    struct radv_meta_state *state = &device->meta_state;
308 
309    for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
310       radv_DestroyPipelineLayout(radv_device_to_handle(device), state->depth_decomp[i].p_layout,
311                                  &state->alloc);
312 
313       radv_DestroyPipeline(radv_device_to_handle(device),
314                            state->depth_decomp[i].decompress_pipeline, &state->alloc);
315       radv_DestroyPipeline(radv_device_to_handle(device),
316                            state->depth_decomp[i].resummarize_pipeline, &state->alloc);
317    }
318 
319    radv_DestroyPipeline(radv_device_to_handle(device),
320                         state->expand_depth_stencil_compute_pipeline, &state->alloc);
321    radv_DestroyPipelineLayout(radv_device_to_handle(device),
322                               state->expand_depth_stencil_compute_p_layout, &state->alloc);
323    device->vk.dispatch_table.DestroyDescriptorSetLayout(
324       radv_device_to_handle(device), state->expand_depth_stencil_compute_ds_layout, &state->alloc);
325 }
326 
327 VkResult
radv_device_init_meta_depth_decomp_state(struct radv_device * device,bool on_demand)328 radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)
329 {
330    struct radv_meta_state *state = &device->meta_state;
331    VkResult res = VK_SUCCESS;
332 
333    for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {
334       uint32_t samples = 1 << i;
335 
336       res = create_pipeline_layout(device, &state->depth_decomp[i].p_layout);
337       if (res != VK_SUCCESS)
338          return res;
339 
340       if (on_demand)
341          continue;
342 
343       res = create_pipeline(device, samples, state->depth_decomp[i].p_layout, DEPTH_DECOMPRESS,
344                             &state->depth_decomp[i].decompress_pipeline);
345       if (res != VK_SUCCESS)
346          return res;
347 
348       res = create_pipeline(device, samples, state->depth_decomp[i].p_layout, DEPTH_RESUMMARIZE,
349                             &state->depth_decomp[i].resummarize_pipeline);
350       if (res != VK_SUCCESS)
351          return res;
352    }
353 
354    return create_expand_depth_stencil_compute(device);
355 }
356 
357 static VkPipeline *
radv_get_depth_pipeline(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange,enum radv_depth_op op)358 radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
359                         const VkImageSubresourceRange *subresourceRange, enum radv_depth_op op)
360 {
361    struct radv_meta_state *state = &cmd_buffer->device->meta_state;
362    uint32_t samples = image->info.samples;
363    uint32_t samples_log2 = ffs(samples) - 1;
364    VkPipeline *pipeline;
365 
366    if (!state->depth_decomp[samples_log2].decompress_pipeline) {
367       VkResult ret;
368 
369       ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].p_layout,
370                             DEPTH_DECOMPRESS, &state->depth_decomp[samples_log2].decompress_pipeline);
371       if (ret != VK_SUCCESS) {
372          cmd_buffer->record_result = ret;
373          return NULL;
374       }
375 
376       ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].p_layout,
377                             DEPTH_RESUMMARIZE, &state->depth_decomp[samples_log2].resummarize_pipeline);
378       if (ret != VK_SUCCESS) {
379          cmd_buffer->record_result = ret;
380          return NULL;
381       }
382    }
383 
384    switch (op) {
385    case DEPTH_DECOMPRESS:
386       pipeline = &state->depth_decomp[samples_log2].decompress_pipeline;
387       break;
388    case DEPTH_RESUMMARIZE:
389       pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline;
390       break;
391    default:
392       unreachable("unknown operation");
393    }
394 
395    return pipeline;
396 }
397 
398 static void
radv_process_depth_image_layer(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * range,int level,int layer)399 radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
400                                const VkImageSubresourceRange *range, int level, int layer)
401 {
402    struct radv_device *device = cmd_buffer->device;
403    struct radv_image_view iview;
404    uint32_t width, height;
405 
406    width = radv_minify(image->info.width, range->baseMipLevel + level);
407    height = radv_minify(image->info.height, range->baseMipLevel + level);
408 
409    radv_image_view_init(&iview, device,
410                         &(VkImageViewCreateInfo){
411                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
412                            .image = radv_image_to_handle(image),
413                            .viewType = radv_meta_get_view_type(image),
414                            .format = image->vk.format,
415                            .subresourceRange =
416                               {
417                                  .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
418                                  .baseMipLevel = range->baseMipLevel + level,
419                                  .levelCount = 1,
420                                  .baseArrayLayer = range->baseArrayLayer + layer,
421                                  .layerCount = 1,
422                               },
423                         },
424                         0, NULL);
425 
426    const VkRenderingAttachmentInfo depth_att = {
427       .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
428       .imageView = radv_image_view_to_handle(&iview),
429       .imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
430       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
431       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
432    };
433 
434    const VkRenderingAttachmentInfo stencil_att = {
435       .sType = VK_STRUCTURE_TYPE_RENDERING_ATTACHMENT_INFO,
436       .imageView = radv_image_view_to_handle(&iview),
437       .imageLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
438       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
439       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
440    };
441 
442    const VkRenderingInfo rendering_info = {
443       .sType = VK_STRUCTURE_TYPE_RENDERING_INFO,
444       .renderArea = {
445          .offset = { 0, 0 },
446          .extent = { width, height }
447       },
448       .layerCount = 1,
449       .pDepthAttachment = &depth_att,
450       .pStencilAttachment = &stencil_att,
451    };
452 
453    radv_CmdBeginRendering(radv_cmd_buffer_to_handle(cmd_buffer), &rendering_info);
454 
455    radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
456 
457    radv_CmdEndRendering(radv_cmd_buffer_to_handle(cmd_buffer));
458 
459    radv_image_view_finish(&iview);
460 }
461 
462 static void
radv_process_depth_stencil(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange,struct radv_sample_locations_state * sample_locs,enum radv_depth_op op)463 radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
464                            const VkImageSubresourceRange *subresourceRange,
465                            struct radv_sample_locations_state *sample_locs, enum radv_depth_op op)
466 {
467    struct radv_meta_saved_state saved_state;
468    VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);
469    VkPipeline *pipeline;
470 
471    radv_meta_save(
472       &saved_state, cmd_buffer,
473       RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_SAMPLE_LOCATIONS | RADV_META_SAVE_PASS);
474 
475    pipeline = radv_get_depth_pipeline(cmd_buffer, image, subresourceRange, op);
476 
477    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
478                         *pipeline);
479 
480    if (sample_locs) {
481       assert(image->vk.create_flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
482 
483       /* Set the sample locations specified during explicit or
484        * automatic layout transitions, otherwise the depth decompress
485        * pass uses the default HW locations.
486        */
487       radv_CmdSetSampleLocationsEXT(cmd_buffer_h,
488                                     &(VkSampleLocationsInfoEXT){
489                                        .sampleLocationsPerPixel = sample_locs->per_pixel,
490                                        .sampleLocationGridSize = sample_locs->grid_size,
491                                        .sampleLocationsCount = sample_locs->count,
492                                        .pSampleLocations = sample_locs->locations,
493                                     });
494    }
495 
496    for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
497 
498       /* Do not decompress levels without HTILE. */
499       if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
500          continue;
501 
502       uint32_t width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
503       uint32_t height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
504 
505       radv_CmdSetViewport(cmd_buffer_h, 0, 1,
506                           &(VkViewport){.x = 0,
507                                         .y = 0,
508                                         .width = width,
509                                         .height = height,
510                                         .minDepth = 0.0f,
511                                         .maxDepth = 1.0f});
512 
513       radv_CmdSetScissor(cmd_buffer_h, 0, 1,
514                          &(VkRect2D){
515                             .offset = {0, 0},
516                             .extent = {width, height},
517                          });
518 
519       for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
520          radv_process_depth_image_layer(cmd_buffer, image, subresourceRange, l, s);
521       }
522    }
523 
524    radv_meta_restore(&saved_state, cmd_buffer);
525 }
526 
527 static void
radv_expand_depth_stencil_compute(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange)528 radv_expand_depth_stencil_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
529                                   const VkImageSubresourceRange *subresourceRange)
530 {
531    struct radv_meta_saved_state saved_state;
532    struct radv_image_view load_iview = {0};
533    struct radv_image_view store_iview = {0};
534    struct radv_device *device = cmd_buffer->device;
535 
536    assert(radv_image_is_tc_compat_htile(image));
537 
538    cmd_buffer->state.flush_bits |=
539       radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
540 
541    radv_meta_save(&saved_state, cmd_buffer,
542                   RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
543 
544    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
545                         device->meta_state.expand_depth_stencil_compute_pipeline);
546 
547    for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); l++) {
548       uint32_t width, height;
549 
550       /* Do not decompress levels without HTILE. */
551       if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))
552          continue;
553 
554       width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
555       height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
556 
557       for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
558          radv_image_view_init(
559             &load_iview, cmd_buffer->device,
560             &(VkImageViewCreateInfo){
561                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
562                .image = radv_image_to_handle(image),
563                .viewType = VK_IMAGE_VIEW_TYPE_2D,
564                .format = image->vk.format,
565                .subresourceRange = {.aspectMask = subresourceRange->aspectMask,
566                                     .baseMipLevel = subresourceRange->baseMipLevel + l,
567                                     .levelCount = 1,
568                                     .baseArrayLayer = subresourceRange->baseArrayLayer + s,
569                                     .layerCount = 1},
570             },
571             0, &(struct radv_image_view_extra_create_info){.enable_compression = true});
572          radv_image_view_init(
573             &store_iview, cmd_buffer->device,
574             &(VkImageViewCreateInfo){
575                .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
576                .image = radv_image_to_handle(image),
577                .viewType = VK_IMAGE_VIEW_TYPE_2D,
578                .format = image->vk.format,
579                .subresourceRange = {.aspectMask = subresourceRange->aspectMask,
580                                     .baseMipLevel = subresourceRange->baseMipLevel + l,
581                                     .levelCount = 1,
582                                     .baseArrayLayer = subresourceRange->baseArrayLayer + s,
583                                     .layerCount = 1},
584             },
585             0, &(struct radv_image_view_extra_create_info){.disable_compression = true});
586 
587          radv_meta_push_descriptor_set(
588             cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
589             device->meta_state.expand_depth_stencil_compute_p_layout, 0, /* set */
590             2, /* descriptorWriteCount */
591             (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
592                                       .dstBinding = 0,
593                                       .dstArrayElement = 0,
594                                       .descriptorCount = 1,
595                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
596                                       .pImageInfo =
597                                          (VkDescriptorImageInfo[]){
598                                             {
599                                                .sampler = VK_NULL_HANDLE,
600                                                .imageView = radv_image_view_to_handle(&load_iview),
601                                                .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
602                                             },
603                                          }},
604                                      {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
605                                       .dstBinding = 1,
606                                       .dstArrayElement = 0,
607                                       .descriptorCount = 1,
608                                       .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
609                                       .pImageInfo = (VkDescriptorImageInfo[]){
610                                          {
611                                             .sampler = VK_NULL_HANDLE,
612                                             .imageView = radv_image_view_to_handle(&store_iview),
613                                             .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
614                                          },
615                                       }}});
616 
617          radv_unaligned_dispatch(cmd_buffer, width, height, 1);
618 
619          radv_image_view_finish(&load_iview);
620          radv_image_view_finish(&store_iview);
621       }
622    }
623 
624    radv_meta_restore(&saved_state, cmd_buffer);
625 
626    cmd_buffer->state.flush_bits |=
627       RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
628       radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
629 
630    /* Initialize the HTILE metadata as "fully expanded". */
631    uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, image);
632 
633    cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, image, subresourceRange, htile_value);
634 }
635 
636 void
radv_expand_depth_stencil(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange,struct radv_sample_locations_state * sample_locs)637 radv_expand_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
638                           const VkImageSubresourceRange *subresourceRange,
639                           struct radv_sample_locations_state *sample_locs)
640 {
641    struct radv_barrier_data barrier = {0};
642 
643    barrier.layout_transitions.depth_stencil_expand = 1;
644    radv_describe_layout_transition(cmd_buffer, &barrier);
645 
646    if (cmd_buffer->qf == RADV_QUEUE_GENERAL) {
647       radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_DECOMPRESS);
648    } else {
649       radv_expand_depth_stencil_compute(cmd_buffer, image, subresourceRange);
650    }
651 }
652 
653 void
radv_resummarize_depth_stencil(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image,const VkImageSubresourceRange * subresourceRange,struct radv_sample_locations_state * sample_locs)654 radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
655                                const VkImageSubresourceRange *subresourceRange,
656                                struct radv_sample_locations_state *sample_locs)
657 {
658    struct radv_barrier_data barrier = {0};
659 
660    barrier.layout_transitions.depth_stencil_resummarize = 1;
661    radv_describe_layout_transition(cmd_buffer, &barrier);
662 
663    assert(cmd_buffer->qf == RADV_QUEUE_GENERAL);
664    radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_RESUMMARIZE);
665 }
666