• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Dave Airlie
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 
27 #include "nir/nir_builder.h"
28 #include "nir/nir_format_convert.h"
29 
30 #include "radv_meta.h"
31 #include "radv_private.h"
32 #include "sid.h"
33 #include "vk_common_entrypoints.h"
34 #include "vk_format.h"
35 
36 static nir_def *
radv_meta_build_resolve_srgb_conversion(nir_builder * b,nir_def * input)37 radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_def *input)
38 {
39    unsigned i;
40    nir_def *comp[4];
41    for (i = 0; i < 3; i++)
42       comp[i] = nir_format_linear_to_srgb(b, nir_channel(b, input, i));
43    comp[3] = nir_channels(b, input, 1 << 3);
44    return nir_vec(b, comp, 4);
45 }
46 
47 static nir_shader *
build_resolve_compute_shader(struct radv_device * dev,bool is_integer,bool is_srgb,int samples)48 build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
49 {
50    enum glsl_base_type img_base_type = is_integer ? GLSL_TYPE_UINT : GLSL_TYPE_FLOAT;
51    const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type);
52    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, img_base_type);
53    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs-%d-%s", samples,
54                                          is_integer ? "int" : (is_srgb ? "srgb" : "float"));
55    b.shader->info.workgroup_size[0] = 8;
56    b.shader->info.workgroup_size[1] = 8;
57 
58    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
59    input_img->data.descriptor_set = 0;
60    input_img->data.binding = 0;
61 
62    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
63    output_img->data.descriptor_set = 0;
64    output_img->data.binding = 1;
65 
66    nir_def *global_id = get_global_ids(&b, 2);
67 
68    nir_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
69    nir_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
70 
71    nir_def *src_coord = nir_iadd(&b, global_id, src_offset);
72    nir_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
73 
74    nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
75 
76    radv_meta_build_resolve_shader_core(dev, &b, is_integer, samples, input_img, color, src_coord);
77 
78    nir_def *outval = nir_load_var(&b, color);
79    if (is_srgb)
80       outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
81 
82    nir_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1), nir_undef(&b, 1, 32),
83                                  nir_undef(&b, 1, 32));
84 
85    nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, img_coord, nir_undef(&b, 1, 32), outval,
86                          nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
87    return b.shader;
88 }
89 
90 enum {
91    DEPTH_RESOLVE,
92    STENCIL_RESOLVE,
93 };
94 
95 static const char *
get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)96 get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)
97 {
98    switch (resolve_mode) {
99    case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
100       return "zero";
101    case VK_RESOLVE_MODE_AVERAGE_BIT:
102       return "average";
103    case VK_RESOLVE_MODE_MIN_BIT:
104       return "min";
105    case VK_RESOLVE_MODE_MAX_BIT:
106       return "max";
107    default:
108       unreachable("invalid resolve mode");
109    }
110 }
111 
112 static nir_shader *
build_depth_stencil_resolve_compute_shader(struct radv_device * dev,int samples,int index,VkResolveModeFlagBits resolve_mode)113 build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, int index,
114                                            VkResolveModeFlagBits resolve_mode)
115 {
116    enum glsl_base_type img_base_type = index == DEPTH_RESOLVE ? GLSL_TYPE_FLOAT : GLSL_TYPE_UINT;
117    const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, img_base_type);
118    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, img_base_type);
119 
120    nir_builder b =
121       radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs_%s-%s-%d",
122                             index == DEPTH_RESOLVE ? "depth" : "stencil", get_resolve_mode_str(resolve_mode), samples);
123    b.shader->info.workgroup_size[0] = 8;
124    b.shader->info.workgroup_size[1] = 8;
125 
126    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
127    input_img->data.descriptor_set = 0;
128    input_img->data.binding = 0;
129 
130    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
131    output_img->data.descriptor_set = 0;
132    output_img->data.binding = 1;
133 
134    nir_def *global_id = get_global_ids(&b, 3);
135 
136    nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
137 
138    nir_def *resolve_coord = nir_iadd(&b, nir_trim_vector(&b, global_id, 2), offset);
139 
140    nir_def *img_coord =
141       nir_vec3(&b, nir_channel(&b, resolve_coord, 0), nir_channel(&b, resolve_coord, 1), nir_channel(&b, global_id, 2));
142 
143    nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
144    nir_def *outval = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, 0));
145 
146    if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) {
147       for (int i = 1; i < samples; i++) {
148          nir_def *si = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, i));
149 
150          switch (resolve_mode) {
151          case VK_RESOLVE_MODE_AVERAGE_BIT:
152             assert(index == DEPTH_RESOLVE);
153             outval = nir_fadd(&b, outval, si);
154             break;
155          case VK_RESOLVE_MODE_MIN_BIT:
156             if (index == DEPTH_RESOLVE)
157                outval = nir_fmin(&b, outval, si);
158             else
159                outval = nir_umin(&b, outval, si);
160             break;
161          case VK_RESOLVE_MODE_MAX_BIT:
162             if (index == DEPTH_RESOLVE)
163                outval = nir_fmax(&b, outval, si);
164             else
165                outval = nir_umax(&b, outval, si);
166             break;
167          default:
168             unreachable("invalid resolve mode");
169          }
170       }
171 
172       if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT)
173          outval = nir_fdiv_imm(&b, outval, samples);
174    }
175 
176    nir_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
177                              nir_channel(&b, img_coord, 2), nir_undef(&b, 1, 32));
178    nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, nir_undef(&b, 1, 32), outval,
179                          nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
180    return b.shader;
181 }
182 
183 static VkResult
create_layout(struct radv_device * device)184 create_layout(struct radv_device *device)
185 {
186    VkResult result;
187    /*
188     * two descriptors one for the image being sampled
189     * one for the buffer being written.
190     */
191    VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
192                                                      .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
193                                                      .bindingCount = 2,
194                                                      .pBindings = (VkDescriptorSetLayoutBinding[]){
195                                                         {.binding = 0,
196                                                          .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
197                                                          .descriptorCount = 1,
198                                                          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
199                                                          .pImmutableSamplers = NULL},
200                                                         {.binding = 1,
201                                                          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
202                                                          .descriptorCount = 1,
203                                                          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
204                                                          .pImmutableSamplers = NULL},
205                                                      }};
206 
207    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
208                                            &device->meta_state.resolve_compute.ds_layout);
209    if (result != VK_SUCCESS)
210       goto fail;
211 
212    VkPipelineLayoutCreateInfo pl_create_info = {
213       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
214       .setLayoutCount = 1,
215       .pSetLayouts = &device->meta_state.resolve_compute.ds_layout,
216       .pushConstantRangeCount = 1,
217       .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
218    };
219 
220    result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
221                                       &device->meta_state.resolve_compute.p_layout);
222    if (result != VK_SUCCESS)
223       goto fail;
224    return VK_SUCCESS;
225 fail:
226    return result;
227 }
228 
229 static VkResult
create_resolve_pipeline(struct radv_device * device,int samples,bool is_integer,bool is_srgb,VkPipeline * pipeline)230 create_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb, VkPipeline *pipeline)
231 {
232    VkResult result;
233 
234    mtx_lock(&device->meta_state.mtx);
235    if (*pipeline) {
236       mtx_unlock(&device->meta_state.mtx);
237       return VK_SUCCESS;
238    }
239 
240    nir_shader *cs = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
241 
242    /* compute shader */
243 
244    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
245       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
246       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
247       .module = vk_shader_module_handle_from_nir(cs),
248       .pName = "main",
249       .pSpecializationInfo = NULL,
250    };
251 
252    VkComputePipelineCreateInfo vk_pipeline_info = {
253       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
254       .stage = pipeline_shader_stage,
255       .flags = 0,
256       .layout = device->meta_state.resolve_compute.p_layout,
257    };
258 
259    result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
260                                          NULL, pipeline);
261    if (result != VK_SUCCESS)
262       goto fail;
263 
264    ralloc_free(cs);
265    mtx_unlock(&device->meta_state.mtx);
266    return VK_SUCCESS;
267 fail:
268    ralloc_free(cs);
269    mtx_unlock(&device->meta_state.mtx);
270    return result;
271 }
272 
273 static VkResult
create_depth_stencil_resolve_pipeline(struct radv_device * device,int samples,int index,VkResolveModeFlagBits resolve_mode,VkPipeline * pipeline)274 create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, int index,
275                                       VkResolveModeFlagBits resolve_mode, VkPipeline *pipeline)
276 {
277    VkResult result;
278 
279    mtx_lock(&device->meta_state.mtx);
280    if (*pipeline) {
281       mtx_unlock(&device->meta_state.mtx);
282       return VK_SUCCESS;
283    }
284 
285    nir_shader *cs = build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode);
286 
287    /* compute shader */
288    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
289       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
290       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
291       .module = vk_shader_module_handle_from_nir(cs),
292       .pName = "main",
293       .pSpecializationInfo = NULL,
294    };
295 
296    VkComputePipelineCreateInfo vk_pipeline_info = {
297       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
298       .stage = pipeline_shader_stage,
299       .flags = 0,
300       .layout = device->meta_state.resolve_compute.p_layout,
301    };
302 
303    result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
304                                          NULL, pipeline);
305    if (result != VK_SUCCESS)
306       goto fail;
307 
308    ralloc_free(cs);
309    mtx_unlock(&device->meta_state.mtx);
310    return VK_SUCCESS;
311 fail:
312    ralloc_free(cs);
313    mtx_unlock(&device->meta_state.mtx);
314    return result;
315 }
316 
317 VkResult
radv_device_init_meta_resolve_compute_state(struct radv_device * device,bool on_demand)318 radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
319 {
320    struct radv_meta_state *state = &device->meta_state;
321    VkResult res;
322 
323    res = create_layout(device);
324    if (res != VK_SUCCESS)
325       return res;
326 
327    if (on_demand)
328       return VK_SUCCESS;
329 
330    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
331       uint32_t samples = 1 << i;
332 
333       res = create_resolve_pipeline(device, samples, false, false, &state->resolve_compute.rc[i].pipeline);
334       if (res != VK_SUCCESS)
335          return res;
336 
337       res = create_resolve_pipeline(device, samples, true, false, &state->resolve_compute.rc[i].i_pipeline);
338       if (res != VK_SUCCESS)
339          return res;
340 
341       res = create_resolve_pipeline(device, samples, false, true, &state->resolve_compute.rc[i].srgb_pipeline);
342       if (res != VK_SUCCESS)
343          return res;
344 
345       res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT,
346                                                   &state->resolve_compute.depth[i].average_pipeline);
347       if (res != VK_SUCCESS)
348          return res;
349 
350       res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_MAX_BIT,
351                                                   &state->resolve_compute.depth[i].max_pipeline);
352       if (res != VK_SUCCESS)
353          return res;
354 
355       res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_MIN_BIT,
356                                                   &state->resolve_compute.depth[i].min_pipeline);
357       if (res != VK_SUCCESS)
358          return res;
359 
360       res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE, VK_RESOLVE_MODE_MAX_BIT,
361                                                   &state->resolve_compute.stencil[i].max_pipeline);
362       if (res != VK_SUCCESS)
363          return res;
364 
365       res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE, VK_RESOLVE_MODE_MIN_BIT,
366                                                   &state->resolve_compute.stencil[i].min_pipeline);
367       if (res != VK_SUCCESS)
368          return res;
369    }
370 
371    res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
372                                                &state->resolve_compute.depth_zero_pipeline);
373    if (res != VK_SUCCESS)
374       return res;
375 
376    return create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
377                                                 &state->resolve_compute.stencil_zero_pipeline);
378 }
379 
380 void
radv_device_finish_meta_resolve_compute_state(struct radv_device * device)381 radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
382 {
383    struct radv_meta_state *state = &device->meta_state;
384    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
385       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline, &state->alloc);
386 
387       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline, &state->alloc);
388 
389       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].srgb_pipeline, &state->alloc);
390 
391       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].average_pipeline,
392                            &state->alloc);
393 
394       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].max_pipeline, &state->alloc);
395 
396       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].min_pipeline, &state->alloc);
397 
398       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil[i].max_pipeline,
399                            &state->alloc);
400 
401       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil[i].min_pipeline,
402                            &state->alloc);
403    }
404 
405    radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline, &state->alloc);
406 
407    radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline, &state->alloc);
408 
409    device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->resolve_compute.ds_layout,
410                                                         &state->alloc);
411    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout, &state->alloc);
412 }
413 
414 static VkPipeline *
radv_get_resolve_pipeline(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview)415 radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview)
416 {
417    struct radv_device *device = cmd_buffer->device;
418    struct radv_meta_state *state = &device->meta_state;
419    uint32_t samples = src_iview->image->vk.samples;
420    uint32_t samples_log2 = ffs(samples) - 1;
421    VkPipeline *pipeline;
422 
423    if (vk_format_is_int(src_iview->vk.format))
424       pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
425    else if (vk_format_is_srgb(src_iview->vk.format))
426       pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
427    else
428       pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
429 
430    if (!*pipeline) {
431       VkResult ret;
432 
433       ret = create_resolve_pipeline(device, samples, vk_format_is_int(src_iview->vk.format),
434                                     vk_format_is_srgb(src_iview->vk.format), pipeline);
435       if (ret != VK_SUCCESS) {
436          vk_command_buffer_set_error(&cmd_buffer->vk, ret);
437          return NULL;
438       }
439    }
440 
441    return pipeline;
442 }
443 
444 static void
emit_resolve(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,struct radv_image_view * dst_iview,const VkOffset2D * src_offset,const VkOffset2D * dst_offset,const VkExtent2D * resolve_extent)445 emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, struct radv_image_view *dst_iview,
446              const VkOffset2D *src_offset, const VkOffset2D *dst_offset, const VkExtent2D *resolve_extent)
447 {
448    struct radv_device *device = cmd_buffer->device;
449    VkPipeline *pipeline;
450 
451    radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
452                                  device->meta_state.resolve_compute.p_layout, 0, /* set */
453                                  2,                                              /* descriptorWriteCount */
454                                  (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
455                                                            .dstBinding = 0,
456                                                            .dstArrayElement = 0,
457                                                            .descriptorCount = 1,
458                                                            .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
459                                                            .pImageInfo =
460                                                               (VkDescriptorImageInfo[]){
461                                                                  {.sampler = VK_NULL_HANDLE,
462                                                                   .imageView = radv_image_view_to_handle(src_iview),
463                                                                   .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
464                                                               }},
465                                                           {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
466                                                            .dstBinding = 1,
467                                                            .dstArrayElement = 0,
468                                                            .descriptorCount = 1,
469                                                            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
470                                                            .pImageInfo = (VkDescriptorImageInfo[]){
471                                                               {
472                                                                  .sampler = VK_NULL_HANDLE,
473                                                                  .imageView = radv_image_view_to_handle(dst_iview),
474                                                                  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
475                                                               },
476                                                            }}});
477 
478    pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview);
479 
480    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
481 
482    unsigned push_constants[4] = {
483       src_offset->x,
484       src_offset->y,
485       dst_offset->x,
486       dst_offset->y,
487    };
488    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.resolve_compute.p_layout,
489                               VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
490    radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
491 }
492 
493 static void
emit_depth_stencil_resolve(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,struct radv_image_view * dst_iview,const VkOffset2D * resolve_offset,const VkExtent3D * resolve_extent,VkImageAspectFlags aspects,VkResolveModeFlagBits resolve_mode)494 emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
495                            struct radv_image_view *dst_iview, const VkOffset2D *resolve_offset,
496                            const VkExtent3D *resolve_extent, VkImageAspectFlags aspects,
497                            VkResolveModeFlagBits resolve_mode)
498 {
499    struct radv_device *device = cmd_buffer->device;
500    const uint32_t samples = src_iview->image->vk.samples;
501    const uint32_t samples_log2 = ffs(samples) - 1;
502    VkPipeline *pipeline;
503 
504    radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
505                                  device->meta_state.resolve_compute.p_layout, 0, /* set */
506                                  2,                                              /* descriptorWriteCount */
507                                  (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
508                                                            .dstBinding = 0,
509                                                            .dstArrayElement = 0,
510                                                            .descriptorCount = 1,
511                                                            .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
512                                                            .pImageInfo =
513                                                               (VkDescriptorImageInfo[]){
514                                                                  {.sampler = VK_NULL_HANDLE,
515                                                                   .imageView = radv_image_view_to_handle(src_iview),
516                                                                   .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
517                                                               }},
518                                                           {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
519                                                            .dstBinding = 1,
520                                                            .dstArrayElement = 0,
521                                                            .descriptorCount = 1,
522                                                            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
523                                                            .pImageInfo = (VkDescriptorImageInfo[]){
524                                                               {
525                                                                  .sampler = VK_NULL_HANDLE,
526                                                                  .imageView = radv_image_view_to_handle(dst_iview),
527                                                                  .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
528                                                               },
529                                                            }}});
530 
531    switch (resolve_mode) {
532    case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
533       if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
534          pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
535       else
536          pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
537       break;
538    case VK_RESOLVE_MODE_AVERAGE_BIT:
539       assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
540       pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
541       break;
542    case VK_RESOLVE_MODE_MIN_BIT:
543       if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
544          pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
545       else
546          pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
547       break;
548    case VK_RESOLVE_MODE_MAX_BIT:
549       if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
550          pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
551       else
552          pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
553       break;
554    default:
555       unreachable("invalid resolve mode");
556    }
557 
558    if (!*pipeline) {
559       int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
560       VkResult ret;
561 
562       ret = create_depth_stencil_resolve_pipeline(device, samples, index, resolve_mode, pipeline);
563       if (ret != VK_SUCCESS) {
564          vk_command_buffer_set_error(&cmd_buffer->vk, ret);
565          return;
566       }
567    }
568 
569    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
570 
571    uint32_t push_constants[2] = {resolve_offset->x, resolve_offset->y};
572 
573    vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.resolve_compute.p_layout,
574                               VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), push_constants);
575 
576    radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, resolve_extent->depth);
577 }
578 
579 void
radv_meta_resolve_compute_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * src_image,VkFormat src_format,VkImageLayout src_image_layout,struct radv_image * dst_image,VkFormat dst_format,VkImageLayout dst_image_layout,const VkImageResolve2 * region)580 radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkFormat src_format,
581                                 VkImageLayout src_image_layout, struct radv_image *dst_image, VkFormat dst_format,
582                                 VkImageLayout dst_image_layout, const VkImageResolve2 *region)
583 {
584    struct radv_meta_saved_state saved_state;
585 
586    /* For partial resolves, DCC should be decompressed before resolving
587     * because the metadata is re-initialized to the uncompressed after.
588     */
589    uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
590 
591    if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dst_image) &&
592        radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel, dst_image_layout,
593                                   queue_mask) &&
594        (region->dstOffset.x || region->dstOffset.y || region->dstOffset.z ||
595         region->extent.width != dst_image->vk.extent.width || region->extent.height != dst_image->vk.extent.height ||
596         region->extent.depth != dst_image->vk.extent.depth)) {
597       radv_decompress_dcc(cmd_buffer, dst_image,
598                           &(VkImageSubresourceRange){
599                              .aspectMask = region->dstSubresource.aspectMask,
600                              .baseMipLevel = region->dstSubresource.mipLevel,
601                              .levelCount = 1,
602                              .baseArrayLayer = region->dstSubresource.baseArrayLayer,
603                              .layerCount = vk_image_subresource_layer_count(&dst_image->vk, &region->dstSubresource),
604                           });
605    }
606 
607    radv_meta_save(&saved_state, cmd_buffer,
608                   RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
609 
610    assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
611    assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
612    assert(vk_image_subresource_layer_count(&src_image->vk, &region->srcSubresource) ==
613           vk_image_subresource_layer_count(&dst_image->vk, &region->dstSubresource));
614 
615    const uint32_t dst_base_layer = radv_meta_get_iview_layer(dst_image, &region->dstSubresource, &region->dstOffset);
616 
617    const struct VkExtent3D extent = vk_image_sanitize_extent(&src_image->vk, region->extent);
618    const struct VkOffset3D srcOffset = vk_image_sanitize_offset(&src_image->vk, region->srcOffset);
619    const struct VkOffset3D dstOffset = vk_image_sanitize_offset(&dst_image->vk, region->dstOffset);
620    const unsigned src_layer_count = vk_image_subresource_layer_count(&src_image->vk, &region->srcSubresource);
621 
622    for (uint32_t layer = 0; layer < src_layer_count; ++layer) {
623 
624       struct radv_image_view src_iview;
625       radv_image_view_init(&src_iview, cmd_buffer->device,
626                            &(VkImageViewCreateInfo){
627                               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
628                               .image = radv_image_to_handle(src_image),
629                               .viewType = VK_IMAGE_VIEW_TYPE_2D,
630                               .format = src_format,
631                               .subresourceRange =
632                                  {
633                                     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
634                                     .baseMipLevel = 0,
635                                     .levelCount = 1,
636                                     .baseArrayLayer = region->srcSubresource.baseArrayLayer + layer,
637                                     .layerCount = 1,
638                                  },
639                            },
640                            0, NULL);
641 
642       struct radv_image_view dst_iview;
643       radv_image_view_init(&dst_iview, cmd_buffer->device,
644                            &(VkImageViewCreateInfo){
645                               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
646                               .image = radv_image_to_handle(dst_image),
647                               .viewType = radv_meta_get_view_type(dst_image),
648                               .format = vk_to_non_srgb_format(dst_format),
649                               .subresourceRange =
650                                  {
651                                     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
652                                     .baseMipLevel = region->dstSubresource.mipLevel,
653                                     .levelCount = 1,
654                                     .baseArrayLayer = dst_base_layer + layer,
655                                     .layerCount = 1,
656                                  },
657                            },
658                            0, NULL);
659 
660       emit_resolve(cmd_buffer, &src_iview, &dst_iview, &(VkOffset2D){srcOffset.x, srcOffset.y},
661                    &(VkOffset2D){dstOffset.x, dstOffset.y}, &(VkExtent2D){extent.width, extent.height});
662 
663       radv_image_view_finish(&src_iview);
664       radv_image_view_finish(&dst_iview);
665    }
666 
667    radv_meta_restore(&saved_state, cmd_buffer);
668 
669    if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dst_image) &&
670        radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel, dst_image_layout,
671                                   queue_mask)) {
672 
673       cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;
674 
675       VkImageSubresourceRange range = {
676          .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
677          .baseMipLevel = region->dstSubresource.mipLevel,
678          .levelCount = 1,
679          .baseArrayLayer = dst_base_layer,
680          .layerCount = vk_image_subresource_layer_count(&dst_image->vk, &region->dstSubresource),
681       };
682 
683       cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dst_image, &range, 0xffffffff);
684    }
685 }
686 
687 void
radv_cmd_buffer_resolve_rendering_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,VkImageLayout src_layout,struct radv_image_view * dst_iview,VkImageLayout dst_layout,const VkImageResolve2 * region)688 radv_cmd_buffer_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
689                                      VkImageLayout src_layout, struct radv_image_view *dst_iview,
690                                      VkImageLayout dst_layout, const VkImageResolve2 *region)
691 {
692    radv_meta_resolve_compute_image(cmd_buffer, src_iview->image, src_iview->vk.format, src_layout, dst_iview->image,
693                                    dst_iview->vk.format, dst_layout, region);
694 
695    cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
696                                    radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
697 }
698 
699 void
radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspects,VkResolveModeFlagBits resolve_mode)700 radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlags aspects,
701                                         VkResolveModeFlagBits resolve_mode)
702 {
703    const struct radv_rendering_state *render = &cmd_buffer->state.render;
704    VkRect2D resolve_area = render->area;
705    struct radv_meta_saved_state saved_state;
706 
707    uint32_t layer_count = render->layer_count;
708    if (render->view_mask)
709       layer_count = util_last_bit(render->view_mask);
710 
711    /* Resolves happen before the end-of-subpass barriers get executed, so
712     * we have to make the attachment shader-readable.
713     */
714    cmd_buffer->state.flush_bits |=
715       radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
716       radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT, NULL) |
717       radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
718 
719    struct radv_image_view *src_iview = render->ds_att.iview;
720    VkImageLayout src_layout =
721       aspects & VK_IMAGE_ASPECT_DEPTH_BIT ? render->ds_att.layout : render->ds_att.stencil_layout;
722    struct radv_image *src_image = src_iview->image;
723 
724    VkImageResolve2 region = {0};
725    region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2;
726    region.srcSubresource.aspectMask = aspects;
727    region.srcSubresource.mipLevel = 0;
728    region.srcSubresource.baseArrayLayer = src_iview->vk.base_array_layer;
729    region.srcSubresource.layerCount = layer_count;
730 
731    radv_decompress_resolve_src(cmd_buffer, src_image, src_layout, &region);
732 
733    radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
734 
735    struct radv_image_view *dst_iview = render->ds_att.resolve_iview;
736    VkImageLayout dst_layout =
737       aspects & VK_IMAGE_ASPECT_DEPTH_BIT ? render->ds_att.resolve_layout : render->ds_att.stencil_resolve_layout;
738    struct radv_image *dst_image = dst_iview->image;
739 
740    struct radv_image_view tsrc_iview;
741    radv_image_view_init(&tsrc_iview, cmd_buffer->device,
742                         &(VkImageViewCreateInfo){
743                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
744                            .image = radv_image_to_handle(src_image),
745                            .viewType = VK_IMAGE_VIEW_TYPE_2D,
746                            .format = src_iview->vk.format,
747                            .subresourceRange =
748                               {
749                                  .aspectMask = aspects,
750                                  .baseMipLevel = 0,
751                                  .levelCount = 1,
752                                  .baseArrayLayer = src_iview->vk.base_array_layer,
753                                  .layerCount = layer_count,
754                               },
755                         },
756                         0, NULL);
757 
758    struct radv_image_view tdst_iview;
759    radv_image_view_init(&tdst_iview, cmd_buffer->device,
760                         &(VkImageViewCreateInfo){
761                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
762                            .image = radv_image_to_handle(dst_image),
763                            .viewType = radv_meta_get_view_type(dst_image),
764                            .format = dst_iview->vk.format,
765                            .subresourceRange =
766                               {
767                                  .aspectMask = aspects,
768                                  .baseMipLevel = dst_iview->vk.base_mip_level,
769                                  .levelCount = 1,
770                                  .baseArrayLayer = dst_iview->vk.base_array_layer,
771                                  .layerCount = layer_count,
772                               },
773                         },
774                         0, NULL);
775 
776    emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview, &resolve_area.offset,
777                               &(VkExtent3D){resolve_area.extent.width, resolve_area.extent.height, layer_count},
778                               aspects, resolve_mode);
779 
780    cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
781                                    radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
782 
783    uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
784 
785    if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, dst_layout, queue_mask)) {
786       VkImageSubresourceRange range = {0};
787       range.aspectMask = aspects;
788       range.baseMipLevel = dst_iview->vk.base_mip_level;
789       range.levelCount = 1;
790       range.baseArrayLayer = dst_iview->vk.base_array_layer;
791       range.layerCount = layer_count;
792 
793       uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, dst_image);
794 
795       cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, dst_image, &range, htile_value);
796    }
797 
798    radv_image_view_finish(&tsrc_iview);
799    radv_image_view_finish(&tdst_iview);
800 
801    radv_meta_restore(&saved_state, cmd_buffer);
802 }
803