• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Dave Airlie
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 
27 #include "nir/nir_builder.h"
28 #include "radv_meta.h"
29 #include "radv_private.h"
30 #include "sid.h"
31 #include "vk_format.h"
32 
33 static nir_ssa_def *
radv_meta_build_resolve_srgb_conversion(nir_builder * b,nir_ssa_def * input)34 radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_ssa_def *input)
35 {
36    unsigned i;
37 
38    nir_ssa_def *cmp[3];
39    for (i = 0; i < 3; i++)
40       cmp[i] = nir_flt(b, nir_channel(b, input, i), nir_imm_int(b, 0x3b4d2e1c));
41 
42    nir_ssa_def *ltvals[3];
43    for (i = 0; i < 3; i++)
44       ltvals[i] = nir_fmul(b, nir_channel(b, input, i), nir_imm_float(b, 12.92));
45 
46    nir_ssa_def *gtvals[3];
47 
48    for (i = 0; i < 3; i++) {
49       gtvals[i] = nir_fpow(b, nir_channel(b, input, i), nir_imm_float(b, 1.0 / 2.4));
50       gtvals[i] = nir_fmul(b, gtvals[i], nir_imm_float(b, 1.055));
51       gtvals[i] = nir_fsub(b, gtvals[i], nir_imm_float(b, 0.055));
52    }
53 
54    nir_ssa_def *comp[4];
55    for (i = 0; i < 3; i++)
56       comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
57    comp[3] = nir_channels(b, input, 1 << 3);
58    return nir_vec(b, comp, 4);
59 }
60 
61 static nir_shader *
build_resolve_compute_shader(struct radv_device * dev,bool is_integer,bool is_srgb,int samples)62 build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
63 {
64    enum glsl_base_type img_base_type = is_integer ? GLSL_TYPE_UINT : GLSL_TYPE_FLOAT;
65    const struct glsl_type *sampler_type =
66       glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type);
67    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, img_base_type);
68    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs-%d-%s", samples,
69                                          is_integer ? "int" : (is_srgb ? "srgb" : "float"));
70    b.shader->info.workgroup_size[0] = 8;
71    b.shader->info.workgroup_size[1] = 8;
72 
73    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
74    input_img->data.descriptor_set = 0;
75    input_img->data.binding = 0;
76 
77    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
78    output_img->data.descriptor_set = 0;
79    output_img->data.binding = 1;
80 
81    nir_ssa_def *global_id = get_global_ids(&b, 2);
82 
83    nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
84    nir_ssa_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
85 
86    nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset);
87    nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
88 
89    nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
90 
91    radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img, color, src_coord);
92 
93    nir_ssa_def *outval = nir_load_var(&b, color);
94    if (is_srgb)
95       outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
96 
97    nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0),
98                                          nir_channel(&b, dst_coord, 1),
99                                          nir_ssa_undef(&b, 1, 32),
100                                          nir_ssa_undef(&b, 1, 32));
101 
102    nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
103                          nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
104                          .image_dim = GLSL_SAMPLER_DIM_2D);
105    return b.shader;
106 }
107 
108 enum {
109    DEPTH_RESOLVE,
110    STENCIL_RESOLVE,
111 };
112 
113 static const char *
get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)114 get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)
115 {
116    switch (resolve_mode) {
117    case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
118       return "zero";
119    case VK_RESOLVE_MODE_AVERAGE_BIT:
120       return "average";
121    case VK_RESOLVE_MODE_MIN_BIT:
122       return "min";
123    case VK_RESOLVE_MODE_MAX_BIT:
124       return "max";
125    default:
126       unreachable("invalid resolve mode");
127    }
128 }
129 
130 static nir_shader *
build_depth_stencil_resolve_compute_shader(struct radv_device * dev,int samples,int index,VkResolveModeFlagBits resolve_mode)131 build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, int index,
132                                            VkResolveModeFlagBits resolve_mode)
133 {
134    enum glsl_base_type img_base_type = index == DEPTH_RESOLVE ? GLSL_TYPE_FLOAT : GLSL_TYPE_UINT;
135    const struct glsl_type *sampler_type =
136       glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, img_base_type);
137    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, img_base_type);
138 
139    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs_%s-%s-%d",
140                                          index == DEPTH_RESOLVE ? "depth" : "stencil",
141                                          get_resolve_mode_str(resolve_mode), samples);
142    b.shader->info.workgroup_size[0] = 8;
143    b.shader->info.workgroup_size[1] = 8;
144 
145    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
146    input_img->data.descriptor_set = 0;
147    input_img->data.binding = 0;
148 
149    nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
150    output_img->data.descriptor_set = 0;
151    output_img->data.binding = 1;
152 
153    nir_ssa_def *img_coord = get_global_ids(&b, 3);
154 
155    nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
156 
157    nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float32 : nir_type_uint32;
158 
159    nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
160    tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
161    tex->op = nir_texop_txf_ms;
162    tex->src[0].src_type = nir_tex_src_coord;
163    tex->src[0].src = nir_src_for_ssa(img_coord);
164    tex->src[1].src_type = nir_tex_src_ms_index;
165    tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
166    tex->src[2].src_type = nir_tex_src_texture_deref;
167    tex->src[2].src = nir_src_for_ssa(input_img_deref);
168    tex->dest_type = type;
169    tex->is_array = true;
170    tex->coord_components = 3;
171 
172    nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
173    nir_builder_instr_insert(&b, &tex->instr);
174 
175    nir_ssa_def *outval = &tex->dest.ssa;
176 
177    if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) {
178       for (int i = 1; i < samples; i++) {
179          nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
180          tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
181          tex_add->op = nir_texop_txf_ms;
182          tex_add->src[0].src_type = nir_tex_src_coord;
183          tex_add->src[0].src = nir_src_for_ssa(img_coord);
184          tex_add->src[1].src_type = nir_tex_src_ms_index;
185          tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
186          tex_add->src[2].src_type = nir_tex_src_texture_deref;
187          tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
188          tex_add->dest_type = type;
189          tex_add->is_array = true;
190          tex_add->coord_components = 3;
191 
192          nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
193          nir_builder_instr_insert(&b, &tex_add->instr);
194 
195          switch (resolve_mode) {
196          case VK_RESOLVE_MODE_AVERAGE_BIT:
197             assert(index == DEPTH_RESOLVE);
198             outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
199             break;
200          case VK_RESOLVE_MODE_MIN_BIT:
201             if (index == DEPTH_RESOLVE)
202                outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
203             else
204                outval = nir_umin(&b, outval, &tex_add->dest.ssa);
205             break;
206          case VK_RESOLVE_MODE_MAX_BIT:
207             if (index == DEPTH_RESOLVE)
208                outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
209             else
210                outval = nir_umax(&b, outval, &tex_add->dest.ssa);
211             break;
212          default:
213             unreachable("invalid resolve mode");
214          }
215       }
216 
217       if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT)
218          outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
219    }
220 
221    nir_ssa_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
222                                  nir_channel(&b, img_coord, 2), nir_ssa_undef(&b, 1, 32));
223    nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
224                          nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
225                          .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
226    return b.shader;
227 }
228 
229 static VkResult
create_layout(struct radv_device * device)230 create_layout(struct radv_device *device)
231 {
232    VkResult result;
233    /*
234     * two descriptors one for the image being sampled
235     * one for the buffer being written.
236     */
237    VkDescriptorSetLayoutCreateInfo ds_create_info = {
238       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
239       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
240       .bindingCount = 2,
241       .pBindings = (VkDescriptorSetLayoutBinding[]){
242          {.binding = 0,
243           .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
244           .descriptorCount = 1,
245           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
246           .pImmutableSamplers = NULL},
247          {.binding = 1,
248           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
249           .descriptorCount = 1,
250           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
251           .pImmutableSamplers = NULL},
252       }};
253 
254    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
255                                            &device->meta_state.alloc,
256                                            &device->meta_state.resolve_compute.ds_layout);
257    if (result != VK_SUCCESS)
258       goto fail;
259 
260    VkPipelineLayoutCreateInfo pl_create_info = {
261       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
262       .setLayoutCount = 1,
263       .pSetLayouts = &device->meta_state.resolve_compute.ds_layout,
264       .pushConstantRangeCount = 1,
265       .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
266    };
267 
268    result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
269                                       &device->meta_state.alloc,
270                                       &device->meta_state.resolve_compute.p_layout);
271    if (result != VK_SUCCESS)
272       goto fail;
273    return VK_SUCCESS;
274 fail:
275    return result;
276 }
277 
278 static VkResult
create_resolve_pipeline(struct radv_device * device,int samples,bool is_integer,bool is_srgb,VkPipeline * pipeline)279 create_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb,
280                         VkPipeline *pipeline)
281 {
282    VkResult result;
283 
284    mtx_lock(&device->meta_state.mtx);
285    if (*pipeline) {
286       mtx_unlock(&device->meta_state.mtx);
287       return VK_SUCCESS;
288    }
289 
290    nir_shader *cs = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
291 
292    /* compute shader */
293 
294    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
295       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
296       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
297       .module = vk_shader_module_handle_from_nir(cs),
298       .pName = "main",
299       .pSpecializationInfo = NULL,
300    };
301 
302    VkComputePipelineCreateInfo vk_pipeline_info = {
303       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
304       .stage = pipeline_shader_stage,
305       .flags = 0,
306       .layout = device->meta_state.resolve_compute.p_layout,
307    };
308 
309    result = radv_CreateComputePipelines(radv_device_to_handle(device),
310                                         radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
311                                         &vk_pipeline_info, NULL, pipeline);
312    if (result != VK_SUCCESS)
313       goto fail;
314 
315    ralloc_free(cs);
316    mtx_unlock(&device->meta_state.mtx);
317    return VK_SUCCESS;
318 fail:
319    ralloc_free(cs);
320    mtx_unlock(&device->meta_state.mtx);
321    return result;
322 }
323 
324 static VkResult
create_depth_stencil_resolve_pipeline(struct radv_device * device,int samples,int index,VkResolveModeFlagBits resolve_mode,VkPipeline * pipeline)325 create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, int index,
326                                       VkResolveModeFlagBits resolve_mode, VkPipeline *pipeline)
327 {
328    VkResult result;
329 
330    mtx_lock(&device->meta_state.mtx);
331    if (*pipeline) {
332       mtx_unlock(&device->meta_state.mtx);
333       return VK_SUCCESS;
334    }
335 
336    nir_shader *cs =
337       build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode);
338 
339    /* compute shader */
340    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
341       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
342       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
343       .module = vk_shader_module_handle_from_nir(cs),
344       .pName = "main",
345       .pSpecializationInfo = NULL,
346    };
347 
348    VkComputePipelineCreateInfo vk_pipeline_info = {
349       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
350       .stage = pipeline_shader_stage,
351       .flags = 0,
352       .layout = device->meta_state.resolve_compute.p_layout,
353    };
354 
355    result = radv_CreateComputePipelines(radv_device_to_handle(device),
356                                         radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
357                                         &vk_pipeline_info, NULL, pipeline);
358    if (result != VK_SUCCESS)
359       goto fail;
360 
361    ralloc_free(cs);
362    mtx_unlock(&device->meta_state.mtx);
363    return VK_SUCCESS;
364 fail:
365    ralloc_free(cs);
366    mtx_unlock(&device->meta_state.mtx);
367    return result;
368 }
369 
370 VkResult
radv_device_init_meta_resolve_compute_state(struct radv_device * device,bool on_demand)371 radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
372 {
373    struct radv_meta_state *state = &device->meta_state;
374    VkResult res;
375 
376    res = create_layout(device);
377    if (res != VK_SUCCESS)
378       return res;
379 
380    if (on_demand)
381       return VK_SUCCESS;
382 
383    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
384       uint32_t samples = 1 << i;
385 
386       res = create_resolve_pipeline(device, samples, false, false,
387                                     &state->resolve_compute.rc[i].pipeline);
388       if (res != VK_SUCCESS)
389          return res;
390 
391       res = create_resolve_pipeline(device, samples, true, false,
392                                     &state->resolve_compute.rc[i].i_pipeline);
393       if (res != VK_SUCCESS)
394          return res;
395 
396       res = create_resolve_pipeline(device, samples, false, true,
397                                     &state->resolve_compute.rc[i].srgb_pipeline);
398       if (res != VK_SUCCESS)
399          return res;
400 
401       res = create_depth_stencil_resolve_pipeline(
402          device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT,
403          &state->resolve_compute.depth[i].average_pipeline);
404       if (res != VK_SUCCESS)
405          return res;
406 
407       res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
408                                                   VK_RESOLVE_MODE_MAX_BIT,
409                                                   &state->resolve_compute.depth[i].max_pipeline);
410       if (res != VK_SUCCESS)
411          return res;
412 
413       res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
414                                                   VK_RESOLVE_MODE_MIN_BIT,
415                                                   &state->resolve_compute.depth[i].min_pipeline);
416       if (res != VK_SUCCESS)
417          return res;
418 
419       res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
420                                                   VK_RESOLVE_MODE_MAX_BIT,
421                                                   &state->resolve_compute.stencil[i].max_pipeline);
422       if (res != VK_SUCCESS)
423          return res;
424 
425       res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
426                                                   VK_RESOLVE_MODE_MIN_BIT,
427                                                   &state->resolve_compute.stencil[i].min_pipeline);
428       if (res != VK_SUCCESS)
429          return res;
430    }
431 
432    res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE,
433                                                VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
434                                                &state->resolve_compute.depth_zero_pipeline);
435    if (res != VK_SUCCESS)
436       return res;
437 
438    return create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE,
439                                                 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
440                                                 &state->resolve_compute.stencil_zero_pipeline);
441 }
442 
443 void
radv_device_finish_meta_resolve_compute_state(struct radv_device * device)444 radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
445 {
446    struct radv_meta_state *state = &device->meta_state;
447    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
448       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline,
449                            &state->alloc);
450 
451       radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline,
452                            &state->alloc);
453 
454       radv_DestroyPipeline(radv_device_to_handle(device),
455                            state->resolve_compute.rc[i].srgb_pipeline, &state->alloc);
456 
457       radv_DestroyPipeline(radv_device_to_handle(device),
458                            state->resolve_compute.depth[i].average_pipeline, &state->alloc);
459 
460       radv_DestroyPipeline(radv_device_to_handle(device),
461                            state->resolve_compute.depth[i].max_pipeline, &state->alloc);
462 
463       radv_DestroyPipeline(radv_device_to_handle(device),
464                            state->resolve_compute.depth[i].min_pipeline, &state->alloc);
465 
466       radv_DestroyPipeline(radv_device_to_handle(device),
467                            state->resolve_compute.stencil[i].max_pipeline, &state->alloc);
468 
469       radv_DestroyPipeline(radv_device_to_handle(device),
470                            state->resolve_compute.stencil[i].min_pipeline, &state->alloc);
471    }
472 
473    radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline,
474                         &state->alloc);
475 
476    radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline,
477                         &state->alloc);
478 
479    device->vk.dispatch_table.DestroyDescriptorSetLayout(
480       radv_device_to_handle(device), state->resolve_compute.ds_layout, &state->alloc);
481    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout,
482                               &state->alloc);
483 }
484 
485 static VkPipeline *
radv_get_resolve_pipeline(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview)486 radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview)
487 {
488    struct radv_device *device = cmd_buffer->device;
489    struct radv_meta_state *state = &device->meta_state;
490    uint32_t samples = src_iview->image->info.samples;
491    uint32_t samples_log2 = ffs(samples) - 1;
492    VkPipeline *pipeline;
493 
494    if (vk_format_is_int(src_iview->vk.format))
495       pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
496    else if (vk_format_is_srgb(src_iview->vk.format))
497       pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
498    else
499       pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
500 
501    if (!*pipeline) {
502       VkResult ret;
503 
504       ret = create_resolve_pipeline(device, samples, vk_format_is_int(src_iview->vk.format),
505                                     vk_format_is_srgb(src_iview->vk.format), pipeline);
506       if (ret != VK_SUCCESS) {
507          cmd_buffer->record_result = ret;
508          return NULL;
509       }
510    }
511 
512    return pipeline;
513 }
514 
515 static void
emit_resolve(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,struct radv_image_view * dest_iview,const VkOffset2D * src_offset,const VkOffset2D * dest_offset,const VkExtent2D * resolve_extent)516 emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
517              struct radv_image_view *dest_iview, const VkOffset2D *src_offset,
518              const VkOffset2D *dest_offset, const VkExtent2D *resolve_extent)
519 {
520    struct radv_device *device = cmd_buffer->device;
521    VkPipeline *pipeline;
522 
523    radv_meta_push_descriptor_set(
524       cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
525       0, /* set */
526       2, /* descriptorWriteCount */
527       (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
528                                 .dstBinding = 0,
529                                 .dstArrayElement = 0,
530                                 .descriptorCount = 1,
531                                 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
532                                 .pImageInfo =
533                                    (VkDescriptorImageInfo[]){
534                                       {.sampler = VK_NULL_HANDLE,
535                                        .imageView = radv_image_view_to_handle(src_iview),
536                                        .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
537                                    }},
538                                {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
539                                 .dstBinding = 1,
540                                 .dstArrayElement = 0,
541                                 .descriptorCount = 1,
542                                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
543                                 .pImageInfo = (VkDescriptorImageInfo[]){
544                                    {
545                                       .sampler = VK_NULL_HANDLE,
546                                       .imageView = radv_image_view_to_handle(dest_iview),
547                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
548                                    },
549                                 }}});
550 
551    pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview);
552 
553    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
554                         *pipeline);
555 
556    unsigned push_constants[4] = {
557       src_offset->x,
558       src_offset->y,
559       dest_offset->x,
560       dest_offset->y,
561    };
562    radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
563                          device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT,
564                          0, 16, push_constants);
565    radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
566 }
567 
568 static void
emit_depth_stencil_resolve(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,struct radv_image_view * dest_iview,const VkExtent3D * resolve_extent,VkImageAspectFlags aspects,VkResolveModeFlagBits resolve_mode)569 emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
570                            struct radv_image_view *dest_iview, const VkExtent3D *resolve_extent,
571                            VkImageAspectFlags aspects, VkResolveModeFlagBits resolve_mode)
572 {
573    struct radv_device *device = cmd_buffer->device;
574    const uint32_t samples = src_iview->image->info.samples;
575    const uint32_t samples_log2 = ffs(samples) - 1;
576    VkPipeline *pipeline;
577 
578    radv_meta_push_descriptor_set(
579       cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
580       0, /* set */
581       2, /* descriptorWriteCount */
582       (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
583                                 .dstBinding = 0,
584                                 .dstArrayElement = 0,
585                                 .descriptorCount = 1,
586                                 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
587                                 .pImageInfo =
588                                    (VkDescriptorImageInfo[]){
589                                       {.sampler = VK_NULL_HANDLE,
590                                        .imageView = radv_image_view_to_handle(src_iview),
591                                        .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
592                                    }},
593                                {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
594                                 .dstBinding = 1,
595                                 .dstArrayElement = 0,
596                                 .descriptorCount = 1,
597                                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
598                                 .pImageInfo = (VkDescriptorImageInfo[]){
599                                    {
600                                       .sampler = VK_NULL_HANDLE,
601                                       .imageView = radv_image_view_to_handle(dest_iview),
602                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
603                                    },
604                                 }}});
605 
606    switch (resolve_mode) {
607    case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
608       if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
609          pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
610       else
611          pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
612       break;
613    case VK_RESOLVE_MODE_AVERAGE_BIT:
614       assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
615       pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
616       break;
617    case VK_RESOLVE_MODE_MIN_BIT:
618       if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
619          pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
620       else
621          pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
622       break;
623    case VK_RESOLVE_MODE_MAX_BIT:
624       if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
625          pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
626       else
627          pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
628       break;
629    default:
630       unreachable("invalid resolve mode");
631    }
632 
633    if (!*pipeline) {
634       int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
635       VkResult ret;
636 
637       ret = create_depth_stencil_resolve_pipeline(device, samples, index, resolve_mode, pipeline);
638       if (ret != VK_SUCCESS) {
639          cmd_buffer->record_result = ret;
640          return;
641       }
642    }
643 
644    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
645                         *pipeline);
646 
647    radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height,
648                            resolve_extent->depth);
649 }
650 
651 void
radv_meta_resolve_compute_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * src_image,VkFormat src_format,VkImageLayout src_image_layout,struct radv_image * dest_image,VkFormat dest_format,VkImageLayout dest_image_layout,const VkImageResolve2 * region)652 radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
653                                 VkFormat src_format, VkImageLayout src_image_layout,
654                                 struct radv_image *dest_image, VkFormat dest_format,
655                                 VkImageLayout dest_image_layout, const VkImageResolve2 *region)
656 {
657    struct radv_meta_saved_state saved_state;
658 
659    radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region);
660 
661    /* For partial resolves, DCC should be decompressed before resolving
662     * because the metadata is re-initialized to the uncompressed after.
663     */
664    uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->qf,
665                                                       cmd_buffer->qf);
666 
667    if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&
668        radv_layout_dcc_compressed(cmd_buffer->device, dest_image, region->dstSubresource.mipLevel,
669                                   dest_image_layout, false, queue_mask) &&
670        (region->dstOffset.x || region->dstOffset.y || region->dstOffset.z ||
671         region->extent.width != dest_image->info.width ||
672         region->extent.height != dest_image->info.height ||
673         region->extent.depth != dest_image->info.depth)) {
674       radv_decompress_dcc(cmd_buffer, dest_image,
675                           &(VkImageSubresourceRange){
676                              .aspectMask = region->dstSubresource.aspectMask,
677                              .baseMipLevel = region->dstSubresource.mipLevel,
678                              .levelCount = 1,
679                              .baseArrayLayer = region->dstSubresource.baseArrayLayer,
680                              .layerCount = region->dstSubresource.layerCount,
681                           });
682    }
683 
684    radv_meta_save(
685       &saved_state, cmd_buffer,
686       RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
687 
688    assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
689    assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
690    assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
691 
692    const uint32_t src_base_layer =
693       radv_meta_get_iview_layer(src_image, &region->srcSubresource, &region->srcOffset);
694 
695    const uint32_t dest_base_layer =
696       radv_meta_get_iview_layer(dest_image, &region->dstSubresource, &region->dstOffset);
697 
698    const struct VkExtent3D extent = vk_image_sanitize_extent(&src_image->vk, region->extent);
699    const struct VkOffset3D srcOffset = vk_image_sanitize_offset(&src_image->vk, region->srcOffset);
700    const struct VkOffset3D dstOffset = vk_image_sanitize_offset(&dest_image->vk, region->dstOffset);
701 
702    for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) {
703 
704       struct radv_image_view src_iview;
705       radv_image_view_init(&src_iview, cmd_buffer->device,
706                            &(VkImageViewCreateInfo){
707                               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
708                               .image = radv_image_to_handle(src_image),
709                               .viewType = radv_meta_get_view_type(src_image),
710                               .format = src_format,
711                               .subresourceRange =
712                                  {
713                                     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
714                                     .baseMipLevel = region->srcSubresource.mipLevel,
715                                     .levelCount = 1,
716                                     .baseArrayLayer = src_base_layer + layer,
717                                     .layerCount = 1,
718                                  },
719                            },
720                            0, NULL);
721 
722       struct radv_image_view dest_iview;
723       radv_image_view_init(&dest_iview, cmd_buffer->device,
724                            &(VkImageViewCreateInfo){
725                               .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
726                               .image = radv_image_to_handle(dest_image),
727                               .viewType = radv_meta_get_view_type(dest_image),
728                               .format = vk_to_non_srgb_format(dest_format),
729                               .subresourceRange =
730                                  {
731                                     .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
732                                     .baseMipLevel = region->dstSubresource.mipLevel,
733                                     .levelCount = 1,
734                                     .baseArrayLayer = dest_base_layer + layer,
735                                     .layerCount = 1,
736                                  },
737                            },
738                            0, NULL);
739 
740       emit_resolve(cmd_buffer, &src_iview, &dest_iview, &(VkOffset2D){srcOffset.x, srcOffset.y},
741                    &(VkOffset2D){dstOffset.x, dstOffset.y},
742                    &(VkExtent2D){extent.width, extent.height});
743 
744       radv_image_view_finish(&src_iview);
745       radv_image_view_finish(&dest_iview);
746    }
747 
748    radv_meta_restore(&saved_state, cmd_buffer);
749 
750    if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&
751        radv_layout_dcc_compressed(cmd_buffer->device, dest_image, region->dstSubresource.mipLevel,
752                                   dest_image_layout, false, queue_mask)) {
753 
754       cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;
755 
756       VkImageSubresourceRange range = {
757          .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
758          .baseMipLevel = region->dstSubresource.mipLevel,
759          .levelCount = 1,
760          .baseArrayLayer = dest_base_layer,
761          .layerCount = region->dstSubresource.layerCount,
762       };
763 
764       cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dest_image, &range, 0xffffffff);
765    }
766 }
767 
768 /**
769  * Emit any needed resolves for the current subpass.
770  */
771 void
radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer * cmd_buffer)772 radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
773 {
774    struct vk_framebuffer *fb = cmd_buffer->state.framebuffer;
775    const struct radv_subpass *subpass = cmd_buffer->state.subpass;
776    struct radv_subpass_barrier barrier;
777    uint32_t layer_count = fb->layers;
778 
779    if (subpass->view_mask)
780       layer_count = util_last_bit(subpass->view_mask);
781 
782    /* Resolves happen before the end-of-subpass barriers get executed, so
783     * we have to make the attachment shader-readable.
784     */
785    barrier.src_stage_mask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
786    barrier.src_access_mask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
787    barrier.dst_access_mask = VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT;
788    radv_emit_subpass_barrier(cmd_buffer, &barrier);
789 
790    for (uint32_t i = 0; i < subpass->color_count; ++i) {
791       struct radv_subpass_attachment src_att = subpass->color_attachments[i];
792       struct radv_subpass_attachment dst_att = subpass->resolve_attachments[i];
793 
794       if (dst_att.attachment == VK_ATTACHMENT_UNUSED)
795          continue;
796 
797       struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
798       struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;
799 
800       VkImageResolve2 region = {
801          .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2,
802          .extent = (VkExtent3D){fb->width, fb->height, 1},
803          .srcSubresource =
804             (VkImageSubresourceLayers){
805                .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
806                .mipLevel = src_iview->vk.base_mip_level,
807                .baseArrayLayer = src_iview->vk.base_array_layer,
808                .layerCount = layer_count,
809             },
810          .dstSubresource =
811             (VkImageSubresourceLayers){
812                .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
813                .mipLevel = dst_iview->vk.base_mip_level,
814                .baseArrayLayer = dst_iview->vk.base_array_layer,
815                .layerCount = layer_count,
816             },
817          .srcOffset = (VkOffset3D){0, 0, 0},
818          .dstOffset = (VkOffset3D){0, 0, 0},
819       };
820 
821       radv_meta_resolve_compute_image(cmd_buffer, src_iview->image, src_iview->vk.format,
822                                       src_att.layout, dst_iview->image, dst_iview->vk.format,
823                                       dst_att.layout, &region);
824    }
825 
826    cmd_buffer->state.flush_bits |=
827       RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
828       radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
829 }
830 
831 void
radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspects,VkResolveModeFlagBits resolve_mode)832 radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
833                                       VkImageAspectFlags aspects,
834                                       VkResolveModeFlagBits resolve_mode)
835 {
836    struct vk_framebuffer *fb = cmd_buffer->state.framebuffer;
837    const struct radv_subpass *subpass = cmd_buffer->state.subpass;
838    struct radv_meta_saved_state saved_state;
839    uint32_t layer_count = fb->layers;
840 
841    if (subpass->view_mask)
842       layer_count = util_last_bit(subpass->view_mask);
843 
844    /* Resolves happen before the end-of-subpass barriers get executed, so
845     * we have to make the attachment shader-readable.
846     */
847    cmd_buffer->state.flush_bits |=
848       radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
849       radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT, NULL) |
850       radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
851 
852    struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
853    struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
854    struct radv_image *src_image = src_iview->image;
855 
856    VkImageResolve2 region = {0};
857    region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2;
858    region.srcSubresource.aspectMask = aspects;
859    region.srcSubresource.mipLevel = 0;
860    region.srcSubresource.baseArrayLayer = src_iview->vk.base_array_layer;
861    region.srcSubresource.layerCount = layer_count;
862 
863    radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, &region);
864 
865    radv_meta_save(&saved_state, cmd_buffer,
866                   RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
867 
868    struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
869    struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
870    struct radv_image *dst_image = dst_iview->image;
871 
872    struct radv_image_view tsrc_iview;
873    radv_image_view_init(&tsrc_iview, cmd_buffer->device,
874                         &(VkImageViewCreateInfo){
875                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
876                            .image = radv_image_to_handle(src_image),
877                            .viewType = radv_meta_get_view_type(src_image),
878                            .format = src_iview->vk.format,
879                            .subresourceRange =
880                               {
881                                  .aspectMask = aspects,
882                                  .baseMipLevel = src_iview->vk.base_mip_level,
883                                  .levelCount = 1,
884                                  .baseArrayLayer = src_iview->vk.base_array_layer,
885                                  .layerCount = layer_count,
886                               },
887                         },
888                         0, NULL);
889 
890    struct radv_image_view tdst_iview;
891    radv_image_view_init(&tdst_iview, cmd_buffer->device,
892                         &(VkImageViewCreateInfo){
893                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
894                            .image = radv_image_to_handle(dst_image),
895                            .viewType = radv_meta_get_view_type(dst_image),
896                            .format = dst_iview->vk.format,
897                            .subresourceRange =
898                               {
899                                  .aspectMask = aspects,
900                                  .baseMipLevel = dst_iview->vk.base_mip_level,
901                                  .levelCount = 1,
902                                  .baseArrayLayer = dst_iview->vk.base_array_layer,
903                                  .layerCount = layer_count,
904                               },
905                         },
906                         0, NULL);
907 
908    emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
909                               &(VkExtent3D){fb->width, fb->height, layer_count}, aspects,
910                               resolve_mode);
911 
912    cmd_buffer->state.flush_bits |=
913       RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
914       radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
915 
916    VkImageLayout layout = cmd_buffer->state.attachments[dest_att.attachment].current_layout;
917    uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf,
918                                                       cmd_buffer->qf);
919 
920    if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, layout, false, queue_mask)) {
921       VkImageSubresourceRange range = {0};
922       range.aspectMask = aspects;
923       range.baseMipLevel = dst_iview->vk.base_mip_level;
924       range.levelCount = 1;
925       range.baseArrayLayer = dst_iview->vk.base_array_layer;
926       range.layerCount = layer_count;
927 
928       uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, dst_image);
929 
930       cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, dst_image, &range, htile_value);
931    }
932 
933    radv_image_view_finish(&tsrc_iview);
934    radv_image_view_finish(&tdst_iview);
935 
936    radv_meta_restore(&saved_state, cmd_buffer);
937 }
938