1 /*
2 * Copyright © 2016 Dave Airlie
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26
27 #include "nir/nir_builder.h"
28 #include "nir/nir_format_convert.h"
29
30 #include "radv_meta.h"
31 #include "radv_private.h"
32 #include "sid.h"
33 #include "vk_common_entrypoints.h"
34 #include "vk_format.h"
35
36 static nir_def *
radv_meta_build_resolve_srgb_conversion(nir_builder * b,nir_def * input)37 radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_def *input)
38 {
39 unsigned i;
40 nir_def *comp[4];
41 for (i = 0; i < 3; i++)
42 comp[i] = nir_format_linear_to_srgb(b, nir_channel(b, input, i));
43 comp[3] = nir_channels(b, input, 1 << 3);
44 return nir_vec(b, comp, 4);
45 }
46
47 static nir_shader *
build_resolve_compute_shader(struct radv_device * dev,bool is_integer,bool is_srgb,int samples)48 build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
49 {
50 enum glsl_base_type img_base_type = is_integer ? GLSL_TYPE_UINT : GLSL_TYPE_FLOAT;
51 const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type);
52 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, img_base_type);
53 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs-%d-%s", samples,
54 is_integer ? "int" : (is_srgb ? "srgb" : "float"));
55 b.shader->info.workgroup_size[0] = 8;
56 b.shader->info.workgroup_size[1] = 8;
57
58 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
59 input_img->data.descriptor_set = 0;
60 input_img->data.binding = 0;
61
62 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
63 output_img->data.descriptor_set = 0;
64 output_img->data.binding = 1;
65
66 nir_def *global_id = get_global_ids(&b, 2);
67
68 nir_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
69 nir_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
70
71 nir_def *src_coord = nir_iadd(&b, global_id, src_offset);
72 nir_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
73
74 nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
75
76 radv_meta_build_resolve_shader_core(dev, &b, is_integer, samples, input_img, color, src_coord);
77
78 nir_def *outval = nir_load_var(&b, color);
79 if (is_srgb)
80 outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
81
82 nir_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1), nir_undef(&b, 1, 32),
83 nir_undef(&b, 1, 32));
84
85 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, img_coord, nir_undef(&b, 1, 32), outval,
86 nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D);
87 return b.shader;
88 }
89
90 enum {
91 DEPTH_RESOLVE,
92 STENCIL_RESOLVE,
93 };
94
95 static const char *
get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)96 get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)
97 {
98 switch (resolve_mode) {
99 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
100 return "zero";
101 case VK_RESOLVE_MODE_AVERAGE_BIT:
102 return "average";
103 case VK_RESOLVE_MODE_MIN_BIT:
104 return "min";
105 case VK_RESOLVE_MODE_MAX_BIT:
106 return "max";
107 default:
108 unreachable("invalid resolve mode");
109 }
110 }
111
112 static nir_shader *
build_depth_stencil_resolve_compute_shader(struct radv_device * dev,int samples,int index,VkResolveModeFlagBits resolve_mode)113 build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, int index,
114 VkResolveModeFlagBits resolve_mode)
115 {
116 enum glsl_base_type img_base_type = index == DEPTH_RESOLVE ? GLSL_TYPE_FLOAT : GLSL_TYPE_UINT;
117 const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, img_base_type);
118 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, img_base_type);
119
120 nir_builder b =
121 radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs_%s-%s-%d",
122 index == DEPTH_RESOLVE ? "depth" : "stencil", get_resolve_mode_str(resolve_mode), samples);
123 b.shader->info.workgroup_size[0] = 8;
124 b.shader->info.workgroup_size[1] = 8;
125
126 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
127 input_img->data.descriptor_set = 0;
128 input_img->data.binding = 0;
129
130 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
131 output_img->data.descriptor_set = 0;
132 output_img->data.binding = 1;
133
134 nir_def *global_id = get_global_ids(&b, 3);
135
136 nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
137
138 nir_def *resolve_coord = nir_iadd(&b, nir_trim_vector(&b, global_id, 2), offset);
139
140 nir_def *img_coord =
141 nir_vec3(&b, nir_channel(&b, resolve_coord, 0), nir_channel(&b, resolve_coord, 1), nir_channel(&b, global_id, 2));
142
143 nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
144 nir_def *outval = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, 0));
145
146 if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) {
147 for (int i = 1; i < samples; i++) {
148 nir_def *si = nir_txf_ms_deref(&b, input_img_deref, img_coord, nir_imm_int(&b, i));
149
150 switch (resolve_mode) {
151 case VK_RESOLVE_MODE_AVERAGE_BIT:
152 assert(index == DEPTH_RESOLVE);
153 outval = nir_fadd(&b, outval, si);
154 break;
155 case VK_RESOLVE_MODE_MIN_BIT:
156 if (index == DEPTH_RESOLVE)
157 outval = nir_fmin(&b, outval, si);
158 else
159 outval = nir_umin(&b, outval, si);
160 break;
161 case VK_RESOLVE_MODE_MAX_BIT:
162 if (index == DEPTH_RESOLVE)
163 outval = nir_fmax(&b, outval, si);
164 else
165 outval = nir_umax(&b, outval, si);
166 break;
167 default:
168 unreachable("invalid resolve mode");
169 }
170 }
171
172 if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT)
173 outval = nir_fdiv_imm(&b, outval, samples);
174 }
175
176 nir_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
177 nir_channel(&b, img_coord, 2), nir_undef(&b, 1, 32));
178 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, nir_undef(&b, 1, 32), outval,
179 nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
180 return b.shader;
181 }
182
183 static VkResult
create_layout(struct radv_device * device)184 create_layout(struct radv_device *device)
185 {
186 VkResult result;
187 /*
188 * two descriptors one for the image being sampled
189 * one for the buffer being written.
190 */
191 VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
192 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
193 .bindingCount = 2,
194 .pBindings = (VkDescriptorSetLayoutBinding[]){
195 {.binding = 0,
196 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
197 .descriptorCount = 1,
198 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
199 .pImmutableSamplers = NULL},
200 {.binding = 1,
201 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
202 .descriptorCount = 1,
203 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
204 .pImmutableSamplers = NULL},
205 }};
206
207 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
208 &device->meta_state.resolve_compute.ds_layout);
209 if (result != VK_SUCCESS)
210 goto fail;
211
212 VkPipelineLayoutCreateInfo pl_create_info = {
213 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
214 .setLayoutCount = 1,
215 .pSetLayouts = &device->meta_state.resolve_compute.ds_layout,
216 .pushConstantRangeCount = 1,
217 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
218 };
219
220 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
221 &device->meta_state.resolve_compute.p_layout);
222 if (result != VK_SUCCESS)
223 goto fail;
224 return VK_SUCCESS;
225 fail:
226 return result;
227 }
228
229 static VkResult
create_resolve_pipeline(struct radv_device * device,int samples,bool is_integer,bool is_srgb,VkPipeline * pipeline)230 create_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb, VkPipeline *pipeline)
231 {
232 VkResult result;
233
234 mtx_lock(&device->meta_state.mtx);
235 if (*pipeline) {
236 mtx_unlock(&device->meta_state.mtx);
237 return VK_SUCCESS;
238 }
239
240 nir_shader *cs = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
241
242 /* compute shader */
243
244 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
245 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
246 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
247 .module = vk_shader_module_handle_from_nir(cs),
248 .pName = "main",
249 .pSpecializationInfo = NULL,
250 };
251
252 VkComputePipelineCreateInfo vk_pipeline_info = {
253 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
254 .stage = pipeline_shader_stage,
255 .flags = 0,
256 .layout = device->meta_state.resolve_compute.p_layout,
257 };
258
259 result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
260 NULL, pipeline);
261 if (result != VK_SUCCESS)
262 goto fail;
263
264 ralloc_free(cs);
265 mtx_unlock(&device->meta_state.mtx);
266 return VK_SUCCESS;
267 fail:
268 ralloc_free(cs);
269 mtx_unlock(&device->meta_state.mtx);
270 return result;
271 }
272
273 static VkResult
create_depth_stencil_resolve_pipeline(struct radv_device * device,int samples,int index,VkResolveModeFlagBits resolve_mode,VkPipeline * pipeline)274 create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, int index,
275 VkResolveModeFlagBits resolve_mode, VkPipeline *pipeline)
276 {
277 VkResult result;
278
279 mtx_lock(&device->meta_state.mtx);
280 if (*pipeline) {
281 mtx_unlock(&device->meta_state.mtx);
282 return VK_SUCCESS;
283 }
284
285 nir_shader *cs = build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode);
286
287 /* compute shader */
288 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
289 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
290 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
291 .module = vk_shader_module_handle_from_nir(cs),
292 .pName = "main",
293 .pSpecializationInfo = NULL,
294 };
295
296 VkComputePipelineCreateInfo vk_pipeline_info = {
297 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
298 .stage = pipeline_shader_stage,
299 .flags = 0,
300 .layout = device->meta_state.resolve_compute.p_layout,
301 };
302
303 result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
304 NULL, pipeline);
305 if (result != VK_SUCCESS)
306 goto fail;
307
308 ralloc_free(cs);
309 mtx_unlock(&device->meta_state.mtx);
310 return VK_SUCCESS;
311 fail:
312 ralloc_free(cs);
313 mtx_unlock(&device->meta_state.mtx);
314 return result;
315 }
316
317 VkResult
radv_device_init_meta_resolve_compute_state(struct radv_device * device,bool on_demand)318 radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
319 {
320 struct radv_meta_state *state = &device->meta_state;
321 VkResult res;
322
323 res = create_layout(device);
324 if (res != VK_SUCCESS)
325 return res;
326
327 if (on_demand)
328 return VK_SUCCESS;
329
330 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
331 uint32_t samples = 1 << i;
332
333 res = create_resolve_pipeline(device, samples, false, false, &state->resolve_compute.rc[i].pipeline);
334 if (res != VK_SUCCESS)
335 return res;
336
337 res = create_resolve_pipeline(device, samples, true, false, &state->resolve_compute.rc[i].i_pipeline);
338 if (res != VK_SUCCESS)
339 return res;
340
341 res = create_resolve_pipeline(device, samples, false, true, &state->resolve_compute.rc[i].srgb_pipeline);
342 if (res != VK_SUCCESS)
343 return res;
344
345 res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT,
346 &state->resolve_compute.depth[i].average_pipeline);
347 if (res != VK_SUCCESS)
348 return res;
349
350 res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_MAX_BIT,
351 &state->resolve_compute.depth[i].max_pipeline);
352 if (res != VK_SUCCESS)
353 return res;
354
355 res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_MIN_BIT,
356 &state->resolve_compute.depth[i].min_pipeline);
357 if (res != VK_SUCCESS)
358 return res;
359
360 res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE, VK_RESOLVE_MODE_MAX_BIT,
361 &state->resolve_compute.stencil[i].max_pipeline);
362 if (res != VK_SUCCESS)
363 return res;
364
365 res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE, VK_RESOLVE_MODE_MIN_BIT,
366 &state->resolve_compute.stencil[i].min_pipeline);
367 if (res != VK_SUCCESS)
368 return res;
369 }
370
371 res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
372 &state->resolve_compute.depth_zero_pipeline);
373 if (res != VK_SUCCESS)
374 return res;
375
376 return create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE, VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
377 &state->resolve_compute.stencil_zero_pipeline);
378 }
379
380 void
radv_device_finish_meta_resolve_compute_state(struct radv_device * device)381 radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
382 {
383 struct radv_meta_state *state = &device->meta_state;
384 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
385 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline, &state->alloc);
386
387 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline, &state->alloc);
388
389 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].srgb_pipeline, &state->alloc);
390
391 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].average_pipeline,
392 &state->alloc);
393
394 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].max_pipeline, &state->alloc);
395
396 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth[i].min_pipeline, &state->alloc);
397
398 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil[i].max_pipeline,
399 &state->alloc);
400
401 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil[i].min_pipeline,
402 &state->alloc);
403 }
404
405 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline, &state->alloc);
406
407 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline, &state->alloc);
408
409 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->resolve_compute.ds_layout,
410 &state->alloc);
411 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout, &state->alloc);
412 }
413
414 static VkPipeline *
radv_get_resolve_pipeline(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview)415 radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview)
416 {
417 struct radv_device *device = cmd_buffer->device;
418 struct radv_meta_state *state = &device->meta_state;
419 uint32_t samples = src_iview->image->vk.samples;
420 uint32_t samples_log2 = ffs(samples) - 1;
421 VkPipeline *pipeline;
422
423 if (vk_format_is_int(src_iview->vk.format))
424 pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
425 else if (vk_format_is_srgb(src_iview->vk.format))
426 pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
427 else
428 pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
429
430 if (!*pipeline) {
431 VkResult ret;
432
433 ret = create_resolve_pipeline(device, samples, vk_format_is_int(src_iview->vk.format),
434 vk_format_is_srgb(src_iview->vk.format), pipeline);
435 if (ret != VK_SUCCESS) {
436 vk_command_buffer_set_error(&cmd_buffer->vk, ret);
437 return NULL;
438 }
439 }
440
441 return pipeline;
442 }
443
444 static void
emit_resolve(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,struct radv_image_view * dst_iview,const VkOffset2D * src_offset,const VkOffset2D * dst_offset,const VkExtent2D * resolve_extent)445 emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview, struct radv_image_view *dst_iview,
446 const VkOffset2D *src_offset, const VkOffset2D *dst_offset, const VkExtent2D *resolve_extent)
447 {
448 struct radv_device *device = cmd_buffer->device;
449 VkPipeline *pipeline;
450
451 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
452 device->meta_state.resolve_compute.p_layout, 0, /* set */
453 2, /* descriptorWriteCount */
454 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
455 .dstBinding = 0,
456 .dstArrayElement = 0,
457 .descriptorCount = 1,
458 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
459 .pImageInfo =
460 (VkDescriptorImageInfo[]){
461 {.sampler = VK_NULL_HANDLE,
462 .imageView = radv_image_view_to_handle(src_iview),
463 .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
464 }},
465 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
466 .dstBinding = 1,
467 .dstArrayElement = 0,
468 .descriptorCount = 1,
469 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
470 .pImageInfo = (VkDescriptorImageInfo[]){
471 {
472 .sampler = VK_NULL_HANDLE,
473 .imageView = radv_image_view_to_handle(dst_iview),
474 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
475 },
476 }}});
477
478 pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview);
479
480 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
481
482 unsigned push_constants[4] = {
483 src_offset->x,
484 src_offset->y,
485 dst_offset->x,
486 dst_offset->y,
487 };
488 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.resolve_compute.p_layout,
489 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
490 radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
491 }
492
493 static void
emit_depth_stencil_resolve(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,struct radv_image_view * dst_iview,const VkOffset2D * resolve_offset,const VkExtent3D * resolve_extent,VkImageAspectFlags aspects,VkResolveModeFlagBits resolve_mode)494 emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
495 struct radv_image_view *dst_iview, const VkOffset2D *resolve_offset,
496 const VkExtent3D *resolve_extent, VkImageAspectFlags aspects,
497 VkResolveModeFlagBits resolve_mode)
498 {
499 struct radv_device *device = cmd_buffer->device;
500 const uint32_t samples = src_iview->image->vk.samples;
501 const uint32_t samples_log2 = ffs(samples) - 1;
502 VkPipeline *pipeline;
503
504 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
505 device->meta_state.resolve_compute.p_layout, 0, /* set */
506 2, /* descriptorWriteCount */
507 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
508 .dstBinding = 0,
509 .dstArrayElement = 0,
510 .descriptorCount = 1,
511 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
512 .pImageInfo =
513 (VkDescriptorImageInfo[]){
514 {.sampler = VK_NULL_HANDLE,
515 .imageView = radv_image_view_to_handle(src_iview),
516 .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
517 }},
518 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
519 .dstBinding = 1,
520 .dstArrayElement = 0,
521 .descriptorCount = 1,
522 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
523 .pImageInfo = (VkDescriptorImageInfo[]){
524 {
525 .sampler = VK_NULL_HANDLE,
526 .imageView = radv_image_view_to_handle(dst_iview),
527 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
528 },
529 }}});
530
531 switch (resolve_mode) {
532 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
533 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
534 pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
535 else
536 pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
537 break;
538 case VK_RESOLVE_MODE_AVERAGE_BIT:
539 assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
540 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
541 break;
542 case VK_RESOLVE_MODE_MIN_BIT:
543 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
544 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
545 else
546 pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
547 break;
548 case VK_RESOLVE_MODE_MAX_BIT:
549 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
550 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
551 else
552 pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
553 break;
554 default:
555 unreachable("invalid resolve mode");
556 }
557
558 if (!*pipeline) {
559 int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
560 VkResult ret;
561
562 ret = create_depth_stencil_resolve_pipeline(device, samples, index, resolve_mode, pipeline);
563 if (ret != VK_SUCCESS) {
564 vk_command_buffer_set_error(&cmd_buffer->vk, ret);
565 return;
566 }
567 }
568
569 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
570
571 uint32_t push_constants[2] = {resolve_offset->x, resolve_offset->y};
572
573 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.resolve_compute.p_layout,
574 VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants), push_constants);
575
576 radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, resolve_extent->depth);
577 }
578
579 void
radv_meta_resolve_compute_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * src_image,VkFormat src_format,VkImageLayout src_image_layout,struct radv_image * dst_image,VkFormat dst_format,VkImageLayout dst_image_layout,const VkImageResolve2 * region)580 radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image, VkFormat src_format,
581 VkImageLayout src_image_layout, struct radv_image *dst_image, VkFormat dst_format,
582 VkImageLayout dst_image_layout, const VkImageResolve2 *region)
583 {
584 struct radv_meta_saved_state saved_state;
585
586 /* For partial resolves, DCC should be decompressed before resolving
587 * because the metadata is re-initialized to the uncompressed after.
588 */
589 uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
590
591 if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dst_image) &&
592 radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel, dst_image_layout,
593 queue_mask) &&
594 (region->dstOffset.x || region->dstOffset.y || region->dstOffset.z ||
595 region->extent.width != dst_image->vk.extent.width || region->extent.height != dst_image->vk.extent.height ||
596 region->extent.depth != dst_image->vk.extent.depth)) {
597 radv_decompress_dcc(cmd_buffer, dst_image,
598 &(VkImageSubresourceRange){
599 .aspectMask = region->dstSubresource.aspectMask,
600 .baseMipLevel = region->dstSubresource.mipLevel,
601 .levelCount = 1,
602 .baseArrayLayer = region->dstSubresource.baseArrayLayer,
603 .layerCount = vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource),
604 });
605 }
606
607 radv_meta_save(&saved_state, cmd_buffer,
608 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
609
610 assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
611 assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
612 assert(vk_image_subresource_layer_count(&src_image->vk, ®ion->srcSubresource) ==
613 vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource));
614
615 const uint32_t dst_base_layer = radv_meta_get_iview_layer(dst_image, ®ion->dstSubresource, ®ion->dstOffset);
616
617 const struct VkExtent3D extent = vk_image_sanitize_extent(&src_image->vk, region->extent);
618 const struct VkOffset3D srcOffset = vk_image_sanitize_offset(&src_image->vk, region->srcOffset);
619 const struct VkOffset3D dstOffset = vk_image_sanitize_offset(&dst_image->vk, region->dstOffset);
620 const unsigned src_layer_count = vk_image_subresource_layer_count(&src_image->vk, ®ion->srcSubresource);
621
622 for (uint32_t layer = 0; layer < src_layer_count; ++layer) {
623
624 struct radv_image_view src_iview;
625 radv_image_view_init(&src_iview, cmd_buffer->device,
626 &(VkImageViewCreateInfo){
627 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
628 .image = radv_image_to_handle(src_image),
629 .viewType = VK_IMAGE_VIEW_TYPE_2D,
630 .format = src_format,
631 .subresourceRange =
632 {
633 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
634 .baseMipLevel = 0,
635 .levelCount = 1,
636 .baseArrayLayer = region->srcSubresource.baseArrayLayer + layer,
637 .layerCount = 1,
638 },
639 },
640 0, NULL);
641
642 struct radv_image_view dst_iview;
643 radv_image_view_init(&dst_iview, cmd_buffer->device,
644 &(VkImageViewCreateInfo){
645 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
646 .image = radv_image_to_handle(dst_image),
647 .viewType = radv_meta_get_view_type(dst_image),
648 .format = vk_to_non_srgb_format(dst_format),
649 .subresourceRange =
650 {
651 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
652 .baseMipLevel = region->dstSubresource.mipLevel,
653 .levelCount = 1,
654 .baseArrayLayer = dst_base_layer + layer,
655 .layerCount = 1,
656 },
657 },
658 0, NULL);
659
660 emit_resolve(cmd_buffer, &src_iview, &dst_iview, &(VkOffset2D){srcOffset.x, srcOffset.y},
661 &(VkOffset2D){dstOffset.x, dstOffset.y}, &(VkExtent2D){extent.width, extent.height});
662
663 radv_image_view_finish(&src_iview);
664 radv_image_view_finish(&dst_iview);
665 }
666
667 radv_meta_restore(&saved_state, cmd_buffer);
668
669 if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dst_image) &&
670 radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel, dst_image_layout,
671 queue_mask)) {
672
673 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;
674
675 VkImageSubresourceRange range = {
676 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
677 .baseMipLevel = region->dstSubresource.mipLevel,
678 .levelCount = 1,
679 .baseArrayLayer = dst_base_layer,
680 .layerCount = vk_image_subresource_layer_count(&dst_image->vk, ®ion->dstSubresource),
681 };
682
683 cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dst_image, &range, 0xffffffff);
684 }
685 }
686
687 void
radv_cmd_buffer_resolve_rendering_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,VkImageLayout src_layout,struct radv_image_view * dst_iview,VkImageLayout dst_layout,const VkImageResolve2 * region)688 radv_cmd_buffer_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
689 VkImageLayout src_layout, struct radv_image_view *dst_iview,
690 VkImageLayout dst_layout, const VkImageResolve2 *region)
691 {
692 radv_meta_resolve_compute_image(cmd_buffer, src_iview->image, src_iview->vk.format, src_layout, dst_iview->image,
693 dst_iview->vk.format, dst_layout, region);
694
695 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
696 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
697 }
698
699 void
radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspects,VkResolveModeFlagBits resolve_mode)700 radv_depth_stencil_resolve_rendering_cs(struct radv_cmd_buffer *cmd_buffer, VkImageAspectFlags aspects,
701 VkResolveModeFlagBits resolve_mode)
702 {
703 const struct radv_rendering_state *render = &cmd_buffer->state.render;
704 VkRect2D resolve_area = render->area;
705 struct radv_meta_saved_state saved_state;
706
707 uint32_t layer_count = render->layer_count;
708 if (render->view_mask)
709 layer_count = util_last_bit(render->view_mask);
710
711 /* Resolves happen before the end-of-subpass barriers get executed, so
712 * we have to make the attachment shader-readable.
713 */
714 cmd_buffer->state.flush_bits |=
715 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
716 radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT, NULL) |
717 radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
718
719 struct radv_image_view *src_iview = render->ds_att.iview;
720 VkImageLayout src_layout =
721 aspects & VK_IMAGE_ASPECT_DEPTH_BIT ? render->ds_att.layout : render->ds_att.stencil_layout;
722 struct radv_image *src_image = src_iview->image;
723
724 VkImageResolve2 region = {0};
725 region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2;
726 region.srcSubresource.aspectMask = aspects;
727 region.srcSubresource.mipLevel = 0;
728 region.srcSubresource.baseArrayLayer = src_iview->vk.base_array_layer;
729 region.srcSubresource.layerCount = layer_count;
730
731 radv_decompress_resolve_src(cmd_buffer, src_image, src_layout, ®ion);
732
733 radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
734
735 struct radv_image_view *dst_iview = render->ds_att.resolve_iview;
736 VkImageLayout dst_layout =
737 aspects & VK_IMAGE_ASPECT_DEPTH_BIT ? render->ds_att.resolve_layout : render->ds_att.stencil_resolve_layout;
738 struct radv_image *dst_image = dst_iview->image;
739
740 struct radv_image_view tsrc_iview;
741 radv_image_view_init(&tsrc_iview, cmd_buffer->device,
742 &(VkImageViewCreateInfo){
743 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
744 .image = radv_image_to_handle(src_image),
745 .viewType = VK_IMAGE_VIEW_TYPE_2D,
746 .format = src_iview->vk.format,
747 .subresourceRange =
748 {
749 .aspectMask = aspects,
750 .baseMipLevel = 0,
751 .levelCount = 1,
752 .baseArrayLayer = src_iview->vk.base_array_layer,
753 .layerCount = layer_count,
754 },
755 },
756 0, NULL);
757
758 struct radv_image_view tdst_iview;
759 radv_image_view_init(&tdst_iview, cmd_buffer->device,
760 &(VkImageViewCreateInfo){
761 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
762 .image = radv_image_to_handle(dst_image),
763 .viewType = radv_meta_get_view_type(dst_image),
764 .format = dst_iview->vk.format,
765 .subresourceRange =
766 {
767 .aspectMask = aspects,
768 .baseMipLevel = dst_iview->vk.base_mip_level,
769 .levelCount = 1,
770 .baseArrayLayer = dst_iview->vk.base_array_layer,
771 .layerCount = layer_count,
772 },
773 },
774 0, NULL);
775
776 emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview, &resolve_area.offset,
777 &(VkExtent3D){resolve_area.extent.width, resolve_area.extent.height, layer_count},
778 aspects, resolve_mode);
779
780 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
781 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
782
783 uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf, cmd_buffer->qf);
784
785 if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, dst_layout, queue_mask)) {
786 VkImageSubresourceRange range = {0};
787 range.aspectMask = aspects;
788 range.baseMipLevel = dst_iview->vk.base_mip_level;
789 range.levelCount = 1;
790 range.baseArrayLayer = dst_iview->vk.base_array_layer;
791 range.layerCount = layer_count;
792
793 uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, dst_image);
794
795 cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, dst_image, &range, htile_value);
796 }
797
798 radv_image_view_finish(&tsrc_iview);
799 radv_image_view_finish(&tdst_iview);
800
801 radv_meta_restore(&saved_state, cmd_buffer);
802 }
803