1 /*
2 * Copyright © 2016 Dave Airlie
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26
27 #include "nir/nir_builder.h"
28 #include "radv_meta.h"
29 #include "radv_private.h"
30 #include "sid.h"
31 #include "vk_format.h"
32
33 static nir_ssa_def *
radv_meta_build_resolve_srgb_conversion(nir_builder * b,nir_ssa_def * input)34 radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_ssa_def *input)
35 {
36 unsigned i;
37
38 nir_ssa_def *cmp[3];
39 for (i = 0; i < 3; i++)
40 cmp[i] = nir_flt(b, nir_channel(b, input, i), nir_imm_int(b, 0x3b4d2e1c));
41
42 nir_ssa_def *ltvals[3];
43 for (i = 0; i < 3; i++)
44 ltvals[i] = nir_fmul(b, nir_channel(b, input, i), nir_imm_float(b, 12.92));
45
46 nir_ssa_def *gtvals[3];
47
48 for (i = 0; i < 3; i++) {
49 gtvals[i] = nir_fpow(b, nir_channel(b, input, i), nir_imm_float(b, 1.0 / 2.4));
50 gtvals[i] = nir_fmul(b, gtvals[i], nir_imm_float(b, 1.055));
51 gtvals[i] = nir_fsub(b, gtvals[i], nir_imm_float(b, 0.055));
52 }
53
54 nir_ssa_def *comp[4];
55 for (i = 0; i < 3; i++)
56 comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
57 comp[3] = nir_channels(b, input, 1 << 3);
58 return nir_vec(b, comp, 4);
59 }
60
61 static nir_shader *
build_resolve_compute_shader(struct radv_device * dev,bool is_integer,bool is_srgb,int samples)62 build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
63 {
64 enum glsl_base_type img_base_type = is_integer ? GLSL_TYPE_UINT : GLSL_TYPE_FLOAT;
65 const struct glsl_type *sampler_type =
66 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, img_base_type);
67 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, img_base_type);
68 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs-%d-%s", samples,
69 is_integer ? "int" : (is_srgb ? "srgb" : "float"));
70 b.shader->info.workgroup_size[0] = 8;
71 b.shader->info.workgroup_size[1] = 8;
72
73 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
74 input_img->data.descriptor_set = 0;
75 input_img->data.binding = 0;
76
77 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
78 output_img->data.descriptor_set = 0;
79 output_img->data.binding = 1;
80
81 nir_ssa_def *global_id = get_global_ids(&b, 2);
82
83 nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
84 nir_ssa_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
85
86 nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset);
87 nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
88
89 nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
90
91 radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img, color, src_coord);
92
93 nir_ssa_def *outval = nir_load_var(&b, color);
94 if (is_srgb)
95 outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
96
97 nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0),
98 nir_channel(&b, dst_coord, 1),
99 nir_ssa_undef(&b, 1, 32),
100 nir_ssa_undef(&b, 1, 32));
101
102 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
103 nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
104 .image_dim = GLSL_SAMPLER_DIM_2D);
105 return b.shader;
106 }
107
108 enum {
109 DEPTH_RESOLVE,
110 STENCIL_RESOLVE,
111 };
112
113 static const char *
get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)114 get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)
115 {
116 switch (resolve_mode) {
117 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
118 return "zero";
119 case VK_RESOLVE_MODE_AVERAGE_BIT:
120 return "average";
121 case VK_RESOLVE_MODE_MIN_BIT:
122 return "min";
123 case VK_RESOLVE_MODE_MAX_BIT:
124 return "max";
125 default:
126 unreachable("invalid resolve mode");
127 }
128 }
129
130 static nir_shader *
build_depth_stencil_resolve_compute_shader(struct radv_device * dev,int samples,int index,VkResolveModeFlagBits resolve_mode)131 build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, int index,
132 VkResolveModeFlagBits resolve_mode)
133 {
134 enum glsl_base_type img_base_type = index == DEPTH_RESOLVE ? GLSL_TYPE_FLOAT : GLSL_TYPE_UINT;
135 const struct glsl_type *sampler_type =
136 glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, img_base_type);
137 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, img_base_type);
138
139 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_resolve_cs_%s-%s-%d",
140 index == DEPTH_RESOLVE ? "depth" : "stencil",
141 get_resolve_mode_str(resolve_mode), samples);
142 b.shader->info.workgroup_size[0] = 8;
143 b.shader->info.workgroup_size[1] = 8;
144
145 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
146 input_img->data.descriptor_set = 0;
147 input_img->data.binding = 0;
148
149 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
150 output_img->data.descriptor_set = 0;
151 output_img->data.binding = 1;
152
153 nir_ssa_def *img_coord = get_global_ids(&b, 3);
154
155 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
156
157 nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float32 : nir_type_uint32;
158
159 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
160 tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
161 tex->op = nir_texop_txf_ms;
162 tex->src[0].src_type = nir_tex_src_coord;
163 tex->src[0].src = nir_src_for_ssa(img_coord);
164 tex->src[1].src_type = nir_tex_src_ms_index;
165 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
166 tex->src[2].src_type = nir_tex_src_texture_deref;
167 tex->src[2].src = nir_src_for_ssa(input_img_deref);
168 tex->dest_type = type;
169 tex->is_array = true;
170 tex->coord_components = 3;
171
172 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
173 nir_builder_instr_insert(&b, &tex->instr);
174
175 nir_ssa_def *outval = &tex->dest.ssa;
176
177 if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT) {
178 for (int i = 1; i < samples; i++) {
179 nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
180 tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
181 tex_add->op = nir_texop_txf_ms;
182 tex_add->src[0].src_type = nir_tex_src_coord;
183 tex_add->src[0].src = nir_src_for_ssa(img_coord);
184 tex_add->src[1].src_type = nir_tex_src_ms_index;
185 tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
186 tex_add->src[2].src_type = nir_tex_src_texture_deref;
187 tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
188 tex_add->dest_type = type;
189 tex_add->is_array = true;
190 tex_add->coord_components = 3;
191
192 nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
193 nir_builder_instr_insert(&b, &tex_add->instr);
194
195 switch (resolve_mode) {
196 case VK_RESOLVE_MODE_AVERAGE_BIT:
197 assert(index == DEPTH_RESOLVE);
198 outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
199 break;
200 case VK_RESOLVE_MODE_MIN_BIT:
201 if (index == DEPTH_RESOLVE)
202 outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
203 else
204 outval = nir_umin(&b, outval, &tex_add->dest.ssa);
205 break;
206 case VK_RESOLVE_MODE_MAX_BIT:
207 if (index == DEPTH_RESOLVE)
208 outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
209 else
210 outval = nir_umax(&b, outval, &tex_add->dest.ssa);
211 break;
212 default:
213 unreachable("invalid resolve mode");
214 }
215 }
216
217 if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT)
218 outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
219 }
220
221 nir_ssa_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
222 nir_channel(&b, img_coord, 2), nir_ssa_undef(&b, 1, 32));
223 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
224 nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
225 .image_dim = GLSL_SAMPLER_DIM_2D, .image_array = true);
226 return b.shader;
227 }
228
229 static VkResult
create_layout(struct radv_device * device)230 create_layout(struct radv_device *device)
231 {
232 VkResult result;
233 /*
234 * two descriptors one for the image being sampled
235 * one for the buffer being written.
236 */
237 VkDescriptorSetLayoutCreateInfo ds_create_info = {
238 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
239 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
240 .bindingCount = 2,
241 .pBindings = (VkDescriptorSetLayoutBinding[]){
242 {.binding = 0,
243 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
244 .descriptorCount = 1,
245 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
246 .pImmutableSamplers = NULL},
247 {.binding = 1,
248 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
249 .descriptorCount = 1,
250 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
251 .pImmutableSamplers = NULL},
252 }};
253
254 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
255 &device->meta_state.alloc,
256 &device->meta_state.resolve_compute.ds_layout);
257 if (result != VK_SUCCESS)
258 goto fail;
259
260 VkPipelineLayoutCreateInfo pl_create_info = {
261 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
262 .setLayoutCount = 1,
263 .pSetLayouts = &device->meta_state.resolve_compute.ds_layout,
264 .pushConstantRangeCount = 1,
265 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
266 };
267
268 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
269 &device->meta_state.alloc,
270 &device->meta_state.resolve_compute.p_layout);
271 if (result != VK_SUCCESS)
272 goto fail;
273 return VK_SUCCESS;
274 fail:
275 return result;
276 }
277
278 static VkResult
create_resolve_pipeline(struct radv_device * device,int samples,bool is_integer,bool is_srgb,VkPipeline * pipeline)279 create_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb,
280 VkPipeline *pipeline)
281 {
282 VkResult result;
283
284 mtx_lock(&device->meta_state.mtx);
285 if (*pipeline) {
286 mtx_unlock(&device->meta_state.mtx);
287 return VK_SUCCESS;
288 }
289
290 nir_shader *cs = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
291
292 /* compute shader */
293
294 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
295 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
296 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
297 .module = vk_shader_module_handle_from_nir(cs),
298 .pName = "main",
299 .pSpecializationInfo = NULL,
300 };
301
302 VkComputePipelineCreateInfo vk_pipeline_info = {
303 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
304 .stage = pipeline_shader_stage,
305 .flags = 0,
306 .layout = device->meta_state.resolve_compute.p_layout,
307 };
308
309 result = radv_CreateComputePipelines(radv_device_to_handle(device),
310 radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
311 &vk_pipeline_info, NULL, pipeline);
312 if (result != VK_SUCCESS)
313 goto fail;
314
315 ralloc_free(cs);
316 mtx_unlock(&device->meta_state.mtx);
317 return VK_SUCCESS;
318 fail:
319 ralloc_free(cs);
320 mtx_unlock(&device->meta_state.mtx);
321 return result;
322 }
323
324 static VkResult
create_depth_stencil_resolve_pipeline(struct radv_device * device,int samples,int index,VkResolveModeFlagBits resolve_mode,VkPipeline * pipeline)325 create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, int index,
326 VkResolveModeFlagBits resolve_mode, VkPipeline *pipeline)
327 {
328 VkResult result;
329
330 mtx_lock(&device->meta_state.mtx);
331 if (*pipeline) {
332 mtx_unlock(&device->meta_state.mtx);
333 return VK_SUCCESS;
334 }
335
336 nir_shader *cs =
337 build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode);
338
339 /* compute shader */
340 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
341 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
342 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
343 .module = vk_shader_module_handle_from_nir(cs),
344 .pName = "main",
345 .pSpecializationInfo = NULL,
346 };
347
348 VkComputePipelineCreateInfo vk_pipeline_info = {
349 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
350 .stage = pipeline_shader_stage,
351 .flags = 0,
352 .layout = device->meta_state.resolve_compute.p_layout,
353 };
354
355 result = radv_CreateComputePipelines(radv_device_to_handle(device),
356 radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
357 &vk_pipeline_info, NULL, pipeline);
358 if (result != VK_SUCCESS)
359 goto fail;
360
361 ralloc_free(cs);
362 mtx_unlock(&device->meta_state.mtx);
363 return VK_SUCCESS;
364 fail:
365 ralloc_free(cs);
366 mtx_unlock(&device->meta_state.mtx);
367 return result;
368 }
369
370 VkResult
radv_device_init_meta_resolve_compute_state(struct radv_device * device,bool on_demand)371 radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
372 {
373 struct radv_meta_state *state = &device->meta_state;
374 VkResult res;
375
376 res = create_layout(device);
377 if (res != VK_SUCCESS)
378 return res;
379
380 if (on_demand)
381 return VK_SUCCESS;
382
383 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
384 uint32_t samples = 1 << i;
385
386 res = create_resolve_pipeline(device, samples, false, false,
387 &state->resolve_compute.rc[i].pipeline);
388 if (res != VK_SUCCESS)
389 return res;
390
391 res = create_resolve_pipeline(device, samples, true, false,
392 &state->resolve_compute.rc[i].i_pipeline);
393 if (res != VK_SUCCESS)
394 return res;
395
396 res = create_resolve_pipeline(device, samples, false, true,
397 &state->resolve_compute.rc[i].srgb_pipeline);
398 if (res != VK_SUCCESS)
399 return res;
400
401 res = create_depth_stencil_resolve_pipeline(
402 device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT,
403 &state->resolve_compute.depth[i].average_pipeline);
404 if (res != VK_SUCCESS)
405 return res;
406
407 res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
408 VK_RESOLVE_MODE_MAX_BIT,
409 &state->resolve_compute.depth[i].max_pipeline);
410 if (res != VK_SUCCESS)
411 return res;
412
413 res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
414 VK_RESOLVE_MODE_MIN_BIT,
415 &state->resolve_compute.depth[i].min_pipeline);
416 if (res != VK_SUCCESS)
417 return res;
418
419 res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
420 VK_RESOLVE_MODE_MAX_BIT,
421 &state->resolve_compute.stencil[i].max_pipeline);
422 if (res != VK_SUCCESS)
423 return res;
424
425 res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
426 VK_RESOLVE_MODE_MIN_BIT,
427 &state->resolve_compute.stencil[i].min_pipeline);
428 if (res != VK_SUCCESS)
429 return res;
430 }
431
432 res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE,
433 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
434 &state->resolve_compute.depth_zero_pipeline);
435 if (res != VK_SUCCESS)
436 return res;
437
438 return create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE,
439 VK_RESOLVE_MODE_SAMPLE_ZERO_BIT,
440 &state->resolve_compute.stencil_zero_pipeline);
441 }
442
443 void
radv_device_finish_meta_resolve_compute_state(struct radv_device * device)444 radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
445 {
446 struct radv_meta_state *state = &device->meta_state;
447 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
448 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline,
449 &state->alloc);
450
451 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline,
452 &state->alloc);
453
454 radv_DestroyPipeline(radv_device_to_handle(device),
455 state->resolve_compute.rc[i].srgb_pipeline, &state->alloc);
456
457 radv_DestroyPipeline(radv_device_to_handle(device),
458 state->resolve_compute.depth[i].average_pipeline, &state->alloc);
459
460 radv_DestroyPipeline(radv_device_to_handle(device),
461 state->resolve_compute.depth[i].max_pipeline, &state->alloc);
462
463 radv_DestroyPipeline(radv_device_to_handle(device),
464 state->resolve_compute.depth[i].min_pipeline, &state->alloc);
465
466 radv_DestroyPipeline(radv_device_to_handle(device),
467 state->resolve_compute.stencil[i].max_pipeline, &state->alloc);
468
469 radv_DestroyPipeline(radv_device_to_handle(device),
470 state->resolve_compute.stencil[i].min_pipeline, &state->alloc);
471 }
472
473 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline,
474 &state->alloc);
475
476 radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline,
477 &state->alloc);
478
479 device->vk.dispatch_table.DestroyDescriptorSetLayout(
480 radv_device_to_handle(device), state->resolve_compute.ds_layout, &state->alloc);
481 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout,
482 &state->alloc);
483 }
484
485 static VkPipeline *
radv_get_resolve_pipeline(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview)486 radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview)
487 {
488 struct radv_device *device = cmd_buffer->device;
489 struct radv_meta_state *state = &device->meta_state;
490 uint32_t samples = src_iview->image->info.samples;
491 uint32_t samples_log2 = ffs(samples) - 1;
492 VkPipeline *pipeline;
493
494 if (vk_format_is_int(src_iview->vk.format))
495 pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
496 else if (vk_format_is_srgb(src_iview->vk.format))
497 pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
498 else
499 pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
500
501 if (!*pipeline) {
502 VkResult ret;
503
504 ret = create_resolve_pipeline(device, samples, vk_format_is_int(src_iview->vk.format),
505 vk_format_is_srgb(src_iview->vk.format), pipeline);
506 if (ret != VK_SUCCESS) {
507 cmd_buffer->record_result = ret;
508 return NULL;
509 }
510 }
511
512 return pipeline;
513 }
514
515 static void
emit_resolve(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,struct radv_image_view * dest_iview,const VkOffset2D * src_offset,const VkOffset2D * dest_offset,const VkExtent2D * resolve_extent)516 emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
517 struct radv_image_view *dest_iview, const VkOffset2D *src_offset,
518 const VkOffset2D *dest_offset, const VkExtent2D *resolve_extent)
519 {
520 struct radv_device *device = cmd_buffer->device;
521 VkPipeline *pipeline;
522
523 radv_meta_push_descriptor_set(
524 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
525 0, /* set */
526 2, /* descriptorWriteCount */
527 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
528 .dstBinding = 0,
529 .dstArrayElement = 0,
530 .descriptorCount = 1,
531 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
532 .pImageInfo =
533 (VkDescriptorImageInfo[]){
534 {.sampler = VK_NULL_HANDLE,
535 .imageView = radv_image_view_to_handle(src_iview),
536 .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
537 }},
538 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
539 .dstBinding = 1,
540 .dstArrayElement = 0,
541 .descriptorCount = 1,
542 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
543 .pImageInfo = (VkDescriptorImageInfo[]){
544 {
545 .sampler = VK_NULL_HANDLE,
546 .imageView = radv_image_view_to_handle(dest_iview),
547 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
548 },
549 }}});
550
551 pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview);
552
553 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
554 *pipeline);
555
556 unsigned push_constants[4] = {
557 src_offset->x,
558 src_offset->y,
559 dest_offset->x,
560 dest_offset->y,
561 };
562 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
563 device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT,
564 0, 16, push_constants);
565 radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
566 }
567
568 static void
emit_depth_stencil_resolve(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src_iview,struct radv_image_view * dest_iview,const VkExtent3D * resolve_extent,VkImageAspectFlags aspects,VkResolveModeFlagBits resolve_mode)569 emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
570 struct radv_image_view *dest_iview, const VkExtent3D *resolve_extent,
571 VkImageAspectFlags aspects, VkResolveModeFlagBits resolve_mode)
572 {
573 struct radv_device *device = cmd_buffer->device;
574 const uint32_t samples = src_iview->image->info.samples;
575 const uint32_t samples_log2 = ffs(samples) - 1;
576 VkPipeline *pipeline;
577
578 radv_meta_push_descriptor_set(
579 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
580 0, /* set */
581 2, /* descriptorWriteCount */
582 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
583 .dstBinding = 0,
584 .dstArrayElement = 0,
585 .descriptorCount = 1,
586 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
587 .pImageInfo =
588 (VkDescriptorImageInfo[]){
589 {.sampler = VK_NULL_HANDLE,
590 .imageView = radv_image_view_to_handle(src_iview),
591 .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
592 }},
593 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
594 .dstBinding = 1,
595 .dstArrayElement = 0,
596 .descriptorCount = 1,
597 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
598 .pImageInfo = (VkDescriptorImageInfo[]){
599 {
600 .sampler = VK_NULL_HANDLE,
601 .imageView = radv_image_view_to_handle(dest_iview),
602 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
603 },
604 }}});
605
606 switch (resolve_mode) {
607 case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT:
608 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
609 pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
610 else
611 pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
612 break;
613 case VK_RESOLVE_MODE_AVERAGE_BIT:
614 assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
615 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
616 break;
617 case VK_RESOLVE_MODE_MIN_BIT:
618 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
619 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
620 else
621 pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
622 break;
623 case VK_RESOLVE_MODE_MAX_BIT:
624 if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
625 pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
626 else
627 pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
628 break;
629 default:
630 unreachable("invalid resolve mode");
631 }
632
633 if (!*pipeline) {
634 int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
635 VkResult ret;
636
637 ret = create_depth_stencil_resolve_pipeline(device, samples, index, resolve_mode, pipeline);
638 if (ret != VK_SUCCESS) {
639 cmd_buffer->record_result = ret;
640 return;
641 }
642 }
643
644 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
645 *pipeline);
646
647 radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height,
648 resolve_extent->depth);
649 }
650
651 void
radv_meta_resolve_compute_image(struct radv_cmd_buffer * cmd_buffer,struct radv_image * src_image,VkFormat src_format,VkImageLayout src_image_layout,struct radv_image * dest_image,VkFormat dest_format,VkImageLayout dest_image_layout,const VkImageResolve2 * region)652 radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
653 VkFormat src_format, VkImageLayout src_image_layout,
654 struct radv_image *dest_image, VkFormat dest_format,
655 VkImageLayout dest_image_layout, const VkImageResolve2 *region)
656 {
657 struct radv_meta_saved_state saved_state;
658
659 radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region);
660
661 /* For partial resolves, DCC should be decompressed before resolving
662 * because the metadata is re-initialized to the uncompressed after.
663 */
664 uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->qf,
665 cmd_buffer->qf);
666
667 if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&
668 radv_layout_dcc_compressed(cmd_buffer->device, dest_image, region->dstSubresource.mipLevel,
669 dest_image_layout, false, queue_mask) &&
670 (region->dstOffset.x || region->dstOffset.y || region->dstOffset.z ||
671 region->extent.width != dest_image->info.width ||
672 region->extent.height != dest_image->info.height ||
673 region->extent.depth != dest_image->info.depth)) {
674 radv_decompress_dcc(cmd_buffer, dest_image,
675 &(VkImageSubresourceRange){
676 .aspectMask = region->dstSubresource.aspectMask,
677 .baseMipLevel = region->dstSubresource.mipLevel,
678 .levelCount = 1,
679 .baseArrayLayer = region->dstSubresource.baseArrayLayer,
680 .layerCount = region->dstSubresource.layerCount,
681 });
682 }
683
684 radv_meta_save(
685 &saved_state, cmd_buffer,
686 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
687
688 assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
689 assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
690 assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
691
692 const uint32_t src_base_layer =
693 radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset);
694
695 const uint32_t dest_base_layer =
696 radv_meta_get_iview_layer(dest_image, ®ion->dstSubresource, ®ion->dstOffset);
697
698 const struct VkExtent3D extent = vk_image_sanitize_extent(&src_image->vk, region->extent);
699 const struct VkOffset3D srcOffset = vk_image_sanitize_offset(&src_image->vk, region->srcOffset);
700 const struct VkOffset3D dstOffset = vk_image_sanitize_offset(&dest_image->vk, region->dstOffset);
701
702 for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) {
703
704 struct radv_image_view src_iview;
705 radv_image_view_init(&src_iview, cmd_buffer->device,
706 &(VkImageViewCreateInfo){
707 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
708 .image = radv_image_to_handle(src_image),
709 .viewType = radv_meta_get_view_type(src_image),
710 .format = src_format,
711 .subresourceRange =
712 {
713 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
714 .baseMipLevel = region->srcSubresource.mipLevel,
715 .levelCount = 1,
716 .baseArrayLayer = src_base_layer + layer,
717 .layerCount = 1,
718 },
719 },
720 0, NULL);
721
722 struct radv_image_view dest_iview;
723 radv_image_view_init(&dest_iview, cmd_buffer->device,
724 &(VkImageViewCreateInfo){
725 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
726 .image = radv_image_to_handle(dest_image),
727 .viewType = radv_meta_get_view_type(dest_image),
728 .format = vk_to_non_srgb_format(dest_format),
729 .subresourceRange =
730 {
731 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
732 .baseMipLevel = region->dstSubresource.mipLevel,
733 .levelCount = 1,
734 .baseArrayLayer = dest_base_layer + layer,
735 .layerCount = 1,
736 },
737 },
738 0, NULL);
739
740 emit_resolve(cmd_buffer, &src_iview, &dest_iview, &(VkOffset2D){srcOffset.x, srcOffset.y},
741 &(VkOffset2D){dstOffset.x, dstOffset.y},
742 &(VkExtent2D){extent.width, extent.height});
743
744 radv_image_view_finish(&src_iview);
745 radv_image_view_finish(&dest_iview);
746 }
747
748 radv_meta_restore(&saved_state, cmd_buffer);
749
750 if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&
751 radv_layout_dcc_compressed(cmd_buffer->device, dest_image, region->dstSubresource.mipLevel,
752 dest_image_layout, false, queue_mask)) {
753
754 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;
755
756 VkImageSubresourceRange range = {
757 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
758 .baseMipLevel = region->dstSubresource.mipLevel,
759 .levelCount = 1,
760 .baseArrayLayer = dest_base_layer,
761 .layerCount = region->dstSubresource.layerCount,
762 };
763
764 cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dest_image, &range, 0xffffffff);
765 }
766 }
767
768 /**
769 * Emit any needed resolves for the current subpass.
770 */
771 void
radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer * cmd_buffer)772 radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
773 {
774 struct vk_framebuffer *fb = cmd_buffer->state.framebuffer;
775 const struct radv_subpass *subpass = cmd_buffer->state.subpass;
776 struct radv_subpass_barrier barrier;
777 uint32_t layer_count = fb->layers;
778
779 if (subpass->view_mask)
780 layer_count = util_last_bit(subpass->view_mask);
781
782 /* Resolves happen before the end-of-subpass barriers get executed, so
783 * we have to make the attachment shader-readable.
784 */
785 barrier.src_stage_mask = VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT;
786 barrier.src_access_mask = VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT;
787 barrier.dst_access_mask = VK_ACCESS_2_SHADER_READ_BIT | VK_ACCESS_2_SHADER_WRITE_BIT;
788 radv_emit_subpass_barrier(cmd_buffer, &barrier);
789
790 for (uint32_t i = 0; i < subpass->color_count; ++i) {
791 struct radv_subpass_attachment src_att = subpass->color_attachments[i];
792 struct radv_subpass_attachment dst_att = subpass->resolve_attachments[i];
793
794 if (dst_att.attachment == VK_ATTACHMENT_UNUSED)
795 continue;
796
797 struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
798 struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;
799
800 VkImageResolve2 region = {
801 .sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2,
802 .extent = (VkExtent3D){fb->width, fb->height, 1},
803 .srcSubresource =
804 (VkImageSubresourceLayers){
805 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
806 .mipLevel = src_iview->vk.base_mip_level,
807 .baseArrayLayer = src_iview->vk.base_array_layer,
808 .layerCount = layer_count,
809 },
810 .dstSubresource =
811 (VkImageSubresourceLayers){
812 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
813 .mipLevel = dst_iview->vk.base_mip_level,
814 .baseArrayLayer = dst_iview->vk.base_array_layer,
815 .layerCount = layer_count,
816 },
817 .srcOffset = (VkOffset3D){0, 0, 0},
818 .dstOffset = (VkOffset3D){0, 0, 0},
819 };
820
821 radv_meta_resolve_compute_image(cmd_buffer, src_iview->image, src_iview->vk.format,
822 src_att.layout, dst_iview->image, dst_iview->vk.format,
823 dst_att.layout, ®ion);
824 }
825
826 cmd_buffer->state.flush_bits |=
827 RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
828 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
829 }
830
831 void
radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspects,VkResolveModeFlagBits resolve_mode)832 radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
833 VkImageAspectFlags aspects,
834 VkResolveModeFlagBits resolve_mode)
835 {
836 struct vk_framebuffer *fb = cmd_buffer->state.framebuffer;
837 const struct radv_subpass *subpass = cmd_buffer->state.subpass;
838 struct radv_meta_saved_state saved_state;
839 uint32_t layer_count = fb->layers;
840
841 if (subpass->view_mask)
842 layer_count = util_last_bit(subpass->view_mask);
843
844 /* Resolves happen before the end-of-subpass barriers get executed, so
845 * we have to make the attachment shader-readable.
846 */
847 cmd_buffer->state.flush_bits |=
848 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
849 radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT, NULL) |
850 radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
851
852 struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
853 struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
854 struct radv_image *src_image = src_iview->image;
855
856 VkImageResolve2 region = {0};
857 region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2;
858 region.srcSubresource.aspectMask = aspects;
859 region.srcSubresource.mipLevel = 0;
860 region.srcSubresource.baseArrayLayer = src_iview->vk.base_array_layer;
861 region.srcSubresource.layerCount = layer_count;
862
863 radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, ®ion);
864
865 radv_meta_save(&saved_state, cmd_buffer,
866 RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
867
868 struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
869 struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
870 struct radv_image *dst_image = dst_iview->image;
871
872 struct radv_image_view tsrc_iview;
873 radv_image_view_init(&tsrc_iview, cmd_buffer->device,
874 &(VkImageViewCreateInfo){
875 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
876 .image = radv_image_to_handle(src_image),
877 .viewType = radv_meta_get_view_type(src_image),
878 .format = src_iview->vk.format,
879 .subresourceRange =
880 {
881 .aspectMask = aspects,
882 .baseMipLevel = src_iview->vk.base_mip_level,
883 .levelCount = 1,
884 .baseArrayLayer = src_iview->vk.base_array_layer,
885 .layerCount = layer_count,
886 },
887 },
888 0, NULL);
889
890 struct radv_image_view tdst_iview;
891 radv_image_view_init(&tdst_iview, cmd_buffer->device,
892 &(VkImageViewCreateInfo){
893 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
894 .image = radv_image_to_handle(dst_image),
895 .viewType = radv_meta_get_view_type(dst_image),
896 .format = dst_iview->vk.format,
897 .subresourceRange =
898 {
899 .aspectMask = aspects,
900 .baseMipLevel = dst_iview->vk.base_mip_level,
901 .levelCount = 1,
902 .baseArrayLayer = dst_iview->vk.base_array_layer,
903 .layerCount = layer_count,
904 },
905 },
906 0, NULL);
907
908 emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
909 &(VkExtent3D){fb->width, fb->height, layer_count}, aspects,
910 resolve_mode);
911
912 cmd_buffer->state.flush_bits |=
913 RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
914 radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, NULL);
915
916 VkImageLayout layout = cmd_buffer->state.attachments[dest_att.attachment].current_layout;
917 uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->qf,
918 cmd_buffer->qf);
919
920 if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, layout, false, queue_mask)) {
921 VkImageSubresourceRange range = {0};
922 range.aspectMask = aspects;
923 range.baseMipLevel = dst_iview->vk.base_mip_level;
924 range.levelCount = 1;
925 range.baseArrayLayer = dst_iview->vk.base_array_layer;
926 range.layerCount = layer_count;
927
928 uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, dst_image);
929
930 cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, dst_image, &range, htile_value);
931 }
932
933 radv_image_view_finish(&tsrc_iview);
934 radv_image_view_finish(&tdst_iview);
935
936 radv_meta_restore(&saved_state, cmd_buffer);
937 }
938