1 /*
2 * Copyright © 2021 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6 #include "nir/nir_builder.h"
7 #include "radv_formats.h"
8 #include "radv_meta.h"
9
10 static nir_shader *
build_fmask_copy_compute_shader(struct radv_device * dev,int samples)11 build_fmask_copy_compute_shader(struct radv_device *dev, int samples)
12 {
13 const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT);
14 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, false, GLSL_TYPE_FLOAT);
15
16 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_fmask_copy_cs_-%d", samples);
17
18 b.shader->info.workgroup_size[0] = 8;
19 b.shader->info.workgroup_size[1] = 8;
20
21 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
22 input_img->data.descriptor_set = 0;
23 input_img->data.binding = 0;
24
25 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
26 output_img->data.descriptor_set = 0;
27 output_img->data.binding = 1;
28
29 nir_def *invoc_id = nir_load_local_invocation_id(&b);
30 nir_def *wg_id = nir_load_workgroup_id(&b);
31 nir_def *block_size = nir_imm_ivec3(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
32 b.shader->info.workgroup_size[2]);
33
34 nir_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
35
36 /* Get coordinates. */
37 nir_def *src_coord = nir_trim_vector(&b, global_id, 2);
38 nir_def *dst_coord = nir_vec4(&b, nir_channel(&b, src_coord, 0), nir_channel(&b, src_coord, 1), nir_undef(&b, 1, 32),
39 nir_undef(&b, 1, 32));
40
41 nir_tex_src frag_mask_srcs[] = {{
42 .src_type = nir_tex_src_coord,
43 .src = nir_src_for_ssa(src_coord),
44 }};
45 nir_def *frag_mask =
46 nir_build_tex_deref_instr(&b, nir_texop_fragment_mask_fetch_amd, nir_build_deref_var(&b, input_img), NULL,
47 ARRAY_SIZE(frag_mask_srcs), frag_mask_srcs);
48
49 /* Get the maximum sample used in this fragment. */
50 nir_def *max_sample_index = nir_imm_int(&b, 0);
51 for (uint32_t s = 0; s < samples; s++) {
52 /* max_sample_index = MAX2(max_sample_index, (frag_mask >> (s * 4)) & 0xf) */
53 max_sample_index = nir_umax(&b, max_sample_index,
54 nir_ubitfield_extract(&b, frag_mask, nir_imm_int(&b, 4 * s), nir_imm_int(&b, 4)));
55 }
56
57 nir_variable *counter = nir_local_variable_create(b.impl, glsl_int_type(), "counter");
58 nir_store_var(&b, counter, nir_imm_int(&b, 0), 0x1);
59
60 nir_loop *loop = nir_push_loop(&b);
61 {
62 nir_def *sample_id = nir_load_var(&b, counter);
63
64 nir_tex_src frag_fetch_srcs[] = {{
65 .src_type = nir_tex_src_coord,
66 .src = nir_src_for_ssa(src_coord),
67 },
68 {
69 .src_type = nir_tex_src_ms_index,
70 .src = nir_src_for_ssa(sample_id),
71 }};
72 nir_def *outval = nir_build_tex_deref_instr(&b, nir_texop_fragment_fetch_amd, nir_build_deref_var(&b, input_img),
73 NULL, ARRAY_SIZE(frag_fetch_srcs), frag_fetch_srcs);
74
75 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, dst_coord, sample_id, outval,
76 nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_MS);
77
78 radv_break_on_count(&b, counter, max_sample_index);
79 }
80 nir_pop_loop(&b, loop);
81
82 return b.shader;
83 }
84
85 static VkResult
get_pipeline_layout(struct radv_device * device,VkPipelineLayout * layout_out)86 get_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout_out)
87 {
88 enum radv_meta_object_key_type key = RADV_META_OBJECT_KEY_FMASK_COPY;
89
90 const VkDescriptorSetLayoutBinding bindings[] = {
91 {
92 .binding = 0,
93 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
94 .descriptorCount = 1,
95 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
96 },
97 {
98 .binding = 1,
99 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
100 .descriptorCount = 1,
101 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
102 },
103 };
104
105 const VkDescriptorSetLayoutCreateInfo desc_info = {
106 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
107 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT,
108 .bindingCount = 2,
109 .pBindings = bindings,
110 };
111
112 return vk_meta_get_pipeline_layout(&device->vk, &device->meta_state.device, &desc_info, NULL, &key, sizeof(key),
113 layout_out);
114 }
115
116 struct radv_fmask_copy_key {
117 enum radv_meta_object_key_type type;
118 uint32_t samples;
119 };
120
121 static VkResult
get_pipeline(struct radv_device * device,uint32_t samples_log2,VkPipeline * pipeline_out,VkPipelineLayout * layout_out)122 get_pipeline(struct radv_device *device, uint32_t samples_log2, VkPipeline *pipeline_out, VkPipelineLayout *layout_out)
123 {
124 const uint32_t samples = 1 << samples_log2;
125 struct radv_fmask_copy_key key;
126 VkResult result;
127
128 result = get_pipeline_layout(device, layout_out);
129 if (result != VK_SUCCESS)
130 return result;
131
132 memset(&key, 0, sizeof(key));
133 key.type = RADV_META_OBJECT_KEY_FMASK_COPY;
134 key.samples = samples;
135
136 VkPipeline pipeline_from_cache = vk_meta_lookup_pipeline(&device->meta_state.device, &key, sizeof(key));
137 if (pipeline_from_cache != VK_NULL_HANDLE) {
138 *pipeline_out = pipeline_from_cache;
139 return VK_SUCCESS;
140 }
141
142 nir_shader *cs = build_fmask_copy_compute_shader(device, samples);
143
144 const VkPipelineShaderStageCreateInfo stage_info = {
145 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
146 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
147 .module = vk_shader_module_handle_from_nir(cs),
148 .pName = "main",
149 .pSpecializationInfo = NULL,
150 };
151
152 const VkComputePipelineCreateInfo pipeline_info = {
153 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
154 .stage = stage_info,
155 .flags = 0,
156 .layout = *layout_out,
157 };
158
159 result = vk_meta_create_compute_pipeline(&device->vk, &device->meta_state.device, &pipeline_info, &key, sizeof(key),
160 pipeline_out);
161
162 ralloc_free(cs);
163 return result;
164 }
165
166 static void
radv_fixup_copy_dst_metadata(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * src_image,const struct radv_image * dst_image)167 radv_fixup_copy_dst_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,
168 const struct radv_image *dst_image)
169 {
170 uint64_t src_offset, dst_offset, size;
171
172 assert(src_image->planes[0].surface.cmask_size == dst_image->planes[0].surface.cmask_size &&
173 src_image->planes[0].surface.fmask_size == dst_image->planes[0].surface.fmask_size);
174 assert(src_image->planes[0].surface.fmask_offset + src_image->planes[0].surface.fmask_size ==
175 src_image->planes[0].surface.cmask_offset &&
176 dst_image->planes[0].surface.fmask_offset + dst_image->planes[0].surface.fmask_size ==
177 dst_image->planes[0].surface.cmask_offset);
178
179 /* Copy CMASK+FMASK. */
180 size = src_image->planes[0].surface.cmask_size + src_image->planes[0].surface.fmask_size;
181 src_offset = src_image->bindings[0].offset + src_image->planes[0].surface.fmask_offset;
182 dst_offset = dst_image->bindings[0].offset + dst_image->planes[0].surface.fmask_offset;
183
184 radv_copy_buffer(cmd_buffer, src_image->bindings[0].bo, dst_image->bindings[0].bo, src_offset, dst_offset, size);
185 }
186
187 bool
radv_can_use_fmask_copy(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * src_image,const struct radv_image * dst_image,const struct radv_meta_blit2d_rect * rect)188 radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,
189 const struct radv_image *dst_image, const struct radv_meta_blit2d_rect *rect)
190 {
191 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
192 const struct radv_physical_device *pdev = radv_device_physical(device);
193
194 /* TODO: Test on pre GFX10 chips. */
195 if (pdev->info.gfx_level < GFX10)
196 return false;
197
198 /* TODO: Add support for layers. */
199 if (src_image->vk.array_layers != 1 || dst_image->vk.array_layers != 1)
200 return false;
201
202 /* Source/destination images must have FMASK. */
203 if (!radv_image_has_fmask(src_image) || !radv_image_has_fmask(dst_image))
204 return false;
205
206 /* Source/destination images must have identical TC-compat mode. */
207 if (radv_image_is_tc_compat_cmask(src_image) != radv_image_is_tc_compat_cmask(dst_image))
208 return false;
209
210 /* The region must be a whole image copy. */
211 if (rect->src_x || rect->src_y || rect->dst_x || rect->dst_y || rect->width != src_image->vk.extent.width ||
212 rect->height != src_image->vk.extent.height)
213 return false;
214
215 /* Source/destination images must have identical size. */
216 if (src_image->vk.extent.width != dst_image->vk.extent.width ||
217 src_image->vk.extent.height != dst_image->vk.extent.height)
218 return false;
219
220 /* Source/destination images must have identical swizzle. */
221 if (src_image->planes[0].surface.fmask_tile_swizzle != dst_image->planes[0].surface.fmask_tile_swizzle ||
222 src_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode !=
223 dst_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode)
224 return false;
225
226 return true;
227 }
228
229 void
radv_fmask_copy(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst)230 radv_fmask_copy(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
231 struct radv_meta_blit2d_surf *dst)
232 {
233 struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
234 struct radv_image_view src_iview, dst_iview;
235 uint32_t samples = src->image->vk.samples;
236 uint32_t samples_log2 = ffs(samples) - 1;
237 VkPipelineLayout layout;
238 VkPipeline pipeline;
239 VkResult result;
240
241 result = get_pipeline(device, samples_log2, &pipeline, &layout);
242 if (result != VK_SUCCESS) {
243 vk_command_buffer_set_error(&cmd_buffer->vk, result);
244 return;
245 }
246
247 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
248
249 radv_image_view_init(&src_iview, device,
250 &(VkImageViewCreateInfo){
251 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
252 .image = radv_image_to_handle(src->image),
253 .viewType = radv_meta_get_view_type(src->image),
254 .format = vk_format_no_srgb(src->image->vk.format),
255 .subresourceRange =
256 {
257 .aspectMask = src->aspect_mask,
258 .baseMipLevel = 0,
259 .levelCount = 1,
260 .baseArrayLayer = 0,
261 .layerCount = 1,
262 },
263 },
264 NULL);
265
266 radv_image_view_init(&dst_iview, device,
267 &(VkImageViewCreateInfo){
268 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
269 .image = radv_image_to_handle(dst->image),
270 .viewType = radv_meta_get_view_type(dst->image),
271 .format = vk_format_no_srgb(dst->image->vk.format),
272 .subresourceRange =
273 {
274 .aspectMask = dst->aspect_mask,
275 .baseMipLevel = 0,
276 .levelCount = 1,
277 .baseArrayLayer = 0,
278 .layerCount = 1,
279 },
280 },
281 NULL);
282
283 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, 2,
284 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
285 .dstBinding = 0,
286 .dstArrayElement = 0,
287 .descriptorCount = 1,
288 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
289 .pImageInfo =
290 (VkDescriptorImageInfo[]){
291 {.sampler = VK_NULL_HANDLE,
292 .imageView = radv_image_view_to_handle(&src_iview),
293 .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
294 }},
295 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
296 .dstBinding = 1,
297 .dstArrayElement = 0,
298 .descriptorCount = 1,
299 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
300 .pImageInfo = (VkDescriptorImageInfo[]){
301 {.sampler = VK_NULL_HANDLE,
302 .imageView = radv_image_view_to_handle(&dst_iview),
303 .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
304 }}});
305
306 radv_unaligned_dispatch(cmd_buffer, src->image->vk.extent.width, src->image->vk.extent.height, 1);
307
308 /* Fixup destination image metadata by copying CMASK/FMASK from the source image. */
309 radv_fixup_copy_dst_metadata(cmd_buffer, src->image, dst->image);
310 }
311