• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 #include "nir/nir_builder.h"
7 #include "radv_formats.h"
8 #include "radv_meta.h"
9 
10 static nir_shader *
build_fmask_copy_compute_shader(struct radv_device * dev,int samples)11 build_fmask_copy_compute_shader(struct radv_device *dev, int samples)
12 {
13    const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT);
14    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, false, GLSL_TYPE_FLOAT);
15 
16    nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_fmask_copy_cs_-%d", samples);
17 
18    b.shader->info.workgroup_size[0] = 8;
19    b.shader->info.workgroup_size[1] = 8;
20 
21    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
22    input_img->data.descriptor_set = 0;
23    input_img->data.binding = 0;
24 
25    nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
26    output_img->data.descriptor_set = 0;
27    output_img->data.binding = 1;
28 
29    nir_def *invoc_id = nir_load_local_invocation_id(&b);
30    nir_def *wg_id = nir_load_workgroup_id(&b);
31    nir_def *block_size = nir_imm_ivec3(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
32                                        b.shader->info.workgroup_size[2]);
33 
34    nir_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
35 
36    /* Get coordinates. */
37    nir_def *src_coord = nir_trim_vector(&b, global_id, 2);
38    nir_def *dst_coord = nir_vec4(&b, nir_channel(&b, src_coord, 0), nir_channel(&b, src_coord, 1), nir_undef(&b, 1, 32),
39                                  nir_undef(&b, 1, 32));
40 
41    nir_tex_src frag_mask_srcs[] = {{
42       .src_type = nir_tex_src_coord,
43       .src = nir_src_for_ssa(src_coord),
44    }};
45    nir_def *frag_mask =
46       nir_build_tex_deref_instr(&b, nir_texop_fragment_mask_fetch_amd, nir_build_deref_var(&b, input_img), NULL,
47                                 ARRAY_SIZE(frag_mask_srcs), frag_mask_srcs);
48 
49    /* Get the maximum sample used in this fragment. */
50    nir_def *max_sample_index = nir_imm_int(&b, 0);
51    for (uint32_t s = 0; s < samples; s++) {
52       /* max_sample_index = MAX2(max_sample_index, (frag_mask >> (s * 4)) & 0xf) */
53       max_sample_index = nir_umax(&b, max_sample_index,
54                                   nir_ubitfield_extract(&b, frag_mask, nir_imm_int(&b, 4 * s), nir_imm_int(&b, 4)));
55    }
56 
57    nir_variable *counter = nir_local_variable_create(b.impl, glsl_int_type(), "counter");
58    nir_store_var(&b, counter, nir_imm_int(&b, 0), 0x1);
59 
60    nir_loop *loop = nir_push_loop(&b);
61    {
62       nir_def *sample_id = nir_load_var(&b, counter);
63 
64       nir_tex_src frag_fetch_srcs[] = {{
65                                           .src_type = nir_tex_src_coord,
66                                           .src = nir_src_for_ssa(src_coord),
67                                        },
68                                        {
69                                           .src_type = nir_tex_src_ms_index,
70                                           .src = nir_src_for_ssa(sample_id),
71                                        }};
72       nir_def *outval = nir_build_tex_deref_instr(&b, nir_texop_fragment_fetch_amd, nir_build_deref_var(&b, input_img),
73                                                   NULL, ARRAY_SIZE(frag_fetch_srcs), frag_fetch_srcs);
74 
75       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, dst_coord, sample_id, outval,
76                             nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_MS);
77 
78       radv_break_on_count(&b, counter, max_sample_index);
79    }
80    nir_pop_loop(&b, loop);
81 
82    return b.shader;
83 }
84 
85 static VkResult
get_pipeline_layout(struct radv_device * device,VkPipelineLayout * layout_out)86 get_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout_out)
87 {
88    enum radv_meta_object_key_type key = RADV_META_OBJECT_KEY_FMASK_COPY;
89 
90    const VkDescriptorSetLayoutBinding bindings[] = {
91       {
92          .binding = 0,
93          .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
94          .descriptorCount = 1,
95          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
96       },
97       {
98          .binding = 1,
99          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
100          .descriptorCount = 1,
101          .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
102       },
103    };
104 
105    const VkDescriptorSetLayoutCreateInfo desc_info = {
106       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
107       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT,
108       .bindingCount = 2,
109       .pBindings = bindings,
110    };
111 
112    return vk_meta_get_pipeline_layout(&device->vk, &device->meta_state.device, &desc_info, NULL, &key, sizeof(key),
113                                       layout_out);
114 }
115 
116 struct radv_fmask_copy_key {
117    enum radv_meta_object_key_type type;
118    uint32_t samples;
119 };
120 
121 static VkResult
get_pipeline(struct radv_device * device,uint32_t samples_log2,VkPipeline * pipeline_out,VkPipelineLayout * layout_out)122 get_pipeline(struct radv_device *device, uint32_t samples_log2, VkPipeline *pipeline_out, VkPipelineLayout *layout_out)
123 {
124    const uint32_t samples = 1 << samples_log2;
125    struct radv_fmask_copy_key key;
126    VkResult result;
127 
128    result = get_pipeline_layout(device, layout_out);
129    if (result != VK_SUCCESS)
130       return result;
131 
132    memset(&key, 0, sizeof(key));
133    key.type = RADV_META_OBJECT_KEY_FMASK_COPY;
134    key.samples = samples;
135 
136    VkPipeline pipeline_from_cache = vk_meta_lookup_pipeline(&device->meta_state.device, &key, sizeof(key));
137    if (pipeline_from_cache != VK_NULL_HANDLE) {
138       *pipeline_out = pipeline_from_cache;
139       return VK_SUCCESS;
140    }
141 
142    nir_shader *cs = build_fmask_copy_compute_shader(device, samples);
143 
144    const VkPipelineShaderStageCreateInfo stage_info = {
145       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
146       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
147       .module = vk_shader_module_handle_from_nir(cs),
148       .pName = "main",
149       .pSpecializationInfo = NULL,
150    };
151 
152    const VkComputePipelineCreateInfo pipeline_info = {
153       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
154       .stage = stage_info,
155       .flags = 0,
156       .layout = *layout_out,
157    };
158 
159    result = vk_meta_create_compute_pipeline(&device->vk, &device->meta_state.device, &pipeline_info, &key, sizeof(key),
160                                             pipeline_out);
161 
162    ralloc_free(cs);
163    return result;
164 }
165 
166 static void
radv_fixup_copy_dst_metadata(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * src_image,const struct radv_image * dst_image)167 radv_fixup_copy_dst_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,
168                              const struct radv_image *dst_image)
169 {
170    uint64_t src_offset, dst_offset, size;
171 
172    assert(src_image->planes[0].surface.cmask_size == dst_image->planes[0].surface.cmask_size &&
173           src_image->planes[0].surface.fmask_size == dst_image->planes[0].surface.fmask_size);
174    assert(src_image->planes[0].surface.fmask_offset + src_image->planes[0].surface.fmask_size ==
175              src_image->planes[0].surface.cmask_offset &&
176           dst_image->planes[0].surface.fmask_offset + dst_image->planes[0].surface.fmask_size ==
177              dst_image->planes[0].surface.cmask_offset);
178 
179    /* Copy CMASK+FMASK. */
180    size = src_image->planes[0].surface.cmask_size + src_image->planes[0].surface.fmask_size;
181    src_offset = src_image->bindings[0].offset + src_image->planes[0].surface.fmask_offset;
182    dst_offset = dst_image->bindings[0].offset + dst_image->planes[0].surface.fmask_offset;
183 
184    radv_copy_buffer(cmd_buffer, src_image->bindings[0].bo, dst_image->bindings[0].bo, src_offset, dst_offset, size);
185 }
186 
187 bool
radv_can_use_fmask_copy(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * src_image,const struct radv_image * dst_image,const struct radv_meta_blit2d_rect * rect)188 radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,
189                         const struct radv_image *dst_image, const struct radv_meta_blit2d_rect *rect)
190 {
191    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
192    const struct radv_physical_device *pdev = radv_device_physical(device);
193 
194    /* TODO: Test on pre GFX10 chips. */
195    if (pdev->info.gfx_level < GFX10)
196       return false;
197 
198    /* TODO: Add support for layers. */
199    if (src_image->vk.array_layers != 1 || dst_image->vk.array_layers != 1)
200       return false;
201 
202    /* Source/destination images must have FMASK. */
203    if (!radv_image_has_fmask(src_image) || !radv_image_has_fmask(dst_image))
204       return false;
205 
206    /* Source/destination images must have identical TC-compat mode. */
207    if (radv_image_is_tc_compat_cmask(src_image) != radv_image_is_tc_compat_cmask(dst_image))
208       return false;
209 
210    /* The region must be a whole image copy. */
211    if (rect->src_x || rect->src_y || rect->dst_x || rect->dst_y || rect->width != src_image->vk.extent.width ||
212        rect->height != src_image->vk.extent.height)
213       return false;
214 
215    /* Source/destination images must have identical size. */
216    if (src_image->vk.extent.width != dst_image->vk.extent.width ||
217        src_image->vk.extent.height != dst_image->vk.extent.height)
218       return false;
219 
220    /* Source/destination images must have identical swizzle. */
221    if (src_image->planes[0].surface.fmask_tile_swizzle != dst_image->planes[0].surface.fmask_tile_swizzle ||
222        src_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode !=
223           dst_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode)
224       return false;
225 
226    return true;
227 }
228 
229 void
radv_fmask_copy(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst)230 radv_fmask_copy(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
231                 struct radv_meta_blit2d_surf *dst)
232 {
233    struct radv_device *device = radv_cmd_buffer_device(cmd_buffer);
234    struct radv_image_view src_iview, dst_iview;
235    uint32_t samples = src->image->vk.samples;
236    uint32_t samples_log2 = ffs(samples) - 1;
237    VkPipelineLayout layout;
238    VkPipeline pipeline;
239    VkResult result;
240 
241    result = get_pipeline(device, samples_log2, &pipeline, &layout);
242    if (result != VK_SUCCESS) {
243       vk_command_buffer_set_error(&cmd_buffer->vk, result);
244       return;
245    }
246 
247    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
248 
249    radv_image_view_init(&src_iview, device,
250                         &(VkImageViewCreateInfo){
251                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
252                            .image = radv_image_to_handle(src->image),
253                            .viewType = radv_meta_get_view_type(src->image),
254                            .format = vk_format_no_srgb(src->image->vk.format),
255                            .subresourceRange =
256                               {
257                                  .aspectMask = src->aspect_mask,
258                                  .baseMipLevel = 0,
259                                  .levelCount = 1,
260                                  .baseArrayLayer = 0,
261                                  .layerCount = 1,
262                               },
263                         },
264                         NULL);
265 
266    radv_image_view_init(&dst_iview, device,
267                         &(VkImageViewCreateInfo){
268                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
269                            .image = radv_image_to_handle(dst->image),
270                            .viewType = radv_meta_get_view_type(dst->image),
271                            .format = vk_format_no_srgb(dst->image->vk.format),
272                            .subresourceRange =
273                               {
274                                  .aspectMask = dst->aspect_mask,
275                                  .baseMipLevel = 0,
276                                  .levelCount = 1,
277                                  .baseArrayLayer = 0,
278                                  .layerCount = 1,
279                               },
280                         },
281                         NULL);
282 
283    radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, layout, 0, 2,
284                                  (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
285                                                            .dstBinding = 0,
286                                                            .dstArrayElement = 0,
287                                                            .descriptorCount = 1,
288                                                            .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
289                                                            .pImageInfo =
290                                                               (VkDescriptorImageInfo[]){
291                                                                  {.sampler = VK_NULL_HANDLE,
292                                                                   .imageView = radv_image_view_to_handle(&src_iview),
293                                                                   .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
294                                                               }},
295                                                           {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
296                                                            .dstBinding = 1,
297                                                            .dstArrayElement = 0,
298                                                            .descriptorCount = 1,
299                                                            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
300                                                            .pImageInfo = (VkDescriptorImageInfo[]){
301                                                               {.sampler = VK_NULL_HANDLE,
302                                                                .imageView = radv_image_view_to_handle(&dst_iview),
303                                                                .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
304                                                            }}});
305 
306    radv_unaligned_dispatch(cmd_buffer, src->image->vk.extent.width, src->image->vk.extent.height, 1);
307 
308    /* Fixup destination image metadata by copying CMASK/FMASK from the source image. */
309    radv_fixup_copy_dst_metadata(cmd_buffer, src->image, dst->image);
310 }
311