• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 #include "nir/nir_builder.h"
24 #include "radv_meta.h"
25 
26 static nir_shader *
build_fmask_copy_compute_shader(struct radv_device * dev,int samples)27 build_fmask_copy_compute_shader(struct radv_device *dev, int samples)
28 {
29    const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT);
30    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_MS, false, GLSL_TYPE_FLOAT);
31 
32    nir_builder b =
33       radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_fmask_copy_cs_-%d", samples);
34 
35    b.shader->info.workgroup_size[0] = 8;
36    b.shader->info.workgroup_size[1] = 8;
37 
38    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
39    input_img->data.descriptor_set = 0;
40    input_img->data.binding = 0;
41 
42    nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
43    output_img->data.descriptor_set = 0;
44    output_img->data.binding = 1;
45 
46    nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
47    nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
48    nir_ssa_def *block_size =
49       nir_imm_ivec3(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
50                     b.shader->info.workgroup_size[2]);
51 
52    nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
53 
54    /* Get coordinates. */
55    nir_ssa_def *src_coord = nir_channels(&b, global_id, 0x3);
56    nir_ssa_def *dst_coord = nir_vec4(&b, nir_channel(&b, src_coord, 0),
57                                          nir_channel(&b, src_coord, 1),
58                                          nir_ssa_undef(&b, 1, 32),
59                                          nir_ssa_undef(&b, 1, 32));
60 
61    nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
62 
63    /* Fetch the mask for this fragment. */
64    nir_tex_instr *frag_mask_fetch = nir_tex_instr_create(b.shader, 3);
65    frag_mask_fetch->sampler_dim = GLSL_SAMPLER_DIM_MS;
66    frag_mask_fetch->op = nir_texop_fragment_mask_fetch_amd;
67    frag_mask_fetch->src[0].src_type = nir_tex_src_coord;
68    frag_mask_fetch->src[0].src = nir_src_for_ssa(src_coord);
69    frag_mask_fetch->src[1].src_type = nir_tex_src_lod;
70    frag_mask_fetch->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
71    frag_mask_fetch->src[2].src_type = nir_tex_src_texture_deref;
72    frag_mask_fetch->src[2].src = nir_src_for_ssa(input_img_deref);
73    frag_mask_fetch->dest_type = nir_type_uint32;
74    frag_mask_fetch->is_array = false;
75    frag_mask_fetch->coord_components = 2;
76 
77    nir_ssa_dest_init(&frag_mask_fetch->instr, &frag_mask_fetch->dest, 1, 32, "frag_mask_fetch");
78    nir_builder_instr_insert(&b, &frag_mask_fetch->instr);
79 
80    nir_ssa_def *frag_mask = &frag_mask_fetch->dest.ssa;
81 
82    /* Get the maximum sample used in this fragment. */
83    nir_ssa_def *max_sample_index = nir_imm_int(&b, 0);
84    for (uint32_t s = 0; s < samples; s++) {
85       /* max_sample_index = MAX2(max_sample_index, (frag_mask >> (s * 4)) & 0xf) */
86       max_sample_index = nir_umax(&b, max_sample_index,
87                               nir_ubitfield_extract(&b, frag_mask, nir_imm_int(&b, 4 * s),
88                                                     nir_imm_int(&b, 4)));
89    }
90 
91    nir_variable *counter = nir_local_variable_create(b.impl, glsl_int_type(), "counter");
92    nir_store_var(&b, counter, nir_imm_int(&b, 0), 0x1);
93 
94    nir_loop *loop = nir_push_loop(&b);
95    {
96       nir_ssa_def *sample_id = nir_load_var(&b, counter);
97 
98       nir_tex_instr *frag_fetch = nir_tex_instr_create(b.shader, 4);
99       frag_fetch->sampler_dim = GLSL_SAMPLER_DIM_MS;
100       frag_fetch->op = nir_texop_fragment_fetch_amd;
101       frag_fetch->src[0].src_type = nir_tex_src_coord;
102       frag_fetch->src[0].src = nir_src_for_ssa(src_coord);
103       frag_fetch->src[1].src_type = nir_tex_src_lod;
104       frag_fetch->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
105       frag_fetch->src[2].src_type = nir_tex_src_texture_deref;
106       frag_fetch->src[2].src = nir_src_for_ssa(input_img_deref);
107       frag_fetch->src[3].src_type = nir_tex_src_ms_index;
108       frag_fetch->src[3].src = nir_src_for_ssa(sample_id);
109       frag_fetch->dest_type = nir_type_float32;
110       frag_fetch->is_array = false;
111       frag_fetch->coord_components = 2;
112 
113       nir_ssa_dest_init(&frag_fetch->instr, &frag_fetch->dest, 4, 32, "frag_fetch");
114       nir_builder_instr_insert(&b, &frag_fetch->instr);
115 
116       nir_ssa_def *outval = &frag_fetch->dest.ssa;
117       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord,
118                             sample_id, outval, nir_imm_int(&b, 0),
119                             .image_dim = GLSL_SAMPLER_DIM_MS);
120 
121       radv_break_on_count(&b, counter, max_sample_index);
122    }
123    nir_pop_loop(&b, loop);
124 
125    return b.shader;
126 }
127 
128 void
radv_device_finish_meta_fmask_copy_state(struct radv_device * device)129 radv_device_finish_meta_fmask_copy_state(struct radv_device *device)
130 {
131    struct radv_meta_state *state = &device->meta_state;
132 
133    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fmask_copy.p_layout,
134                               &state->alloc);
135    device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
136                                                         state->fmask_copy.ds_layout, &state->alloc);
137 
138    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
139       radv_DestroyPipeline(radv_device_to_handle(device), state->fmask_copy.pipeline[i], &state->alloc);
140    }
141 }
142 
143 static VkResult
create_fmask_copy_pipeline(struct radv_device * device,int samples,VkPipeline * pipeline)144 create_fmask_copy_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
145 {
146    struct radv_meta_state *state = &device->meta_state;
147    nir_shader *cs = build_fmask_copy_compute_shader(device, samples);
148    VkResult result;
149 
150    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
151       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
152       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
153       .module = vk_shader_module_handle_from_nir(cs),
154       .pName = "main",
155       .pSpecializationInfo = NULL,
156    };
157 
158    VkComputePipelineCreateInfo vk_pipeline_info = {
159       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
160       .stage = pipeline_shader_stage,
161       .flags = 0,
162       .layout = state->fmask_copy.p_layout,
163    };
164 
165    result = radv_CreateComputePipelines(radv_device_to_handle(device),
166                                         radv_pipeline_cache_to_handle(&state->cache), 1,
167                                         &vk_pipeline_info, NULL, pipeline);
168    ralloc_free(cs);
169    return result;
170 }
171 
172 VkResult
radv_device_init_meta_fmask_copy_state(struct radv_device * device)173 radv_device_init_meta_fmask_copy_state(struct radv_device *device)
174 {
175    VkResult result;
176 
177    VkDescriptorSetLayoutCreateInfo ds_create_info = {
178       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
179       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
180       .bindingCount = 2,
181       .pBindings = (VkDescriptorSetLayoutBinding[]){
182          {.binding = 0,
183           .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
184           .descriptorCount = 1,
185           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
186           .pImmutableSamplers = NULL},
187          {.binding = 1,
188           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
189           .descriptorCount = 1,
190           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
191           .pImmutableSamplers = NULL},
192       }};
193 
194    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
195                                            &device->meta_state.alloc,
196                                            &device->meta_state.fmask_copy.ds_layout);
197    if (result != VK_SUCCESS)
198       return result;
199 
200    VkPipelineLayoutCreateInfo pl_create_info = {
201       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
202       .setLayoutCount = 1,
203       .pSetLayouts = &device->meta_state.fmask_copy.ds_layout,
204       .pushConstantRangeCount = 0,
205       .pPushConstantRanges = NULL
206    };
207 
208    result =
209       radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
210                                 &device->meta_state.alloc, &device->meta_state.fmask_copy.p_layout);
211    if (result != VK_SUCCESS)
212       return result;
213 
214    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
215       uint32_t samples = 1 << i;
216       result = create_fmask_copy_pipeline(device, samples, &device->meta_state.fmask_copy.pipeline[i]);
217       if (result != VK_SUCCESS)
218          return result;
219    }
220 
221    return VK_SUCCESS;
222 }
223 
224 static void
radv_fixup_copy_dst_metadata(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * src_image,const struct radv_image * dst_image)225 radv_fixup_copy_dst_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,
226                              const struct radv_image *dst_image)
227 {
228    uint64_t src_offset, dst_offset, size;
229 
230    assert(src_image->planes[0].surface.cmask_size == dst_image->planes[0].surface.cmask_size &&
231           src_image->planes[0].surface.fmask_size == dst_image->planes[0].surface.fmask_size);
232    assert(src_image->planes[0].surface.fmask_offset + src_image->planes[0].surface.fmask_size ==
233           src_image->planes[0].surface.cmask_offset &&
234           dst_image->planes[0].surface.fmask_offset + dst_image->planes[0].surface.fmask_size ==
235           dst_image->planes[0].surface.cmask_offset);
236 
237    /* Copy CMASK+FMASK. */
238    size = src_image->planes[0].surface.cmask_size + src_image->planes[0].surface.fmask_size;
239    src_offset = src_image->bindings[0].offset + src_image->planes[0].surface.fmask_offset;
240    dst_offset = dst_image->bindings[0].offset + dst_image->planes[0].surface.fmask_offset;
241 
242    radv_copy_buffer(cmd_buffer, src_image->bindings[0].bo, dst_image->bindings[0].bo,
243                     src_offset, dst_offset, size);
244 }
245 
246 bool
radv_can_use_fmask_copy(struct radv_cmd_buffer * cmd_buffer,const struct radv_image * src_image,const struct radv_image * dst_image,unsigned num_rects,const struct radv_meta_blit2d_rect * rects)247 radv_can_use_fmask_copy(struct radv_cmd_buffer *cmd_buffer,
248                         const struct radv_image *src_image, const struct radv_image *dst_image,
249                         unsigned num_rects, const struct radv_meta_blit2d_rect *rects)
250 {
251    /* TODO: Test on pre GFX10 chips. */
252    if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX10)
253       return false;
254 
255    /* TODO: Add support for layers. */
256    if (src_image->info.array_size != 1 || dst_image->info.array_size != 1)
257       return false;
258 
259    /* Source/destination images must have FMASK. */
260    if (!radv_image_has_fmask(src_image) || !radv_image_has_fmask(dst_image))
261       return false;
262 
263    /* Source/destination images must have identical TC-compat mode. */
264    if (radv_image_is_tc_compat_cmask(src_image) != radv_image_is_tc_compat_cmask(dst_image))
265       return false;
266 
267    /* The region must be a whole image copy. */
268    if (num_rects != 1 ||
269        (rects[0].src_x || rects[0].src_y || rects[0].dst_x || rects[0].dst_y ||
270         rects[0].width != src_image->info.width || rects[0].height != src_image->info.height))
271       return false;
272 
273    /* Source/destination images must have identical size. */
274    if (src_image->info.width != dst_image->info.width ||
275        src_image->info.height != dst_image->info.height)
276       return false;
277 
278    /* Source/destination images must have identical swizzle. */
279    if (src_image->planes[0].surface.fmask_tile_swizzle !=
280        dst_image->planes[0].surface.fmask_tile_swizzle ||
281        src_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode !=
282        dst_image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode)
283       return false;
284 
285    return true;
286 }
287 
288 void
radv_fmask_copy(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst)289 radv_fmask_copy(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
290                 struct radv_meta_blit2d_surf *dst)
291 {
292    struct radv_device *device = cmd_buffer->device;
293    struct radv_image_view src_iview, dst_iview;
294    uint32_t samples = src->image->info.samples;
295    uint32_t samples_log2 = ffs(samples) - 1;
296 
297    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
298                         cmd_buffer->device->meta_state.fmask_copy.pipeline[samples_log2]);
299 
300    radv_image_view_init(&src_iview, device,
301                         &(VkImageViewCreateInfo){
302                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
303                            .image = radv_image_to_handle(src->image),
304                            .viewType = radv_meta_get_view_type(src->image),
305                            .format = vk_format_no_srgb(src->image->vk.format),
306                            .subresourceRange =
307                               {
308                                  .aspectMask = src->aspect_mask,
309                                  .baseMipLevel = 0,
310                                  .levelCount = 1,
311                                  .baseArrayLayer = 0,
312                                  .layerCount = 1,
313                               },
314                         },
315                         0, NULL);
316 
317    radv_image_view_init(&dst_iview, device,
318                         &(VkImageViewCreateInfo){
319                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
320                            .image = radv_image_to_handle(dst->image),
321                            .viewType = radv_meta_get_view_type(dst->image),
322                            .format = vk_format_no_srgb(dst->image->vk.format),
323                            .subresourceRange =
324                               {
325                                  .aspectMask = dst->aspect_mask,
326                                  .baseMipLevel = 0,
327                                  .levelCount = 1,
328                                  .baseArrayLayer = 0,
329                                  .layerCount = 1,
330                               },
331                         },
332                         0, NULL);
333 
334    radv_meta_push_descriptor_set(
335       cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
336       cmd_buffer->device->meta_state.fmask_copy.p_layout, 0, /* set */
337       2,                                                     /* descriptorWriteCount */
338       (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
339                                 .dstBinding = 0,
340                                 .dstArrayElement = 0,
341                                 .descriptorCount = 1,
342                                 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
343                                 .pImageInfo =
344                                    (VkDescriptorImageInfo[]){
345                                       {.sampler = VK_NULL_HANDLE,
346                                        .imageView = radv_image_view_to_handle(&src_iview),
347                                        .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
348                                    }},
349                                {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
350                                 .dstBinding = 1,
351                                 .dstArrayElement = 0,
352                                 .descriptorCount = 1,
353                                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
354                                 .pImageInfo = (VkDescriptorImageInfo[]){
355                                    {.sampler = VK_NULL_HANDLE,
356                                     .imageView = radv_image_view_to_handle(&dst_iview),
357                                     .imageLayout = VK_IMAGE_LAYOUT_GENERAL},
358                                 }}});
359 
360    radv_unaligned_dispatch(cmd_buffer, src->image->info.width, src->image->info.height, 1);
361 
362    /* Fixup destination image metadata by copying CMASK/FMASK from the source image. */
363    radv_fixup_copy_dst_metadata(cmd_buffer, src->image, dst->image);
364 }
365