1 /*
2 * Copyright © 2021 Google
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #define AC_SURFACE_INCLUDE_NIR
25 #include "ac_surface.h"
26
27 #include "radv_meta.h"
28 #include "radv_private.h"
29 #include "vk_common_entrypoints.h"
30
31 static nir_shader *
build_dcc_retile_compute_shader(struct radv_device * dev,struct radeon_surf * surf)32 build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *surf)
33 {
34 enum glsl_sampler_dim dim = GLSL_SAMPLER_DIM_BUF;
35 const struct glsl_type *buf_type = glsl_image_type(dim, false, GLSL_TYPE_UINT);
36 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "dcc_retile_compute");
37
38 b.shader->info.workgroup_size[0] = 8;
39 b.shader->info.workgroup_size[1] = 8;
40
41 nir_def *src_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
42 nir_def *src_dcc_pitch = nir_channels(&b, src_dcc_size, 1);
43 nir_def *src_dcc_height = nir_channels(&b, src_dcc_size, 2);
44
45 nir_def *dst_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8);
46 nir_def *dst_dcc_pitch = nir_channels(&b, dst_dcc_size, 1);
47 nir_def *dst_dcc_height = nir_channels(&b, dst_dcc_size, 2);
48 nir_variable *input_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_in");
49 input_dcc->data.descriptor_set = 0;
50 input_dcc->data.binding = 0;
51 nir_variable *output_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_out");
52 output_dcc->data.descriptor_set = 0;
53 output_dcc->data.binding = 1;
54
55 nir_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->def;
56 nir_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->def;
57
58 nir_def *coord = get_global_ids(&b, 2);
59 nir_def *zero = nir_imm_int(&b, 0);
60 coord =
61 nir_imul(&b, coord, nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height));
62
63 nir_def *src = ac_nir_dcc_addr_from_coord(&b, &dev->physical_device->rad_info, surf->bpe,
64 &surf->u.gfx9.color.dcc_equation, src_dcc_pitch, src_dcc_height, zero,
65 nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero);
66 nir_def *dst = ac_nir_dcc_addr_from_coord(
67 &b, &dev->physical_device->rad_info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation, dst_dcc_pitch,
68 dst_dcc_height, zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1), zero, zero, zero);
69
70 nir_def *dcc_val = nir_image_deref_load(&b, 1, 32, input_dcc_ref, nir_vec4(&b, src, src, src, src),
71 nir_undef(&b, 1, 32), nir_imm_int(&b, 0), .image_dim = dim);
72
73 nir_image_deref_store(&b, output_dcc_ref, nir_vec4(&b, dst, dst, dst, dst), nir_undef(&b, 1, 32), dcc_val,
74 nir_imm_int(&b, 0), .image_dim = dim);
75
76 return b.shader;
77 }
78
79 void
radv_device_finish_meta_dcc_retile_state(struct radv_device * device)80 radv_device_finish_meta_dcc_retile_state(struct radv_device *device)
81 {
82 struct radv_meta_state *state = &device->meta_state;
83
84 for (unsigned i = 0; i < ARRAY_SIZE(state->dcc_retile.pipeline); i++) {
85 radv_DestroyPipeline(radv_device_to_handle(device), state->dcc_retile.pipeline[i], &state->alloc);
86 }
87 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->dcc_retile.p_layout, &state->alloc);
88 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->dcc_retile.ds_layout,
89 &state->alloc);
90
91 /* Reset for next finish. */
92 memset(&state->dcc_retile, 0, sizeof(state->dcc_retile));
93 }
94
95 /*
96 * This take a surface, but the only things used are:
97 * - BPE
98 * - DCC equations
99 * - DCC block size
100 *
101 * BPE is always 4 at the moment and the rest is derived from the tilemode.
102 */
103 static VkResult
radv_device_init_meta_dcc_retile_state(struct radv_device * device,struct radeon_surf * surf)104 radv_device_init_meta_dcc_retile_state(struct radv_device *device, struct radeon_surf *surf)
105 {
106 VkResult result = VK_SUCCESS;
107 nir_shader *cs = build_dcc_retile_compute_shader(device, surf);
108
109 VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
110 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
111 .bindingCount = 2,
112 .pBindings = (VkDescriptorSetLayoutBinding[]){
113 {.binding = 0,
114 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
115 .descriptorCount = 1,
116 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
117 .pImmutableSamplers = NULL},
118 {.binding = 1,
119 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
120 .descriptorCount = 1,
121 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
122 .pImmutableSamplers = NULL},
123 }};
124
125 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
126 &device->meta_state.dcc_retile.ds_layout);
127 if (result != VK_SUCCESS)
128 goto cleanup;
129
130 VkPipelineLayoutCreateInfo pl_create_info = {
131 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
132 .setLayoutCount = 1,
133 .pSetLayouts = &device->meta_state.dcc_retile.ds_layout,
134 .pushConstantRangeCount = 1,
135 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
136 };
137
138 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
139 &device->meta_state.dcc_retile.p_layout);
140 if (result != VK_SUCCESS)
141 goto cleanup;
142
143 /* compute shader */
144
145 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
146 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
147 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
148 .module = vk_shader_module_handle_from_nir(cs),
149 .pName = "main",
150 .pSpecializationInfo = NULL,
151 };
152
153 VkComputePipelineCreateInfo vk_pipeline_info = {
154 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
155 .stage = pipeline_shader_stage,
156 .flags = 0,
157 .layout = device->meta_state.dcc_retile.p_layout,
158 };
159
160 result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
161 NULL, &device->meta_state.dcc_retile.pipeline[surf->u.gfx9.swizzle_mode]);
162 if (result != VK_SUCCESS)
163 goto cleanup;
164
165 cleanup:
166 ralloc_free(cs);
167 return result;
168 }
169
170 void
radv_retile_dcc(struct radv_cmd_buffer * cmd_buffer,struct radv_image * image)171 radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image)
172 {
173 struct radv_meta_saved_state saved_state;
174 struct radv_device *device = cmd_buffer->device;
175 struct radv_buffer buffer;
176
177 assert(image->vk.image_type == VK_IMAGE_TYPE_2D);
178 assert(image->vk.array_layers == 1 && image->vk.mip_levels == 1);
179
180 struct radv_cmd_state *state = &cmd_buffer->state;
181
182 state->flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT, image) |
183 radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
184
185 unsigned swizzle_mode = image->planes[0].surface.u.gfx9.swizzle_mode;
186
187 /* Compile pipelines if not already done so. */
188 if (!cmd_buffer->device->meta_state.dcc_retile.pipeline[swizzle_mode]) {
189 VkResult ret = radv_device_init_meta_dcc_retile_state(cmd_buffer->device, &image->planes[0].surface);
190 if (ret != VK_SUCCESS) {
191 vk_command_buffer_set_error(&cmd_buffer->vk, ret);
192 return;
193 }
194 }
195
196 radv_meta_save(&saved_state, cmd_buffer,
197 RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS);
198
199 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
200 device->meta_state.dcc_retile.pipeline[swizzle_mode]);
201
202 radv_buffer_init(&buffer, device, image->bindings[0].bo, image->size, image->bindings[0].offset);
203
204 struct radv_buffer_view views[2];
205 VkBufferView view_handles[2];
206 radv_buffer_view_init(views, cmd_buffer->device,
207 &(VkBufferViewCreateInfo){
208 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
209 .buffer = radv_buffer_to_handle(&buffer),
210 .offset = image->planes[0].surface.meta_offset,
211 .range = image->planes[0].surface.meta_size,
212 .format = VK_FORMAT_R8_UINT,
213 });
214 radv_buffer_view_init(views + 1, cmd_buffer->device,
215 &(VkBufferViewCreateInfo){
216 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
217 .buffer = radv_buffer_to_handle(&buffer),
218 .offset = image->planes[0].surface.display_dcc_offset,
219 .range = image->planes[0].surface.u.gfx9.color.display_dcc_size,
220 .format = VK_FORMAT_R8_UINT,
221 });
222 for (unsigned i = 0; i < 2; ++i)
223 view_handles[i] = radv_buffer_view_to_handle(&views[i]);
224
225 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.dcc_retile.p_layout,
226 0, /* set */
227 2, /* descriptorWriteCount */
228 (VkWriteDescriptorSet[]){
229 {
230 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
231 .dstBinding = 0,
232 .dstArrayElement = 0,
233 .descriptorCount = 1,
234 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
235 .pTexelBufferView = &view_handles[0],
236 },
237 {
238 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
239 .dstBinding = 1,
240 .dstArrayElement = 0,
241 .descriptorCount = 1,
242 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
243 .pTexelBufferView = &view_handles[1],
244 },
245 });
246
247 unsigned width = DIV_ROUND_UP(image->vk.extent.width, vk_format_get_blockwidth(image->vk.format));
248 unsigned height = DIV_ROUND_UP(image->vk.extent.height, vk_format_get_blockheight(image->vk.format));
249
250 unsigned dcc_width = DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width);
251 unsigned dcc_height = DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height);
252
253 uint32_t constants[] = {
254 image->planes[0].surface.u.gfx9.color.dcc_pitch_max + 1,
255 image->planes[0].surface.u.gfx9.color.dcc_height,
256 image->planes[0].surface.u.gfx9.color.display_dcc_pitch_max + 1,
257 image->planes[0].surface.u.gfx9.color.display_dcc_height,
258 };
259 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.dcc_retile.p_layout,
260 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, constants);
261
262 radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, 1);
263
264 radv_buffer_view_finish(views);
265 radv_buffer_view_finish(views + 1);
266 radv_buffer_finish(&buffer);
267
268 radv_meta_restore(&saved_state, cmd_buffer);
269
270 state->flush_bits |=
271 RADV_CMD_FLAG_CS_PARTIAL_FLUSH | radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
272 }
273