1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24 #include "nir/nir_builder.h"
25 #include "radv_meta.h"
26 #include "vk_common_entrypoints.h"
27
28 /*
29 * GFX queue: Compute shader implementation of image->buffer copy
30 * Compute queue: implementation also of buffer->image, image->image, and image clear.
31 */
32
33 static nir_shader *
build_nir_itob_compute_shader(struct radv_device * dev,bool is_3d)34 build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
35 {
36 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
37 const struct glsl_type *sampler_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT);
38 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
39 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
40 b.shader->info.workgroup_size[0] = 8;
41 b.shader->info.workgroup_size[1] = 8;
42 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
43 input_img->data.descriptor_set = 0;
44 input_img->data.binding = 0;
45
46 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
47 output_img->data.descriptor_set = 0;
48 output_img->data.binding = 1;
49
50 nir_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
51
52 nir_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
53 nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
54
55 nir_def *img_coord = nir_iadd(&b, global_id, offset);
56 nir_def *outval =
57 nir_txf_deref(&b, nir_build_deref_var(&b, input_img), nir_trim_vector(&b, img_coord, 2 + is_3d), NULL);
58
59 nir_def *pos_x = nir_channel(&b, global_id, 0);
60 nir_def *pos_y = nir_channel(&b, global_id, 1);
61
62 nir_def *tmp = nir_imul(&b, pos_y, stride);
63 tmp = nir_iadd(&b, tmp, pos_x);
64
65 nir_def *coord = nir_replicate(&b, tmp, 4);
66
67 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, nir_undef(&b, 1, 32), outval,
68 nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
69
70 return b.shader;
71 }
72
73 /* Image to buffer - don't write use image accessors */
74 static VkResult
radv_device_init_meta_itob_state(struct radv_device * device)75 radv_device_init_meta_itob_state(struct radv_device *device)
76 {
77 VkResult result;
78 nir_shader *cs = build_nir_itob_compute_shader(device, false);
79 nir_shader *cs_3d = build_nir_itob_compute_shader(device, true);
80
81 /*
82 * two descriptors one for the image being sampled
83 * one for the buffer being written.
84 */
85 VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
86 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
87 .bindingCount = 2,
88 .pBindings = (VkDescriptorSetLayoutBinding[]){
89 {.binding = 0,
90 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
91 .descriptorCount = 1,
92 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
93 .pImmutableSamplers = NULL},
94 {.binding = 1,
95 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
96 .descriptorCount = 1,
97 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
98 .pImmutableSamplers = NULL},
99 }};
100
101 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
102 &device->meta_state.itob.img_ds_layout);
103 if (result != VK_SUCCESS)
104 goto fail;
105
106 VkPipelineLayoutCreateInfo pl_create_info = {
107 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
108 .setLayoutCount = 1,
109 .pSetLayouts = &device->meta_state.itob.img_ds_layout,
110 .pushConstantRangeCount = 1,
111 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
112 };
113
114 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
115 &device->meta_state.itob.img_p_layout);
116 if (result != VK_SUCCESS)
117 goto fail;
118
119 /* compute shader */
120
121 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
122 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
123 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
124 .module = vk_shader_module_handle_from_nir(cs),
125 .pName = "main",
126 .pSpecializationInfo = NULL,
127 };
128
129 VkComputePipelineCreateInfo vk_pipeline_info = {
130 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
131 .stage = pipeline_shader_stage,
132 .flags = 0,
133 .layout = device->meta_state.itob.img_p_layout,
134 };
135
136 result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
137 NULL, &device->meta_state.itob.pipeline);
138 if (result != VK_SUCCESS)
139 goto fail;
140
141 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
142 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
143 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
144 .module = vk_shader_module_handle_from_nir(cs_3d),
145 .pName = "main",
146 .pSpecializationInfo = NULL,
147 };
148
149 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
150 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
151 .stage = pipeline_shader_stage_3d,
152 .flags = 0,
153 .layout = device->meta_state.itob.img_p_layout,
154 };
155
156 result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info_3d,
157 NULL, &device->meta_state.itob.pipeline_3d);
158 if (result != VK_SUCCESS)
159 goto fail;
160
161 ralloc_free(cs_3d);
162 ralloc_free(cs);
163
164 return VK_SUCCESS;
165 fail:
166 ralloc_free(cs);
167 ralloc_free(cs_3d);
168 return result;
169 }
170
171 static void
radv_device_finish_meta_itob_state(struct radv_device * device)172 radv_device_finish_meta_itob_state(struct radv_device *device)
173 {
174 struct radv_meta_state *state = &device->meta_state;
175
176 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itob.img_p_layout, &state->alloc);
177 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itob.img_ds_layout,
178 &state->alloc);
179 radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline, &state->alloc);
180 radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline_3d, &state->alloc);
181 }
182
183 static nir_shader *
build_nir_btoi_compute_shader(struct radv_device * dev,bool is_3d)184 build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
185 {
186 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
187 const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
188 const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
189 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
190 b.shader->info.workgroup_size[0] = 8;
191 b.shader->info.workgroup_size[1] = 8;
192 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
193 input_img->data.descriptor_set = 0;
194 input_img->data.binding = 0;
195
196 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
197 output_img->data.descriptor_set = 0;
198 output_img->data.binding = 1;
199
200 nir_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
201
202 nir_def *offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
203 nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
204
205 nir_def *pos_x = nir_channel(&b, global_id, 0);
206 nir_def *pos_y = nir_channel(&b, global_id, 1);
207
208 nir_def *buf_coord = nir_imul(&b, pos_y, stride);
209 buf_coord = nir_iadd(&b, buf_coord, pos_x);
210
211 nir_def *coord = nir_iadd(&b, global_id, offset);
212 nir_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), buf_coord, NULL);
213
214 nir_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),
215 is_3d ? nir_channel(&b, coord, 2) : nir_undef(&b, 1, 32), nir_undef(&b, 1, 32));
216
217 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, img_coord, nir_undef(&b, 1, 32), outval,
218 nir_imm_int(&b, 0), .image_dim = dim);
219
220 return b.shader;
221 }
222
223 /* Buffer to image - don't write use image accessors */
224 static VkResult
radv_device_init_meta_btoi_state(struct radv_device * device)225 radv_device_init_meta_btoi_state(struct radv_device *device)
226 {
227 VkResult result;
228 nir_shader *cs = build_nir_btoi_compute_shader(device, false);
229 nir_shader *cs_3d = build_nir_btoi_compute_shader(device, true);
230 /*
231 * two descriptors one for the image being sampled
232 * one for the buffer being written.
233 */
234 VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
235 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
236 .bindingCount = 2,
237 .pBindings = (VkDescriptorSetLayoutBinding[]){
238 {.binding = 0,
239 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
240 .descriptorCount = 1,
241 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
242 .pImmutableSamplers = NULL},
243 {.binding = 1,
244 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
245 .descriptorCount = 1,
246 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
247 .pImmutableSamplers = NULL},
248 }};
249
250 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
251 &device->meta_state.btoi.img_ds_layout);
252 if (result != VK_SUCCESS)
253 goto fail;
254
255 VkPipelineLayoutCreateInfo pl_create_info = {
256 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
257 .setLayoutCount = 1,
258 .pSetLayouts = &device->meta_state.btoi.img_ds_layout,
259 .pushConstantRangeCount = 1,
260 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
261 };
262
263 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
264 &device->meta_state.btoi.img_p_layout);
265 if (result != VK_SUCCESS)
266 goto fail;
267
268 /* compute shader */
269
270 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
271 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
272 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
273 .module = vk_shader_module_handle_from_nir(cs),
274 .pName = "main",
275 .pSpecializationInfo = NULL,
276 };
277
278 VkComputePipelineCreateInfo vk_pipeline_info = {
279 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
280 .stage = pipeline_shader_stage,
281 .flags = 0,
282 .layout = device->meta_state.btoi.img_p_layout,
283 };
284
285 result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
286 NULL, &device->meta_state.btoi.pipeline);
287 if (result != VK_SUCCESS)
288 goto fail;
289
290 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
291 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
292 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
293 .module = vk_shader_module_handle_from_nir(cs_3d),
294 .pName = "main",
295 .pSpecializationInfo = NULL,
296 };
297
298 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
299 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
300 .stage = pipeline_shader_stage_3d,
301 .flags = 0,
302 .layout = device->meta_state.btoi.img_p_layout,
303 };
304
305 result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info_3d,
306 NULL, &device->meta_state.btoi.pipeline_3d);
307
308 ralloc_free(cs_3d);
309 ralloc_free(cs);
310
311 return VK_SUCCESS;
312 fail:
313 ralloc_free(cs_3d);
314 ralloc_free(cs);
315 return result;
316 }
317
318 static void
radv_device_finish_meta_btoi_state(struct radv_device * device)319 radv_device_finish_meta_btoi_state(struct radv_device *device)
320 {
321 struct radv_meta_state *state = &device->meta_state;
322
323 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi.img_p_layout, &state->alloc);
324 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->btoi.img_ds_layout,
325 &state->alloc);
326 radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline, &state->alloc);
327 radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline_3d, &state->alloc);
328 }
329
330 /* Buffer to image - special path for R32G32B32 */
331 static nir_shader *
build_nir_btoi_r32g32b32_compute_shader(struct radv_device * dev)332 build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
333 {
334 const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
335 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
336 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_btoi_r32g32b32_cs");
337 b.shader->info.workgroup_size[0] = 8;
338 b.shader->info.workgroup_size[1] = 8;
339 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
340 input_img->data.descriptor_set = 0;
341 input_img->data.binding = 0;
342
343 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
344 output_img->data.descriptor_set = 0;
345 output_img->data.binding = 1;
346
347 nir_def *global_id = get_global_ids(&b, 2);
348
349 nir_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);
350 nir_def *pitch = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 12);
351 nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
352
353 nir_def *pos_x = nir_channel(&b, global_id, 0);
354 nir_def *pos_y = nir_channel(&b, global_id, 1);
355
356 nir_def *buf_coord = nir_imul(&b, pos_y, stride);
357 buf_coord = nir_iadd(&b, buf_coord, pos_x);
358
359 nir_def *img_coord = nir_iadd(&b, global_id, offset);
360
361 nir_def *global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, img_coord, 1), pitch),
362 nir_imul_imm(&b, nir_channel(&b, img_coord, 0), 3));
363
364 nir_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), buf_coord, NULL);
365
366 for (int chan = 0; chan < 3; chan++) {
367 nir_def *local_pos = nir_iadd_imm(&b, global_pos, chan);
368
369 nir_def *coord = nir_replicate(&b, local_pos, 4);
370
371 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, nir_undef(&b, 1, 32),
372 nir_channel(&b, outval, chan), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
373 }
374
375 return b.shader;
376 }
377
378 static VkResult
radv_device_init_meta_btoi_r32g32b32_state(struct radv_device * device)379 radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
380 {
381 VkResult result;
382 nir_shader *cs = build_nir_btoi_r32g32b32_compute_shader(device);
383
384 VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
385 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
386 .bindingCount = 2,
387 .pBindings = (VkDescriptorSetLayoutBinding[]){
388 {.binding = 0,
389 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
390 .descriptorCount = 1,
391 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
392 .pImmutableSamplers = NULL},
393 {.binding = 1,
394 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
395 .descriptorCount = 1,
396 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
397 .pImmutableSamplers = NULL},
398 }};
399
400 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
401 &device->meta_state.btoi_r32g32b32.img_ds_layout);
402 if (result != VK_SUCCESS)
403 goto fail;
404
405 VkPipelineLayoutCreateInfo pl_create_info = {
406 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
407 .setLayoutCount = 1,
408 .pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
409 .pushConstantRangeCount = 1,
410 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
411 };
412
413 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
414 &device->meta_state.btoi_r32g32b32.img_p_layout);
415 if (result != VK_SUCCESS)
416 goto fail;
417
418 /* compute shader */
419
420 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
421 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
422 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
423 .module = vk_shader_module_handle_from_nir(cs),
424 .pName = "main",
425 .pSpecializationInfo = NULL,
426 };
427
428 VkComputePipelineCreateInfo vk_pipeline_info = {
429 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
430 .stage = pipeline_shader_stage,
431 .flags = 0,
432 .layout = device->meta_state.btoi_r32g32b32.img_p_layout,
433 };
434
435 result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
436 NULL, &device->meta_state.btoi_r32g32b32.pipeline);
437
438 fail:
439 ralloc_free(cs);
440 return result;
441 }
442
443 static void
radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device * device)444 radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
445 {
446 struct radv_meta_state *state = &device->meta_state;
447
448 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi_r32g32b32.img_p_layout, &state->alloc);
449 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
450 state->btoi_r32g32b32.img_ds_layout, &state->alloc);
451 radv_DestroyPipeline(radv_device_to_handle(device), state->btoi_r32g32b32.pipeline, &state->alloc);
452 }
453
454 static nir_shader *
build_nir_itoi_compute_shader(struct radv_device * dev,bool is_3d,int samples)455 build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d, int samples)
456 {
457 bool is_multisampled = samples > 1;
458 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D
459 : is_multisampled ? GLSL_SAMPLER_DIM_MS
460 : GLSL_SAMPLER_DIM_2D;
461 const struct glsl_type *buf_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT);
462 const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
463 nir_builder b =
464 radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_itoi_cs_3d-%d" : "meta_itoi_cs-%d", samples);
465 b.shader->info.workgroup_size[0] = 8;
466 b.shader->info.workgroup_size[1] = 8;
467 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
468 input_img->data.descriptor_set = 0;
469 input_img->data.binding = 0;
470
471 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
472 output_img->data.descriptor_set = 0;
473 output_img->data.binding = 1;
474
475 nir_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
476
477 nir_def *src_offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = is_3d ? 12 : 8);
478 nir_def *dst_offset = nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range = is_3d ? 24 : 20);
479
480 nir_def *src_coord = nir_iadd(&b, global_id, src_offset);
481 nir_deref_instr *input_img_deref = nir_build_deref_var(&b, input_img);
482
483 nir_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
484
485 nir_def *tex_vals[8];
486 if (is_multisampled) {
487 for (uint32_t i = 0; i < samples; i++) {
488 tex_vals[i] = nir_txf_ms_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2), nir_imm_int(&b, i));
489 }
490 } else {
491 tex_vals[0] = nir_txf_deref(&b, input_img_deref, nir_trim_vector(&b, src_coord, 2 + is_3d), nir_imm_int(&b, 0));
492 }
493
494 nir_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0), nir_channel(&b, dst_coord, 1),
495 is_3d ? nir_channel(&b, dst_coord, 2) : nir_undef(&b, 1, 32), nir_undef(&b, 1, 32));
496
497 for (uint32_t i = 0; i < samples; i++) {
498 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, img_coord, nir_imm_int(&b, i), tex_vals[i],
499 nir_imm_int(&b, 0), .image_dim = dim);
500 }
501
502 return b.shader;
503 }
504
505 static VkResult
create_itoi_pipeline(struct radv_device * device,int samples,VkPipeline * pipeline)506 create_itoi_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
507 {
508 struct radv_meta_state *state = &device->meta_state;
509 nir_shader *cs = build_nir_itoi_compute_shader(device, false, samples);
510 VkResult result;
511
512 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
513 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
514 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
515 .module = vk_shader_module_handle_from_nir(cs),
516 .pName = "main",
517 .pSpecializationInfo = NULL,
518 };
519
520 VkComputePipelineCreateInfo vk_pipeline_info = {
521 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
522 .stage = pipeline_shader_stage,
523 .flags = 0,
524 .layout = state->itoi.img_p_layout,
525 };
526
527 result =
528 radv_compute_pipeline_create(radv_device_to_handle(device), state->cache, &vk_pipeline_info, NULL, pipeline);
529 ralloc_free(cs);
530 return result;
531 }
532
533 /* image to image - don't write use image accessors */
534 static VkResult
radv_device_init_meta_itoi_state(struct radv_device * device)535 radv_device_init_meta_itoi_state(struct radv_device *device)
536 {
537 VkResult result;
538
539 /*
540 * two descriptors one for the image being sampled
541 * one for the buffer being written.
542 */
543 VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
544 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
545 .bindingCount = 2,
546 .pBindings = (VkDescriptorSetLayoutBinding[]){
547 {.binding = 0,
548 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
549 .descriptorCount = 1,
550 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
551 .pImmutableSamplers = NULL},
552 {.binding = 1,
553 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
554 .descriptorCount = 1,
555 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
556 .pImmutableSamplers = NULL},
557 }};
558
559 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
560 &device->meta_state.itoi.img_ds_layout);
561 if (result != VK_SUCCESS)
562 goto fail;
563
564 VkPipelineLayoutCreateInfo pl_create_info = {
565 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
566 .setLayoutCount = 1,
567 .pSetLayouts = &device->meta_state.itoi.img_ds_layout,
568 .pushConstantRangeCount = 1,
569 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
570 };
571
572 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
573 &device->meta_state.itoi.img_p_layout);
574 if (result != VK_SUCCESS)
575 goto fail;
576
577 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
578 uint32_t samples = 1 << i;
579 result = create_itoi_pipeline(device, samples, &device->meta_state.itoi.pipeline[i]);
580 if (result != VK_SUCCESS)
581 goto fail;
582 }
583
584 nir_shader *cs_3d = build_nir_itoi_compute_shader(device, true, 1);
585
586 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
587 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
588 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
589 .module = vk_shader_module_handle_from_nir(cs_3d),
590 .pName = "main",
591 .pSpecializationInfo = NULL,
592 };
593
594 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
595 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
596 .stage = pipeline_shader_stage_3d,
597 .flags = 0,
598 .layout = device->meta_state.itoi.img_p_layout,
599 };
600
601 result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info_3d,
602 NULL, &device->meta_state.itoi.pipeline_3d);
603 ralloc_free(cs_3d);
604
605 return VK_SUCCESS;
606 fail:
607 return result;
608 }
609
610 static void
radv_device_finish_meta_itoi_state(struct radv_device * device)611 radv_device_finish_meta_itoi_state(struct radv_device *device)
612 {
613 struct radv_meta_state *state = &device->meta_state;
614
615 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi.img_p_layout, &state->alloc);
616 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itoi.img_ds_layout,
617 &state->alloc);
618
619 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
620 radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline[i], &state->alloc);
621 }
622
623 radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline_3d, &state->alloc);
624 }
625
626 static nir_shader *
build_nir_itoi_r32g32b32_compute_shader(struct radv_device * dev)627 build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
628 {
629 const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
630 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
631 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_itoi_r32g32b32_cs");
632 b.shader->info.workgroup_size[0] = 8;
633 b.shader->info.workgroup_size[1] = 8;
634 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, type, "input_img");
635 input_img->data.descriptor_set = 0;
636 input_img->data.binding = 0;
637
638 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "output_img");
639 output_img->data.descriptor_set = 0;
640 output_img->data.binding = 1;
641
642 nir_def *global_id = get_global_ids(&b, 2);
643
644 nir_def *src_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 12);
645 nir_def *dst_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 12), .range = 24);
646
647 nir_def *src_stride = nir_channel(&b, src_offset, 2);
648 nir_def *dst_stride = nir_channel(&b, dst_offset, 2);
649
650 nir_def *src_img_coord = nir_iadd(&b, global_id, src_offset);
651 nir_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset);
652
653 nir_def *src_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
654 nir_imul_imm(&b, nir_channel(&b, src_img_coord, 0), 3));
655
656 nir_def *dst_global_pos = nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
657 nir_imul_imm(&b, nir_channel(&b, dst_img_coord, 0), 3));
658
659 for (int chan = 0; chan < 3; chan++) {
660 /* src */
661 nir_def *src_local_pos = nir_iadd_imm(&b, src_global_pos, chan);
662 nir_def *outval = nir_txf_deref(&b, nir_build_deref_var(&b, input_img), src_local_pos, NULL);
663
664 /* dst */
665 nir_def *dst_local_pos = nir_iadd_imm(&b, dst_global_pos, chan);
666
667 nir_def *dst_coord = nir_replicate(&b, dst_local_pos, 4);
668
669 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, dst_coord, nir_undef(&b, 1, 32),
670 nir_channel(&b, outval, 0), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
671 }
672
673 return b.shader;
674 }
675
676 /* Image to image - special path for R32G32B32 */
677 static VkResult
radv_device_init_meta_itoi_r32g32b32_state(struct radv_device * device)678 radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device)
679 {
680 VkResult result;
681 nir_shader *cs = build_nir_itoi_r32g32b32_compute_shader(device);
682
683 VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
684 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
685 .bindingCount = 2,
686 .pBindings = (VkDescriptorSetLayoutBinding[]){
687 {.binding = 0,
688 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
689 .descriptorCount = 1,
690 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
691 .pImmutableSamplers = NULL},
692 {.binding = 1,
693 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
694 .descriptorCount = 1,
695 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
696 .pImmutableSamplers = NULL},
697 }};
698
699 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
700 &device->meta_state.itoi_r32g32b32.img_ds_layout);
701 if (result != VK_SUCCESS)
702 goto fail;
703
704 VkPipelineLayoutCreateInfo pl_create_info = {
705 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
706 .setLayoutCount = 1,
707 .pSetLayouts = &device->meta_state.itoi_r32g32b32.img_ds_layout,
708 .pushConstantRangeCount = 1,
709 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
710 };
711
712 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
713 &device->meta_state.itoi_r32g32b32.img_p_layout);
714 if (result != VK_SUCCESS)
715 goto fail;
716
717 /* compute shader */
718
719 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
720 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
721 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
722 .module = vk_shader_module_handle_from_nir(cs),
723 .pName = "main",
724 .pSpecializationInfo = NULL,
725 };
726
727 VkComputePipelineCreateInfo vk_pipeline_info = {
728 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
729 .stage = pipeline_shader_stage,
730 .flags = 0,
731 .layout = device->meta_state.itoi_r32g32b32.img_p_layout,
732 };
733
734 result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
735 NULL, &device->meta_state.itoi_r32g32b32.pipeline);
736
737 fail:
738 ralloc_free(cs);
739 return result;
740 }
741
742 static void
radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device * device)743 radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device)
744 {
745 struct radv_meta_state *state = &device->meta_state;
746
747 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi_r32g32b32.img_p_layout, &state->alloc);
748 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
749 state->itoi_r32g32b32.img_ds_layout, &state->alloc);
750 radv_DestroyPipeline(radv_device_to_handle(device), state->itoi_r32g32b32.pipeline, &state->alloc);
751 }
752
753 static nir_shader *
build_nir_cleari_compute_shader(struct radv_device * dev,bool is_3d,int samples)754 build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d, int samples)
755 {
756 bool is_multisampled = samples > 1;
757 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D
758 : is_multisampled ? GLSL_SAMPLER_DIM_MS
759 : GLSL_SAMPLER_DIM_2D;
760 const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
761 nir_builder b =
762 radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples);
763 b.shader->info.workgroup_size[0] = 8;
764 b.shader->info.workgroup_size[1] = 8;
765
766 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
767 output_img->data.descriptor_set = 0;
768 output_img->data.binding = 0;
769
770 nir_def *global_id = get_global_ids(&b, 2);
771
772 nir_def *clear_val = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
773 nir_def *layer = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20);
774
775 nir_def *comps[4];
776 comps[0] = nir_channel(&b, global_id, 0);
777 comps[1] = nir_channel(&b, global_id, 1);
778 comps[2] = layer;
779 comps[3] = nir_undef(&b, 1, 32);
780 global_id = nir_vec(&b, comps, 4);
781
782 for (uint32_t i = 0; i < samples; i++) {
783 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, global_id, nir_imm_int(&b, i), clear_val,
784 nir_imm_int(&b, 0), .image_dim = dim);
785 }
786
787 return b.shader;
788 }
789
790 static VkResult
create_cleari_pipeline(struct radv_device * device,int samples,VkPipeline * pipeline)791 create_cleari_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
792 {
793 nir_shader *cs = build_nir_cleari_compute_shader(device, false, samples);
794 VkResult result;
795
796 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
797 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
798 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
799 .module = vk_shader_module_handle_from_nir(cs),
800 .pName = "main",
801 .pSpecializationInfo = NULL,
802 };
803
804 VkComputePipelineCreateInfo vk_pipeline_info = {
805 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
806 .stage = pipeline_shader_stage,
807 .flags = 0,
808 .layout = device->meta_state.cleari.img_p_layout,
809 };
810
811 result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
812 NULL, pipeline);
813 ralloc_free(cs);
814 return result;
815 }
816
817 static VkResult
radv_device_init_meta_cleari_state(struct radv_device * device)818 radv_device_init_meta_cleari_state(struct radv_device *device)
819 {
820 VkResult result;
821
822 /*
823 * two descriptors one for the image being sampled
824 * one for the buffer being written.
825 */
826 VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
827 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
828 .bindingCount = 1,
829 .pBindings = (VkDescriptorSetLayoutBinding[]){
830 {.binding = 0,
831 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
832 .descriptorCount = 1,
833 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
834 .pImmutableSamplers = NULL},
835 }};
836
837 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
838 &device->meta_state.cleari.img_ds_layout);
839 if (result != VK_SUCCESS)
840 goto fail;
841
842 VkPipelineLayoutCreateInfo pl_create_info = {
843 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
844 .setLayoutCount = 1,
845 .pSetLayouts = &device->meta_state.cleari.img_ds_layout,
846 .pushConstantRangeCount = 1,
847 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
848 };
849
850 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
851 &device->meta_state.cleari.img_p_layout);
852 if (result != VK_SUCCESS)
853 goto fail;
854
855 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
856 uint32_t samples = 1 << i;
857 result = create_cleari_pipeline(device, samples, &device->meta_state.cleari.pipeline[i]);
858 if (result != VK_SUCCESS)
859 goto fail;
860 }
861
862 nir_shader *cs_3d = build_nir_cleari_compute_shader(device, true, 1);
863
864 /* compute shader */
865 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
866 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
867 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
868 .module = vk_shader_module_handle_from_nir(cs_3d),
869 .pName = "main",
870 .pSpecializationInfo = NULL,
871 };
872
873 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
874 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
875 .stage = pipeline_shader_stage_3d,
876 .flags = 0,
877 .layout = device->meta_state.cleari.img_p_layout,
878 };
879
880 result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info_3d,
881 NULL, &device->meta_state.cleari.pipeline_3d);
882 ralloc_free(cs_3d);
883
884 return VK_SUCCESS;
885 fail:
886 return result;
887 }
888
889 static void
radv_device_finish_meta_cleari_state(struct radv_device * device)890 radv_device_finish_meta_cleari_state(struct radv_device *device)
891 {
892 struct radv_meta_state *state = &device->meta_state;
893
894 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari.img_p_layout, &state->alloc);
895 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device), state->cleari.img_ds_layout,
896 &state->alloc);
897
898 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
899 radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline[i], &state->alloc);
900 }
901
902 radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline_3d, &state->alloc);
903 }
904
905 /* Special path for clearing R32G32B32 images using a compute shader. */
906 static nir_shader *
build_nir_cleari_r32g32b32_compute_shader(struct radv_device * dev)907 build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
908 {
909 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
910 nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_cleari_r32g32b32_cs");
911 b.shader->info.workgroup_size[0] = 8;
912 b.shader->info.workgroup_size[1] = 8;
913
914 nir_variable *output_img = nir_variable_create(b.shader, nir_var_image, img_type, "out_img");
915 output_img->data.descriptor_set = 0;
916 output_img->data.binding = 0;
917
918 nir_def *global_id = get_global_ids(&b, 2);
919
920 nir_def *clear_val = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 12);
921 nir_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
922
923 nir_def *global_x = nir_channel(&b, global_id, 0);
924 nir_def *global_y = nir_channel(&b, global_id, 1);
925
926 nir_def *global_pos = nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul_imm(&b, global_x, 3));
927
928 for (unsigned chan = 0; chan < 3; chan++) {
929 nir_def *local_pos = nir_iadd_imm(&b, global_pos, chan);
930
931 nir_def *coord = nir_replicate(&b, local_pos, 4);
932
933 nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->def, coord, nir_undef(&b, 1, 32),
934 nir_channel(&b, clear_val, chan), nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
935 }
936
937 return b.shader;
938 }
939
940 static VkResult
radv_device_init_meta_cleari_r32g32b32_state(struct radv_device * device)941 radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device)
942 {
943 VkResult result;
944 nir_shader *cs = build_nir_cleari_r32g32b32_compute_shader(device);
945
946 VkDescriptorSetLayoutCreateInfo ds_create_info = {.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
947 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
948 .bindingCount = 1,
949 .pBindings = (VkDescriptorSetLayoutBinding[]){
950 {.binding = 0,
951 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
952 .descriptorCount = 1,
953 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
954 .pImmutableSamplers = NULL},
955 }};
956
957 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
958 &device->meta_state.cleari_r32g32b32.img_ds_layout);
959 if (result != VK_SUCCESS)
960 goto fail;
961
962 VkPipelineLayoutCreateInfo pl_create_info = {
963 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
964 .setLayoutCount = 1,
965 .pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout,
966 .pushConstantRangeCount = 1,
967 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
968 };
969
970 result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
971 &device->meta_state.cleari_r32g32b32.img_p_layout);
972 if (result != VK_SUCCESS)
973 goto fail;
974
975 /* compute shader */
976 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
977 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
978 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
979 .module = vk_shader_module_handle_from_nir(cs),
980 .pName = "main",
981 .pSpecializationInfo = NULL,
982 };
983
984 VkComputePipelineCreateInfo vk_pipeline_info = {
985 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
986 .stage = pipeline_shader_stage,
987 .flags = 0,
988 .layout = device->meta_state.cleari_r32g32b32.img_p_layout,
989 };
990
991 result = radv_compute_pipeline_create(radv_device_to_handle(device), device->meta_state.cache, &vk_pipeline_info,
992 NULL, &device->meta_state.cleari_r32g32b32.pipeline);
993
994 fail:
995 ralloc_free(cs);
996 return result;
997 }
998
999 static void
radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device * device)1000 radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device)
1001 {
1002 struct radv_meta_state *state = &device->meta_state;
1003
1004 radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari_r32g32b32.img_p_layout, &state->alloc);
1005 device->vk.dispatch_table.DestroyDescriptorSetLayout(radv_device_to_handle(device),
1006 state->cleari_r32g32b32.img_ds_layout, &state->alloc);
1007 radv_DestroyPipeline(radv_device_to_handle(device), state->cleari_r32g32b32.pipeline, &state->alloc);
1008 }
1009
1010 void
radv_device_finish_meta_bufimage_state(struct radv_device * device)1011 radv_device_finish_meta_bufimage_state(struct radv_device *device)
1012 {
1013 radv_device_finish_meta_itob_state(device);
1014 radv_device_finish_meta_btoi_state(device);
1015 radv_device_finish_meta_btoi_r32g32b32_state(device);
1016 radv_device_finish_meta_itoi_state(device);
1017 radv_device_finish_meta_itoi_r32g32b32_state(device);
1018 radv_device_finish_meta_cleari_state(device);
1019 radv_device_finish_meta_cleari_r32g32b32_state(device);
1020 }
1021
1022 VkResult
radv_device_init_meta_bufimage_state(struct radv_device * device)1023 radv_device_init_meta_bufimage_state(struct radv_device *device)
1024 {
1025 VkResult result;
1026
1027 result = radv_device_init_meta_itob_state(device);
1028 if (result != VK_SUCCESS)
1029 return result;
1030
1031 result = radv_device_init_meta_btoi_state(device);
1032 if (result != VK_SUCCESS)
1033 return result;
1034
1035 result = radv_device_init_meta_btoi_r32g32b32_state(device);
1036 if (result != VK_SUCCESS)
1037 return result;
1038
1039 result = radv_device_init_meta_itoi_state(device);
1040 if (result != VK_SUCCESS)
1041 return result;
1042
1043 result = radv_device_init_meta_itoi_r32g32b32_state(device);
1044 if (result != VK_SUCCESS)
1045 return result;
1046
1047 result = radv_device_init_meta_cleari_state(device);
1048 if (result != VK_SUCCESS)
1049 return result;
1050
1051 result = radv_device_init_meta_cleari_r32g32b32_state(device);
1052 if (result != VK_SUCCESS)
1053 return result;
1054
1055 return VK_SUCCESS;
1056 }
1057
1058 static void
create_iview(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf,struct radv_image_view * iview,VkFormat format,VkImageAspectFlagBits aspects)1059 create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf, struct radv_image_view *iview,
1060 VkFormat format, VkImageAspectFlagBits aspects)
1061 {
1062 if (format == VK_FORMAT_UNDEFINED)
1063 format = surf->format;
1064
1065 radv_image_view_init(iview, cmd_buffer->device,
1066 &(VkImageViewCreateInfo){
1067 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1068 .image = radv_image_to_handle(surf->image),
1069 .viewType = radv_meta_get_view_type(surf->image),
1070 .format = format,
1071 .subresourceRange = {.aspectMask = aspects,
1072 .baseMipLevel = surf->level,
1073 .levelCount = 1,
1074 .baseArrayLayer = surf->layer,
1075 .layerCount = 1},
1076 },
1077 0,
1078 &(struct radv_image_view_extra_create_info){
1079 .disable_compression = surf->disable_compression,
1080 });
1081 }
1082
1083 static void
create_bview(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer * buffer,unsigned offset,VkFormat format,struct radv_buffer_view * bview)1084 create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset, VkFormat format,
1085 struct radv_buffer_view *bview)
1086 {
1087 radv_buffer_view_init(bview, cmd_buffer->device,
1088 &(VkBufferViewCreateInfo){
1089 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1090 .flags = 0,
1091 .buffer = radv_buffer_to_handle(buffer),
1092 .format = format,
1093 .offset = offset,
1094 .range = VK_WHOLE_SIZE,
1095 });
1096 }
1097
1098 static void
create_buffer_from_image(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf,VkBufferUsageFlagBits2KHR usage,VkBuffer * buffer)1099 create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf,
1100 VkBufferUsageFlagBits2KHR usage, VkBuffer *buffer)
1101 {
1102 struct radv_device *device = cmd_buffer->device;
1103 struct radv_device_memory mem;
1104
1105 radv_device_memory_init(&mem, device, surf->image->bindings[0].bo);
1106
1107 radv_create_buffer(device,
1108 &(VkBufferCreateInfo){
1109 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1110 .pNext =
1111 &(VkBufferUsageFlags2CreateInfoKHR){
1112 .sType = VK_STRUCTURE_TYPE_BUFFER_USAGE_FLAGS_2_CREATE_INFO_KHR,
1113 .usage = usage,
1114 },
1115 .flags = 0,
1116 .size = surf->image->size,
1117 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1118 },
1119 NULL, buffer, true);
1120
1121 radv_BindBufferMemory2(radv_device_to_handle(device), 1,
1122 (VkBindBufferMemoryInfo[]){{
1123 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
1124 .buffer = *buffer,
1125 .memory = radv_device_memory_to_handle(&mem),
1126 .memoryOffset = surf->image->bindings[0].offset,
1127 }});
1128
1129 radv_device_memory_finish(&mem);
1130 }
1131
1132 static void
create_bview_for_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer * buffer,unsigned offset,VkFormat src_format,struct radv_buffer_view * bview)1133 create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset,
1134 VkFormat src_format, struct radv_buffer_view *bview)
1135 {
1136 VkFormat format;
1137
1138 switch (src_format) {
1139 case VK_FORMAT_R32G32B32_UINT:
1140 format = VK_FORMAT_R32_UINT;
1141 break;
1142 case VK_FORMAT_R32G32B32_SINT:
1143 format = VK_FORMAT_R32_SINT;
1144 break;
1145 case VK_FORMAT_R32G32B32_SFLOAT:
1146 format = VK_FORMAT_R32_SFLOAT;
1147 break;
1148 default:
1149 unreachable("invalid R32G32B32 format");
1150 }
1151
1152 radv_buffer_view_init(bview, cmd_buffer->device,
1153 &(VkBufferViewCreateInfo){
1154 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1155 .flags = 0,
1156 .buffer = radv_buffer_to_handle(buffer),
1157 .format = format,
1158 .offset = offset,
1159 .range = VK_WHOLE_SIZE,
1160 });
1161 }
1162
1163 /* GFX9+ has an issue where the HW does not calculate mipmap degradations
1164 * for block-compressed images correctly (see the comment in
1165 * radv_image_view_init). Some texels are unaddressable and cannot be copied
1166 * to/from by a compute shader. Here we will perform a buffer copy to copy the
1167 * texels that the hardware missed.
1168 *
1169 * GFX10 will not use this workaround because it can be fixed by adjusting its
1170 * image view descriptors instead.
1171 */
1172 static void
fixup_gfx9_cs_copy(struct radv_cmd_buffer * cmd_buffer,const struct radv_meta_blit2d_buffer * buf_bsurf,const struct radv_meta_blit2d_surf * img_bsurf,const struct radv_meta_blit2d_rect * rect,bool to_image)1173 fixup_gfx9_cs_copy(struct radv_cmd_buffer *cmd_buffer, const struct radv_meta_blit2d_buffer *buf_bsurf,
1174 const struct radv_meta_blit2d_surf *img_bsurf, const struct radv_meta_blit2d_rect *rect,
1175 bool to_image)
1176 {
1177 const unsigned mip_level = img_bsurf->level;
1178 const struct radv_image *image = img_bsurf->image;
1179 const struct radeon_surf *surf = &image->planes[0].surface;
1180 struct radv_device *device = cmd_buffer->device;
1181 const struct radeon_info *rad_info = &device->physical_device->rad_info;
1182 struct ac_addrlib *addrlib = device->ws->get_addrlib(device->ws);
1183 struct ac_surf_info surf_info = radv_get_ac_surf_info(device, image);
1184
1185 /* GFX10 will use a different workaround unless this is not a 2D image */
1186 if (rad_info->gfx_level < GFX9 || (rad_info->gfx_level >= GFX10 && image->vk.image_type == VK_IMAGE_TYPE_2D) ||
1187 image->vk.mip_levels == 1 || !vk_format_is_block_compressed(image->vk.format))
1188 return;
1189
1190 /* The physical extent of the base mip */
1191 VkExtent2D hw_base_extent = {surf->u.gfx9.base_mip_width, surf->u.gfx9.base_mip_height};
1192
1193 /* The hardware-calculated extent of the selected mip
1194 * (naive divide-by-two integer math)
1195 */
1196 VkExtent2D hw_mip_extent = {radv_minify(hw_base_extent.width, mip_level),
1197 radv_minify(hw_base_extent.height, mip_level)};
1198
1199 /* The actual extent we want to copy */
1200 VkExtent2D mip_extent = {rect->width, rect->height};
1201
1202 VkOffset2D mip_offset = {to_image ? rect->dst_x : rect->src_x, to_image ? rect->dst_y : rect->src_y};
1203
1204 if (hw_mip_extent.width >= mip_offset.x + mip_extent.width &&
1205 hw_mip_extent.height >= mip_offset.y + mip_extent.height)
1206 return;
1207
1208 if (!to_image) {
1209 /* If we are writing to a buffer, then we need to wait for the compute
1210 * shader to finish because it may write over the unaddressable texels
1211 * while we're fixing them. If we're writing to an image, we do not need
1212 * to wait because the compute shader cannot write to those texels
1213 */
1214 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_INV_VCACHE;
1215 }
1216
1217 for (uint32_t y = 0; y < mip_extent.height; y++) {
1218 uint32_t coordY = y + mip_offset.y;
1219 /* If the default copy algorithm (done previously) has already seen this
1220 * scanline, then we can bias the starting X coordinate over to skip the
1221 * region already copied by the default copy.
1222 */
1223 uint32_t x = (coordY < hw_mip_extent.height) ? hw_mip_extent.width : 0;
1224 for (; x < mip_extent.width; x++) {
1225 uint32_t coordX = x + mip_offset.x;
1226 uint64_t addr = ac_surface_addr_from_coord(addrlib, rad_info, surf, &surf_info, mip_level, coordX, coordY,
1227 img_bsurf->layer, image->vk.image_type == VK_IMAGE_TYPE_3D);
1228 struct radeon_winsys_bo *img_bo = image->bindings[0].bo;
1229 struct radeon_winsys_bo *mem_bo = buf_bsurf->buffer->bo;
1230 const uint64_t img_offset = image->bindings[0].offset + addr;
1231 /* buf_bsurf->offset already includes the layer offset */
1232 const uint64_t mem_offset =
1233 buf_bsurf->buffer->offset + buf_bsurf->offset + y * buf_bsurf->pitch * surf->bpe + x * surf->bpe;
1234 if (to_image) {
1235 radv_copy_buffer(cmd_buffer, mem_bo, img_bo, mem_offset, img_offset, surf->bpe);
1236 } else {
1237 radv_copy_buffer(cmd_buffer, img_bo, mem_bo, img_offset, mem_offset, surf->bpe);
1238 }
1239 }
1240 }
1241 }
1242
1243 static unsigned
get_image_stride_for_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf)1244 get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf)
1245 {
1246 unsigned stride;
1247
1248 if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
1249 stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
1250 } else {
1251 stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
1252 }
1253
1254 return stride;
1255 }
1256
1257 static void
itob_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src,struct radv_buffer_view * dst)1258 itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src, struct radv_buffer_view *dst)
1259 {
1260 struct radv_device *device = cmd_buffer->device;
1261
1262 radv_meta_push_descriptor_set(
1263 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itob.img_p_layout, 0, /* set */
1264 2, /* descriptorWriteCount */
1265 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1266 .dstBinding = 0,
1267 .dstArrayElement = 0,
1268 .descriptorCount = 1,
1269 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1270 .pImageInfo =
1271 (VkDescriptorImageInfo[]){
1272 {
1273 .sampler = VK_NULL_HANDLE,
1274 .imageView = radv_image_view_to_handle(src),
1275 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1276 },
1277 }},
1278 {
1279 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1280 .dstBinding = 1,
1281 .dstArrayElement = 0,
1282 .descriptorCount = 1,
1283 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1284 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
1285 }});
1286 }
1287
1288 void
radv_meta_image_to_buffer(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_buffer * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1289 radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
1290 struct radv_meta_blit2d_buffer *dst, unsigned num_rects, struct radv_meta_blit2d_rect *rects)
1291 {
1292 VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
1293 struct radv_device *device = cmd_buffer->device;
1294 struct radv_image_view src_view;
1295 struct radv_buffer_view dst_view;
1296
1297 create_iview(cmd_buffer, src, &src_view, VK_FORMAT_UNDEFINED, src->aspect_mask);
1298 create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
1299 itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1300
1301 if (src->image->vk.image_type == VK_IMAGE_TYPE_3D)
1302 pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
1303
1304 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1305
1306 for (unsigned r = 0; r < num_rects; ++r) {
1307 unsigned push_constants[4] = {rects[r].src_x, rects[r].src_y, src->layer, dst->pitch};
1308 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.itob.img_p_layout,
1309 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
1310
1311 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1312 fixup_gfx9_cs_copy(cmd_buffer, dst, src, &rects[r], false);
1313 }
1314
1315 radv_image_view_finish(&src_view);
1316 radv_buffer_view_finish(&dst_view);
1317 }
1318
1319 static void
btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_buffer_view * dst)1320 btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
1321 struct radv_buffer_view *dst)
1322 {
1323 struct radv_device *device = cmd_buffer->device;
1324
1325 radv_meta_push_descriptor_set(
1326 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi_r32g32b32.img_p_layout, 0, /* set */
1327 2, /* descriptorWriteCount */
1328 (VkWriteDescriptorSet[]){{
1329 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1330 .dstBinding = 0,
1331 .dstArrayElement = 0,
1332 .descriptorCount = 1,
1333 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1334 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
1335 },
1336 {
1337 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1338 .dstBinding = 1,
1339 .dstArrayElement = 0,
1340 .descriptorCount = 1,
1341 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1342 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
1343 }});
1344 }
1345
1346 static void
radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_buffer * src,struct radv_meta_blit2d_surf * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1347 radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src,
1348 struct radv_meta_blit2d_surf *dst, unsigned num_rects,
1349 struct radv_meta_blit2d_rect *rects)
1350 {
1351 VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
1352 struct radv_device *device = cmd_buffer->device;
1353 struct radv_buffer_view src_view, dst_view;
1354 unsigned dst_offset = 0;
1355 unsigned stride;
1356 VkBuffer buffer;
1357
1358 /* This special btoi path for R32G32B32 formats will write the linear
1359 * image as a buffer with the same underlying memory. The compute
1360 * shader will copy all components separately using a R32 format.
1361 */
1362 create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_2_STORAGE_TEXEL_BUFFER_BIT_KHR, &buffer);
1363
1364 create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
1365 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset, dst->format, &dst_view);
1366 btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1367
1368 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1369
1370 stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1371
1372 for (unsigned r = 0; r < num_rects; ++r) {
1373 unsigned push_constants[4] = {
1374 rects[r].dst_x,
1375 rects[r].dst_y,
1376 stride,
1377 src->pitch,
1378 };
1379
1380 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.btoi_r32g32b32.img_p_layout,
1381 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
1382
1383 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1384 }
1385
1386 radv_buffer_view_finish(&src_view);
1387 radv_buffer_view_finish(&dst_view);
1388 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1389 }
1390
1391 static void
btoi_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_image_view * dst)1392 btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src, struct radv_image_view *dst)
1393 {
1394 struct radv_device *device = cmd_buffer->device;
1395
1396 radv_meta_push_descriptor_set(
1397 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi.img_p_layout, 0, /* set */
1398 2, /* descriptorWriteCount */
1399 (VkWriteDescriptorSet[]){{
1400 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1401 .dstBinding = 0,
1402 .dstArrayElement = 0,
1403 .descriptorCount = 1,
1404 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1405 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
1406 },
1407 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1408 .dstBinding = 1,
1409 .dstArrayElement = 0,
1410 .descriptorCount = 1,
1411 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1412 .pImageInfo = (VkDescriptorImageInfo[]){
1413 {
1414 .sampler = VK_NULL_HANDLE,
1415 .imageView = radv_image_view_to_handle(dst),
1416 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1417 },
1418 }}});
1419 }
1420
1421 void
radv_meta_buffer_to_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_buffer * src,struct radv_meta_blit2d_surf * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1422 radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_buffer *src,
1423 struct radv_meta_blit2d_surf *dst, unsigned num_rects, struct radv_meta_blit2d_rect *rects)
1424 {
1425 VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
1426 struct radv_device *device = cmd_buffer->device;
1427 struct radv_buffer_view src_view;
1428 struct radv_image_view dst_view;
1429
1430 if (dst->image->vk.format == VK_FORMAT_R32G32B32_UINT || dst->image->vk.format == VK_FORMAT_R32G32B32_SINT ||
1431 dst->image->vk.format == VK_FORMAT_R32G32B32_SFLOAT) {
1432 radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects);
1433 return;
1434 }
1435
1436 create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
1437 create_iview(cmd_buffer, dst, &dst_view, VK_FORMAT_UNDEFINED, dst->aspect_mask);
1438 btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1439
1440 if (dst->image->vk.image_type == VK_IMAGE_TYPE_3D)
1441 pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
1442 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1443
1444 for (unsigned r = 0; r < num_rects; ++r) {
1445 unsigned push_constants[4] = {
1446 rects[r].dst_x,
1447 rects[r].dst_y,
1448 dst->layer,
1449 src->pitch,
1450 };
1451 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.btoi.img_p_layout,
1452 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
1453
1454 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1455 fixup_gfx9_cs_copy(cmd_buffer, src, dst, &rects[r], true);
1456 }
1457
1458 radv_image_view_finish(&dst_view);
1459 radv_buffer_view_finish(&src_view);
1460 }
1461
1462 static void
itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_buffer_view * dst)1463 itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
1464 struct radv_buffer_view *dst)
1465 {
1466 struct radv_device *device = cmd_buffer->device;
1467
1468 radv_meta_push_descriptor_set(
1469 cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi_r32g32b32.img_p_layout, 0, /* set */
1470 2, /* descriptorWriteCount */
1471 (VkWriteDescriptorSet[]){{
1472 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1473 .dstBinding = 0,
1474 .dstArrayElement = 0,
1475 .descriptorCount = 1,
1476 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1477 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
1478 },
1479 {
1480 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1481 .dstBinding = 1,
1482 .dstArrayElement = 0,
1483 .descriptorCount = 1,
1484 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1485 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
1486 }});
1487 }
1488
1489 static void
radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1490 radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
1491 struct radv_meta_blit2d_surf *dst, unsigned num_rects,
1492 struct radv_meta_blit2d_rect *rects)
1493 {
1494 VkPipeline pipeline = cmd_buffer->device->meta_state.itoi_r32g32b32.pipeline;
1495 struct radv_device *device = cmd_buffer->device;
1496 struct radv_buffer_view src_view, dst_view;
1497 unsigned src_offset = 0, dst_offset = 0;
1498 unsigned src_stride, dst_stride;
1499 VkBuffer src_buffer, dst_buffer;
1500
1501 /* 96-bit formats are only compatible to themselves. */
1502 assert(dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT ||
1503 dst->format == VK_FORMAT_R32G32B32_SFLOAT);
1504
1505 /* This special itoi path for R32G32B32 formats will write the linear
1506 * image as a buffer with the same underlying memory. The compute
1507 * shader will copy all components separately using a R32 format.
1508 */
1509 create_buffer_from_image(cmd_buffer, src, VK_BUFFER_USAGE_2_UNIFORM_TEXEL_BUFFER_BIT_KHR, &src_buffer);
1510 create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_2_STORAGE_TEXEL_BUFFER_BIT_KHR, &dst_buffer);
1511
1512 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer), src_offset, src->format, &src_view);
1513 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer), dst_offset, dst->format, &dst_view);
1514 itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1515
1516 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1517
1518 src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src);
1519 dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1520
1521 for (unsigned r = 0; r < num_rects; ++r) {
1522 unsigned push_constants[6] = {
1523 rects[r].src_x, rects[r].src_y, src_stride, rects[r].dst_x, rects[r].dst_y, dst_stride,
1524 };
1525 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.itoi_r32g32b32.img_p_layout,
1526 VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, push_constants);
1527
1528 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1529 }
1530
1531 radv_buffer_view_finish(&src_view);
1532 radv_buffer_view_finish(&dst_view);
1533 radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL);
1534 radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL);
1535 }
1536
1537 static void
itoi_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src,struct radv_image_view * dst)1538 itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src, struct radv_image_view *dst)
1539 {
1540 struct radv_device *device = cmd_buffer->device;
1541
1542 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi.img_p_layout,
1543 0, /* set */
1544 2, /* descriptorWriteCount */
1545 (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1546 .dstBinding = 0,
1547 .dstArrayElement = 0,
1548 .descriptorCount = 1,
1549 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1550 .pImageInfo =
1551 (VkDescriptorImageInfo[]){
1552 {
1553 .sampler = VK_NULL_HANDLE,
1554 .imageView = radv_image_view_to_handle(src),
1555 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1556 },
1557 }},
1558 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1559 .dstBinding = 1,
1560 .dstArrayElement = 0,
1561 .descriptorCount = 1,
1562 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1563 .pImageInfo = (VkDescriptorImageInfo[]){
1564 {
1565 .sampler = VK_NULL_HANDLE,
1566 .imageView = radv_image_view_to_handle(dst),
1567 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1568 },
1569 }}});
1570 }
1571
1572 void
radv_meta_image_to_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1573 radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
1574 struct radv_meta_blit2d_surf *dst, unsigned num_rects, struct radv_meta_blit2d_rect *rects)
1575 {
1576 struct radv_device *device = cmd_buffer->device;
1577 struct radv_image_view src_view, dst_view;
1578 uint32_t samples = src->image->vk.samples;
1579 uint32_t samples_log2 = ffs(samples) - 1;
1580
1581 if (src->format == VK_FORMAT_R32G32B32_UINT || src->format == VK_FORMAT_R32G32B32_SINT ||
1582 src->format == VK_FORMAT_R32G32B32_SFLOAT) {
1583 radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects);
1584 return;
1585 }
1586
1587 u_foreach_bit (i, dst->aspect_mask) {
1588 unsigned aspect_mask = 1u << i;
1589 VkFormat depth_format = 0;
1590 if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
1591 depth_format = vk_format_stencil_only(dst->image->vk.format);
1592 else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
1593 depth_format = vk_format_depth_only(dst->image->vk.format);
1594
1595 create_iview(cmd_buffer, src, &src_view, depth_format, aspect_mask);
1596 create_iview(cmd_buffer, dst, &dst_view, depth_format, aspect_mask);
1597
1598 itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1599
1600 VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline[samples_log2];
1601 if (src->image->vk.image_type == VK_IMAGE_TYPE_3D || dst->image->vk.image_type == VK_IMAGE_TYPE_3D)
1602 pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
1603 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1604
1605 for (unsigned r = 0; r < num_rects; ++r) {
1606 unsigned push_constants[6] = {
1607 rects[r].src_x, rects[r].src_y, src->layer, rects[r].dst_x, rects[r].dst_y, dst->layer,
1608 };
1609 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.itoi.img_p_layout,
1610 VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, push_constants);
1611
1612 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1613 }
1614
1615 radv_image_view_finish(&src_view);
1616 radv_image_view_finish(&dst_view);
1617 }
1618 }
1619
1620 static void
cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * view)1621 cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *view)
1622 {
1623 struct radv_device *device = cmd_buffer->device;
1624
1625 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1626 device->meta_state.cleari_r32g32b32.img_p_layout, 0, /* set */
1627 1, /* descriptorWriteCount */
1628 (VkWriteDescriptorSet[]){{
1629 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1630 .dstBinding = 0,
1631 .dstArrayElement = 0,
1632 .descriptorCount = 1,
1633 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1634 .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(view)},
1635 }});
1636 }
1637
1638 static void
radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * dst,const VkClearColorValue * clear_color)1639 radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
1640 const VkClearColorValue *clear_color)
1641 {
1642 VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline;
1643 struct radv_device *device = cmd_buffer->device;
1644 struct radv_buffer_view dst_view;
1645 unsigned stride;
1646 VkBuffer buffer;
1647
1648 /* This special clear path for R32G32B32 formats will write the linear
1649 * image as a buffer with the same underlying memory. The compute
1650 * shader will clear all components separately using a R32 format.
1651 */
1652 create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_2_STORAGE_TEXEL_BUFFER_BIT_KHR, &buffer);
1653
1654 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), 0, dst->format, &dst_view);
1655 cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
1656
1657 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1658
1659 stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1660
1661 unsigned push_constants[4] = {
1662 clear_color->uint32[0],
1663 clear_color->uint32[1],
1664 clear_color->uint32[2],
1665 stride,
1666 };
1667
1668 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.cleari_r32g32b32.img_p_layout,
1669 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
1670
1671 radv_unaligned_dispatch(cmd_buffer, dst->image->vk.extent.width, dst->image->vk.extent.height, 1);
1672
1673 radv_buffer_view_finish(&dst_view);
1674 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1675 }
1676
1677 static void
cleari_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * dst_iview)1678 cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *dst_iview)
1679 {
1680 struct radv_device *device = cmd_buffer->device;
1681
1682 radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.cleari.img_p_layout,
1683 0, /* set */
1684 1, /* descriptorWriteCount */
1685 (VkWriteDescriptorSet[]){
1686 {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1687 .dstBinding = 0,
1688 .dstArrayElement = 0,
1689 .descriptorCount = 1,
1690 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1691 .pImageInfo =
1692 (VkDescriptorImageInfo[]){
1693 {
1694 .sampler = VK_NULL_HANDLE,
1695 .imageView = radv_image_view_to_handle(dst_iview),
1696 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1697 },
1698 }},
1699 });
1700 }
1701
1702 void
radv_meta_clear_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * dst,const VkClearColorValue * clear_color)1703 radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
1704 const VkClearColorValue *clear_color)
1705 {
1706 struct radv_device *device = cmd_buffer->device;
1707 struct radv_image_view dst_iview;
1708 uint32_t samples = dst->image->vk.samples;
1709 uint32_t samples_log2 = ffs(samples) - 1;
1710
1711 if (dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT ||
1712 dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
1713 radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
1714 return;
1715 }
1716
1717 create_iview(cmd_buffer, dst, &dst_iview, VK_FORMAT_UNDEFINED, dst->aspect_mask);
1718 cleari_bind_descriptors(cmd_buffer, &dst_iview);
1719
1720 VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline[samples_log2];
1721 if (dst->image->vk.image_type == VK_IMAGE_TYPE_3D)
1722 pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
1723
1724 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1725
1726 unsigned push_constants[5] = {
1727 clear_color->uint32[0], clear_color->uint32[1], clear_color->uint32[2], clear_color->uint32[3], dst->layer,
1728 };
1729
1730 vk_common_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), device->meta_state.cleari.img_p_layout,
1731 VK_SHADER_STAGE_COMPUTE_BIT, 0, 20, push_constants);
1732
1733 radv_unaligned_dispatch(cmd_buffer, dst->image->vk.extent.width, dst->image->vk.extent.height, 1);
1734
1735 radv_image_view_finish(&dst_iview);
1736 }
1737