1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24 #include "radv_meta.h"
25 #include "nir/nir_builder.h"
26
27 /*
28 * GFX queue: Compute shader implementation of image->buffer copy
29 * Compute queue: implementation also of buffer->image, image->image, and image clear.
30 */
31
32 /* GFX9 needs to use a 3D sampler to access 3D resources, so the shader has the options
33 * for that.
34 */
35 static nir_shader *
build_nir_itob_compute_shader(struct radv_device * dev,bool is_3d)36 build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
37 {
38 nir_builder b;
39 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
40 const struct glsl_type *sampler_type = glsl_sampler_type(dim,
41 false,
42 false,
43 GLSL_TYPE_FLOAT);
44 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
45 false,
46 GLSL_TYPE_FLOAT);
47 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
48 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
49 b.shader->info.cs.local_size[0] = 16;
50 b.shader->info.cs.local_size[1] = 16;
51 b.shader->info.cs.local_size[2] = 1;
52 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
53 sampler_type, "s_tex");
54 input_img->data.descriptor_set = 0;
55 input_img->data.binding = 0;
56
57 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
58 img_type, "out_img");
59 output_img->data.descriptor_set = 0;
60 output_img->data.binding = 1;
61
62 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
63 nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
64 nir_ssa_def *block_size = nir_imm_ivec4(&b,
65 b.shader->info.cs.local_size[0],
66 b.shader->info.cs.local_size[1],
67 b.shader->info.cs.local_size[2], 0);
68
69 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
70
71
72
73 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
74 nir_intrinsic_set_base(offset, 0);
75 nir_intrinsic_set_range(offset, 16);
76 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
77 offset->num_components = is_3d ? 3 : 2;
78 nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
79 nir_builder_instr_insert(&b, &offset->instr);
80
81 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
82 nir_intrinsic_set_base(stride, 0);
83 nir_intrinsic_set_range(stride, 16);
84 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
85 stride->num_components = 1;
86 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
87 nir_builder_instr_insert(&b, &stride->instr);
88
89 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
90 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
91
92 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
93 tex->sampler_dim = dim;
94 tex->op = nir_texop_txf;
95 tex->src[0].src_type = nir_tex_src_coord;
96 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3));
97 tex->src[1].src_type = nir_tex_src_lod;
98 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
99 tex->src[2].src_type = nir_tex_src_texture_deref;
100 tex->src[2].src = nir_src_for_ssa(input_img_deref);
101 tex->dest_type = nir_type_float;
102 tex->is_array = false;
103 tex->coord_components = is_3d ? 3 : 2;
104
105 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
106 nir_builder_instr_insert(&b, &tex->instr);
107
108 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
109 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
110
111 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
112 tmp = nir_iadd(&b, tmp, pos_x);
113
114 nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
115
116 nir_ssa_def *outval = &tex->dest.ssa;
117 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
118 store->num_components = 4;
119 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
120 store->src[1] = nir_src_for_ssa(coord);
121 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
122 store->src[3] = nir_src_for_ssa(outval);
123 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
124
125 nir_builder_instr_insert(&b, &store->instr);
126 return b.shader;
127 }
128
129 /* Image to buffer - don't write use image accessors */
130 static VkResult
radv_device_init_meta_itob_state(struct radv_device * device)131 radv_device_init_meta_itob_state(struct radv_device *device)
132 {
133 VkResult result;
134 struct radv_shader_module cs = { .nir = NULL };
135 struct radv_shader_module cs_3d = { .nir = NULL };
136
137 cs.nir = build_nir_itob_compute_shader(device, false);
138 if (device->physical_device->rad_info.chip_class >= GFX9)
139 cs_3d.nir = build_nir_itob_compute_shader(device, true);
140
141 /*
142 * two descriptors one for the image being sampled
143 * one for the buffer being written.
144 */
145 VkDescriptorSetLayoutCreateInfo ds_create_info = {
146 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
147 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
148 .bindingCount = 2,
149 .pBindings = (VkDescriptorSetLayoutBinding[]) {
150 {
151 .binding = 0,
152 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
153 .descriptorCount = 1,
154 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
155 .pImmutableSamplers = NULL
156 },
157 {
158 .binding = 1,
159 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
160 .descriptorCount = 1,
161 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
162 .pImmutableSamplers = NULL
163 },
164 }
165 };
166
167 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
168 &ds_create_info,
169 &device->meta_state.alloc,
170 &device->meta_state.itob.img_ds_layout);
171 if (result != VK_SUCCESS)
172 goto fail;
173
174
175 VkPipelineLayoutCreateInfo pl_create_info = {
176 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
177 .setLayoutCount = 1,
178 .pSetLayouts = &device->meta_state.itob.img_ds_layout,
179 .pushConstantRangeCount = 1,
180 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
181 };
182
183 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
184 &pl_create_info,
185 &device->meta_state.alloc,
186 &device->meta_state.itob.img_p_layout);
187 if (result != VK_SUCCESS)
188 goto fail;
189
190 /* compute shader */
191
192 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
193 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
194 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
195 .module = radv_shader_module_to_handle(&cs),
196 .pName = "main",
197 .pSpecializationInfo = NULL,
198 };
199
200 VkComputePipelineCreateInfo vk_pipeline_info = {
201 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
202 .stage = pipeline_shader_stage,
203 .flags = 0,
204 .layout = device->meta_state.itob.img_p_layout,
205 };
206
207 result = radv_CreateComputePipelines(radv_device_to_handle(device),
208 radv_pipeline_cache_to_handle(&device->meta_state.cache),
209 1, &vk_pipeline_info, NULL,
210 &device->meta_state.itob.pipeline);
211 if (result != VK_SUCCESS)
212 goto fail;
213
214 if (device->physical_device->rad_info.chip_class >= GFX9) {
215 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
216 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
217 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
218 .module = radv_shader_module_to_handle(&cs_3d),
219 .pName = "main",
220 .pSpecializationInfo = NULL,
221 };
222
223 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
224 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
225 .stage = pipeline_shader_stage_3d,
226 .flags = 0,
227 .layout = device->meta_state.itob.img_p_layout,
228 };
229
230 result = radv_CreateComputePipelines(radv_device_to_handle(device),
231 radv_pipeline_cache_to_handle(&device->meta_state.cache),
232 1, &vk_pipeline_info_3d, NULL,
233 &device->meta_state.itob.pipeline_3d);
234 if (result != VK_SUCCESS)
235 goto fail;
236 ralloc_free(cs_3d.nir);
237 }
238 ralloc_free(cs.nir);
239
240 return VK_SUCCESS;
241 fail:
242 ralloc_free(cs.nir);
243 ralloc_free(cs_3d.nir);
244 return result;
245 }
246
247 static void
radv_device_finish_meta_itob_state(struct radv_device * device)248 radv_device_finish_meta_itob_state(struct radv_device *device)
249 {
250 struct radv_meta_state *state = &device->meta_state;
251
252 radv_DestroyPipelineLayout(radv_device_to_handle(device),
253 state->itob.img_p_layout, &state->alloc);
254 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
255 state->itob.img_ds_layout,
256 &state->alloc);
257 radv_DestroyPipeline(radv_device_to_handle(device),
258 state->itob.pipeline, &state->alloc);
259 if (device->physical_device->rad_info.chip_class >= GFX9)
260 radv_DestroyPipeline(radv_device_to_handle(device),
261 state->itob.pipeline_3d, &state->alloc);
262 }
263
264 static nir_shader *
build_nir_btoi_compute_shader(struct radv_device * dev,bool is_3d)265 build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
266 {
267 nir_builder b;
268 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
269 const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
270 false,
271 false,
272 GLSL_TYPE_FLOAT);
273 const struct glsl_type *img_type = glsl_image_type(dim,
274 false,
275 GLSL_TYPE_FLOAT);
276 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
277 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
278 b.shader->info.cs.local_size[0] = 16;
279 b.shader->info.cs.local_size[1] = 16;
280 b.shader->info.cs.local_size[2] = 1;
281 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
282 buf_type, "s_tex");
283 input_img->data.descriptor_set = 0;
284 input_img->data.binding = 0;
285
286 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
287 img_type, "out_img");
288 output_img->data.descriptor_set = 0;
289 output_img->data.binding = 1;
290
291 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
292 nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
293 nir_ssa_def *block_size = nir_imm_ivec4(&b,
294 b.shader->info.cs.local_size[0],
295 b.shader->info.cs.local_size[1],
296 b.shader->info.cs.local_size[2], 0);
297
298 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
299
300 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
301 nir_intrinsic_set_base(offset, 0);
302 nir_intrinsic_set_range(offset, 16);
303 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
304 offset->num_components = is_3d ? 3 : 2;
305 nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
306 nir_builder_instr_insert(&b, &offset->instr);
307
308 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
309 nir_intrinsic_set_base(stride, 0);
310 nir_intrinsic_set_range(stride, 16);
311 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
312 stride->num_components = 1;
313 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
314 nir_builder_instr_insert(&b, &stride->instr);
315
316 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
317 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
318
319 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
320 tmp = nir_iadd(&b, tmp, pos_x);
321
322 nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
323
324 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
325 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
326
327 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
328 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
329 tex->op = nir_texop_txf;
330 tex->src[0].src_type = nir_tex_src_coord;
331 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
332 tex->src[1].src_type = nir_tex_src_lod;
333 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
334 tex->src[2].src_type = nir_tex_src_texture_deref;
335 tex->src[2].src = nir_src_for_ssa(input_img_deref);
336 tex->dest_type = nir_type_float;
337 tex->is_array = false;
338 tex->coord_components = 1;
339
340 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
341 nir_builder_instr_insert(&b, &tex->instr);
342
343 nir_ssa_def *outval = &tex->dest.ssa;
344 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
345 store->num_components = 4;
346 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
347 store->src[1] = nir_src_for_ssa(img_coord);
348 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
349 store->src[3] = nir_src_for_ssa(outval);
350 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
351
352 nir_builder_instr_insert(&b, &store->instr);
353 return b.shader;
354 }
355
356 /* Buffer to image - don't write use image accessors */
357 static VkResult
radv_device_init_meta_btoi_state(struct radv_device * device)358 radv_device_init_meta_btoi_state(struct radv_device *device)
359 {
360 VkResult result;
361 struct radv_shader_module cs = { .nir = NULL };
362 struct radv_shader_module cs_3d = { .nir = NULL };
363 cs.nir = build_nir_btoi_compute_shader(device, false);
364 if (device->physical_device->rad_info.chip_class >= GFX9)
365 cs_3d.nir = build_nir_btoi_compute_shader(device, true);
366 /*
367 * two descriptors one for the image being sampled
368 * one for the buffer being written.
369 */
370 VkDescriptorSetLayoutCreateInfo ds_create_info = {
371 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
372 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
373 .bindingCount = 2,
374 .pBindings = (VkDescriptorSetLayoutBinding[]) {
375 {
376 .binding = 0,
377 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
378 .descriptorCount = 1,
379 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
380 .pImmutableSamplers = NULL
381 },
382 {
383 .binding = 1,
384 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
385 .descriptorCount = 1,
386 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
387 .pImmutableSamplers = NULL
388 },
389 }
390 };
391
392 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
393 &ds_create_info,
394 &device->meta_state.alloc,
395 &device->meta_state.btoi.img_ds_layout);
396 if (result != VK_SUCCESS)
397 goto fail;
398
399
400 VkPipelineLayoutCreateInfo pl_create_info = {
401 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
402 .setLayoutCount = 1,
403 .pSetLayouts = &device->meta_state.btoi.img_ds_layout,
404 .pushConstantRangeCount = 1,
405 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
406 };
407
408 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
409 &pl_create_info,
410 &device->meta_state.alloc,
411 &device->meta_state.btoi.img_p_layout);
412 if (result != VK_SUCCESS)
413 goto fail;
414
415 /* compute shader */
416
417 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
418 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
419 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
420 .module = radv_shader_module_to_handle(&cs),
421 .pName = "main",
422 .pSpecializationInfo = NULL,
423 };
424
425 VkComputePipelineCreateInfo vk_pipeline_info = {
426 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
427 .stage = pipeline_shader_stage,
428 .flags = 0,
429 .layout = device->meta_state.btoi.img_p_layout,
430 };
431
432 result = radv_CreateComputePipelines(radv_device_to_handle(device),
433 radv_pipeline_cache_to_handle(&device->meta_state.cache),
434 1, &vk_pipeline_info, NULL,
435 &device->meta_state.btoi.pipeline);
436 if (result != VK_SUCCESS)
437 goto fail;
438
439 if (device->physical_device->rad_info.chip_class >= GFX9) {
440 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
441 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
442 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
443 .module = radv_shader_module_to_handle(&cs_3d),
444 .pName = "main",
445 .pSpecializationInfo = NULL,
446 };
447
448 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
449 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
450 .stage = pipeline_shader_stage_3d,
451 .flags = 0,
452 .layout = device->meta_state.btoi.img_p_layout,
453 };
454
455 result = radv_CreateComputePipelines(radv_device_to_handle(device),
456 radv_pipeline_cache_to_handle(&device->meta_state.cache),
457 1, &vk_pipeline_info_3d, NULL,
458 &device->meta_state.btoi.pipeline_3d);
459 ralloc_free(cs_3d.nir);
460 }
461 ralloc_free(cs.nir);
462
463 return VK_SUCCESS;
464 fail:
465 ralloc_free(cs_3d.nir);
466 ralloc_free(cs.nir);
467 return result;
468 }
469
470 static void
radv_device_finish_meta_btoi_state(struct radv_device * device)471 radv_device_finish_meta_btoi_state(struct radv_device *device)
472 {
473 struct radv_meta_state *state = &device->meta_state;
474
475 radv_DestroyPipelineLayout(radv_device_to_handle(device),
476 state->btoi.img_p_layout, &state->alloc);
477 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
478 state->btoi.img_ds_layout,
479 &state->alloc);
480 radv_DestroyPipeline(radv_device_to_handle(device),
481 state->btoi.pipeline, &state->alloc);
482 radv_DestroyPipeline(radv_device_to_handle(device),
483 state->btoi.pipeline_3d, &state->alloc);
484 }
485
486 /* Buffer to image - special path for R32G32B32 */
487 static nir_shader *
build_nir_btoi_r32g32b32_compute_shader(struct radv_device * dev)488 build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
489 {
490 nir_builder b;
491 const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
492 false,
493 false,
494 GLSL_TYPE_FLOAT);
495 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
496 false,
497 GLSL_TYPE_FLOAT);
498 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
499 b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_r32g32b32_cs");
500 b.shader->info.cs.local_size[0] = 16;
501 b.shader->info.cs.local_size[1] = 16;
502 b.shader->info.cs.local_size[2] = 1;
503 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
504 buf_type, "s_tex");
505 input_img->data.descriptor_set = 0;
506 input_img->data.binding = 0;
507
508 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
509 img_type, "out_img");
510 output_img->data.descriptor_set = 0;
511 output_img->data.binding = 1;
512
513 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
514 nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
515 nir_ssa_def *block_size = nir_imm_ivec4(&b,
516 b.shader->info.cs.local_size[0],
517 b.shader->info.cs.local_size[1],
518 b.shader->info.cs.local_size[2], 0);
519
520 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
521
522 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
523 nir_intrinsic_set_base(offset, 0);
524 nir_intrinsic_set_range(offset, 16);
525 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
526 offset->num_components = 2;
527 nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
528 nir_builder_instr_insert(&b, &offset->instr);
529
530 nir_intrinsic_instr *pitch = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
531 nir_intrinsic_set_base(pitch, 0);
532 nir_intrinsic_set_range(pitch, 16);
533 pitch->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
534 pitch->num_components = 1;
535 nir_ssa_dest_init(&pitch->instr, &pitch->dest, 1, 32, "pitch");
536 nir_builder_instr_insert(&b, &pitch->instr);
537
538 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
539 nir_intrinsic_set_base(stride, 0);
540 nir_intrinsic_set_range(stride, 16);
541 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
542 stride->num_components = 1;
543 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
544 nir_builder_instr_insert(&b, &stride->instr);
545
546 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
547 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
548
549 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
550 tmp = nir_iadd(&b, tmp, pos_x);
551
552 nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
553
554 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
555
556 nir_ssa_def *global_pos =
557 nir_iadd(&b,
558 nir_imul(&b, nir_channel(&b, img_coord, 1), &pitch->dest.ssa),
559 nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3)));
560
561 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
562
563 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
564 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
565 tex->op = nir_texop_txf;
566 tex->src[0].src_type = nir_tex_src_coord;
567 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
568 tex->src[1].src_type = nir_tex_src_lod;
569 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
570 tex->src[2].src_type = nir_tex_src_texture_deref;
571 tex->src[2].src = nir_src_for_ssa(input_img_deref);
572 tex->dest_type = nir_type_float;
573 tex->is_array = false;
574 tex->coord_components = 1;
575 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
576 nir_builder_instr_insert(&b, &tex->instr);
577
578 nir_ssa_def *outval = &tex->dest.ssa;
579
580 for (int chan = 0; chan < 3; chan++) {
581 nir_ssa_def *local_pos =
582 nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
583
584 nir_ssa_def *coord =
585 nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
586
587 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
588 store->num_components = 1;
589 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
590 store->src[1] = nir_src_for_ssa(coord);
591 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
592 store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, chan));
593 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
594 nir_builder_instr_insert(&b, &store->instr);
595 }
596
597 return b.shader;
598 }
599
600 static VkResult
radv_device_init_meta_btoi_r32g32b32_state(struct radv_device * device)601 radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
602 {
603 VkResult result;
604 struct radv_shader_module cs = { .nir = NULL };
605
606 cs.nir = build_nir_btoi_r32g32b32_compute_shader(device);
607
608 VkDescriptorSetLayoutCreateInfo ds_create_info = {
609 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
610 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
611 .bindingCount = 2,
612 .pBindings = (VkDescriptorSetLayoutBinding[]) {
613 {
614 .binding = 0,
615 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
616 .descriptorCount = 1,
617 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
618 .pImmutableSamplers = NULL
619 },
620 {
621 .binding = 1,
622 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
623 .descriptorCount = 1,
624 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
625 .pImmutableSamplers = NULL
626 },
627 }
628 };
629
630 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
631 &ds_create_info,
632 &device->meta_state.alloc,
633 &device->meta_state.btoi_r32g32b32.img_ds_layout);
634 if (result != VK_SUCCESS)
635 goto fail;
636
637
638 VkPipelineLayoutCreateInfo pl_create_info = {
639 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
640 .setLayoutCount = 1,
641 .pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
642 .pushConstantRangeCount = 1,
643 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
644 };
645
646 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
647 &pl_create_info,
648 &device->meta_state.alloc,
649 &device->meta_state.btoi_r32g32b32.img_p_layout);
650 if (result != VK_SUCCESS)
651 goto fail;
652
653 /* compute shader */
654
655 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
656 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
657 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
658 .module = radv_shader_module_to_handle(&cs),
659 .pName = "main",
660 .pSpecializationInfo = NULL,
661 };
662
663 VkComputePipelineCreateInfo vk_pipeline_info = {
664 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
665 .stage = pipeline_shader_stage,
666 .flags = 0,
667 .layout = device->meta_state.btoi_r32g32b32.img_p_layout,
668 };
669
670 result = radv_CreateComputePipelines(radv_device_to_handle(device),
671 radv_pipeline_cache_to_handle(&device->meta_state.cache),
672 1, &vk_pipeline_info, NULL,
673 &device->meta_state.btoi_r32g32b32.pipeline);
674
675 fail:
676 ralloc_free(cs.nir);
677 return result;
678 }
679
680 static void
radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device * device)681 radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
682 {
683 struct radv_meta_state *state = &device->meta_state;
684
685 radv_DestroyPipelineLayout(radv_device_to_handle(device),
686 state->btoi_r32g32b32.img_p_layout, &state->alloc);
687 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
688 state->btoi_r32g32b32.img_ds_layout,
689 &state->alloc);
690 radv_DestroyPipeline(radv_device_to_handle(device),
691 state->btoi_r32g32b32.pipeline, &state->alloc);
692 }
693
694 static nir_shader *
build_nir_itoi_compute_shader(struct radv_device * dev,bool is_3d)695 build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
696 {
697 nir_builder b;
698 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
699 const struct glsl_type *buf_type = glsl_sampler_type(dim,
700 false,
701 false,
702 GLSL_TYPE_FLOAT);
703 const struct glsl_type *img_type = glsl_image_type(dim,
704 false,
705 GLSL_TYPE_FLOAT);
706 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
707 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itoi_cs_3d" : "meta_itoi_cs");
708 b.shader->info.cs.local_size[0] = 16;
709 b.shader->info.cs.local_size[1] = 16;
710 b.shader->info.cs.local_size[2] = 1;
711 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
712 buf_type, "s_tex");
713 input_img->data.descriptor_set = 0;
714 input_img->data.binding = 0;
715
716 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
717 img_type, "out_img");
718 output_img->data.descriptor_set = 0;
719 output_img->data.binding = 1;
720
721 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
722 nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
723 nir_ssa_def *block_size = nir_imm_ivec4(&b,
724 b.shader->info.cs.local_size[0],
725 b.shader->info.cs.local_size[1],
726 b.shader->info.cs.local_size[2], 0);
727
728 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
729
730 nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
731 nir_intrinsic_set_base(src_offset, 0);
732 nir_intrinsic_set_range(src_offset, 24);
733 src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
734 src_offset->num_components = is_3d ? 3 : 2;
735 nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, is_3d ? 3 : 2, 32, "src_offset");
736 nir_builder_instr_insert(&b, &src_offset->instr);
737
738 nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
739 nir_intrinsic_set_base(dst_offset, 0);
740 nir_intrinsic_set_range(dst_offset, 24);
741 dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
742 dst_offset->num_components = is_3d ? 3 : 2;
743 nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, is_3d ? 3 : 2, 32, "dst_offset");
744 nir_builder_instr_insert(&b, &dst_offset->instr);
745
746 nir_ssa_def *src_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
747 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
748
749 nir_ssa_def *dst_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
750
751 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
752 tex->sampler_dim = dim;
753 tex->op = nir_texop_txf;
754 tex->src[0].src_type = nir_tex_src_coord;
755 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
756 tex->src[1].src_type = nir_tex_src_lod;
757 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
758 tex->src[2].src_type = nir_tex_src_texture_deref;
759 tex->src[2].src = nir_src_for_ssa(input_img_deref);
760 tex->dest_type = nir_type_float;
761 tex->is_array = false;
762 tex->coord_components = is_3d ? 3 : 2;
763
764 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
765 nir_builder_instr_insert(&b, &tex->instr);
766
767 nir_ssa_def *outval = &tex->dest.ssa;
768 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
769 store->num_components = 4;
770 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
771 store->src[1] = nir_src_for_ssa(dst_coord);
772 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
773 store->src[3] = nir_src_for_ssa(outval);
774 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
775
776 nir_builder_instr_insert(&b, &store->instr);
777 return b.shader;
778 }
779
780 /* image to image - don't write use image accessors */
781 static VkResult
radv_device_init_meta_itoi_state(struct radv_device * device)782 radv_device_init_meta_itoi_state(struct radv_device *device)
783 {
784 VkResult result;
785 struct radv_shader_module cs = { .nir = NULL };
786 struct radv_shader_module cs_3d = { .nir = NULL };
787 cs.nir = build_nir_itoi_compute_shader(device, false);
788 if (device->physical_device->rad_info.chip_class >= GFX9)
789 cs_3d.nir = build_nir_itoi_compute_shader(device, true);
790 /*
791 * two descriptors one for the image being sampled
792 * one for the buffer being written.
793 */
794 VkDescriptorSetLayoutCreateInfo ds_create_info = {
795 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
796 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
797 .bindingCount = 2,
798 .pBindings = (VkDescriptorSetLayoutBinding[]) {
799 {
800 .binding = 0,
801 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
802 .descriptorCount = 1,
803 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
804 .pImmutableSamplers = NULL
805 },
806 {
807 .binding = 1,
808 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
809 .descriptorCount = 1,
810 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
811 .pImmutableSamplers = NULL
812 },
813 }
814 };
815
816 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
817 &ds_create_info,
818 &device->meta_state.alloc,
819 &device->meta_state.itoi.img_ds_layout);
820 if (result != VK_SUCCESS)
821 goto fail;
822
823
824 VkPipelineLayoutCreateInfo pl_create_info = {
825 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
826 .setLayoutCount = 1,
827 .pSetLayouts = &device->meta_state.itoi.img_ds_layout,
828 .pushConstantRangeCount = 1,
829 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
830 };
831
832 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
833 &pl_create_info,
834 &device->meta_state.alloc,
835 &device->meta_state.itoi.img_p_layout);
836 if (result != VK_SUCCESS)
837 goto fail;
838
839 /* compute shader */
840
841 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
842 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
843 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
844 .module = radv_shader_module_to_handle(&cs),
845 .pName = "main",
846 .pSpecializationInfo = NULL,
847 };
848
849 VkComputePipelineCreateInfo vk_pipeline_info = {
850 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
851 .stage = pipeline_shader_stage,
852 .flags = 0,
853 .layout = device->meta_state.itoi.img_p_layout,
854 };
855
856 result = radv_CreateComputePipelines(radv_device_to_handle(device),
857 radv_pipeline_cache_to_handle(&device->meta_state.cache),
858 1, &vk_pipeline_info, NULL,
859 &device->meta_state.itoi.pipeline);
860 if (result != VK_SUCCESS)
861 goto fail;
862
863 if (device->physical_device->rad_info.chip_class >= GFX9) {
864 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
865 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
866 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
867 .module = radv_shader_module_to_handle(&cs_3d),
868 .pName = "main",
869 .pSpecializationInfo = NULL,
870 };
871
872 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
873 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
874 .stage = pipeline_shader_stage_3d,
875 .flags = 0,
876 .layout = device->meta_state.itoi.img_p_layout,
877 };
878
879 result = radv_CreateComputePipelines(radv_device_to_handle(device),
880 radv_pipeline_cache_to_handle(&device->meta_state.cache),
881 1, &vk_pipeline_info_3d, NULL,
882 &device->meta_state.itoi.pipeline_3d);
883
884 ralloc_free(cs_3d.nir);
885 }
886 ralloc_free(cs.nir);
887
888 return VK_SUCCESS;
889 fail:
890 ralloc_free(cs.nir);
891 ralloc_free(cs_3d.nir);
892 return result;
893 }
894
895 static void
radv_device_finish_meta_itoi_state(struct radv_device * device)896 radv_device_finish_meta_itoi_state(struct radv_device *device)
897 {
898 struct radv_meta_state *state = &device->meta_state;
899
900 radv_DestroyPipelineLayout(radv_device_to_handle(device),
901 state->itoi.img_p_layout, &state->alloc);
902 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
903 state->itoi.img_ds_layout,
904 &state->alloc);
905 radv_DestroyPipeline(radv_device_to_handle(device),
906 state->itoi.pipeline, &state->alloc);
907 if (device->physical_device->rad_info.chip_class >= GFX9)
908 radv_DestroyPipeline(radv_device_to_handle(device),
909 state->itoi.pipeline_3d, &state->alloc);
910 }
911
912 static nir_shader *
build_nir_itoi_r32g32b32_compute_shader(struct radv_device * dev)913 build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
914 {
915 nir_builder b;
916 const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
917 false,
918 false,
919 GLSL_TYPE_FLOAT);
920 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
921 false,
922 GLSL_TYPE_FLOAT);
923 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
924 b.shader->info.name = ralloc_strdup(b.shader, "meta_itoi_r32g32b32_cs");
925 b.shader->info.cs.local_size[0] = 16;
926 b.shader->info.cs.local_size[1] = 16;
927 b.shader->info.cs.local_size[2] = 1;
928 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
929 type, "input_img");
930 input_img->data.descriptor_set = 0;
931 input_img->data.binding = 0;
932
933 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
934 img_type, "output_img");
935 output_img->data.descriptor_set = 0;
936 output_img->data.binding = 1;
937
938 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
939 nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
940 nir_ssa_def *block_size = nir_imm_ivec4(&b,
941 b.shader->info.cs.local_size[0],
942 b.shader->info.cs.local_size[1],
943 b.shader->info.cs.local_size[2], 0);
944
945 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
946
947 nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
948 nir_intrinsic_set_base(src_offset, 0);
949 nir_intrinsic_set_range(src_offset, 24);
950 src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
951 src_offset->num_components = 3;
952 nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 3, 32, "src_offset");
953 nir_builder_instr_insert(&b, &src_offset->instr);
954
955 nir_ssa_def *src_stride = nir_channel(&b, &src_offset->dest.ssa, 2);
956
957 nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
958 nir_intrinsic_set_base(dst_offset, 0);
959 nir_intrinsic_set_range(dst_offset, 24);
960 dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
961 dst_offset->num_components = 3;
962 nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 3, 32, "dst_offset");
963 nir_builder_instr_insert(&b, &dst_offset->instr);
964
965 nir_ssa_def *dst_stride = nir_channel(&b, &dst_offset->dest.ssa, 2);
966
967 nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
968 nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
969
970 nir_ssa_def *src_global_pos =
971 nir_iadd(&b,
972 nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
973 nir_imul(&b, nir_channel(&b, src_img_coord, 0), nir_imm_int(&b, 3)));
974
975 nir_ssa_def *dst_global_pos =
976 nir_iadd(&b,
977 nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
978 nir_imul(&b, nir_channel(&b, dst_img_coord, 0), nir_imm_int(&b, 3)));
979
980 for (int chan = 0; chan < 3; chan++) {
981 /* src */
982 nir_ssa_def *src_local_pos =
983 nir_iadd(&b, src_global_pos, nir_imm_int(&b, chan));
984
985 nir_ssa_def *src_coord =
986 nir_vec4(&b, src_local_pos, src_local_pos,
987 src_local_pos, src_local_pos);
988
989 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
990
991 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
992 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
993 tex->op = nir_texop_txf;
994 tex->src[0].src_type = nir_tex_src_coord;
995 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 1));
996 tex->src[1].src_type = nir_tex_src_lod;
997 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
998 tex->src[2].src_type = nir_tex_src_texture_deref;
999 tex->src[2].src = nir_src_for_ssa(input_img_deref);
1000 tex->dest_type = nir_type_float;
1001 tex->is_array = false;
1002 tex->coord_components = 1;
1003 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
1004 nir_builder_instr_insert(&b, &tex->instr);
1005
1006 nir_ssa_def *outval = &tex->dest.ssa;
1007
1008 /* dst */
1009 nir_ssa_def *dst_local_pos =
1010 nir_iadd(&b, dst_global_pos, nir_imm_int(&b, chan));
1011
1012 nir_ssa_def *dst_coord =
1013 nir_vec4(&b, dst_local_pos, dst_local_pos,
1014 dst_local_pos, dst_local_pos);
1015
1016 nir_intrinsic_instr *store =
1017 nir_intrinsic_instr_create(b.shader,
1018 nir_intrinsic_image_deref_store);
1019 store->num_components = 1;
1020 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
1021 store->src[1] = nir_src_for_ssa(dst_coord);
1022 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1023 store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, 0));
1024 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
1025 nir_builder_instr_insert(&b, &store->instr);
1026 }
1027
1028 return b.shader;
1029 }
1030
1031 /* Image to image - special path for R32G32B32 */
1032 static VkResult
radv_device_init_meta_itoi_r32g32b32_state(struct radv_device * device)1033 radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device)
1034 {
1035 VkResult result;
1036 struct radv_shader_module cs = { .nir = NULL };
1037
1038 cs.nir = build_nir_itoi_r32g32b32_compute_shader(device);
1039
1040 VkDescriptorSetLayoutCreateInfo ds_create_info = {
1041 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1042 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1043 .bindingCount = 2,
1044 .pBindings = (VkDescriptorSetLayoutBinding[]) {
1045 {
1046 .binding = 0,
1047 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1048 .descriptorCount = 1,
1049 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1050 .pImmutableSamplers = NULL
1051 },
1052 {
1053 .binding = 1,
1054 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1055 .descriptorCount = 1,
1056 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1057 .pImmutableSamplers = NULL
1058 },
1059 }
1060 };
1061
1062 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1063 &ds_create_info,
1064 &device->meta_state.alloc,
1065 &device->meta_state.itoi_r32g32b32.img_ds_layout);
1066 if (result != VK_SUCCESS)
1067 goto fail;
1068
1069
1070 VkPipelineLayoutCreateInfo pl_create_info = {
1071 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1072 .setLayoutCount = 1,
1073 .pSetLayouts = &device->meta_state.itoi_r32g32b32.img_ds_layout,
1074 .pushConstantRangeCount = 1,
1075 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
1076 };
1077
1078 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1079 &pl_create_info,
1080 &device->meta_state.alloc,
1081 &device->meta_state.itoi_r32g32b32.img_p_layout);
1082 if (result != VK_SUCCESS)
1083 goto fail;
1084
1085 /* compute shader */
1086
1087 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1088 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1089 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1090 .module = radv_shader_module_to_handle(&cs),
1091 .pName = "main",
1092 .pSpecializationInfo = NULL,
1093 };
1094
1095 VkComputePipelineCreateInfo vk_pipeline_info = {
1096 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1097 .stage = pipeline_shader_stage,
1098 .flags = 0,
1099 .layout = device->meta_state.itoi_r32g32b32.img_p_layout,
1100 };
1101
1102 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1103 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1104 1, &vk_pipeline_info, NULL,
1105 &device->meta_state.itoi_r32g32b32.pipeline);
1106
1107 fail:
1108 ralloc_free(cs.nir);
1109 return result;
1110 }
1111
1112 static void
radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device * device)1113 radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device)
1114 {
1115 struct radv_meta_state *state = &device->meta_state;
1116
1117 radv_DestroyPipelineLayout(radv_device_to_handle(device),
1118 state->itoi_r32g32b32.img_p_layout, &state->alloc);
1119 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1120 state->itoi_r32g32b32.img_ds_layout,
1121 &state->alloc);
1122 radv_DestroyPipeline(radv_device_to_handle(device),
1123 state->itoi_r32g32b32.pipeline, &state->alloc);
1124 }
1125
1126 static nir_shader *
build_nir_cleari_compute_shader(struct radv_device * dev,bool is_3d)1127 build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
1128 {
1129 nir_builder b;
1130 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
1131 const struct glsl_type *img_type = glsl_image_type(dim,
1132 false,
1133 GLSL_TYPE_FLOAT);
1134 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
1135 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_cleari_cs_3d" : "meta_cleari_cs");
1136 b.shader->info.cs.local_size[0] = 16;
1137 b.shader->info.cs.local_size[1] = 16;
1138 b.shader->info.cs.local_size[2] = 1;
1139
1140 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
1141 img_type, "out_img");
1142 output_img->data.descriptor_set = 0;
1143 output_img->data.binding = 0;
1144
1145 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
1146 nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
1147 nir_ssa_def *block_size = nir_imm_ivec4(&b,
1148 b.shader->info.cs.local_size[0],
1149 b.shader->info.cs.local_size[1],
1150 b.shader->info.cs.local_size[2], 0);
1151
1152 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
1153
1154 nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1155 nir_intrinsic_set_base(clear_val, 0);
1156 nir_intrinsic_set_range(clear_val, 20);
1157 clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
1158 clear_val->num_components = 4;
1159 nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value");
1160 nir_builder_instr_insert(&b, &clear_val->instr);
1161
1162 nir_intrinsic_instr *layer = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1163 nir_intrinsic_set_base(layer, 0);
1164 nir_intrinsic_set_range(layer, 20);
1165 layer->src[0] = nir_src_for_ssa(nir_imm_int(&b, 16));
1166 layer->num_components = 1;
1167 nir_ssa_dest_init(&layer->instr, &layer->dest, 1, 32, "layer");
1168 nir_builder_instr_insert(&b, &layer->instr);
1169
1170 nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), &layer->dest.ssa);
1171
1172 nir_ssa_def *comps[4];
1173 comps[0] = nir_channel(&b, global_id, 0);
1174 comps[1] = nir_channel(&b, global_id, 1);
1175 comps[2] = global_z;
1176 comps[3] = nir_imm_int(&b, 0);
1177 global_id = nir_vec(&b, comps, 4);
1178
1179 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
1180 store->num_components = 4;
1181 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
1182 store->src[1] = nir_src_for_ssa(global_id);
1183 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1184 store->src[3] = nir_src_for_ssa(&clear_val->dest.ssa);
1185 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
1186
1187 nir_builder_instr_insert(&b, &store->instr);
1188 return b.shader;
1189 }
1190
1191 static VkResult
radv_device_init_meta_cleari_state(struct radv_device * device)1192 radv_device_init_meta_cleari_state(struct radv_device *device)
1193 {
1194 VkResult result;
1195 struct radv_shader_module cs = { .nir = NULL };
1196 struct radv_shader_module cs_3d = { .nir = NULL };
1197 cs.nir = build_nir_cleari_compute_shader(device, false);
1198 if (device->physical_device->rad_info.chip_class >= GFX9)
1199 cs_3d.nir = build_nir_cleari_compute_shader(device, true);
1200
1201 /*
1202 * two descriptors one for the image being sampled
1203 * one for the buffer being written.
1204 */
1205 VkDescriptorSetLayoutCreateInfo ds_create_info = {
1206 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1207 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1208 .bindingCount = 1,
1209 .pBindings = (VkDescriptorSetLayoutBinding[]) {
1210 {
1211 .binding = 0,
1212 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1213 .descriptorCount = 1,
1214 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1215 .pImmutableSamplers = NULL
1216 },
1217 }
1218 };
1219
1220 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1221 &ds_create_info,
1222 &device->meta_state.alloc,
1223 &device->meta_state.cleari.img_ds_layout);
1224 if (result != VK_SUCCESS)
1225 goto fail;
1226
1227
1228 VkPipelineLayoutCreateInfo pl_create_info = {
1229 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1230 .setLayoutCount = 1,
1231 .pSetLayouts = &device->meta_state.cleari.img_ds_layout,
1232 .pushConstantRangeCount = 1,
1233 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
1234 };
1235
1236 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1237 &pl_create_info,
1238 &device->meta_state.alloc,
1239 &device->meta_state.cleari.img_p_layout);
1240 if (result != VK_SUCCESS)
1241 goto fail;
1242
1243 /* compute shader */
1244
1245 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1246 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1247 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1248 .module = radv_shader_module_to_handle(&cs),
1249 .pName = "main",
1250 .pSpecializationInfo = NULL,
1251 };
1252
1253 VkComputePipelineCreateInfo vk_pipeline_info = {
1254 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1255 .stage = pipeline_shader_stage,
1256 .flags = 0,
1257 .layout = device->meta_state.cleari.img_p_layout,
1258 };
1259
1260 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1261 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1262 1, &vk_pipeline_info, NULL,
1263 &device->meta_state.cleari.pipeline);
1264 if (result != VK_SUCCESS)
1265 goto fail;
1266
1267
1268 if (device->physical_device->rad_info.chip_class >= GFX9) {
1269 /* compute shader */
1270 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
1271 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1272 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1273 .module = radv_shader_module_to_handle(&cs_3d),
1274 .pName = "main",
1275 .pSpecializationInfo = NULL,
1276 };
1277
1278 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
1279 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1280 .stage = pipeline_shader_stage_3d,
1281 .flags = 0,
1282 .layout = device->meta_state.cleari.img_p_layout,
1283 };
1284
1285 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1286 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1287 1, &vk_pipeline_info_3d, NULL,
1288 &device->meta_state.cleari.pipeline_3d);
1289 if (result != VK_SUCCESS)
1290 goto fail;
1291
1292 ralloc_free(cs_3d.nir);
1293 }
1294 ralloc_free(cs.nir);
1295 return VK_SUCCESS;
1296 fail:
1297 ralloc_free(cs.nir);
1298 ralloc_free(cs_3d.nir);
1299 return result;
1300 }
1301
1302 static void
radv_device_finish_meta_cleari_state(struct radv_device * device)1303 radv_device_finish_meta_cleari_state(struct radv_device *device)
1304 {
1305 struct radv_meta_state *state = &device->meta_state;
1306
1307 radv_DestroyPipelineLayout(radv_device_to_handle(device),
1308 state->cleari.img_p_layout, &state->alloc);
1309 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1310 state->cleari.img_ds_layout,
1311 &state->alloc);
1312 radv_DestroyPipeline(radv_device_to_handle(device),
1313 state->cleari.pipeline, &state->alloc);
1314 radv_DestroyPipeline(radv_device_to_handle(device),
1315 state->cleari.pipeline_3d, &state->alloc);
1316 }
1317
1318 /* Special path for clearing R32G32B32 images using a compute shader. */
1319 static nir_shader *
build_nir_cleari_r32g32b32_compute_shader(struct radv_device * dev)1320 build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
1321 {
1322 nir_builder b;
1323 const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF,
1324 false,
1325 GLSL_TYPE_FLOAT);
1326 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
1327 b.shader->info.name = ralloc_strdup(b.shader, "meta_cleari_r32g32b32_cs");
1328 b.shader->info.cs.local_size[0] = 16;
1329 b.shader->info.cs.local_size[1] = 16;
1330 b.shader->info.cs.local_size[2] = 1;
1331
1332 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
1333 img_type, "out_img");
1334 output_img->data.descriptor_set = 0;
1335 output_img->data.binding = 0;
1336
1337 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
1338 nir_ssa_def *wg_id = nir_load_work_group_id(&b, 32);
1339 nir_ssa_def *block_size = nir_imm_ivec4(&b,
1340 b.shader->info.cs.local_size[0],
1341 b.shader->info.cs.local_size[1],
1342 b.shader->info.cs.local_size[2], 0);
1343
1344 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
1345
1346 nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1347 nir_intrinsic_set_base(clear_val, 0);
1348 nir_intrinsic_set_range(clear_val, 16);
1349 clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
1350 clear_val->num_components = 3;
1351 nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 3, 32, "clear_value");
1352 nir_builder_instr_insert(&b, &clear_val->instr);
1353
1354 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1355 nir_intrinsic_set_base(stride, 0);
1356 nir_intrinsic_set_range(stride, 16);
1357 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
1358 stride->num_components = 1;
1359 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
1360 nir_builder_instr_insert(&b, &stride->instr);
1361
1362 nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
1363 nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
1364
1365 nir_ssa_def *global_pos =
1366 nir_iadd(&b,
1367 nir_imul(&b, global_y, &stride->dest.ssa),
1368 nir_imul(&b, global_x, nir_imm_int(&b, 3)));
1369
1370 for (unsigned chan = 0; chan < 3; chan++) {
1371 nir_ssa_def *local_pos =
1372 nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
1373
1374 nir_ssa_def *coord =
1375 nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
1376
1377 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
1378 store->num_components = 1;
1379 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
1380 store->src[1] = nir_src_for_ssa(coord);
1381 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1382 store->src[3] = nir_src_for_ssa(nir_channel(&b, &clear_val->dest.ssa, chan));
1383 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
1384 nir_builder_instr_insert(&b, &store->instr);
1385 }
1386
1387 return b.shader;
1388 }
1389
1390 static VkResult
radv_device_init_meta_cleari_r32g32b32_state(struct radv_device * device)1391 radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device)
1392 {
1393 VkResult result;
1394 struct radv_shader_module cs = { .nir = NULL };
1395
1396 cs.nir = build_nir_cleari_r32g32b32_compute_shader(device);
1397
1398 VkDescriptorSetLayoutCreateInfo ds_create_info = {
1399 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1400 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1401 .bindingCount = 1,
1402 .pBindings = (VkDescriptorSetLayoutBinding[]) {
1403 {
1404 .binding = 0,
1405 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1406 .descriptorCount = 1,
1407 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1408 .pImmutableSamplers = NULL
1409 },
1410 }
1411 };
1412
1413 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1414 &ds_create_info,
1415 &device->meta_state.alloc,
1416 &device->meta_state.cleari_r32g32b32.img_ds_layout);
1417 if (result != VK_SUCCESS)
1418 goto fail;
1419
1420 VkPipelineLayoutCreateInfo pl_create_info = {
1421 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1422 .setLayoutCount = 1,
1423 .pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout,
1424 .pushConstantRangeCount = 1,
1425 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
1426 };
1427
1428 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1429 &pl_create_info,
1430 &device->meta_state.alloc,
1431 &device->meta_state.cleari_r32g32b32.img_p_layout);
1432 if (result != VK_SUCCESS)
1433 goto fail;
1434
1435 /* compute shader */
1436 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1437 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1438 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1439 .module = radv_shader_module_to_handle(&cs),
1440 .pName = "main",
1441 .pSpecializationInfo = NULL,
1442 };
1443
1444 VkComputePipelineCreateInfo vk_pipeline_info = {
1445 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1446 .stage = pipeline_shader_stage,
1447 .flags = 0,
1448 .layout = device->meta_state.cleari_r32g32b32.img_p_layout,
1449 };
1450
1451 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1452 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1453 1, &vk_pipeline_info, NULL,
1454 &device->meta_state.cleari_r32g32b32.pipeline);
1455
1456 fail:
1457 ralloc_free(cs.nir);
1458 return result;
1459 }
1460
1461 static void
radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device * device)1462 radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device)
1463 {
1464 struct radv_meta_state *state = &device->meta_state;
1465
1466 radv_DestroyPipelineLayout(radv_device_to_handle(device),
1467 state->cleari_r32g32b32.img_p_layout,
1468 &state->alloc);
1469 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1470 state->cleari_r32g32b32.img_ds_layout,
1471 &state->alloc);
1472 radv_DestroyPipeline(radv_device_to_handle(device),
1473 state->cleari_r32g32b32.pipeline, &state->alloc);
1474 }
1475
1476 void
radv_device_finish_meta_bufimage_state(struct radv_device * device)1477 radv_device_finish_meta_bufimage_state(struct radv_device *device)
1478 {
1479 radv_device_finish_meta_itob_state(device);
1480 radv_device_finish_meta_btoi_state(device);
1481 radv_device_finish_meta_btoi_r32g32b32_state(device);
1482 radv_device_finish_meta_itoi_state(device);
1483 radv_device_finish_meta_itoi_r32g32b32_state(device);
1484 radv_device_finish_meta_cleari_state(device);
1485 radv_device_finish_meta_cleari_r32g32b32_state(device);
1486 }
1487
1488 VkResult
radv_device_init_meta_bufimage_state(struct radv_device * device)1489 radv_device_init_meta_bufimage_state(struct radv_device *device)
1490 {
1491 VkResult result;
1492
1493 result = radv_device_init_meta_itob_state(device);
1494 if (result != VK_SUCCESS)
1495 goto fail_itob;
1496
1497 result = radv_device_init_meta_btoi_state(device);
1498 if (result != VK_SUCCESS)
1499 goto fail_btoi;
1500
1501 result = radv_device_init_meta_btoi_r32g32b32_state(device);
1502 if (result != VK_SUCCESS)
1503 goto fail_btoi_r32g32b32;
1504
1505 result = radv_device_init_meta_itoi_state(device);
1506 if (result != VK_SUCCESS)
1507 goto fail_itoi;
1508
1509 result = radv_device_init_meta_itoi_r32g32b32_state(device);
1510 if (result != VK_SUCCESS)
1511 goto fail_itoi_r32g32b32;
1512
1513 result = radv_device_init_meta_cleari_state(device);
1514 if (result != VK_SUCCESS)
1515 goto fail_cleari;
1516
1517 result = radv_device_init_meta_cleari_r32g32b32_state(device);
1518 if (result != VK_SUCCESS)
1519 goto fail_cleari_r32g32b32;
1520
1521 return VK_SUCCESS;
1522 fail_cleari_r32g32b32:
1523 radv_device_finish_meta_cleari_r32g32b32_state(device);
1524 fail_cleari:
1525 radv_device_finish_meta_cleari_state(device);
1526 fail_itoi_r32g32b32:
1527 radv_device_finish_meta_itoi_r32g32b32_state(device);
1528 fail_itoi:
1529 radv_device_finish_meta_itoi_state(device);
1530 fail_btoi_r32g32b32:
1531 radv_device_finish_meta_btoi_r32g32b32_state(device);
1532 fail_btoi:
1533 radv_device_finish_meta_btoi_state(device);
1534 fail_itob:
1535 radv_device_finish_meta_itob_state(device);
1536 return result;
1537 }
1538
1539 static void
create_iview(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf,struct radv_image_view * iview)1540 create_iview(struct radv_cmd_buffer *cmd_buffer,
1541 struct radv_meta_blit2d_surf *surf,
1542 struct radv_image_view *iview)
1543 {
1544 VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D :
1545 radv_meta_get_view_type(surf->image);
1546 radv_image_view_init(iview, cmd_buffer->device,
1547 &(VkImageViewCreateInfo) {
1548 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1549 .image = radv_image_to_handle(surf->image),
1550 .viewType = view_type,
1551 .format = surf->format,
1552 .subresourceRange = {
1553 .aspectMask = surf->aspect_mask,
1554 .baseMipLevel = surf->level,
1555 .levelCount = 1,
1556 .baseArrayLayer = surf->layer,
1557 .layerCount = 1
1558 },
1559 }, NULL);
1560 }
1561
1562 static void
create_bview(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer * buffer,unsigned offset,VkFormat format,struct radv_buffer_view * bview)1563 create_bview(struct radv_cmd_buffer *cmd_buffer,
1564 struct radv_buffer *buffer,
1565 unsigned offset,
1566 VkFormat format,
1567 struct radv_buffer_view *bview)
1568 {
1569 radv_buffer_view_init(bview, cmd_buffer->device,
1570 &(VkBufferViewCreateInfo) {
1571 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1572 .flags = 0,
1573 .buffer = radv_buffer_to_handle(buffer),
1574 .format = format,
1575 .offset = offset,
1576 .range = VK_WHOLE_SIZE,
1577 });
1578
1579 }
1580
1581 static void
create_buffer_from_image(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf,VkBufferUsageFlagBits usage,VkBuffer * buffer)1582 create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer,
1583 struct radv_meta_blit2d_surf *surf,
1584 VkBufferUsageFlagBits usage,
1585 VkBuffer *buffer)
1586 {
1587 struct radv_device *device = cmd_buffer->device;
1588 struct radv_device_memory mem = { .bo = surf->image->bo };
1589
1590 radv_CreateBuffer(radv_device_to_handle(device),
1591 &(VkBufferCreateInfo) {
1592 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1593 .flags = 0,
1594 .size = surf->image->size,
1595 .usage = usage,
1596 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1597 }, NULL, buffer);
1598
1599 radv_BindBufferMemory2(radv_device_to_handle(device), 1,
1600 (VkBindBufferMemoryInfo[]) {
1601 {
1602 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
1603 .buffer = *buffer,
1604 .memory = radv_device_memory_to_handle(&mem),
1605 .memoryOffset = surf->image->offset,
1606 }
1607 });
1608 }
1609
1610 static void
create_bview_for_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer * buffer,unsigned offset,VkFormat src_format,struct radv_buffer_view * bview)1611 create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1612 struct radv_buffer *buffer,
1613 unsigned offset,
1614 VkFormat src_format,
1615 struct radv_buffer_view *bview)
1616 {
1617 VkFormat format;
1618
1619 switch (src_format) {
1620 case VK_FORMAT_R32G32B32_UINT:
1621 format = VK_FORMAT_R32_UINT;
1622 break;
1623 case VK_FORMAT_R32G32B32_SINT:
1624 format = VK_FORMAT_R32_SINT;
1625 break;
1626 case VK_FORMAT_R32G32B32_SFLOAT:
1627 format = VK_FORMAT_R32_SFLOAT;
1628 break;
1629 default:
1630 unreachable("invalid R32G32B32 format");
1631 }
1632
1633 radv_buffer_view_init(bview, cmd_buffer->device,
1634 &(VkBufferViewCreateInfo) {
1635 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1636 .flags = 0,
1637 .buffer = radv_buffer_to_handle(buffer),
1638 .format = format,
1639 .offset = offset,
1640 .range = VK_WHOLE_SIZE,
1641 });
1642 }
1643
1644 static unsigned
get_image_stride_for_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf)1645 get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1646 struct radv_meta_blit2d_surf *surf)
1647 {
1648 unsigned stride;
1649
1650 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
1651 stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
1652 } else {
1653 stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
1654 }
1655
1656 return stride;
1657 }
1658
1659 static void
itob_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src,struct radv_buffer_view * dst)1660 itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1661 struct radv_image_view *src,
1662 struct radv_buffer_view *dst)
1663 {
1664 struct radv_device *device = cmd_buffer->device;
1665
1666 radv_meta_push_descriptor_set(cmd_buffer,
1667 VK_PIPELINE_BIND_POINT_COMPUTE,
1668 device->meta_state.itob.img_p_layout,
1669 0, /* set */
1670 2, /* descriptorWriteCount */
1671 (VkWriteDescriptorSet[]) {
1672 {
1673 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1674 .dstBinding = 0,
1675 .dstArrayElement = 0,
1676 .descriptorCount = 1,
1677 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1678 .pImageInfo = (VkDescriptorImageInfo[]) {
1679 {
1680 .sampler = VK_NULL_HANDLE,
1681 .imageView = radv_image_view_to_handle(src),
1682 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1683 },
1684 }
1685 },
1686 {
1687 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1688 .dstBinding = 1,
1689 .dstArrayElement = 0,
1690 .descriptorCount = 1,
1691 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1692 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
1693 }
1694 });
1695 }
1696
1697 void
radv_meta_image_to_buffer(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_buffer * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1698 radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
1699 struct radv_meta_blit2d_surf *src,
1700 struct radv_meta_blit2d_buffer *dst,
1701 unsigned num_rects,
1702 struct radv_meta_blit2d_rect *rects)
1703 {
1704 VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
1705 struct radv_device *device = cmd_buffer->device;
1706 struct radv_image_view src_view;
1707 struct radv_buffer_view dst_view;
1708
1709 create_iview(cmd_buffer, src, &src_view);
1710 create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
1711 itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1712
1713 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1714 src->image->type == VK_IMAGE_TYPE_3D)
1715 pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
1716
1717 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1718 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1719
1720 for (unsigned r = 0; r < num_rects; ++r) {
1721 unsigned push_constants[4] = {
1722 rects[r].src_x,
1723 rects[r].src_y,
1724 src->layer,
1725 dst->pitch
1726 };
1727 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1728 device->meta_state.itob.img_p_layout,
1729 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1730 push_constants);
1731
1732 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1733 }
1734 }
1735
1736 static void
btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_buffer_view * dst)1737 btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1738 struct radv_buffer_view *src,
1739 struct radv_buffer_view *dst)
1740 {
1741 struct radv_device *device = cmd_buffer->device;
1742
1743 radv_meta_push_descriptor_set(cmd_buffer,
1744 VK_PIPELINE_BIND_POINT_COMPUTE,
1745 device->meta_state.btoi_r32g32b32.img_p_layout,
1746 0, /* set */
1747 2, /* descriptorWriteCount */
1748 (VkWriteDescriptorSet[]) {
1749 {
1750 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1751 .dstBinding = 0,
1752 .dstArrayElement = 0,
1753 .descriptorCount = 1,
1754 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1755 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
1756 },
1757 {
1758 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1759 .dstBinding = 1,
1760 .dstArrayElement = 0,
1761 .descriptorCount = 1,
1762 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1763 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
1764 }
1765 });
1766 }
1767
1768 static void
radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_buffer * src,struct radv_meta_blit2d_surf * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1769 radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1770 struct radv_meta_blit2d_buffer *src,
1771 struct radv_meta_blit2d_surf *dst,
1772 unsigned num_rects,
1773 struct radv_meta_blit2d_rect *rects)
1774 {
1775 VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
1776 struct radv_device *device = cmd_buffer->device;
1777 struct radv_buffer_view src_view, dst_view;
1778 unsigned dst_offset = 0;
1779 unsigned stride;
1780 VkBuffer buffer;
1781
1782 /* This special btoi path for R32G32B32 formats will write the linear
1783 * image as a buffer with the same underlying memory. The compute
1784 * shader will copy all components separately using a R32 format.
1785 */
1786 create_buffer_from_image(cmd_buffer, dst,
1787 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
1788 &buffer);
1789
1790 create_bview(cmd_buffer, src->buffer, src->offset,
1791 src->format, &src_view);
1792 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer),
1793 dst_offset, dst->format, &dst_view);
1794 btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1795
1796 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1797 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1798
1799 stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1800
1801 for (unsigned r = 0; r < num_rects; ++r) {
1802 unsigned push_constants[4] = {
1803 rects[r].dst_x,
1804 rects[r].dst_y,
1805 stride,
1806 src->pitch,
1807 };
1808
1809 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1810 device->meta_state.btoi_r32g32b32.img_p_layout,
1811 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1812 push_constants);
1813
1814 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1815 }
1816
1817 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1818 }
1819
1820 static void
btoi_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_image_view * dst)1821 btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1822 struct radv_buffer_view *src,
1823 struct radv_image_view *dst)
1824 {
1825 struct radv_device *device = cmd_buffer->device;
1826
1827 radv_meta_push_descriptor_set(cmd_buffer,
1828 VK_PIPELINE_BIND_POINT_COMPUTE,
1829 device->meta_state.btoi.img_p_layout,
1830 0, /* set */
1831 2, /* descriptorWriteCount */
1832 (VkWriteDescriptorSet[]) {
1833 {
1834 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1835 .dstBinding = 0,
1836 .dstArrayElement = 0,
1837 .descriptorCount = 1,
1838 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1839 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
1840 },
1841 {
1842 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1843 .dstBinding = 1,
1844 .dstArrayElement = 0,
1845 .descriptorCount = 1,
1846 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1847 .pImageInfo = (VkDescriptorImageInfo[]) {
1848 {
1849 .sampler = VK_NULL_HANDLE,
1850 .imageView = radv_image_view_to_handle(dst),
1851 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1852 },
1853 }
1854 }
1855 });
1856 }
1857
1858 void
radv_meta_buffer_to_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_buffer * src,struct radv_meta_blit2d_surf * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1859 radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
1860 struct radv_meta_blit2d_buffer *src,
1861 struct radv_meta_blit2d_surf *dst,
1862 unsigned num_rects,
1863 struct radv_meta_blit2d_rect *rects)
1864 {
1865 VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
1866 struct radv_device *device = cmd_buffer->device;
1867 struct radv_buffer_view src_view;
1868 struct radv_image_view dst_view;
1869
1870 if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1871 dst->image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1872 dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1873 radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst,
1874 num_rects, rects);
1875 return;
1876 }
1877
1878 create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
1879 create_iview(cmd_buffer, dst, &dst_view);
1880 btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1881
1882 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1883 dst->image->type == VK_IMAGE_TYPE_3D)
1884 pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
1885 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1886 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1887
1888 for (unsigned r = 0; r < num_rects; ++r) {
1889 unsigned push_constants[4] = {
1890 rects[r].dst_x,
1891 rects[r].dst_y,
1892 dst->layer,
1893 src->pitch,
1894 };
1895 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1896 device->meta_state.btoi.img_p_layout,
1897 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1898 push_constants);
1899
1900 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1901 }
1902 }
1903
1904 static void
itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_buffer_view * dst)1905 itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1906 struct radv_buffer_view *src,
1907 struct radv_buffer_view *dst)
1908 {
1909 struct radv_device *device = cmd_buffer->device;
1910
1911 radv_meta_push_descriptor_set(cmd_buffer,
1912 VK_PIPELINE_BIND_POINT_COMPUTE,
1913 device->meta_state.itoi_r32g32b32.img_p_layout,
1914 0, /* set */
1915 2, /* descriptorWriteCount */
1916 (VkWriteDescriptorSet[]) {
1917 {
1918 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1919 .dstBinding = 0,
1920 .dstArrayElement = 0,
1921 .descriptorCount = 1,
1922 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1923 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
1924 },
1925 {
1926 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1927 .dstBinding = 1,
1928 .dstArrayElement = 0,
1929 .descriptorCount = 1,
1930 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1931 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
1932 }
1933 });
1934 }
1935
1936 static void
radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1937 radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1938 struct radv_meta_blit2d_surf *src,
1939 struct radv_meta_blit2d_surf *dst,
1940 unsigned num_rects,
1941 struct radv_meta_blit2d_rect *rects)
1942 {
1943 VkPipeline pipeline = cmd_buffer->device->meta_state.itoi_r32g32b32.pipeline;
1944 struct radv_device *device = cmd_buffer->device;
1945 struct radv_buffer_view src_view, dst_view;
1946 unsigned src_offset = 0, dst_offset = 0;
1947 unsigned src_stride, dst_stride;
1948 VkBuffer src_buffer, dst_buffer;
1949
1950 /* 96-bit formats are only compatible to themselves. */
1951 assert(dst->format == VK_FORMAT_R32G32B32_UINT ||
1952 dst->format == VK_FORMAT_R32G32B32_SINT ||
1953 dst->format == VK_FORMAT_R32G32B32_SFLOAT);
1954
1955 /* This special itoi path for R32G32B32 formats will write the linear
1956 * image as a buffer with the same underlying memory. The compute
1957 * shader will copy all components separately using a R32 format.
1958 */
1959 create_buffer_from_image(cmd_buffer, src,
1960 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
1961 &src_buffer);
1962 create_buffer_from_image(cmd_buffer, dst,
1963 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
1964 &dst_buffer);
1965
1966 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer),
1967 src_offset, src->format, &src_view);
1968 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer),
1969 dst_offset, dst->format, &dst_view);
1970 itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1971
1972 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1973 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1974
1975 src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src);
1976 dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1977
1978 for (unsigned r = 0; r < num_rects; ++r) {
1979 unsigned push_constants[6] = {
1980 rects[r].src_x,
1981 rects[r].src_y,
1982 src_stride,
1983 rects[r].dst_x,
1984 rects[r].dst_y,
1985 dst_stride,
1986 };
1987 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1988 device->meta_state.itoi_r32g32b32.img_p_layout,
1989 VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
1990 push_constants);
1991
1992 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1993 }
1994
1995 radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL);
1996 radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL);
1997 }
1998
1999 static void
itoi_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src,struct radv_image_view * dst)2000 itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
2001 struct radv_image_view *src,
2002 struct radv_image_view *dst)
2003 {
2004 struct radv_device *device = cmd_buffer->device;
2005
2006 radv_meta_push_descriptor_set(cmd_buffer,
2007 VK_PIPELINE_BIND_POINT_COMPUTE,
2008 device->meta_state.itoi.img_p_layout,
2009 0, /* set */
2010 2, /* descriptorWriteCount */
2011 (VkWriteDescriptorSet[]) {
2012 {
2013 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2014 .dstBinding = 0,
2015 .dstArrayElement = 0,
2016 .descriptorCount = 1,
2017 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
2018 .pImageInfo = (VkDescriptorImageInfo[]) {
2019 {
2020 .sampler = VK_NULL_HANDLE,
2021 .imageView = radv_image_view_to_handle(src),
2022 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
2023 },
2024 }
2025 },
2026 {
2027 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2028 .dstBinding = 1,
2029 .dstArrayElement = 0,
2030 .descriptorCount = 1,
2031 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
2032 .pImageInfo = (VkDescriptorImageInfo[]) {
2033 {
2034 .sampler = VK_NULL_HANDLE,
2035 .imageView = radv_image_view_to_handle(dst),
2036 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
2037 },
2038 }
2039 }
2040 });
2041 }
2042
2043 void
radv_meta_image_to_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)2044 radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
2045 struct radv_meta_blit2d_surf *src,
2046 struct radv_meta_blit2d_surf *dst,
2047 unsigned num_rects,
2048 struct radv_meta_blit2d_rect *rects)
2049 {
2050 VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline;
2051 struct radv_device *device = cmd_buffer->device;
2052 struct radv_image_view src_view, dst_view;
2053
2054 if (src->format == VK_FORMAT_R32G32B32_UINT ||
2055 src->format == VK_FORMAT_R32G32B32_SINT ||
2056 src->format == VK_FORMAT_R32G32B32_SFLOAT) {
2057 radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst,
2058 num_rects, rects);
2059 return;
2060 }
2061
2062 create_iview(cmd_buffer, src, &src_view);
2063 create_iview(cmd_buffer, dst, &dst_view);
2064
2065 itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
2066
2067 if (device->physical_device->rad_info.chip_class >= GFX9 &&
2068 (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D))
2069 pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
2070 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
2071 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2072
2073 for (unsigned r = 0; r < num_rects; ++r) {
2074 unsigned push_constants[6] = {
2075 rects[r].src_x,
2076 rects[r].src_y,
2077 src->layer,
2078 rects[r].dst_x,
2079 rects[r].dst_y,
2080 dst->layer,
2081 };
2082 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
2083 device->meta_state.itoi.img_p_layout,
2084 VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
2085 push_constants);
2086
2087 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
2088 }
2089 }
2090
2091 static void
cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * view)2092 cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
2093 struct radv_buffer_view *view)
2094 {
2095 struct radv_device *device = cmd_buffer->device;
2096
2097 radv_meta_push_descriptor_set(cmd_buffer,
2098 VK_PIPELINE_BIND_POINT_COMPUTE,
2099 device->meta_state.cleari_r32g32b32.img_p_layout,
2100 0, /* set */
2101 1, /* descriptorWriteCount */
2102 (VkWriteDescriptorSet[]) {
2103 {
2104 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2105 .dstBinding = 0,
2106 .dstArrayElement = 0,
2107 .descriptorCount = 1,
2108 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
2109 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(view) },
2110 }
2111 });
2112 }
2113
2114 static void
radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * dst,const VkClearColorValue * clear_color)2115 radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
2116 struct radv_meta_blit2d_surf *dst,
2117 const VkClearColorValue *clear_color)
2118 {
2119 VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline;
2120 struct radv_device *device = cmd_buffer->device;
2121 struct radv_buffer_view dst_view;
2122 unsigned stride;
2123 VkBuffer buffer;
2124
2125 /* This special clear path for R32G32B32 formats will write the linear
2126 * image as a buffer with the same underlying memory. The compute
2127 * shader will clear all components separately using a R32 format.
2128 */
2129 create_buffer_from_image(cmd_buffer, dst,
2130 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
2131 &buffer);
2132
2133 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer),
2134 0, dst->format, &dst_view);
2135 cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
2136
2137 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
2138 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2139
2140 stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
2141
2142 unsigned push_constants[4] = {
2143 clear_color->uint32[0],
2144 clear_color->uint32[1],
2145 clear_color->uint32[2],
2146 stride,
2147 };
2148
2149 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
2150 device->meta_state.cleari_r32g32b32.img_p_layout,
2151 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
2152 push_constants);
2153
2154 radv_unaligned_dispatch(cmd_buffer, dst->image->info.width,
2155 dst->image->info.height, 1);
2156
2157 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
2158 }
2159
2160 static void
cleari_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * dst_iview)2161 cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
2162 struct radv_image_view *dst_iview)
2163 {
2164 struct radv_device *device = cmd_buffer->device;
2165
2166 radv_meta_push_descriptor_set(cmd_buffer,
2167 VK_PIPELINE_BIND_POINT_COMPUTE,
2168 device->meta_state.cleari.img_p_layout,
2169 0, /* set */
2170 1, /* descriptorWriteCount */
2171 (VkWriteDescriptorSet[]) {
2172 {
2173 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2174 .dstBinding = 0,
2175 .dstArrayElement = 0,
2176 .descriptorCount = 1,
2177 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
2178 .pImageInfo = (VkDescriptorImageInfo[]) {
2179 {
2180 .sampler = VK_NULL_HANDLE,
2181 .imageView = radv_image_view_to_handle(dst_iview),
2182 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
2183 },
2184 }
2185 },
2186 });
2187 }
2188
2189 void
radv_meta_clear_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * dst,const VkClearColorValue * clear_color)2190 radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
2191 struct radv_meta_blit2d_surf *dst,
2192 const VkClearColorValue *clear_color)
2193 {
2194 VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline;
2195 struct radv_device *device = cmd_buffer->device;
2196 struct radv_image_view dst_iview;
2197
2198 if (dst->format == VK_FORMAT_R32G32B32_UINT ||
2199 dst->format == VK_FORMAT_R32G32B32_SINT ||
2200 dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
2201 radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
2202 return;
2203 }
2204
2205 create_iview(cmd_buffer, dst, &dst_iview);
2206 cleari_bind_descriptors(cmd_buffer, &dst_iview);
2207
2208 if (device->physical_device->rad_info.chip_class >= GFX9 &&
2209 dst->image->type == VK_IMAGE_TYPE_3D)
2210 pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
2211
2212 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
2213 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2214
2215 unsigned push_constants[5] = {
2216 clear_color->uint32[0],
2217 clear_color->uint32[1],
2218 clear_color->uint32[2],
2219 clear_color->uint32[3],
2220 dst->layer,
2221 };
2222
2223 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
2224 device->meta_state.cleari.img_p_layout,
2225 VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
2226 push_constants);
2227
2228 radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
2229 }
2230