• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2016 Red Hat.
3  * Copyright © 2016 Bas Nieuwenhuizen
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22  * IN THE SOFTWARE.
23  */
24 #include "nir/nir_builder.h"
25 #include "radv_meta.h"
26 
27 /*
28  * GFX queue: Compute shader implementation of image->buffer copy
29  * Compute queue: implementation also of buffer->image, image->image, and image clear.
30  */
31 
32 /* GFX9 needs to use a 3D sampler to access 3D resources, so the shader has the options
33  * for that.
34  */
35 static nir_shader *
build_nir_itob_compute_shader(struct radv_device * dev,bool is_3d)36 build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
37 {
38    enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
39    const struct glsl_type *sampler_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT);
40    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
41    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
42                                                   is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
43    b.shader->info.workgroup_size[0] = 8;
44    b.shader->info.workgroup_size[1] = 8;
45    b.shader->info.workgroup_size[2] = 1;
46    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
47    input_img->data.descriptor_set = 0;
48    input_img->data.binding = 0;
49 
50    nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
51    output_img->data.descriptor_set = 0;
52    output_img->data.binding = 1;
53 
54    nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
55 
56    nir_ssa_def *offset =
57       nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 16);
58    nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
59 
60    nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
61    nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
62 
63    nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
64    tex->sampler_dim = dim;
65    tex->op = nir_texop_txf;
66    tex->src[0].src_type = nir_tex_src_coord;
67    tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3));
68    tex->src[1].src_type = nir_tex_src_lod;
69    tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
70    tex->src[2].src_type = nir_tex_src_texture_deref;
71    tex->src[2].src = nir_src_for_ssa(input_img_deref);
72    tex->dest_type = nir_type_float32;
73    tex->is_array = false;
74    tex->coord_components = is_3d ? 3 : 2;
75 
76    nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
77    nir_builder_instr_insert(&b, &tex->instr);
78 
79    nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
80    nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
81 
82    nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
83    tmp = nir_iadd(&b, tmp, pos_x);
84 
85    nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
86 
87    nir_ssa_def *outval = &tex->dest.ssa;
88    nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
89                          nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0),
90                          .image_dim = GLSL_SAMPLER_DIM_BUF);
91 
92    return b.shader;
93 }
94 
95 /* Image to buffer - don't write use image accessors */
96 static VkResult
radv_device_init_meta_itob_state(struct radv_device * device)97 radv_device_init_meta_itob_state(struct radv_device *device)
98 {
99    VkResult result;
100    nir_shader *cs = build_nir_itob_compute_shader(device, false);
101    nir_shader *cs_3d = NULL;
102 
103    if (device->physical_device->rad_info.chip_class >= GFX9)
104       cs_3d = build_nir_itob_compute_shader(device, true);
105 
106    /*
107     * two descriptors one for the image being sampled
108     * one for the buffer being written.
109     */
110    VkDescriptorSetLayoutCreateInfo ds_create_info = {
111       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
112       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
113       .bindingCount = 2,
114       .pBindings = (VkDescriptorSetLayoutBinding[]){
115          {.binding = 0,
116           .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
117           .descriptorCount = 1,
118           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
119           .pImmutableSamplers = NULL},
120          {.binding = 1,
121           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
122           .descriptorCount = 1,
123           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
124           .pImmutableSamplers = NULL},
125       }};
126 
127    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
128                                            &device->meta_state.alloc,
129                                            &device->meta_state.itob.img_ds_layout);
130    if (result != VK_SUCCESS)
131       goto fail;
132 
133    VkPipelineLayoutCreateInfo pl_create_info = {
134       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
135       .setLayoutCount = 1,
136       .pSetLayouts = &device->meta_state.itob.img_ds_layout,
137       .pushConstantRangeCount = 1,
138       .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
139    };
140 
141    result =
142       radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
143                                 &device->meta_state.alloc, &device->meta_state.itob.img_p_layout);
144    if (result != VK_SUCCESS)
145       goto fail;
146 
147    /* compute shader */
148 
149    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
150       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
151       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
152       .module = vk_shader_module_handle_from_nir(cs),
153       .pName = "main",
154       .pSpecializationInfo = NULL,
155    };
156 
157    VkComputePipelineCreateInfo vk_pipeline_info = {
158       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
159       .stage = pipeline_shader_stage,
160       .flags = 0,
161       .layout = device->meta_state.itob.img_p_layout,
162    };
163 
164    result = radv_CreateComputePipelines(radv_device_to_handle(device),
165                                         radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
166                                         &vk_pipeline_info, NULL, &device->meta_state.itob.pipeline);
167    if (result != VK_SUCCESS)
168       goto fail;
169 
170    if (device->physical_device->rad_info.chip_class >= GFX9) {
171       VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
172          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
173          .stage = VK_SHADER_STAGE_COMPUTE_BIT,
174          .module = vk_shader_module_handle_from_nir(cs_3d),
175          .pName = "main",
176          .pSpecializationInfo = NULL,
177       };
178 
179       VkComputePipelineCreateInfo vk_pipeline_info_3d = {
180          .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
181          .stage = pipeline_shader_stage_3d,
182          .flags = 0,
183          .layout = device->meta_state.itob.img_p_layout,
184       };
185 
186       result = radv_CreateComputePipelines(
187          radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
188          &vk_pipeline_info_3d, NULL, &device->meta_state.itob.pipeline_3d);
189       if (result != VK_SUCCESS)
190          goto fail;
191       ralloc_free(cs_3d);
192    }
193    ralloc_free(cs);
194 
195    return VK_SUCCESS;
196 fail:
197    ralloc_free(cs);
198    ralloc_free(cs_3d);
199    return result;
200 }
201 
202 static void
radv_device_finish_meta_itob_state(struct radv_device * device)203 radv_device_finish_meta_itob_state(struct radv_device *device)
204 {
205    struct radv_meta_state *state = &device->meta_state;
206 
207    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itob.img_p_layout,
208                               &state->alloc);
209    radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itob.img_ds_layout,
210                                    &state->alloc);
211    radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline, &state->alloc);
212    if (device->physical_device->rad_info.chip_class >= GFX9)
213       radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline_3d, &state->alloc);
214 }
215 
216 static nir_shader *
build_nir_btoi_compute_shader(struct radv_device * dev,bool is_3d)217 build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
218 {
219    enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
220    const struct glsl_type *buf_type =
221       glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
222    const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
223    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
224                                                   is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
225    b.shader->info.workgroup_size[0] = 8;
226    b.shader->info.workgroup_size[1] = 8;
227    b.shader->info.workgroup_size[2] = 1;
228    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
229    input_img->data.descriptor_set = 0;
230    input_img->data.binding = 0;
231 
232    nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
233    output_img->data.descriptor_set = 0;
234    output_img->data.binding = 1;
235 
236    nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
237 
238    nir_ssa_def *offset =
239       nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 16);
240    nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
241 
242    nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
243    nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
244 
245    nir_ssa_def *buf_coord = nir_imul(&b, pos_y, stride);
246    buf_coord = nir_iadd(&b, buf_coord, pos_x);
247 
248    nir_ssa_def *coord = nir_iadd(&b, global_id, offset);
249    nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
250 
251    nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
252    tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
253    tex->op = nir_texop_txf;
254    tex->src[0].src_type = nir_tex_src_coord;
255    tex->src[0].src = nir_src_for_ssa(buf_coord);
256    tex->src[1].src_type = nir_tex_src_lod;
257    tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
258    tex->src[2].src_type = nir_tex_src_texture_deref;
259    tex->src[2].src = nir_src_for_ssa(input_img_deref);
260    tex->dest_type = nir_type_float32;
261    tex->is_array = false;
262    tex->coord_components = 1;
263 
264    nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
265    nir_builder_instr_insert(&b, &tex->instr);
266 
267    nir_ssa_def *outval = &tex->dest.ssa;
268 
269    nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, coord, 0),
270                                          nir_channel(&b, coord, 1),
271                                          is_3d ? nir_channel(&b, coord, 2) : nir_ssa_undef(&b, 1, 32),
272                                          nir_ssa_undef(&b, 1, 32));
273 
274    nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
275                          nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0), .image_dim = dim);
276 
277    return b.shader;
278 }
279 
280 /* Buffer to image - don't write use image accessors */
281 static VkResult
radv_device_init_meta_btoi_state(struct radv_device * device)282 radv_device_init_meta_btoi_state(struct radv_device *device)
283 {
284    VkResult result;
285    nir_shader *cs = build_nir_btoi_compute_shader(device, false);
286    nir_shader *cs_3d = NULL;
287    if (device->physical_device->rad_info.chip_class >= GFX9)
288       cs_3d = build_nir_btoi_compute_shader(device, true);
289    /*
290     * two descriptors one for the image being sampled
291     * one for the buffer being written.
292     */
293    VkDescriptorSetLayoutCreateInfo ds_create_info = {
294       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
295       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
296       .bindingCount = 2,
297       .pBindings = (VkDescriptorSetLayoutBinding[]){
298          {.binding = 0,
299           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
300           .descriptorCount = 1,
301           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
302           .pImmutableSamplers = NULL},
303          {.binding = 1,
304           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
305           .descriptorCount = 1,
306           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
307           .pImmutableSamplers = NULL},
308       }};
309 
310    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
311                                            &device->meta_state.alloc,
312                                            &device->meta_state.btoi.img_ds_layout);
313    if (result != VK_SUCCESS)
314       goto fail;
315 
316    VkPipelineLayoutCreateInfo pl_create_info = {
317       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
318       .setLayoutCount = 1,
319       .pSetLayouts = &device->meta_state.btoi.img_ds_layout,
320       .pushConstantRangeCount = 1,
321       .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
322    };
323 
324    result =
325       radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
326                                 &device->meta_state.alloc, &device->meta_state.btoi.img_p_layout);
327    if (result != VK_SUCCESS)
328       goto fail;
329 
330    /* compute shader */
331 
332    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
333       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
334       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
335       .module = vk_shader_module_handle_from_nir(cs),
336       .pName = "main",
337       .pSpecializationInfo = NULL,
338    };
339 
340    VkComputePipelineCreateInfo vk_pipeline_info = {
341       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
342       .stage = pipeline_shader_stage,
343       .flags = 0,
344       .layout = device->meta_state.btoi.img_p_layout,
345    };
346 
347    result = radv_CreateComputePipelines(radv_device_to_handle(device),
348                                         radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
349                                         &vk_pipeline_info, NULL, &device->meta_state.btoi.pipeline);
350    if (result != VK_SUCCESS)
351       goto fail;
352 
353    if (device->physical_device->rad_info.chip_class >= GFX9) {
354       VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
355          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
356          .stage = VK_SHADER_STAGE_COMPUTE_BIT,
357          .module = vk_shader_module_handle_from_nir(cs_3d),
358          .pName = "main",
359          .pSpecializationInfo = NULL,
360       };
361 
362       VkComputePipelineCreateInfo vk_pipeline_info_3d = {
363          .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
364          .stage = pipeline_shader_stage_3d,
365          .flags = 0,
366          .layout = device->meta_state.btoi.img_p_layout,
367       };
368 
369       result = radv_CreateComputePipelines(
370          radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
371          &vk_pipeline_info_3d, NULL, &device->meta_state.btoi.pipeline_3d);
372       ralloc_free(cs_3d);
373    }
374    ralloc_free(cs);
375 
376    return VK_SUCCESS;
377 fail:
378    ralloc_free(cs_3d);
379    ralloc_free(cs);
380    return result;
381 }
382 
383 static void
radv_device_finish_meta_btoi_state(struct radv_device * device)384 radv_device_finish_meta_btoi_state(struct radv_device *device)
385 {
386    struct radv_meta_state *state = &device->meta_state;
387 
388    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi.img_p_layout,
389                               &state->alloc);
390    radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->btoi.img_ds_layout,
391                                    &state->alloc);
392    radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline, &state->alloc);
393    radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline_3d, &state->alloc);
394 }
395 
396 /* Buffer to image - special path for R32G32B32 */
397 static nir_shader *
build_nir_btoi_r32g32b32_compute_shader(struct radv_device * dev)398 build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
399 {
400    const struct glsl_type *buf_type =
401       glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
402    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
403    nir_builder b =
404       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_btoi_r32g32b32_cs");
405    b.shader->info.workgroup_size[0] = 8;
406    b.shader->info.workgroup_size[1] = 8;
407    b.shader->info.workgroup_size[2] = 1;
408    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
409    input_img->data.descriptor_set = 0;
410    input_img->data.binding = 0;
411 
412    nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
413    output_img->data.descriptor_set = 0;
414    output_img->data.binding = 1;
415 
416    nir_ssa_def *global_id = get_global_ids(&b, 2);
417 
418    nir_ssa_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 16);
419    nir_ssa_def *pitch = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 16);
420    nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
421 
422    nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
423    nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
424 
425    nir_ssa_def *buf_coord = nir_imul(&b, pos_y, stride);
426    buf_coord = nir_iadd(&b, buf_coord, pos_x);
427 
428    nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
429 
430    nir_ssa_def *global_pos =
431       nir_iadd(&b, nir_imul(&b, nir_channel(&b, img_coord, 1), pitch),
432                nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3)));
433 
434    nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
435 
436    nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
437    tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
438    tex->op = nir_texop_txf;
439    tex->src[0].src_type = nir_tex_src_coord;
440    tex->src[0].src = nir_src_for_ssa(buf_coord);
441    tex->src[1].src_type = nir_tex_src_lod;
442    tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
443    tex->src[2].src_type = nir_tex_src_texture_deref;
444    tex->src[2].src = nir_src_for_ssa(input_img_deref);
445    tex->dest_type = nir_type_float32;
446    tex->is_array = false;
447    tex->coord_components = 1;
448    nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
449    nir_builder_instr_insert(&b, &tex->instr);
450 
451    nir_ssa_def *outval = &tex->dest.ssa;
452 
453    for (int chan = 0; chan < 3; chan++) {
454       nir_ssa_def *local_pos = nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
455 
456       nir_ssa_def *coord = nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
457 
458       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
459                             nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, chan),
460                             nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
461    }
462 
463    return b.shader;
464 }
465 
466 static VkResult
radv_device_init_meta_btoi_r32g32b32_state(struct radv_device * device)467 radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
468 {
469    VkResult result;
470    nir_shader *cs = build_nir_btoi_r32g32b32_compute_shader(device);
471 
472    VkDescriptorSetLayoutCreateInfo ds_create_info = {
473       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
474       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
475       .bindingCount = 2,
476       .pBindings = (VkDescriptorSetLayoutBinding[]){
477          {.binding = 0,
478           .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
479           .descriptorCount = 1,
480           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
481           .pImmutableSamplers = NULL},
482          {.binding = 1,
483           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
484           .descriptorCount = 1,
485           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
486           .pImmutableSamplers = NULL},
487       }};
488 
489    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
490                                            &device->meta_state.alloc,
491                                            &device->meta_state.btoi_r32g32b32.img_ds_layout);
492    if (result != VK_SUCCESS)
493       goto fail;
494 
495    VkPipelineLayoutCreateInfo pl_create_info = {
496       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
497       .setLayoutCount = 1,
498       .pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
499       .pushConstantRangeCount = 1,
500       .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
501    };
502 
503    result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
504                                       &device->meta_state.alloc,
505                                       &device->meta_state.btoi_r32g32b32.img_p_layout);
506    if (result != VK_SUCCESS)
507       goto fail;
508 
509    /* compute shader */
510 
511    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
512       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
513       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
514       .module = vk_shader_module_handle_from_nir(cs),
515       .pName = "main",
516       .pSpecializationInfo = NULL,
517    };
518 
519    VkComputePipelineCreateInfo vk_pipeline_info = {
520       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
521       .stage = pipeline_shader_stage,
522       .flags = 0,
523       .layout = device->meta_state.btoi_r32g32b32.img_p_layout,
524    };
525 
526    result = radv_CreateComputePipelines(
527       radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
528       &vk_pipeline_info, NULL, &device->meta_state.btoi_r32g32b32.pipeline);
529 
530 fail:
531    ralloc_free(cs);
532    return result;
533 }
534 
535 static void
radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device * device)536 radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
537 {
538    struct radv_meta_state *state = &device->meta_state;
539 
540    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi_r32g32b32.img_p_layout,
541                               &state->alloc);
542    radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
543                                    state->btoi_r32g32b32.img_ds_layout, &state->alloc);
544    radv_DestroyPipeline(radv_device_to_handle(device), state->btoi_r32g32b32.pipeline,
545                         &state->alloc);
546 }
547 
548 static nir_shader *
build_nir_itoi_compute_shader(struct radv_device * dev,bool is_3d,int samples)549 build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d, int samples)
550 {
551    bool is_multisampled = samples > 1;
552    enum glsl_sampler_dim dim = is_3d             ? GLSL_SAMPLER_DIM_3D
553                                : is_multisampled ? GLSL_SAMPLER_DIM_MS
554                                                  : GLSL_SAMPLER_DIM_2D;
555    const struct glsl_type *buf_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT);
556    const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
557    nir_builder b = nir_builder_init_simple_shader(
558       MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_itoi_cs_3d-%d" : "meta_itoi_cs-%d", samples);
559    b.shader->info.workgroup_size[0] = 8;
560    b.shader->info.workgroup_size[1] = 8;
561    b.shader->info.workgroup_size[2] = 1;
562    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
563    input_img->data.descriptor_set = 0;
564    input_img->data.binding = 0;
565 
566    nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
567    output_img->data.descriptor_set = 0;
568    output_img->data.binding = 1;
569 
570    nir_ssa_def *global_id = get_global_ids(&b, is_3d ? 3 : 2);
571 
572    nir_ssa_def *src_offset =
573       nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 24);
574    nir_ssa_def *dst_offset =
575       nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range = 24);
576 
577    nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset);
578    nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
579 
580    nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
581 
582    nir_tex_instr *tex_instr[8];
583    for (uint32_t i = 0; i < samples; i++) {
584       tex_instr[i] = nir_tex_instr_create(b.shader, is_multisampled ? 4 : 3);
585 
586       nir_tex_instr *tex = tex_instr[i];
587       tex->sampler_dim = dim;
588       tex->op = is_multisampled ? nir_texop_txf_ms : nir_texop_txf;
589       tex->src[0].src_type = nir_tex_src_coord;
590       tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
591       tex->src[1].src_type = nir_tex_src_lod;
592       tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
593       tex->src[2].src_type = nir_tex_src_texture_deref;
594       tex->src[2].src = nir_src_for_ssa(input_img_deref);
595       if (is_multisampled) {
596          tex->src[3].src_type = nir_tex_src_ms_index;
597          tex->src[3].src = nir_src_for_ssa(nir_imm_int(&b, i));
598       }
599       tex->dest_type = nir_type_float32;
600       tex->is_array = false;
601       tex->coord_components = is_3d ? 3 : 2;
602 
603       nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
604       nir_builder_instr_insert(&b, &tex->instr);
605    }
606 
607    nir_ssa_def *img_coord = nir_vec4(&b, nir_channel(&b, dst_coord, 0),
608                                          nir_channel(&b, dst_coord, 1),
609                                          is_3d ? nir_channel(&b, dst_coord, 2) : nir_ssa_undef(&b, 1, 32),
610                                          nir_ssa_undef(&b, 1, 32));
611 
612    for (uint32_t i = 0; i < samples; i++) {
613       nir_ssa_def *outval = &tex_instr[i]->dest.ssa;
614       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
615                             nir_imm_int(&b, i), outval, nir_imm_int(&b, 0), .image_dim = dim);
616    }
617 
618    return b.shader;
619 }
620 
621 static VkResult
create_itoi_pipeline(struct radv_device * device,int samples,VkPipeline * pipeline)622 create_itoi_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
623 {
624    struct radv_meta_state *state = &device->meta_state;
625    nir_shader *cs = build_nir_itoi_compute_shader(device, false, samples);
626    VkResult result;
627 
628    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
629       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
630       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
631       .module = vk_shader_module_handle_from_nir(cs),
632       .pName = "main",
633       .pSpecializationInfo = NULL,
634    };
635 
636    VkComputePipelineCreateInfo vk_pipeline_info = {
637       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
638       .stage = pipeline_shader_stage,
639       .flags = 0,
640       .layout = state->itoi.img_p_layout,
641    };
642 
643    result = radv_CreateComputePipelines(radv_device_to_handle(device),
644                                         radv_pipeline_cache_to_handle(&state->cache), 1,
645                                         &vk_pipeline_info, NULL, pipeline);
646    ralloc_free(cs);
647    return result;
648 }
649 
650 /* image to image - don't write use image accessors */
651 static VkResult
radv_device_init_meta_itoi_state(struct radv_device * device)652 radv_device_init_meta_itoi_state(struct radv_device *device)
653 {
654    VkResult result;
655 
656    /*
657     * two descriptors one for the image being sampled
658     * one for the buffer being written.
659     */
660    VkDescriptorSetLayoutCreateInfo ds_create_info = {
661       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
662       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
663       .bindingCount = 2,
664       .pBindings = (VkDescriptorSetLayoutBinding[]){
665          {.binding = 0,
666           .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
667           .descriptorCount = 1,
668           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
669           .pImmutableSamplers = NULL},
670          {.binding = 1,
671           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
672           .descriptorCount = 1,
673           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
674           .pImmutableSamplers = NULL},
675       }};
676 
677    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
678                                            &device->meta_state.alloc,
679                                            &device->meta_state.itoi.img_ds_layout);
680    if (result != VK_SUCCESS)
681       goto fail;
682 
683    VkPipelineLayoutCreateInfo pl_create_info = {
684       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
685       .setLayoutCount = 1,
686       .pSetLayouts = &device->meta_state.itoi.img_ds_layout,
687       .pushConstantRangeCount = 1,
688       .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
689    };
690 
691    result =
692       radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
693                                 &device->meta_state.alloc, &device->meta_state.itoi.img_p_layout);
694    if (result != VK_SUCCESS)
695       goto fail;
696 
697    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
698       uint32_t samples = 1 << i;
699       result = create_itoi_pipeline(device, samples, &device->meta_state.itoi.pipeline[i]);
700       if (result != VK_SUCCESS)
701          goto fail;
702    }
703 
704    if (device->physical_device->rad_info.chip_class >= GFX9) {
705       nir_shader *cs_3d = build_nir_itoi_compute_shader(device, true, 1);
706 
707       VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
708          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
709          .stage = VK_SHADER_STAGE_COMPUTE_BIT,
710          .module = vk_shader_module_handle_from_nir(cs_3d),
711          .pName = "main",
712          .pSpecializationInfo = NULL,
713       };
714 
715       VkComputePipelineCreateInfo vk_pipeline_info_3d = {
716          .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
717          .stage = pipeline_shader_stage_3d,
718          .flags = 0,
719          .layout = device->meta_state.itoi.img_p_layout,
720       };
721 
722       result = radv_CreateComputePipelines(
723          radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
724          &vk_pipeline_info_3d, NULL, &device->meta_state.itoi.pipeline_3d);
725       ralloc_free(cs_3d);
726    }
727 
728    return VK_SUCCESS;
729 fail:
730    return result;
731 }
732 
733 static void
radv_device_finish_meta_itoi_state(struct radv_device * device)734 radv_device_finish_meta_itoi_state(struct radv_device *device)
735 {
736    struct radv_meta_state *state = &device->meta_state;
737 
738    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi.img_p_layout,
739                               &state->alloc);
740    radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itoi.img_ds_layout,
741                                    &state->alloc);
742 
743    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
744       radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline[i], &state->alloc);
745    }
746 
747    if (device->physical_device->rad_info.chip_class >= GFX9)
748       radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline_3d, &state->alloc);
749 }
750 
751 static nir_shader *
build_nir_itoi_r32g32b32_compute_shader(struct radv_device * dev)752 build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
753 {
754    const struct glsl_type *type =
755       glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
756    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
757    nir_builder b =
758       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_itoi_r32g32b32_cs");
759    b.shader->info.workgroup_size[0] = 8;
760    b.shader->info.workgroup_size[1] = 8;
761    b.shader->info.workgroup_size[2] = 1;
762    nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, type, "input_img");
763    input_img->data.descriptor_set = 0;
764    input_img->data.binding = 0;
765 
766    nir_variable *output_img =
767       nir_variable_create(b.shader, nir_var_uniform, img_type, "output_img");
768    output_img->data.descriptor_set = 0;
769    output_img->data.binding = 1;
770 
771    nir_ssa_def *global_id = get_global_ids(&b, 2);
772 
773    nir_ssa_def *src_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 24);
774    nir_ssa_def *dst_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 12), .range = 24);
775 
776    nir_ssa_def *src_stride = nir_channel(&b, src_offset, 2);
777    nir_ssa_def *dst_stride = nir_channel(&b, dst_offset, 2);
778 
779    nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, src_offset);
780    nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset);
781 
782    nir_ssa_def *src_global_pos =
783       nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
784                nir_imul(&b, nir_channel(&b, src_img_coord, 0), nir_imm_int(&b, 3)));
785 
786    nir_ssa_def *dst_global_pos =
787       nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
788                nir_imul(&b, nir_channel(&b, dst_img_coord, 0), nir_imm_int(&b, 3)));
789 
790    for (int chan = 0; chan < 3; chan++) {
791       /* src */
792       nir_ssa_def *src_local_pos = nir_iadd(&b, src_global_pos, nir_imm_int(&b, chan));
793       nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
794 
795       nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
796       tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
797       tex->op = nir_texop_txf;
798       tex->src[0].src_type = nir_tex_src_coord;
799       tex->src[0].src = nir_src_for_ssa(src_local_pos);
800       tex->src[1].src_type = nir_tex_src_lod;
801       tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
802       tex->src[2].src_type = nir_tex_src_texture_deref;
803       tex->src[2].src = nir_src_for_ssa(input_img_deref);
804       tex->dest_type = nir_type_float32;
805       tex->is_array = false;
806       tex->coord_components = 1;
807       nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
808       nir_builder_instr_insert(&b, &tex->instr);
809 
810       nir_ssa_def *outval = &tex->dest.ssa;
811 
812       /* dst */
813       nir_ssa_def *dst_local_pos = nir_iadd(&b, dst_global_pos, nir_imm_int(&b, chan));
814 
815       nir_ssa_def *dst_coord =
816          nir_vec4(&b, dst_local_pos, dst_local_pos, dst_local_pos, dst_local_pos);
817 
818       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord,
819                             nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, 0),
820                             nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
821    }
822 
823    return b.shader;
824 }
825 
826 /* Image to image - special path for R32G32B32 */
827 static VkResult
radv_device_init_meta_itoi_r32g32b32_state(struct radv_device * device)828 radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device)
829 {
830    VkResult result;
831    nir_shader *cs = build_nir_itoi_r32g32b32_compute_shader(device);
832 
833    VkDescriptorSetLayoutCreateInfo ds_create_info = {
834       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
835       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
836       .bindingCount = 2,
837       .pBindings = (VkDescriptorSetLayoutBinding[]){
838          {.binding = 0,
839           .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
840           .descriptorCount = 1,
841           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
842           .pImmutableSamplers = NULL},
843          {.binding = 1,
844           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
845           .descriptorCount = 1,
846           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
847           .pImmutableSamplers = NULL},
848       }};
849 
850    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
851                                            &device->meta_state.alloc,
852                                            &device->meta_state.itoi_r32g32b32.img_ds_layout);
853    if (result != VK_SUCCESS)
854       goto fail;
855 
856    VkPipelineLayoutCreateInfo pl_create_info = {
857       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
858       .setLayoutCount = 1,
859       .pSetLayouts = &device->meta_state.itoi_r32g32b32.img_ds_layout,
860       .pushConstantRangeCount = 1,
861       .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
862    };
863 
864    result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
865                                       &device->meta_state.alloc,
866                                       &device->meta_state.itoi_r32g32b32.img_p_layout);
867    if (result != VK_SUCCESS)
868       goto fail;
869 
870    /* compute shader */
871 
872    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
873       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
874       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
875       .module = vk_shader_module_handle_from_nir(cs),
876       .pName = "main",
877       .pSpecializationInfo = NULL,
878    };
879 
880    VkComputePipelineCreateInfo vk_pipeline_info = {
881       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
882       .stage = pipeline_shader_stage,
883       .flags = 0,
884       .layout = device->meta_state.itoi_r32g32b32.img_p_layout,
885    };
886 
887    result = radv_CreateComputePipelines(
888       radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
889       &vk_pipeline_info, NULL, &device->meta_state.itoi_r32g32b32.pipeline);
890 
891 fail:
892    ralloc_free(cs);
893    return result;
894 }
895 
896 static void
radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device * device)897 radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device)
898 {
899    struct radv_meta_state *state = &device->meta_state;
900 
901    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi_r32g32b32.img_p_layout,
902                               &state->alloc);
903    radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
904                                    state->itoi_r32g32b32.img_ds_layout, &state->alloc);
905    radv_DestroyPipeline(radv_device_to_handle(device), state->itoi_r32g32b32.pipeline,
906                         &state->alloc);
907 }
908 
909 static nir_shader *
build_nir_cleari_compute_shader(struct radv_device * dev,bool is_3d,int samples)910 build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d, int samples)
911 {
912    bool is_multisampled = samples > 1;
913    enum glsl_sampler_dim dim = is_3d             ? GLSL_SAMPLER_DIM_3D
914                                : is_multisampled ? GLSL_SAMPLER_DIM_MS
915                                                  : GLSL_SAMPLER_DIM_2D;
916    const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
917    nir_builder b = nir_builder_init_simple_shader(
918       MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples);
919    b.shader->info.workgroup_size[0] = 8;
920    b.shader->info.workgroup_size[1] = 8;
921    b.shader->info.workgroup_size[2] = 1;
922 
923    nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
924    output_img->data.descriptor_set = 0;
925    output_img->data.binding = 0;
926 
927    nir_ssa_def *global_id = get_global_ids(&b, 2);
928 
929    nir_ssa_def *clear_val = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 20);
930    nir_ssa_def *layer = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20);
931 
932    nir_ssa_def *comps[4];
933    comps[0] = nir_channel(&b, global_id, 0);
934    comps[1] = nir_channel(&b, global_id, 1);
935    comps[2] = layer;
936    comps[3] = nir_ssa_undef(&b, 1, 32);
937    global_id = nir_vec(&b, comps, 4);
938 
939    for (uint32_t i = 0; i < samples; i++) {
940       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id,
941                             nir_imm_int(&b, i), clear_val, nir_imm_int(&b, 0), .image_dim = dim);
942    }
943 
944    return b.shader;
945 }
946 
947 static VkResult
create_cleari_pipeline(struct radv_device * device,int samples,VkPipeline * pipeline)948 create_cleari_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
949 {
950    nir_shader *cs = build_nir_cleari_compute_shader(device, false, samples);
951    VkResult result;
952 
953    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
954       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
955       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
956       .module = vk_shader_module_handle_from_nir(cs),
957       .pName = "main",
958       .pSpecializationInfo = NULL,
959    };
960 
961    VkComputePipelineCreateInfo vk_pipeline_info = {
962       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
963       .stage = pipeline_shader_stage,
964       .flags = 0,
965       .layout = device->meta_state.cleari.img_p_layout,
966    };
967 
968    result = radv_CreateComputePipelines(radv_device_to_handle(device),
969                                         radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
970                                         &vk_pipeline_info, NULL, pipeline);
971    ralloc_free(cs);
972    return result;
973 }
974 
975 static VkResult
radv_device_init_meta_cleari_state(struct radv_device * device)976 radv_device_init_meta_cleari_state(struct radv_device *device)
977 {
978    VkResult result;
979 
980    /*
981     * two descriptors one for the image being sampled
982     * one for the buffer being written.
983     */
984    VkDescriptorSetLayoutCreateInfo ds_create_info = {
985       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
986       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
987       .bindingCount = 1,
988       .pBindings = (VkDescriptorSetLayoutBinding[]){
989          {.binding = 0,
990           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
991           .descriptorCount = 1,
992           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
993           .pImmutableSamplers = NULL},
994       }};
995 
996    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
997                                            &device->meta_state.alloc,
998                                            &device->meta_state.cleari.img_ds_layout);
999    if (result != VK_SUCCESS)
1000       goto fail;
1001 
1002    VkPipelineLayoutCreateInfo pl_create_info = {
1003       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1004       .setLayoutCount = 1,
1005       .pSetLayouts = &device->meta_state.cleari.img_ds_layout,
1006       .pushConstantRangeCount = 1,
1007       .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
1008    };
1009 
1010    result =
1011       radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
1012                                 &device->meta_state.alloc, &device->meta_state.cleari.img_p_layout);
1013    if (result != VK_SUCCESS)
1014       goto fail;
1015 
1016    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
1017       uint32_t samples = 1 << i;
1018       result = create_cleari_pipeline(device, samples, &device->meta_state.cleari.pipeline[i]);
1019       if (result != VK_SUCCESS)
1020          goto fail;
1021    }
1022 
1023    if (device->physical_device->rad_info.chip_class >= GFX9) {
1024       nir_shader *cs_3d = build_nir_cleari_compute_shader(device, true, 1);
1025 
1026       /* compute shader */
1027       VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
1028          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1029          .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1030          .module = vk_shader_module_handle_from_nir(cs_3d),
1031          .pName = "main",
1032          .pSpecializationInfo = NULL,
1033       };
1034 
1035       VkComputePipelineCreateInfo vk_pipeline_info_3d = {
1036          .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1037          .stage = pipeline_shader_stage_3d,
1038          .flags = 0,
1039          .layout = device->meta_state.cleari.img_p_layout,
1040       };
1041 
1042       result = radv_CreateComputePipelines(
1043          radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
1044          &vk_pipeline_info_3d, NULL, &device->meta_state.cleari.pipeline_3d);
1045       ralloc_free(cs_3d);
1046    }
1047 
1048    return VK_SUCCESS;
1049 fail:
1050    return result;
1051 }
1052 
1053 static void
radv_device_finish_meta_cleari_state(struct radv_device * device)1054 radv_device_finish_meta_cleari_state(struct radv_device *device)
1055 {
1056    struct radv_meta_state *state = &device->meta_state;
1057 
1058    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari.img_p_layout,
1059                               &state->alloc);
1060    radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->cleari.img_ds_layout,
1061                                    &state->alloc);
1062 
1063    for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
1064       radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline[i], &state->alloc);
1065    }
1066 
1067    radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline_3d, &state->alloc);
1068 }
1069 
1070 /* Special path for clearing R32G32B32 images using a compute shader. */
1071 static nir_shader *
build_nir_cleari_r32g32b32_compute_shader(struct radv_device * dev)1072 build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
1073 {
1074    const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
1075    nir_builder b =
1076       nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_cleari_r32g32b32_cs");
1077    b.shader->info.workgroup_size[0] = 8;
1078    b.shader->info.workgroup_size[1] = 8;
1079    b.shader->info.workgroup_size[2] = 1;
1080 
1081    nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
1082    output_img->data.descriptor_set = 0;
1083    output_img->data.binding = 0;
1084 
1085    nir_ssa_def *global_id = get_global_ids(&b, 2);
1086 
1087    nir_ssa_def *clear_val = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 16);
1088    nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
1089 
1090    nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
1091    nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
1092 
1093    nir_ssa_def *global_pos =
1094       nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul(&b, global_x, nir_imm_int(&b, 3)));
1095 
1096    for (unsigned chan = 0; chan < 3; chan++) {
1097       nir_ssa_def *local_pos = nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
1098 
1099       nir_ssa_def *coord = nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
1100 
1101       nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
1102                             nir_ssa_undef(&b, 1, 32), nir_channel(&b, clear_val, chan),
1103                             nir_imm_int(&b, 0), .image_dim = GLSL_SAMPLER_DIM_BUF);
1104    }
1105 
1106    return b.shader;
1107 }
1108 
1109 static VkResult
radv_device_init_meta_cleari_r32g32b32_state(struct radv_device * device)1110 radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device)
1111 {
1112    VkResult result;
1113    nir_shader *cs = build_nir_cleari_r32g32b32_compute_shader(device);
1114 
1115    VkDescriptorSetLayoutCreateInfo ds_create_info = {
1116       .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1117       .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1118       .bindingCount = 1,
1119       .pBindings = (VkDescriptorSetLayoutBinding[]){
1120          {.binding = 0,
1121           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1122           .descriptorCount = 1,
1123           .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1124           .pImmutableSamplers = NULL},
1125       }};
1126 
1127    result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
1128                                            &device->meta_state.alloc,
1129                                            &device->meta_state.cleari_r32g32b32.img_ds_layout);
1130    if (result != VK_SUCCESS)
1131       goto fail;
1132 
1133    VkPipelineLayoutCreateInfo pl_create_info = {
1134       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1135       .setLayoutCount = 1,
1136       .pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout,
1137       .pushConstantRangeCount = 1,
1138       .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
1139    };
1140 
1141    result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
1142                                       &device->meta_state.alloc,
1143                                       &device->meta_state.cleari_r32g32b32.img_p_layout);
1144    if (result != VK_SUCCESS)
1145       goto fail;
1146 
1147    /* compute shader */
1148    VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1149       .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1150       .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1151       .module = vk_shader_module_handle_from_nir(cs),
1152       .pName = "main",
1153       .pSpecializationInfo = NULL,
1154    };
1155 
1156    VkComputePipelineCreateInfo vk_pipeline_info = {
1157       .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1158       .stage = pipeline_shader_stage,
1159       .flags = 0,
1160       .layout = device->meta_state.cleari_r32g32b32.img_p_layout,
1161    };
1162 
1163    result = radv_CreateComputePipelines(
1164       radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
1165       &vk_pipeline_info, NULL, &device->meta_state.cleari_r32g32b32.pipeline);
1166 
1167 fail:
1168    ralloc_free(cs);
1169    return result;
1170 }
1171 
1172 static void
radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device * device)1173 radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device)
1174 {
1175    struct radv_meta_state *state = &device->meta_state;
1176 
1177    radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari_r32g32b32.img_p_layout,
1178                               &state->alloc);
1179    radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1180                                    state->cleari_r32g32b32.img_ds_layout, &state->alloc);
1181    radv_DestroyPipeline(radv_device_to_handle(device), state->cleari_r32g32b32.pipeline,
1182                         &state->alloc);
1183 }
1184 
1185 void
radv_device_finish_meta_bufimage_state(struct radv_device * device)1186 radv_device_finish_meta_bufimage_state(struct radv_device *device)
1187 {
1188    radv_device_finish_meta_itob_state(device);
1189    radv_device_finish_meta_btoi_state(device);
1190    radv_device_finish_meta_btoi_r32g32b32_state(device);
1191    radv_device_finish_meta_itoi_state(device);
1192    radv_device_finish_meta_itoi_r32g32b32_state(device);
1193    radv_device_finish_meta_cleari_state(device);
1194    radv_device_finish_meta_cleari_r32g32b32_state(device);
1195 }
1196 
1197 VkResult
radv_device_init_meta_bufimage_state(struct radv_device * device)1198 radv_device_init_meta_bufimage_state(struct radv_device *device)
1199 {
1200    VkResult result;
1201 
1202    result = radv_device_init_meta_itob_state(device);
1203    if (result != VK_SUCCESS)
1204       goto fail_itob;
1205 
1206    result = radv_device_init_meta_btoi_state(device);
1207    if (result != VK_SUCCESS)
1208       goto fail_btoi;
1209 
1210    result = radv_device_init_meta_btoi_r32g32b32_state(device);
1211    if (result != VK_SUCCESS)
1212       goto fail_btoi_r32g32b32;
1213 
1214    result = radv_device_init_meta_itoi_state(device);
1215    if (result != VK_SUCCESS)
1216       goto fail_itoi;
1217 
1218    result = radv_device_init_meta_itoi_r32g32b32_state(device);
1219    if (result != VK_SUCCESS)
1220       goto fail_itoi_r32g32b32;
1221 
1222    result = radv_device_init_meta_cleari_state(device);
1223    if (result != VK_SUCCESS)
1224       goto fail_cleari;
1225 
1226    result = radv_device_init_meta_cleari_r32g32b32_state(device);
1227    if (result != VK_SUCCESS)
1228       goto fail_cleari_r32g32b32;
1229 
1230    return VK_SUCCESS;
1231 fail_cleari_r32g32b32:
1232    radv_device_finish_meta_cleari_r32g32b32_state(device);
1233 fail_cleari:
1234    radv_device_finish_meta_cleari_state(device);
1235 fail_itoi_r32g32b32:
1236    radv_device_finish_meta_itoi_r32g32b32_state(device);
1237 fail_itoi:
1238    radv_device_finish_meta_itoi_state(device);
1239 fail_btoi_r32g32b32:
1240    radv_device_finish_meta_btoi_r32g32b32_state(device);
1241 fail_btoi:
1242    radv_device_finish_meta_btoi_state(device);
1243 fail_itob:
1244    radv_device_finish_meta_itob_state(device);
1245    return result;
1246 }
1247 
1248 static void
create_iview(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf,struct radv_image_view * iview,VkFormat format,VkImageAspectFlagBits aspects)1249 create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf,
1250              struct radv_image_view *iview, VkFormat format, VkImageAspectFlagBits aspects)
1251 {
1252    VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9
1253                                   ? VK_IMAGE_VIEW_TYPE_2D
1254                                   : radv_meta_get_view_type(surf->image);
1255 
1256    if (format == VK_FORMAT_UNDEFINED)
1257       format = surf->format;
1258 
1259    radv_image_view_init(iview, cmd_buffer->device,
1260                         &(VkImageViewCreateInfo){
1261                            .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1262                            .image = radv_image_to_handle(surf->image),
1263                            .viewType = view_type,
1264                            .format = format,
1265                            .subresourceRange = {.aspectMask = aspects,
1266                                                 .baseMipLevel = surf->level,
1267                                                 .levelCount = 1,
1268                                                 .baseArrayLayer = surf->layer,
1269                                                 .layerCount = 1},
1270                         },
1271                         &(struct radv_image_view_extra_create_info){
1272                            .disable_compression = surf->disable_compression,
1273                         });
1274 }
1275 
1276 static void
create_bview(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer * buffer,unsigned offset,VkFormat format,struct radv_buffer_view * bview)1277 create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset,
1278              VkFormat format, struct radv_buffer_view *bview)
1279 {
1280    radv_buffer_view_init(bview, cmd_buffer->device,
1281                          &(VkBufferViewCreateInfo){
1282                             .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1283                             .flags = 0,
1284                             .buffer = radv_buffer_to_handle(buffer),
1285                             .format = format,
1286                             .offset = offset,
1287                             .range = VK_WHOLE_SIZE,
1288                          });
1289 }
1290 
1291 static void
create_buffer_from_image(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf,VkBufferUsageFlagBits usage,VkBuffer * buffer)1292 create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf,
1293                          VkBufferUsageFlagBits usage, VkBuffer *buffer)
1294 {
1295    struct radv_device *device = cmd_buffer->device;
1296    struct radv_device_memory mem;
1297 
1298    radv_device_memory_init(&mem, device, surf->image->bo);
1299 
1300    radv_CreateBuffer(radv_device_to_handle(device),
1301                      &(VkBufferCreateInfo){
1302                         .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1303                         .flags = 0,
1304                         .size = surf->image->size,
1305                         .usage = usage,
1306                         .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1307                      },
1308                      NULL, buffer);
1309 
1310    radv_BindBufferMemory2(radv_device_to_handle(device), 1,
1311                           (VkBindBufferMemoryInfo[]){{
1312                              .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
1313                              .buffer = *buffer,
1314                              .memory = radv_device_memory_to_handle(&mem),
1315                              .memoryOffset = surf->image->offset,
1316                           }});
1317 
1318    radv_device_memory_finish(&mem);
1319 }
1320 
1321 static void
create_bview_for_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer * buffer,unsigned offset,VkFormat src_format,struct radv_buffer_view * bview)1322 create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer,
1323                            unsigned offset, VkFormat src_format, struct radv_buffer_view *bview)
1324 {
1325    VkFormat format;
1326 
1327    switch (src_format) {
1328    case VK_FORMAT_R32G32B32_UINT:
1329       format = VK_FORMAT_R32_UINT;
1330       break;
1331    case VK_FORMAT_R32G32B32_SINT:
1332       format = VK_FORMAT_R32_SINT;
1333       break;
1334    case VK_FORMAT_R32G32B32_SFLOAT:
1335       format = VK_FORMAT_R32_SFLOAT;
1336       break;
1337    default:
1338       unreachable("invalid R32G32B32 format");
1339    }
1340 
1341    radv_buffer_view_init(bview, cmd_buffer->device,
1342                          &(VkBufferViewCreateInfo){
1343                             .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1344                             .flags = 0,
1345                             .buffer = radv_buffer_to_handle(buffer),
1346                             .format = format,
1347                             .offset = offset,
1348                             .range = VK_WHOLE_SIZE,
1349                          });
1350 }
1351 
1352 static unsigned
get_image_stride_for_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * surf)1353 get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1354                                struct radv_meta_blit2d_surf *surf)
1355 {
1356    unsigned stride;
1357 
1358    if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
1359       stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
1360    } else {
1361       stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
1362    }
1363 
1364    return stride;
1365 }
1366 
1367 static void
itob_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src,struct radv_buffer_view * dst)1368 itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src,
1369                       struct radv_buffer_view *dst)
1370 {
1371    struct radv_device *device = cmd_buffer->device;
1372 
1373    radv_meta_push_descriptor_set(
1374       cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itob.img_p_layout, 0, /* set */
1375       2, /* descriptorWriteCount */
1376       (VkWriteDescriptorSet[]){
1377          {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1378           .dstBinding = 0,
1379           .dstArrayElement = 0,
1380           .descriptorCount = 1,
1381           .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1382           .pImageInfo =
1383              (VkDescriptorImageInfo[]){
1384                 {
1385                    .sampler = VK_NULL_HANDLE,
1386                    .imageView = radv_image_view_to_handle(src),
1387                    .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1388                 },
1389              }},
1390          {
1391             .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1392             .dstBinding = 1,
1393             .dstArrayElement = 0,
1394             .descriptorCount = 1,
1395             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1396             .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
1397          }});
1398 }
1399 
1400 void
radv_meta_image_to_buffer(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_buffer * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1401 radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
1402                           struct radv_meta_blit2d_buffer *dst, unsigned num_rects,
1403                           struct radv_meta_blit2d_rect *rects)
1404 {
1405    VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
1406    struct radv_device *device = cmd_buffer->device;
1407    struct radv_image_view src_view;
1408    struct radv_buffer_view dst_view;
1409 
1410    create_iview(cmd_buffer, src, &src_view, VK_FORMAT_UNDEFINED, src->aspect_mask);
1411    create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
1412    itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1413 
1414    if (device->physical_device->rad_info.chip_class >= GFX9 && src->image->type == VK_IMAGE_TYPE_3D)
1415       pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
1416 
1417    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1418                         pipeline);
1419 
1420    for (unsigned r = 0; r < num_rects; ++r) {
1421       unsigned push_constants[4] = {rects[r].src_x, rects[r].src_y, src->layer, dst->pitch};
1422       radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1423                             device->meta_state.itob.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
1424                             16, push_constants);
1425 
1426       radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1427    }
1428 
1429    radv_image_view_finish(&src_view);
1430    radv_buffer_view_finish(&dst_view);
1431 }
1432 
1433 static void
btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_buffer_view * dst)1434 btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
1435                                 struct radv_buffer_view *dst)
1436 {
1437    struct radv_device *device = cmd_buffer->device;
1438 
1439    radv_meta_push_descriptor_set(
1440       cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi_r32g32b32.img_p_layout,
1441       0, /* set */
1442       2, /* descriptorWriteCount */
1443       (VkWriteDescriptorSet[]){
1444          {
1445             .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1446             .dstBinding = 0,
1447             .dstArrayElement = 0,
1448             .descriptorCount = 1,
1449             .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1450             .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
1451          },
1452          {
1453             .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1454             .dstBinding = 1,
1455             .dstArrayElement = 0,
1456             .descriptorCount = 1,
1457             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1458             .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
1459          }});
1460 }
1461 
1462 static void
radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_buffer * src,struct radv_meta_blit2d_surf * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1463 radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1464                                        struct radv_meta_blit2d_buffer *src,
1465                                        struct radv_meta_blit2d_surf *dst, unsigned num_rects,
1466                                        struct radv_meta_blit2d_rect *rects)
1467 {
1468    VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
1469    struct radv_device *device = cmd_buffer->device;
1470    struct radv_buffer_view src_view, dst_view;
1471    unsigned dst_offset = 0;
1472    unsigned stride;
1473    VkBuffer buffer;
1474 
1475    /* This special btoi path for R32G32B32 formats will write the linear
1476     * image as a buffer with the same underlying memory. The compute
1477     * shader will copy all components separately using a R32 format.
1478     */
1479    create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer);
1480 
1481    create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
1482    create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset, dst->format,
1483                               &dst_view);
1484    btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1485 
1486    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1487                         pipeline);
1488 
1489    stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1490 
1491    for (unsigned r = 0; r < num_rects; ++r) {
1492       unsigned push_constants[4] = {
1493          rects[r].dst_x,
1494          rects[r].dst_y,
1495          stride,
1496          src->pitch,
1497       };
1498 
1499       radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1500                             device->meta_state.btoi_r32g32b32.img_p_layout,
1501                             VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
1502 
1503       radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1504    }
1505 
1506    radv_buffer_view_finish(&src_view);
1507    radv_buffer_view_finish(&dst_view);
1508    radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1509 }
1510 
1511 static void
btoi_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_image_view * dst)1512 btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
1513                       struct radv_image_view *dst)
1514 {
1515    struct radv_device *device = cmd_buffer->device;
1516 
1517    radv_meta_push_descriptor_set(
1518       cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi.img_p_layout, 0, /* set */
1519       2, /* descriptorWriteCount */
1520       (VkWriteDescriptorSet[]){
1521          {
1522             .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1523             .dstBinding = 0,
1524             .dstArrayElement = 0,
1525             .descriptorCount = 1,
1526             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1527             .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
1528          },
1529          {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1530           .dstBinding = 1,
1531           .dstArrayElement = 0,
1532           .descriptorCount = 1,
1533           .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1534           .pImageInfo = (VkDescriptorImageInfo[]){
1535              {
1536                 .sampler = VK_NULL_HANDLE,
1537                 .imageView = radv_image_view_to_handle(dst),
1538                 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1539              },
1540           }}});
1541 }
1542 
1543 void
radv_meta_buffer_to_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_buffer * src,struct radv_meta_blit2d_surf * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1544 radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
1545                              struct radv_meta_blit2d_buffer *src, struct radv_meta_blit2d_surf *dst,
1546                              unsigned num_rects, struct radv_meta_blit2d_rect *rects)
1547 {
1548    VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
1549    struct radv_device *device = cmd_buffer->device;
1550    struct radv_buffer_view src_view;
1551    struct radv_image_view dst_view;
1552 
1553    if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1554        dst->image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1555        dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1556       radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects);
1557       return;
1558    }
1559 
1560    create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
1561    create_iview(cmd_buffer, dst, &dst_view, VK_FORMAT_UNDEFINED, dst->aspect_mask);
1562    btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1563 
1564    if (device->physical_device->rad_info.chip_class >= GFX9 && dst->image->type == VK_IMAGE_TYPE_3D)
1565       pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
1566    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1567                         pipeline);
1568 
1569    for (unsigned r = 0; r < num_rects; ++r) {
1570       unsigned push_constants[4] = {
1571          rects[r].dst_x,
1572          rects[r].dst_y,
1573          dst->layer,
1574          src->pitch,
1575       };
1576       radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1577                             device->meta_state.btoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
1578                             16, push_constants);
1579 
1580       radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1581    }
1582 
1583    radv_image_view_finish(&dst_view);
1584    radv_buffer_view_finish(&src_view);
1585 }
1586 
1587 static void
itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * src,struct radv_buffer_view * dst)1588 itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
1589                                 struct radv_buffer_view *dst)
1590 {
1591    struct radv_device *device = cmd_buffer->device;
1592 
1593    radv_meta_push_descriptor_set(
1594       cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi_r32g32b32.img_p_layout,
1595       0, /* set */
1596       2, /* descriptorWriteCount */
1597       (VkWriteDescriptorSet[]){
1598          {
1599             .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1600             .dstBinding = 0,
1601             .dstArrayElement = 0,
1602             .descriptorCount = 1,
1603             .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1604             .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
1605          },
1606          {
1607             .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1608             .dstBinding = 1,
1609             .dstArrayElement = 0,
1610             .descriptorCount = 1,
1611             .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1612             .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
1613          }});
1614 }
1615 
1616 static void
radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1617 radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1618                                       struct radv_meta_blit2d_surf *src,
1619                                       struct radv_meta_blit2d_surf *dst, unsigned num_rects,
1620                                       struct radv_meta_blit2d_rect *rects)
1621 {
1622    VkPipeline pipeline = cmd_buffer->device->meta_state.itoi_r32g32b32.pipeline;
1623    struct radv_device *device = cmd_buffer->device;
1624    struct radv_buffer_view src_view, dst_view;
1625    unsigned src_offset = 0, dst_offset = 0;
1626    unsigned src_stride, dst_stride;
1627    VkBuffer src_buffer, dst_buffer;
1628 
1629    /* 96-bit formats are only compatible to themselves. */
1630    assert(dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT ||
1631           dst->format == VK_FORMAT_R32G32B32_SFLOAT);
1632 
1633    /* This special itoi path for R32G32B32 formats will write the linear
1634     * image as a buffer with the same underlying memory. The compute
1635     * shader will copy all components separately using a R32 format.
1636     */
1637    create_buffer_from_image(cmd_buffer, src, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, &src_buffer);
1638    create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &dst_buffer);
1639 
1640    create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer), src_offset,
1641                               src->format, &src_view);
1642    create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer), dst_offset,
1643                               dst->format, &dst_view);
1644    itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1645 
1646    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1647                         pipeline);
1648 
1649    src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src);
1650    dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1651 
1652    for (unsigned r = 0; r < num_rects; ++r) {
1653       unsigned push_constants[6] = {
1654          rects[r].src_x, rects[r].src_y, src_stride, rects[r].dst_x, rects[r].dst_y, dst_stride,
1655       };
1656       radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1657                             device->meta_state.itoi_r32g32b32.img_p_layout,
1658                             VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, push_constants);
1659 
1660       radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1661    }
1662 
1663    radv_buffer_view_finish(&src_view);
1664    radv_buffer_view_finish(&dst_view);
1665    radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL);
1666    radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL);
1667 }
1668 
1669 static void
itoi_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * src,struct radv_image_view * dst)1670 itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src,
1671                       struct radv_image_view *dst)
1672 {
1673    struct radv_device *device = cmd_buffer->device;
1674 
1675    radv_meta_push_descriptor_set(
1676       cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi.img_p_layout, 0, /* set */
1677       2, /* descriptorWriteCount */
1678       (VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1679                                 .dstBinding = 0,
1680                                 .dstArrayElement = 0,
1681                                 .descriptorCount = 1,
1682                                 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1683                                 .pImageInfo =
1684                                    (VkDescriptorImageInfo[]){
1685                                       {
1686                                          .sampler = VK_NULL_HANDLE,
1687                                          .imageView = radv_image_view_to_handle(src),
1688                                          .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1689                                       },
1690                                    }},
1691                                {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1692                                 .dstBinding = 1,
1693                                 .dstArrayElement = 0,
1694                                 .descriptorCount = 1,
1695                                 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1696                                 .pImageInfo = (VkDescriptorImageInfo[]){
1697                                    {
1698                                       .sampler = VK_NULL_HANDLE,
1699                                       .imageView = radv_image_view_to_handle(dst),
1700                                       .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1701                                    },
1702                                 }}});
1703 }
1704 
1705 void
radv_meta_image_to_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * src,struct radv_meta_blit2d_surf * dst,unsigned num_rects,struct radv_meta_blit2d_rect * rects)1706 radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
1707                             struct radv_meta_blit2d_surf *dst, unsigned num_rects,
1708                             struct radv_meta_blit2d_rect *rects)
1709 {
1710    struct radv_device *device = cmd_buffer->device;
1711    struct radv_image_view src_view, dst_view;
1712    uint32_t samples = src->image->info.samples;
1713    uint32_t samples_log2 = ffs(samples) - 1;
1714 
1715    if (src->format == VK_FORMAT_R32G32B32_UINT || src->format == VK_FORMAT_R32G32B32_SINT ||
1716        src->format == VK_FORMAT_R32G32B32_SFLOAT) {
1717       radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects);
1718       return;
1719    }
1720 
1721    u_foreach_bit(i, dst->aspect_mask) {
1722       unsigned aspect_mask = 1u << i;
1723       VkFormat depth_format = 0;
1724       if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
1725          depth_format = vk_format_stencil_only(dst->image->vk_format);
1726       else if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
1727          depth_format = vk_format_depth_only(dst->image->vk_format);
1728 
1729       create_iview(cmd_buffer, src, &src_view, depth_format, aspect_mask);
1730       create_iview(cmd_buffer, dst, &dst_view, depth_format, aspect_mask);
1731 
1732       itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1733 
1734       VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline[samples_log2];
1735       if (device->physical_device->rad_info.chip_class >= GFX9 &&
1736           (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D))
1737          pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
1738       radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1739                            pipeline);
1740 
1741       for (unsigned r = 0; r < num_rects; ++r) {
1742          unsigned push_constants[6] = {
1743             rects[r].src_x, rects[r].src_y, src->layer, rects[r].dst_x, rects[r].dst_y, dst->layer,
1744          };
1745          radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1746                                device->meta_state.itoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
1747                                24, push_constants);
1748 
1749          radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1750       }
1751 
1752       radv_image_view_finish(&src_view);
1753       radv_image_view_finish(&dst_view);
1754    }
1755 }
1756 
1757 static void
cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_buffer_view * view)1758 cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *view)
1759 {
1760    struct radv_device *device = cmd_buffer->device;
1761 
1762    radv_meta_push_descriptor_set(
1763       cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.cleari_r32g32b32.img_p_layout,
1764       0, /* set */
1765       1, /* descriptorWriteCount */
1766       (VkWriteDescriptorSet[]){{
1767          .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1768          .dstBinding = 0,
1769          .dstArrayElement = 0,
1770          .descriptorCount = 1,
1771          .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1772          .pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(view)},
1773       }});
1774 }
1775 
1776 static void
radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * dst,const VkClearColorValue * clear_color)1777 radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1778                                    struct radv_meta_blit2d_surf *dst,
1779                                    const VkClearColorValue *clear_color)
1780 {
1781    VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline;
1782    struct radv_device *device = cmd_buffer->device;
1783    struct radv_buffer_view dst_view;
1784    unsigned stride;
1785    VkBuffer buffer;
1786 
1787    /* This special clear path for R32G32B32 formats will write the linear
1788     * image as a buffer with the same underlying memory. The compute
1789     * shader will clear all components separately using a R32 format.
1790     */
1791    create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer);
1792 
1793    create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), 0, dst->format,
1794                               &dst_view);
1795    cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
1796 
1797    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1798                         pipeline);
1799 
1800    stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1801 
1802    unsigned push_constants[4] = {
1803       clear_color->uint32[0],
1804       clear_color->uint32[1],
1805       clear_color->uint32[2],
1806       stride,
1807    };
1808 
1809    radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1810                          device->meta_state.cleari_r32g32b32.img_p_layout,
1811                          VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
1812 
1813    radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
1814 
1815    radv_buffer_view_finish(&dst_view);
1816    radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1817 }
1818 
1819 static void
cleari_bind_descriptors(struct radv_cmd_buffer * cmd_buffer,struct radv_image_view * dst_iview)1820 cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *dst_iview)
1821 {
1822    struct radv_device *device = cmd_buffer->device;
1823 
1824    radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1825                                  device->meta_state.cleari.img_p_layout, 0, /* set */
1826                                  1, /* descriptorWriteCount */
1827                                  (VkWriteDescriptorSet[]){
1828                                     {.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1829                                      .dstBinding = 0,
1830                                      .dstArrayElement = 0,
1831                                      .descriptorCount = 1,
1832                                      .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1833                                      .pImageInfo =
1834                                         (VkDescriptorImageInfo[]){
1835                                            {
1836                                               .sampler = VK_NULL_HANDLE,
1837                                               .imageView = radv_image_view_to_handle(dst_iview),
1838                                               .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1839                                            },
1840                                         }},
1841                                  });
1842 }
1843 
1844 void
radv_meta_clear_image_cs(struct radv_cmd_buffer * cmd_buffer,struct radv_meta_blit2d_surf * dst,const VkClearColorValue * clear_color)1845 radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
1846                          const VkClearColorValue *clear_color)
1847 {
1848    struct radv_device *device = cmd_buffer->device;
1849    struct radv_image_view dst_iview;
1850    uint32_t samples = dst->image->info.samples;
1851    uint32_t samples_log2 = ffs(samples) - 1;
1852 
1853    if (dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT ||
1854        dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
1855       radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
1856       return;
1857    }
1858 
1859    create_iview(cmd_buffer, dst, &dst_iview, VK_FORMAT_UNDEFINED, dst->aspect_mask);
1860    cleari_bind_descriptors(cmd_buffer, &dst_iview);
1861 
1862    VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline[samples_log2];
1863    if (device->physical_device->rad_info.chip_class >= GFX9 && dst->image->type == VK_IMAGE_TYPE_3D)
1864       pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
1865 
1866    radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1867                         pipeline);
1868 
1869    unsigned push_constants[5] = {
1870       clear_color->uint32[0],
1871       clear_color->uint32[1],
1872       clear_color->uint32[2],
1873       clear_color->uint32[3],
1874       dst->layer,
1875    };
1876 
1877    radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1878                          device->meta_state.cleari.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
1879                          push_constants);
1880 
1881    radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
1882 
1883    radv_image_view_finish(&dst_iview);
1884 }
1885