• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26 
27 #include "compiler/nir/nir_builder.h"
28 #include "util/u_pack_color.h"
29 #include "vk_common_entrypoints.h"
30 
31 static void
get_hw_clear_color(struct v3dv_device * device,const VkClearColorValue * color,VkFormat fb_format,VkFormat image_format,uint32_t internal_type,uint32_t internal_bpp,uint32_t * hw_color)32 get_hw_clear_color(struct v3dv_device *device,
33                    const VkClearColorValue *color,
34                    VkFormat fb_format,
35                    VkFormat image_format,
36                    uint32_t internal_type,
37                    uint32_t internal_bpp,
38                    uint32_t *hw_color)
39 {
40    const uint32_t internal_size = 4 << internal_bpp;
41 
42    /* If the image format doesn't match the framebuffer format, then we are
43     * trying to clear an unsupported tlb format using a compatible
44     * format for the framebuffer. In this case, we want to make sure that
45     * we pack the clear value according to the original format semantics,
46     * not the compatible format.
47     */
48    if (fb_format == image_format) {
49       v3d_X((&device->devinfo), get_hw_clear_color)(color, internal_type, internal_size,
50                                          hw_color);
51    } else {
52       union util_color uc;
53       enum pipe_format pipe_image_format =
54          vk_format_to_pipe_format(image_format);
55       util_pack_color(color->float32, pipe_image_format, &uc);
56       memcpy(hw_color, uc.ui, internal_size);
57    }
58 }
59 
60 /* Returns true if the implementation is able to handle the case, false
61  * otherwise.
62 */
63 static bool
clear_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,const VkClearValue * clear_value,const VkImageSubresourceRange * range)64 clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
65                 struct v3dv_image *image,
66                 const VkClearValue *clear_value,
67                 const VkImageSubresourceRange *range)
68 {
69    const VkOffset3D origin = { 0, 0, 0 };
70    VkFormat fb_format;
71 
72    /* From vkCmdClearColorImage spec:
73     *  "image must not use any of the formats that require a sampler YCBCR
74     *   conversion"
75     */
76    assert(image->plane_count == 1);
77    if (!v3dv_meta_can_use_tlb(image, 0, 0, &origin, NULL, &fb_format))
78       return false;
79 
80    uint32_t internal_type, internal_bpp;
81    v3d_X((&cmd_buffer->device->devinfo), get_internal_type_bpp_for_image_aspects)
82       (fb_format, range->aspectMask,
83        &internal_type, &internal_bpp);
84 
85    union v3dv_clear_value hw_clear_value = { 0 };
86    if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
87       get_hw_clear_color(cmd_buffer->device, &clear_value->color, fb_format,
88                          image->vk.format, internal_type, internal_bpp,
89                          &hw_clear_value.color[0]);
90    } else {
91       assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||
92              (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));
93       hw_clear_value.z = clear_value->depthStencil.depth;
94       hw_clear_value.s = clear_value->depthStencil.stencil;
95    }
96 
97    uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
98    uint32_t min_level = range->baseMipLevel;
99    uint32_t max_level = range->baseMipLevel + level_count;
100 
101    /* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.
102     * Instead, we need to consider the full depth dimension of the image, which
103     * goes from 0 up to the level's depth extent.
104     */
105    uint32_t min_layer;
106    uint32_t max_layer;
107    if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
108       min_layer = range->baseArrayLayer;
109       max_layer = range->baseArrayLayer +
110                   vk_image_subresource_layer_count(&image->vk, range);
111    } else {
112       min_layer = 0;
113       max_layer = 0;
114    }
115 
116    for (uint32_t level = min_level; level < max_level; level++) {
117       if (image->vk.image_type == VK_IMAGE_TYPE_3D)
118          max_layer = u_minify(image->vk.extent.depth, level);
119 
120       uint32_t width = u_minify(image->vk.extent.width, level);
121       uint32_t height = u_minify(image->vk.extent.height, level);
122 
123       struct v3dv_job *job =
124          v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
125 
126       if (!job)
127          return true;
128 
129       v3dv_job_start_frame(job, width, height, max_layer,
130                            false, true, 1, internal_bpp,
131                            4 * v3d_internal_bpp_words(internal_bpp),
132                            image->vk.samples > VK_SAMPLE_COUNT_1_BIT);
133 
134       struct v3dv_meta_framebuffer framebuffer;
135       v3d_X((&job->device->devinfo), meta_framebuffer_init)(&framebuffer, fb_format,
136                                                  internal_type,
137                                                  &job->frame_tiling);
138 
139       v3d_X((&job->device->devinfo), job_emit_binning_flush)(job);
140 
141       /* If this triggers it is an application bug: the spec requires
142        * that any aspects to clear are present in the image.
143        */
144       assert(range->aspectMask & image->vk.aspects);
145 
146       v3d_X((&job->device->devinfo), meta_emit_clear_image_rcl)
147          (job, image, &framebuffer, &hw_clear_value,
148           range->aspectMask, min_layer, max_layer, level);
149 
150       v3dv_cmd_buffer_finish_job(cmd_buffer);
151    }
152 
153    return true;
154 }
155 
156 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)157 v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,
158                         VkImage _image,
159                         VkImageLayout imageLayout,
160                         const VkClearColorValue *pColor,
161                         uint32_t rangeCount,
162                         const VkImageSubresourceRange *pRanges)
163 {
164    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
165    V3DV_FROM_HANDLE(v3dv_image, image, _image);
166 
167    const VkClearValue clear_value = {
168       .color = *pColor,
169    };
170 
171    cmd_buffer->state.is_transfer = true;
172 
173    for (uint32_t i = 0; i < rangeCount; i++) {
174       if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
175          continue;
176       unreachable("Unsupported color clear.");
177    }
178 
179    cmd_buffer->state.is_transfer = false;
180 }
181 
182 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)183 v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
184                                VkImage _image,
185                                VkImageLayout imageLayout,
186                                const VkClearDepthStencilValue *pDepthStencil,
187                                uint32_t rangeCount,
188                                const VkImageSubresourceRange *pRanges)
189 {
190    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
191    V3DV_FROM_HANDLE(v3dv_image, image, _image);
192 
193    const VkClearValue clear_value = {
194       .depthStencil = *pDepthStencil,
195    };
196 
197    cmd_buffer->state.is_transfer = true;
198 
199    for (uint32_t i = 0; i < rangeCount; i++) {
200       if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
201          continue;
202       unreachable("Unsupported depth/stencil clear.");
203    }
204 
205    cmd_buffer->state.is_transfer = false;
206 }
207 
208 static void
destroy_color_clear_pipeline(VkDevice _device,uint64_t pipeline,VkAllocationCallbacks * alloc)209 destroy_color_clear_pipeline(VkDevice _device,
210                              uint64_t pipeline,
211                              VkAllocationCallbacks *alloc)
212 {
213    struct v3dv_meta_color_clear_pipeline *p =
214       (struct v3dv_meta_color_clear_pipeline *) (uintptr_t) pipeline;
215    v3dv_DestroyPipeline(_device, p->pipeline, alloc);
216    if (p->cached)
217       v3dv_DestroyRenderPass(_device, p->pass, alloc);
218    vk_free(alloc, p);
219 }
220 
221 static void
destroy_depth_clear_pipeline(VkDevice _device,uint64_t pipeline,VkAllocationCallbacks * alloc)222 destroy_depth_clear_pipeline(VkDevice _device,
223                              uint64_t pipeline,
224                              VkAllocationCallbacks *alloc)
225 {
226    struct v3dv_meta_depth_clear_pipeline *p =
227      (struct v3dv_meta_depth_clear_pipeline *)(uintptr_t)pipeline;
228    v3dv_DestroyPipeline(_device, p->pipeline, alloc);
229    vk_free(alloc, p);
230 }
231 
232 static VkResult
create_color_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)233 create_color_clear_pipeline_layout(struct v3dv_device *device,
234                                    VkPipelineLayout *pipeline_layout)
235 {
236    /* FIXME: this is abusing a bit the API, since not all of our clear
237     * pipelines have a geometry shader. We could create 2 different pipeline
238     * layouts, but this works for us for now.
239     */
240    VkPushConstantRange ranges[2] = {
241       { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 },
242       { VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4 },
243    };
244 
245    VkPipelineLayoutCreateInfo info = {
246       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
247       .setLayoutCount = 0,
248       .pushConstantRangeCount = 2,
249       .pPushConstantRanges = ranges,
250    };
251 
252    return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
253                                     &info, &device->vk.alloc, pipeline_layout);
254 }
255 
256 static VkResult
create_depth_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)257 create_depth_clear_pipeline_layout(struct v3dv_device *device,
258                                    VkPipelineLayout *pipeline_layout)
259 {
260    /* FIXME: this is abusing a bit the API, since not all of our clear
261     * pipelines have a geometry shader. We could create 2 different pipeline
262     * layouts, but this works for us for now.
263     */
264    VkPushConstantRange ranges[2] = {
265       { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 },
266       { VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4 },
267    };
268 
269    VkPipelineLayoutCreateInfo info = {
270       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
271       .setLayoutCount = 0,
272       .pushConstantRangeCount = 2,
273       .pPushConstantRanges = ranges
274    };
275 
276    return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
277                                     &info, &device->vk.alloc, pipeline_layout);
278 }
279 
280 void
v3dv_meta_clear_init(struct v3dv_device * device)281 v3dv_meta_clear_init(struct v3dv_device *device)
282 {
283    if (device->instance->meta_cache_enabled) {
284       device->meta.color_clear.cache =
285          _mesa_hash_table_create(NULL, u64_hash, u64_compare);
286 
287       device->meta.depth_clear.cache =
288          _mesa_hash_table_create(NULL, u64_hash, u64_compare);
289    }
290 
291    create_color_clear_pipeline_layout(device,
292                                       &device->meta.color_clear.p_layout);
293    create_depth_clear_pipeline_layout(device,
294                                       &device->meta.depth_clear.p_layout);
295 }
296 
297 void
v3dv_meta_clear_finish(struct v3dv_device * device)298 v3dv_meta_clear_finish(struct v3dv_device *device)
299 {
300    VkDevice _device = v3dv_device_to_handle(device);
301 
302    if (device->instance->meta_cache_enabled) {
303       hash_table_foreach(device->meta.color_clear.cache, entry) {
304          struct v3dv_meta_color_clear_pipeline *item = entry->data;
305          destroy_color_clear_pipeline(_device, (uintptr_t)item, &device->vk.alloc);
306       }
307       _mesa_hash_table_destroy(device->meta.color_clear.cache, NULL);
308 
309       hash_table_foreach(device->meta.depth_clear.cache, entry) {
310          struct v3dv_meta_depth_clear_pipeline *item = entry->data;
311          destroy_depth_clear_pipeline(_device, (uintptr_t)item, &device->vk.alloc);
312       }
313       _mesa_hash_table_destroy(device->meta.depth_clear.cache, NULL);
314    }
315 
316    if (device->meta.color_clear.p_layout) {
317       v3dv_DestroyPipelineLayout(_device, device->meta.color_clear.p_layout,
318                                  &device->vk.alloc);
319    }
320 
321    if (device->meta.depth_clear.p_layout) {
322       v3dv_DestroyPipelineLayout(_device, device->meta.depth_clear.p_layout,
323                                  &device->vk.alloc);
324    }
325 }
326 
327 static nir_shader *
get_clear_rect_vs(const nir_shader_compiler_options * options)328 get_clear_rect_vs(const nir_shader_compiler_options *options)
329 {
330    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
331                                                   "meta clear vs");
332 
333    const struct glsl_type *vec4 = glsl_vec4_type();
334    nir_variable *vs_out_pos =
335       nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
336    vs_out_pos->data.location = VARYING_SLOT_POS;
337 
338    nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
339    nir_store_var(&b, vs_out_pos, pos, 0xf);
340 
341    return b.shader;
342 }
343 
344 static nir_shader *
get_clear_rect_gs(const nir_shader_compiler_options * options,uint32_t push_constant_layer_base)345 get_clear_rect_gs(const nir_shader_compiler_options *options,
346                   uint32_t push_constant_layer_base)
347 {
348    /* FIXME: this creates a geometry shader that takes the index of a single
349     * layer to clear from push constants, so we need to emit a draw call for
350     * each layer that we want to clear. We could actually do better and have it
351     * take a range of layers and then emit one triangle per layer to clear,
352     * however, if we were to do this we would need to be careful not to exceed
353     * the maximum number of output vertices allowed in a geometry shader.
354     */
355    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
356                                                   "meta clear gs");
357    nir_shader *nir = b.shader;
358    nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
359    nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
360                                (1ull << VARYING_SLOT_LAYER);
361    nir->info.gs.input_primitive = MESA_PRIM_TRIANGLES;
362    nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
363    nir->info.gs.vertices_in = 3;
364    nir->info.gs.vertices_out = 3;
365    nir->info.gs.invocations = 1;
366    nir->info.gs.active_stream_mask = 0x1;
367 
368    /* in vec4 gl_Position[3] */
369    nir_variable *gs_in_pos =
370       nir_variable_create(b.shader, nir_var_shader_in,
371                           glsl_array_type(glsl_vec4_type(), 3, 0),
372                           "in_gl_Position");
373    gs_in_pos->data.location = VARYING_SLOT_POS;
374 
375    /* out vec4 gl_Position */
376    nir_variable *gs_out_pos =
377       nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
378                           "out_gl_Position");
379    gs_out_pos->data.location = VARYING_SLOT_POS;
380 
381    /* out float gl_Layer */
382    nir_variable *gs_out_layer =
383       nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
384                           "out_gl_Layer");
385    gs_out_layer->data.location = VARYING_SLOT_LAYER;
386 
387    /* Emit output triangle */
388    for (uint32_t i = 0; i < 3; i++) {
389       /* gl_Position from shader input */
390       nir_deref_instr *in_pos_i =
391          nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
392       nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
393 
394       /* gl_Layer from push constants */
395       nir_def *layer =
396          nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
397                                 .base = push_constant_layer_base, .range = 4);
398       nir_store_var(&b, gs_out_layer, layer, 0x1);
399 
400       nir_emit_vertex(&b, 0);
401    }
402 
403    nir_end_primitive(&b, 0);
404 
405    return nir;
406 }
407 
408 static nir_shader *
get_color_clear_rect_fs(const nir_shader_compiler_options * options,uint32_t rt_idx,VkFormat format)409 get_color_clear_rect_fs(const nir_shader_compiler_options *options,
410                         uint32_t rt_idx, VkFormat format)
411 {
412    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
413                                                   "meta clear fs");
414 
415    enum pipe_format pformat = vk_format_to_pipe_format(format);
416    const struct glsl_type *fs_out_type =
417       util_format_is_float(pformat) ? glsl_vec4_type() : glsl_uvec4_type();
418 
419    nir_variable *fs_out_color =
420       nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
421    fs_out_color->data.location = FRAG_RESULT_DATA0 + rt_idx;
422 
423    nir_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
424    nir_store_var(&b, fs_out_color, color_load, 0xf);
425 
426    return b.shader;
427 }
428 
429 static nir_shader *
get_depth_clear_rect_fs(const nir_shader_compiler_options * options)430 get_depth_clear_rect_fs(const nir_shader_compiler_options *options)
431 {
432    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
433                                                   "meta depth clear fs");
434 
435    nir_variable *fs_out_depth =
436       nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
437                           "out_depth");
438    fs_out_depth->data.location = FRAG_RESULT_DEPTH;
439 
440    nir_def *depth_load =
441       nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
442 
443    nir_store_var(&b, fs_out_depth, depth_load, 0x1);
444 
445    return b.shader;
446 }
447 
448 static VkResult
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,struct nir_shader * vs_nir,struct nir_shader * gs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineLayout layout,VkPipeline * pipeline)449 create_pipeline(struct v3dv_device *device,
450                 struct v3dv_render_pass *pass,
451                 uint32_t subpass_idx,
452                 uint32_t samples,
453                 struct nir_shader *vs_nir,
454                 struct nir_shader *gs_nir,
455                 struct nir_shader *fs_nir,
456                 const VkPipelineVertexInputStateCreateInfo *vi_state,
457                 const VkPipelineDepthStencilStateCreateInfo *ds_state,
458                 const VkPipelineColorBlendStateCreateInfo *cb_state,
459                 const VkPipelineLayout layout,
460                 VkPipeline *pipeline)
461 {
462    VkPipelineShaderStageCreateInfo stages[3] = { 0 };
463    struct vk_shader_module vs_m = vk_shader_module_from_nir(vs_nir);
464    struct vk_shader_module gs_m;
465    struct vk_shader_module fs_m;
466 
467    uint32_t stage_count = 0;
468    stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
469    stages[stage_count].stage = VK_SHADER_STAGE_VERTEX_BIT;
470    stages[stage_count].module = vk_shader_module_to_handle(&vs_m);
471    stages[stage_count].pName = "main";
472    stage_count++;
473 
474    if (gs_nir) {
475       gs_m = vk_shader_module_from_nir(gs_nir);
476       stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
477       stages[stage_count].stage = VK_SHADER_STAGE_GEOMETRY_BIT;
478       stages[stage_count].module = vk_shader_module_to_handle(&gs_m);
479       stages[stage_count].pName = "main";
480       stage_count++;
481    }
482 
483    if (fs_nir) {
484       fs_m = vk_shader_module_from_nir(fs_nir);
485       stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
486       stages[stage_count].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
487       stages[stage_count].module = vk_shader_module_to_handle(&fs_m);
488       stages[stage_count].pName = "main";
489       stage_count++;
490    }
491 
492    VkGraphicsPipelineCreateInfo info = {
493       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
494 
495       .stageCount = stage_count,
496       .pStages = stages,
497 
498       .pVertexInputState = vi_state,
499 
500       .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
501          .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
502          .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
503          .primitiveRestartEnable = false,
504       },
505 
506       .pViewportState = &(VkPipelineViewportStateCreateInfo) {
507          .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
508          .viewportCount = 1,
509          .scissorCount = 1,
510       },
511 
512       .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
513          .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
514          .rasterizerDiscardEnable = false,
515          .polygonMode = VK_POLYGON_MODE_FILL,
516          .cullMode = VK_CULL_MODE_NONE,
517          .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
518          .depthBiasEnable = false,
519       },
520 
521       .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
522          .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
523          .rasterizationSamples = samples,
524          .sampleShadingEnable = false,
525          .pSampleMask = NULL,
526          .alphaToCoverageEnable = false,
527          .alphaToOneEnable = false,
528       },
529 
530       .pDepthStencilState = ds_state,
531 
532       .pColorBlendState = cb_state,
533 
534       /* The meta clear pipeline declares all state as dynamic.
535        * As a consequence, vkCmdBindPipeline writes no dynamic state
536        * to the cmd buffer. Therefore, at the end of the meta clear,
537        * we need only restore dynamic state that was vkCmdSet.
538        */
539       .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
540          .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
541          .dynamicStateCount = 6,
542          .pDynamicStates = (VkDynamicState[]) {
543             VK_DYNAMIC_STATE_VIEWPORT,
544             VK_DYNAMIC_STATE_SCISSOR,
545             VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
546             VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
547             VK_DYNAMIC_STATE_STENCIL_REFERENCE,
548             VK_DYNAMIC_STATE_BLEND_CONSTANTS,
549             VK_DYNAMIC_STATE_DEPTH_BIAS,
550             VK_DYNAMIC_STATE_LINE_WIDTH,
551          },
552       },
553 
554       .flags = 0,
555       .layout = layout,
556       .renderPass = v3dv_render_pass_to_handle(pass),
557       .subpass = subpass_idx,
558    };
559 
560    VkResult result =
561       v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
562                                    VK_NULL_HANDLE,
563                                    1, &info,
564                                    &device->vk.alloc,
565                                    pipeline);
566 
567    ralloc_free(vs_nir);
568    ralloc_free(gs_nir);
569    ralloc_free(fs_nir);
570 
571    return result;
572 }
573 
574 static VkResult
create_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,VkFormat format,VkSampleCountFlagBits samples,uint32_t components,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)575 create_color_clear_pipeline(struct v3dv_device *device,
576                             struct v3dv_render_pass *pass,
577                             uint32_t subpass_idx,
578                             uint32_t rt_idx,
579                             VkFormat format,
580                             VkSampleCountFlagBits samples,
581                             uint32_t components,
582                             bool is_layered,
583                             VkPipelineLayout pipeline_layout,
584                             VkPipeline *pipeline)
585 {
586    const nir_shader_compiler_options *options =
587       v3dv_pipeline_get_nir_options(&device->devinfo);
588 
589    nir_shader *vs_nir = get_clear_rect_vs(options);
590    nir_shader *fs_nir = get_color_clear_rect_fs(options, rt_idx, format);
591    nir_shader *gs_nir = is_layered ? get_clear_rect_gs(options, 16) : NULL;
592 
593    const VkPipelineVertexInputStateCreateInfo vi_state = {
594       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
595       .vertexBindingDescriptionCount = 0,
596       .vertexAttributeDescriptionCount = 0,
597    };
598 
599    const VkPipelineDepthStencilStateCreateInfo ds_state = {
600       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
601       .depthTestEnable = false,
602       .depthWriteEnable = false,
603       .depthBoundsTestEnable = false,
604       .stencilTestEnable = false,
605    };
606 
607    assert(subpass_idx < pass->subpass_count);
608    const uint32_t color_count = pass->subpasses[subpass_idx].color_count;
609    assert(rt_idx < color_count);
610 
611    VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS];
612    for (uint32_t i = 0; i < color_count; i++) {
613       blend_att_state[i] = (VkPipelineColorBlendAttachmentState) {
614          .blendEnable = false,
615          .colorWriteMask = i == rt_idx ? components : 0,
616       };
617    }
618 
619    const VkPipelineColorBlendStateCreateInfo cb_state = {
620       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
621       .logicOpEnable = false,
622       .attachmentCount = color_count,
623       .pAttachments = blend_att_state
624    };
625 
626    return create_pipeline(device,
627                           pass, subpass_idx,
628                           samples,
629                           vs_nir, gs_nir, fs_nir,
630                           &vi_state,
631                           &ds_state,
632                           &cb_state,
633                           pipeline_layout,
634                           pipeline);
635 }
636 
637 static VkResult
create_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)638 create_depth_clear_pipeline(struct v3dv_device *device,
639                             VkImageAspectFlags aspects,
640                             struct v3dv_render_pass *pass,
641                             uint32_t subpass_idx,
642                             uint32_t samples,
643                             bool is_layered,
644                             VkPipelineLayout pipeline_layout,
645                             VkPipeline *pipeline)
646 {
647    const bool has_depth = aspects & VK_IMAGE_ASPECT_DEPTH_BIT;
648    const bool has_stencil = aspects & VK_IMAGE_ASPECT_STENCIL_BIT;
649    assert(has_depth || has_stencil);
650 
651    const nir_shader_compiler_options *options =
652       v3dv_pipeline_get_nir_options(&device->devinfo);
653 
654    nir_shader *vs_nir = get_clear_rect_vs(options);
655    nir_shader *fs_nir = has_depth ? get_depth_clear_rect_fs(options) : NULL;
656    nir_shader *gs_nir = is_layered ? get_clear_rect_gs(options, 4) : NULL;
657 
658    const VkPipelineVertexInputStateCreateInfo vi_state = {
659       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
660       .vertexBindingDescriptionCount = 0,
661       .vertexAttributeDescriptionCount = 0,
662    };
663 
664    const VkPipelineDepthStencilStateCreateInfo ds_state = {
665       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
666       .depthTestEnable = has_depth,
667       .depthWriteEnable = has_depth,
668       .depthCompareOp = VK_COMPARE_OP_ALWAYS,
669       .depthBoundsTestEnable = false,
670       .stencilTestEnable = has_stencil,
671       .front = {
672          .passOp = VK_STENCIL_OP_REPLACE,
673          .compareOp = VK_COMPARE_OP_ALWAYS,
674          /* compareMask, writeMask and reference are dynamic state */
675       },
676       .back = { 0 },
677    };
678 
679    assert(subpass_idx < pass->subpass_count);
680    VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS] = { 0 };
681    const VkPipelineColorBlendStateCreateInfo cb_state = {
682       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
683       .logicOpEnable = false,
684       .attachmentCount = pass->subpasses[subpass_idx].color_count,
685       .pAttachments = blend_att_state,
686    };
687 
688    return create_pipeline(device,
689                           pass, subpass_idx,
690                           samples,
691                           vs_nir, gs_nir, fs_nir,
692                           &vi_state,
693                           &ds_state,
694                           &cb_state,
695                           pipeline_layout,
696                           pipeline);
697 }
698 
699 static VkResult
create_color_clear_render_pass(struct v3dv_device * device,uint32_t rt_idx,VkFormat format,VkSampleCountFlagBits samples,VkRenderPass * pass)700 create_color_clear_render_pass(struct v3dv_device *device,
701                                uint32_t rt_idx,
702                                VkFormat format,
703                                VkSampleCountFlagBits samples,
704                                VkRenderPass *pass)
705 {
706    VkAttachmentDescription2 att = {
707       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
708       .format = format,
709       .samples = samples,
710       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
711       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
712       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
713       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
714    };
715 
716    VkAttachmentReference2 att_ref = {
717       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
718       .attachment = rt_idx,
719       .layout = VK_IMAGE_LAYOUT_GENERAL,
720    };
721 
722    VkSubpassDescription2 subpass = {
723       .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
724       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
725       .inputAttachmentCount = 0,
726       .colorAttachmentCount = 1,
727       .pColorAttachments = &att_ref,
728       .pResolveAttachments = NULL,
729       .pDepthStencilAttachment = NULL,
730       .preserveAttachmentCount = 0,
731       .pPreserveAttachments = NULL,
732    };
733 
734    VkRenderPassCreateInfo2 info = {
735       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
736       .attachmentCount = 1,
737       .pAttachments = &att,
738       .subpassCount = 1,
739       .pSubpasses = &subpass,
740       .dependencyCount = 0,
741       .pDependencies = NULL,
742    };
743 
744    return v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
745                                  &info, &device->vk.alloc, pass);
746 }
747 
748 static inline uint64_t
get_color_clear_pipeline_cache_key(uint32_t rt_idx,VkFormat format,VkSampleCountFlagBits samples,uint32_t components,bool is_layered,bool has_multiview)749 get_color_clear_pipeline_cache_key(uint32_t rt_idx,
750                                    VkFormat format,
751                                    VkSampleCountFlagBits samples,
752                                    uint32_t components,
753                                    bool is_layered,
754                                    bool has_multiview)
755 {
756    assert(rt_idx < V3D_MAX_DRAW_BUFFERS);
757 
758    uint64_t key = 0;
759    uint32_t bit_offset = 0;
760 
761    key |= rt_idx;
762    bit_offset += 3;
763 
764    key |= ((uint64_t) format) << bit_offset;
765    bit_offset += 32;
766 
767    key |= ((uint64_t) samples) << bit_offset;
768    bit_offset += 4;
769 
770    key |= ((uint64_t) components) << bit_offset;
771    bit_offset += 4;
772 
773    key |= (is_layered ? 1ull : 0ull) << bit_offset;
774    bit_offset += 1;
775 
776    key |= (has_multiview ? 1ull : 0ull) << bit_offset;
777    bit_offset += 1;
778 
779    assert(bit_offset <= 64);
780    return key;
781 }
782 
783 static inline uint64_t
get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,VkFormat format,uint32_t samples,bool is_layered,bool has_multiview)784 get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,
785                                    VkFormat format,
786                                    uint32_t samples,
787                                    bool is_layered,
788                                    bool has_multiview)
789 {
790    uint64_t key = 0;
791    uint32_t bit_offset = 0;
792 
793    key |= format;
794    bit_offset += 32;
795 
796    key |= ((uint64_t) samples) << bit_offset;
797    bit_offset += 4;
798 
799    const bool has_depth = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? 1 : 0;
800    key |= ((uint64_t) has_depth) << bit_offset;
801    bit_offset++;
802 
803    const bool has_stencil = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
804    key |= ((uint64_t) has_stencil) << bit_offset;
805    bit_offset++;;
806 
807    key |= (is_layered ? 1ull : 0ull) << bit_offset;
808    bit_offset += 1;
809 
810    key |= (has_multiview ? 1ull : 0ull) << bit_offset;
811    bit_offset += 1;
812 
813    assert(bit_offset <= 64);
814    return key;
815 }
816 
817 static VkResult
get_color_clear_pipeline(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,uint32_t attachment_idx,VkFormat format,VkSampleCountFlagBits samples,uint32_t components,bool is_layered,bool has_multiview,struct v3dv_meta_color_clear_pipeline ** pipeline)818 get_color_clear_pipeline(struct v3dv_cmd_buffer *cmd_buffer,
819                          struct v3dv_render_pass *pass,
820                          uint32_t subpass_idx,
821                          uint32_t rt_idx,
822                          uint32_t attachment_idx,
823                          VkFormat format,
824                          VkSampleCountFlagBits samples,
825                          uint32_t components,
826                          bool is_layered,
827                          bool has_multiview,
828                          struct v3dv_meta_color_clear_pipeline **pipeline)
829 {
830    assert(vk_format_is_color(format));
831    struct v3dv_device *device = cmd_buffer->device;
832 
833    VkResult result = VK_SUCCESS;
834 
835    /* If pass != NULL it means that we are emitting the clear as a draw call
836     * in the current pass bound by the application. In that case, we can't
837     * cache the pipeline, since it will be referencing that pass and the
838     * application could be destroying it at any point. Hopefully, the perf
839     * impact is not too big since we still have the device pipeline cache
840     * around and we won't end up re-compiling the clear shader.
841     *
842     * FIXME: alternatively, we could refcount (or maybe clone) the render pass
843     * provided by the application and include it in the pipeline key setup
844     * to make caching safe in this scenario, however, based on tests with
845     * vkQuake3, the fact that we are not caching here doesn't seem to have
846     * any significant impact in performance, so it might not be worth it.
847     */
848    const bool can_cache_pipeline =
849       (pass == NULL) && (device->instance->meta_cache_enabled);
850 
851    uint64_t key;
852    if (can_cache_pipeline) {
853       key = get_color_clear_pipeline_cache_key(rt_idx, format, samples,
854                                                components, is_layered,
855                                                has_multiview);
856       mtx_lock(&device->meta.mtx);
857       struct hash_entry *entry =
858          _mesa_hash_table_search(device->meta.color_clear.cache, &key);
859       if (entry) {
860          mtx_unlock(&device->meta.mtx);
861          *pipeline = entry->data;
862          return VK_SUCCESS;
863       }
864    }
865 
866    *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
867                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
868 
869    if (*pipeline == NULL) {
870       result = VK_ERROR_OUT_OF_HOST_MEMORY;
871       goto fail;
872    }
873 
874    if (!pass) {
875       result = create_color_clear_render_pass(device,
876                                               rt_idx,
877                                               format,
878                                               samples,
879                                               &(*pipeline)->pass);
880       if (result != VK_SUCCESS)
881          goto fail;
882 
883       pass = v3dv_render_pass_from_handle((*pipeline)->pass);
884    } else {
885       (*pipeline)->pass = v3dv_render_pass_to_handle(pass);
886    }
887 
888    result = create_color_clear_pipeline(device,
889                                         pass,
890                                         subpass_idx,
891                                         rt_idx,
892                                         format,
893                                         samples,
894                                         components,
895                                         is_layered,
896                                         device->meta.color_clear.p_layout,
897                                         &(*pipeline)->pipeline);
898    if (result != VK_SUCCESS)
899       goto fail;
900 
901    if (can_cache_pipeline) {
902       (*pipeline)->key = key;
903       (*pipeline)->cached = true;
904       _mesa_hash_table_insert(device->meta.color_clear.cache,
905                               &(*pipeline)->key, *pipeline);
906 
907       mtx_unlock(&device->meta.mtx);
908    } else {
909       v3dv_cmd_buffer_add_private_obj(
910          cmd_buffer, (uintptr_t)*pipeline,
911          (v3dv_cmd_buffer_private_obj_destroy_cb)destroy_color_clear_pipeline);
912    }
913 
914    return VK_SUCCESS;
915 
916 fail:
917    if (can_cache_pipeline)
918       mtx_unlock(&device->meta.mtx);
919 
920    VkDevice _device = v3dv_device_to_handle(device);
921    if (*pipeline) {
922       if ((*pipeline)->cached)
923          v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
924       if ((*pipeline)->pipeline)
925          v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
926       vk_free(&device->vk.alloc, *pipeline);
927       *pipeline = NULL;
928    }
929 
930    return result;
931 }
932 
933 static VkResult
get_depth_clear_pipeline(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t attachment_idx,bool is_layered,bool has_multiview,struct v3dv_meta_depth_clear_pipeline ** pipeline)934 get_depth_clear_pipeline(struct v3dv_cmd_buffer *cmd_buffer,
935                          VkImageAspectFlags aspects,
936                          struct v3dv_render_pass *pass,
937                          uint32_t subpass_idx,
938                          uint32_t attachment_idx,
939                          bool is_layered,
940                          bool has_multiview,
941                          struct v3dv_meta_depth_clear_pipeline **pipeline)
942 {
943    assert(subpass_idx < pass->subpass_count);
944    assert(attachment_idx != VK_ATTACHMENT_UNUSED);
945    assert(attachment_idx < pass->attachment_count);
946 
947    VkResult result = VK_SUCCESS;
948    struct v3dv_device *device = cmd_buffer->device;
949 
950    const uint32_t samples = pass->attachments[attachment_idx].desc.samples;
951    const VkFormat format = pass->attachments[attachment_idx].desc.format;
952    assert(vk_format_is_depth_or_stencil(format));
953 
954    uint64_t key;
955    bool meta_cache_enabled = device->instance->meta_cache_enabled;
956 
957    if (meta_cache_enabled) {
958       key = get_depth_clear_pipeline_cache_key(aspects, format, samples,
959                                                is_layered, has_multiview);
960       mtx_lock(&device->meta.mtx);
961       struct hash_entry *entry =
962          _mesa_hash_table_search(device->meta.depth_clear.cache, &key);
963       if (entry) {
964          mtx_unlock(&device->meta.mtx);
965          *pipeline = entry->data;
966          return VK_SUCCESS;
967       }
968    }
969 
970    *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
971                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
972 
973    if (*pipeline == NULL) {
974       result = VK_ERROR_OUT_OF_HOST_MEMORY;
975       goto fail;
976    }
977 
978    result = create_depth_clear_pipeline(device,
979                                         aspects,
980                                         pass,
981                                         subpass_idx,
982                                         samples,
983                                         is_layered,
984                                         device->meta.depth_clear.p_layout,
985                                         &(*pipeline)->pipeline);
986    if (result != VK_SUCCESS)
987       goto fail;
988 
989    if (meta_cache_enabled) {
990       (*pipeline)->key = key;
991       _mesa_hash_table_insert(device->meta.depth_clear.cache,
992                               &(*pipeline)->key, *pipeline);
993       mtx_unlock(&device->meta.mtx);
994    } else {
995       v3dv_cmd_buffer_add_private_obj(
996          cmd_buffer, (uintptr_t)*pipeline,
997          (v3dv_cmd_buffer_private_obj_destroy_cb)destroy_depth_clear_pipeline);
998    }
999 
1000    return VK_SUCCESS;
1001 
1002 fail:
1003    if (device->instance->meta_cache_enabled)
1004       mtx_unlock(&device->meta.mtx);
1005 
1006    VkDevice _device = v3dv_device_to_handle(device);
1007    if (*pipeline) {
1008       if ((*pipeline)->pipeline)
1009          v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
1010       vk_free(&device->vk.alloc, *pipeline);
1011       *pipeline = NULL;
1012    }
1013 
1014    return result;
1015 }
1016 
1017 /* Emits a scissored quad in the clear color */
1018 static void
emit_subpass_color_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,uint32_t rt_idx,const VkClearColorValue * clear_color,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)1019 emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1020                                struct v3dv_render_pass *pass,
1021                                struct v3dv_subpass *subpass,
1022                                uint32_t rt_idx,
1023                                const VkClearColorValue *clear_color,
1024                                bool is_layered,
1025                                bool all_rects_same_layers,
1026                                uint32_t rect_count,
1027                                const VkClearRect *rects)
1028 {
1029    /* Skip if attachment is unused in the current subpass */
1030    assert(rt_idx < subpass->color_count);
1031    const uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment;
1032    if (attachment_idx == VK_ATTACHMENT_UNUSED)
1033       return;
1034 
1035    /* Obtain a pipeline for this clear */
1036    assert(attachment_idx < pass->attachment_count);
1037    const VkFormat format = pass->attachments[attachment_idx].desc.format;
1038    const VkSampleCountFlagBits samples =
1039       pass->attachments[attachment_idx].desc.samples;
1040    const uint32_t components = VK_COLOR_COMPONENT_R_BIT |
1041                                VK_COLOR_COMPONENT_G_BIT |
1042                                VK_COLOR_COMPONENT_B_BIT |
1043                                VK_COLOR_COMPONENT_A_BIT;
1044 
1045    struct v3dv_meta_color_clear_pipeline *pipeline = NULL;
1046    VkResult result = get_color_clear_pipeline(cmd_buffer,
1047                                               pass,
1048                                               cmd_buffer->state.subpass_idx,
1049                                               rt_idx,
1050                                               attachment_idx,
1051                                               format,
1052                                               samples,
1053                                               components,
1054                                               is_layered,
1055                                               pass->multiview_enabled,
1056                                               &pipeline);
1057    if (result != VK_SUCCESS) {
1058       if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1059          v3dv_flag_oom(cmd_buffer, NULL);
1060       return;
1061    }
1062    assert(pipeline && pipeline->pipeline);
1063 
1064    /* Emit clear rects */
1065    v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1066 
1067    VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1068    v3dv_CmdPushConstants(cmd_buffer_handle,
1069                          cmd_buffer->device->meta.depth_clear.p_layout,
1070                          VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
1071                          clear_color->float32);
1072 
1073    v3dv_CmdBindPipeline(cmd_buffer_handle,
1074                         VK_PIPELINE_BIND_POINT_GRAPHICS,
1075                         pipeline->pipeline);
1076 
1077    for (uint32_t i = 0; i < rect_count; i++) {
1078       const VkViewport viewport = {
1079          .x = rects[i].rect.offset.x,
1080          .y = rects[i].rect.offset.y,
1081          .width = rects[i].rect.extent.width,
1082          .height = rects[i].rect.extent.height,
1083          .minDepth = 0.0f,
1084          .maxDepth = 1.0f
1085       };
1086       v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1087       v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1088 
1089       if (is_layered) {
1090          for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1091               layer_offset++) {
1092             uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1093             v3dv_CmdPushConstants(cmd_buffer_handle,
1094                                   cmd_buffer->device->meta.depth_clear.p_layout,
1095                                   VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4, &layer);
1096             v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1097          }
1098       } else {
1099          assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1100          v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1101       }
1102    }
1103 
1104    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, false);
1105 }
1106 
1107 /* Emits a scissored quad, clearing the depth aspect by writing to gl_FragDepth
1108  * and the stencil aspect by using stencil testing.
1109  */
1110 static void
emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,VkImageAspectFlags aspects,const VkClearDepthStencilValue * clear_ds,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)1111 emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1112                             struct v3dv_render_pass *pass,
1113                             struct v3dv_subpass *subpass,
1114                             VkImageAspectFlags aspects,
1115                             const VkClearDepthStencilValue *clear_ds,
1116                             bool is_layered,
1117                             bool all_rects_same_layers,
1118                             uint32_t rect_count,
1119                             const VkClearRect *rects)
1120 {
1121    /* Skip if attachment is unused in the current subpass */
1122    const uint32_t attachment_idx = subpass->ds_attachment.attachment;
1123    if (attachment_idx == VK_ATTACHMENT_UNUSED)
1124       return;
1125 
1126    /* Obtain a pipeline for this clear */
1127    assert(attachment_idx < pass->attachment_count);
1128    struct v3dv_meta_depth_clear_pipeline *pipeline = NULL;
1129 
1130    VkResult result = get_depth_clear_pipeline(cmd_buffer,
1131                                               aspects,
1132                                               pass,
1133                                               cmd_buffer->state.subpass_idx,
1134                                               attachment_idx,
1135                                               is_layered,
1136                                               pass->multiview_enabled,
1137                                               &pipeline);
1138    if (result != VK_SUCCESS) {
1139       if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1140          v3dv_flag_oom(cmd_buffer, NULL);
1141       return;
1142    }
1143    assert(pipeline && pipeline->pipeline);
1144 
1145    /* Emit clear rects */
1146    v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1147 
1148    VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1149    v3dv_CmdPushConstants(cmd_buffer_handle,
1150                          cmd_buffer->device->meta.depth_clear.p_layout,
1151                          VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
1152                          &clear_ds->depth);
1153 
1154    v3dv_CmdBindPipeline(cmd_buffer_handle,
1155                         VK_PIPELINE_BIND_POINT_GRAPHICS,
1156                         pipeline->pipeline);
1157 
1158    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1159       vk_common_CmdSetStencilReference(cmd_buffer_handle,
1160                                        VK_STENCIL_FACE_FRONT_AND_BACK,
1161                                        clear_ds->stencil);
1162       vk_common_CmdSetStencilWriteMask(cmd_buffer_handle,
1163                                        VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1164       vk_common_CmdSetStencilCompareMask(cmd_buffer_handle,
1165                                          VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1166    }
1167 
1168    for (uint32_t i = 0; i < rect_count; i++) {
1169       const VkViewport viewport = {
1170          .x = rects[i].rect.offset.x,
1171          .y = rects[i].rect.offset.y,
1172          .width = rects[i].rect.extent.width,
1173          .height = rects[i].rect.extent.height,
1174          .minDepth = 0.0f,
1175          .maxDepth = 1.0f
1176       };
1177       v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1178       v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1179       if (is_layered) {
1180          for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1181               layer_offset++) {
1182             uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1183             v3dv_CmdPushConstants(cmd_buffer_handle,
1184                                   cmd_buffer->device->meta.depth_clear.p_layout,
1185                                   VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4, &layer);
1186             v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1187          }
1188       } else {
1189          assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1190          v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1191       }
1192    }
1193 
1194    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, false);
1195 }
1196 
1197 static void
gather_layering_info(uint32_t rect_count,const VkClearRect * rects,bool * is_layered,bool * all_rects_same_layers)1198 gather_layering_info(uint32_t rect_count, const VkClearRect *rects,
1199                      bool *is_layered, bool *all_rects_same_layers)
1200 {
1201    *all_rects_same_layers = true;
1202 
1203    uint32_t min_layer = rects[0].baseArrayLayer;
1204    uint32_t max_layer = rects[0].baseArrayLayer + rects[0].layerCount - 1;
1205    for (uint32_t i = 1; i < rect_count; i++) {
1206       if (rects[i].baseArrayLayer != rects[i - 1].baseArrayLayer ||
1207           rects[i].layerCount != rects[i - 1].layerCount) {
1208          *all_rects_same_layers = false;
1209          min_layer = MIN2(min_layer, rects[i].baseArrayLayer);
1210          max_layer = MAX2(max_layer, rects[i].baseArrayLayer +
1211                                      rects[i].layerCount - 1);
1212       }
1213    }
1214 
1215    *is_layered = !(min_layer == 0 && max_layer == 0);
1216 }
1217 
1218 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1219 v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,
1220                          uint32_t attachmentCount,
1221                          const VkClearAttachment *pAttachments,
1222                          uint32_t rectCount,
1223                          const VkClearRect *pRects)
1224 {
1225    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1226 
1227    /* We can have at most max_color_RTs + 1 D/S attachments */
1228    assert(attachmentCount <=
1229           V3D_MAX_RENDER_TARGETS(cmd_buffer->device->devinfo.ver) + 1);
1230 
1231    /* We can only clear attachments in the current subpass */
1232    struct v3dv_render_pass *pass = cmd_buffer->state.pass;
1233 
1234    assert(cmd_buffer->state.subpass_idx < pass->subpass_count);
1235    struct v3dv_subpass *subpass =
1236       &cmd_buffer->state.pass->subpasses[cmd_buffer->state.subpass_idx];
1237 
1238    /* Emit a clear rect inside the current job for this subpass. For layered
1239     * framebuffers, we use a geometry shader to redirect clears to the
1240     * appropriate layers.
1241     */
1242 
1243    v3dv_cmd_buffer_pause_occlusion_query(cmd_buffer);
1244 
1245    bool is_layered, all_rects_same_layers;
1246    gather_layering_info(rectCount, pRects, &is_layered, &all_rects_same_layers);
1247    for (uint32_t i = 0; i < attachmentCount; i++) {
1248       if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1249          emit_subpass_color_clear_rects(cmd_buffer, pass, subpass,
1250                                         pAttachments[i].colorAttachment,
1251                                         &pAttachments[i].clearValue.color,
1252                                         is_layered, all_rects_same_layers,
1253                                         rectCount, pRects);
1254       } else {
1255          emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass,
1256                                      pAttachments[i].aspectMask,
1257                                      &pAttachments[i].clearValue.depthStencil,
1258                                      is_layered, all_rects_same_layers,
1259                                      rectCount, pRects);
1260       }
1261    }
1262 
1263    v3dv_cmd_buffer_resume_occlusion_query(cmd_buffer);
1264 }
1265