• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26 
27 #include "compiler/nir/nir_builder.h"
28 #include "util/u_pack_color.h"
29 
30 static void
get_hw_clear_color(struct v3dv_device * device,const VkClearColorValue * color,VkFormat fb_format,VkFormat image_format,uint32_t internal_type,uint32_t internal_bpp,uint32_t * hw_color)31 get_hw_clear_color(struct v3dv_device *device,
32                    const VkClearColorValue *color,
33                    VkFormat fb_format,
34                    VkFormat image_format,
35                    uint32_t internal_type,
36                    uint32_t internal_bpp,
37                    uint32_t *hw_color)
38 {
39    const uint32_t internal_size = 4 << internal_bpp;
40 
41    /* If the image format doesn't match the framebuffer format, then we are
42     * trying to clear an unsupported tlb format using a compatible
43     * format for the framebuffer. In this case, we want to make sure that
44     * we pack the clear value according to the original format semantics,
45     * not the compatible format.
46     */
47    if (fb_format == image_format) {
48       v3dv_X(device, get_hw_clear_color)(color, internal_type, internal_size,
49                                          hw_color);
50    } else {
51       union util_color uc;
52       enum pipe_format pipe_image_format =
53          vk_format_to_pipe_format(image_format);
54       util_pack_color(color->float32, pipe_image_format, &uc);
55       memcpy(hw_color, uc.ui, internal_size);
56    }
57 }
58 
59 /* Returns true if the implementation is able to handle the case, false
60  * otherwise.
61 */
62 static bool
clear_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,const VkClearValue * clear_value,const VkImageSubresourceRange * range)63 clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
64                 struct v3dv_image *image,
65                 const VkClearValue *clear_value,
66                 const VkImageSubresourceRange *range)
67 {
68    const VkOffset3D origin = { 0, 0, 0 };
69    VkFormat fb_format;
70 
71    /* From vkCmdClearColorImage spec:
72     *  "image must not use any of the formats that require a sampler YCBCR
73     *   conversion"
74     */
75    assert(image->plane_count == 1);
76    if (!v3dv_meta_can_use_tlb(image, 0, 0, &origin, NULL, &fb_format))
77       return false;
78 
79    uint32_t internal_type, internal_bpp;
80    v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
81       (fb_format, range->aspectMask,
82        &internal_type, &internal_bpp);
83 
84    union v3dv_clear_value hw_clear_value = { 0 };
85    if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
86       get_hw_clear_color(cmd_buffer->device, &clear_value->color, fb_format,
87                          image->vk.format, internal_type, internal_bpp,
88                          &hw_clear_value.color[0]);
89    } else {
90       assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||
91              (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));
92       hw_clear_value.z = clear_value->depthStencil.depth;
93       hw_clear_value.s = clear_value->depthStencil.stencil;
94    }
95 
96    uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
97    uint32_t min_level = range->baseMipLevel;
98    uint32_t max_level = range->baseMipLevel + level_count;
99 
100    /* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.
101     * Instead, we need to consider the full depth dimension of the image, which
102     * goes from 0 up to the level's depth extent.
103     */
104    uint32_t min_layer;
105    uint32_t max_layer;
106    if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
107       min_layer = range->baseArrayLayer;
108       max_layer = range->baseArrayLayer +
109                   vk_image_subresource_layer_count(&image->vk, range);
110    } else {
111       min_layer = 0;
112       max_layer = 0;
113    }
114 
115    for (uint32_t level = min_level; level < max_level; level++) {
116       if (image->vk.image_type == VK_IMAGE_TYPE_3D)
117          max_layer = u_minify(image->vk.extent.depth, level);
118 
119       uint32_t width = u_minify(image->vk.extent.width, level);
120       uint32_t height = u_minify(image->vk.extent.height, level);
121 
122       struct v3dv_job *job =
123          v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
124 
125       if (!job)
126          return true;
127 
128       v3dv_job_start_frame(job, width, height, max_layer,
129                            false, true, 1, internal_bpp,
130                            4 * v3d_internal_bpp_words(internal_bpp),
131                            image->vk.samples > VK_SAMPLE_COUNT_1_BIT);
132 
133       struct v3dv_meta_framebuffer framebuffer;
134       v3dv_X(job->device, meta_framebuffer_init)(&framebuffer, fb_format,
135                                                  internal_type,
136                                                  &job->frame_tiling);
137 
138       v3dv_X(job->device, job_emit_binning_flush)(job);
139 
140       /* If this triggers it is an application bug: the spec requires
141        * that any aspects to clear are present in the image.
142        */
143       assert(range->aspectMask & image->vk.aspects);
144 
145       v3dv_X(job->device, meta_emit_clear_image_rcl)
146          (job, image, &framebuffer, &hw_clear_value,
147           range->aspectMask, min_layer, max_layer, level);
148 
149       v3dv_cmd_buffer_finish_job(cmd_buffer);
150    }
151 
152    return true;
153 }
154 
155 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)156 v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,
157                         VkImage _image,
158                         VkImageLayout imageLayout,
159                         const VkClearColorValue *pColor,
160                         uint32_t rangeCount,
161                         const VkImageSubresourceRange *pRanges)
162 {
163    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
164    V3DV_FROM_HANDLE(v3dv_image, image, _image);
165 
166    const VkClearValue clear_value = {
167       .color = *pColor,
168    };
169 
170    cmd_buffer->state.is_transfer = true;
171 
172    for (uint32_t i = 0; i < rangeCount; i++) {
173       if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
174          continue;
175       unreachable("Unsupported color clear.");
176    }
177 
178    cmd_buffer->state.is_transfer = false;
179 }
180 
181 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)182 v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
183                                VkImage _image,
184                                VkImageLayout imageLayout,
185                                const VkClearDepthStencilValue *pDepthStencil,
186                                uint32_t rangeCount,
187                                const VkImageSubresourceRange *pRanges)
188 {
189    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
190    V3DV_FROM_HANDLE(v3dv_image, image, _image);
191 
192    const VkClearValue clear_value = {
193       .depthStencil = *pDepthStencil,
194    };
195 
196    cmd_buffer->state.is_transfer = true;
197 
198    for (uint32_t i = 0; i < rangeCount; i++) {
199       if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
200          continue;
201       unreachable("Unsupported depth/stencil clear.");
202    }
203 
204    cmd_buffer->state.is_transfer = false;
205 }
206 
207 static void
destroy_color_clear_pipeline(VkDevice _device,uint64_t pipeline,VkAllocationCallbacks * alloc)208 destroy_color_clear_pipeline(VkDevice _device,
209                              uint64_t pipeline,
210                              VkAllocationCallbacks *alloc)
211 {
212    struct v3dv_meta_color_clear_pipeline *p =
213       (struct v3dv_meta_color_clear_pipeline *) (uintptr_t) pipeline;
214    v3dv_DestroyPipeline(_device, p->pipeline, alloc);
215    if (p->cached)
216       v3dv_DestroyRenderPass(_device, p->pass, alloc);
217    vk_free(alloc, p);
218 }
219 
220 static void
destroy_depth_clear_pipeline(VkDevice _device,struct v3dv_meta_depth_clear_pipeline * p,VkAllocationCallbacks * alloc)221 destroy_depth_clear_pipeline(VkDevice _device,
222                              struct v3dv_meta_depth_clear_pipeline *p,
223                              VkAllocationCallbacks *alloc)
224 {
225    v3dv_DestroyPipeline(_device, p->pipeline, alloc);
226    vk_free(alloc, p);
227 }
228 
229 static VkResult
create_color_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)230 create_color_clear_pipeline_layout(struct v3dv_device *device,
231                                    VkPipelineLayout *pipeline_layout)
232 {
233    /* FIXME: this is abusing a bit the API, since not all of our clear
234     * pipelines have a geometry shader. We could create 2 different pipeline
235     * layouts, but this works for us for now.
236     */
237    VkPushConstantRange ranges[2] = {
238       { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 },
239       { VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4 },
240    };
241 
242    VkPipelineLayoutCreateInfo info = {
243       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
244       .setLayoutCount = 0,
245       .pushConstantRangeCount = 2,
246       .pPushConstantRanges = ranges,
247    };
248 
249    return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
250                                     &info, &device->vk.alloc, pipeline_layout);
251 }
252 
253 static VkResult
create_depth_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)254 create_depth_clear_pipeline_layout(struct v3dv_device *device,
255                                    VkPipelineLayout *pipeline_layout)
256 {
257    /* FIXME: this is abusing a bit the API, since not all of our clear
258     * pipelines have a geometry shader. We could create 2 different pipeline
259     * layouts, but this works for us for now.
260     */
261    VkPushConstantRange ranges[2] = {
262       { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 },
263       { VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4 },
264    };
265 
266    VkPipelineLayoutCreateInfo info = {
267       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
268       .setLayoutCount = 0,
269       .pushConstantRangeCount = 2,
270       .pPushConstantRanges = ranges
271    };
272 
273    return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
274                                     &info, &device->vk.alloc, pipeline_layout);
275 }
276 
277 void
v3dv_meta_clear_init(struct v3dv_device * device)278 v3dv_meta_clear_init(struct v3dv_device *device)
279 {
280    device->meta.color_clear.cache =
281       _mesa_hash_table_create(NULL, u64_hash, u64_compare);
282 
283    create_color_clear_pipeline_layout(device,
284                                       &device->meta.color_clear.p_layout);
285 
286    device->meta.depth_clear.cache =
287       _mesa_hash_table_create(NULL, u64_hash, u64_compare);
288 
289    create_depth_clear_pipeline_layout(device,
290                                       &device->meta.depth_clear.p_layout);
291 }
292 
293 void
v3dv_meta_clear_finish(struct v3dv_device * device)294 v3dv_meta_clear_finish(struct v3dv_device *device)
295 {
296    VkDevice _device = v3dv_device_to_handle(device);
297 
298    hash_table_foreach(device->meta.color_clear.cache, entry) {
299       struct v3dv_meta_color_clear_pipeline *item = entry->data;
300       destroy_color_clear_pipeline(_device, (uintptr_t)item, &device->vk.alloc);
301    }
302    _mesa_hash_table_destroy(device->meta.color_clear.cache, NULL);
303 
304    if (device->meta.color_clear.p_layout) {
305       v3dv_DestroyPipelineLayout(_device, device->meta.color_clear.p_layout,
306                                  &device->vk.alloc);
307    }
308 
309    hash_table_foreach(device->meta.depth_clear.cache, entry) {
310       struct v3dv_meta_depth_clear_pipeline *item = entry->data;
311       destroy_depth_clear_pipeline(_device, item, &device->vk.alloc);
312    }
313    _mesa_hash_table_destroy(device->meta.depth_clear.cache, NULL);
314 
315    if (device->meta.depth_clear.p_layout) {
316       v3dv_DestroyPipelineLayout(_device, device->meta.depth_clear.p_layout,
317                                  &device->vk.alloc);
318    }
319 }
320 
321 static nir_shader *
get_clear_rect_vs()322 get_clear_rect_vs()
323 {
324    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
325    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
326                                                   "meta clear vs");
327 
328    const struct glsl_type *vec4 = glsl_vec4_type();
329    nir_variable *vs_out_pos =
330       nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
331    vs_out_pos->data.location = VARYING_SLOT_POS;
332 
333    nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
334    nir_store_var(&b, vs_out_pos, pos, 0xf);
335 
336    return b.shader;
337 }
338 
339 static nir_shader *
get_clear_rect_gs(uint32_t push_constant_layer_base)340 get_clear_rect_gs(uint32_t push_constant_layer_base)
341 {
342    /* FIXME: this creates a geometry shader that takes the index of a single
343     * layer to clear from push constants, so we need to emit a draw call for
344     * each layer that we want to clear. We could actually do better and have it
345     * take a range of layers and then emit one triangle per layer to clear,
346     * however, if we were to do this we would need to be careful not to exceed
347     * the maximum number of output vertices allowed in a geometry shader.
348     */
349    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
350    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
351                                                   "meta clear gs");
352    nir_shader *nir = b.shader;
353    nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
354    nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
355                                (1ull << VARYING_SLOT_LAYER);
356    nir->info.gs.input_primitive = MESA_PRIM_TRIANGLES;
357    nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
358    nir->info.gs.vertices_in = 3;
359    nir->info.gs.vertices_out = 3;
360    nir->info.gs.invocations = 1;
361    nir->info.gs.active_stream_mask = 0x1;
362 
363    /* in vec4 gl_Position[3] */
364    nir_variable *gs_in_pos =
365       nir_variable_create(b.shader, nir_var_shader_in,
366                           glsl_array_type(glsl_vec4_type(), 3, 0),
367                           "in_gl_Position");
368    gs_in_pos->data.location = VARYING_SLOT_POS;
369 
370    /* out vec4 gl_Position */
371    nir_variable *gs_out_pos =
372       nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
373                           "out_gl_Position");
374    gs_out_pos->data.location = VARYING_SLOT_POS;
375 
376    /* out float gl_Layer */
377    nir_variable *gs_out_layer =
378       nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
379                           "out_gl_Layer");
380    gs_out_layer->data.location = VARYING_SLOT_LAYER;
381 
382    /* Emit output triangle */
383    for (uint32_t i = 0; i < 3; i++) {
384       /* gl_Position from shader input */
385       nir_deref_instr *in_pos_i =
386          nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
387       nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
388 
389       /* gl_Layer from push constants */
390       nir_def *layer =
391          nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
392                                 .base = push_constant_layer_base, .range = 4);
393       nir_store_var(&b, gs_out_layer, layer, 0x1);
394 
395       nir_emit_vertex(&b, 0);
396    }
397 
398    nir_end_primitive(&b, 0);
399 
400    return nir;
401 }
402 
403 static nir_shader *
get_color_clear_rect_fs(uint32_t rt_idx,VkFormat format)404 get_color_clear_rect_fs(uint32_t rt_idx, VkFormat format)
405 {
406    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
407    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
408                                                   "meta clear fs");
409 
410    enum pipe_format pformat = vk_format_to_pipe_format(format);
411    const struct glsl_type *fs_out_type =
412       util_format_is_float(pformat) ? glsl_vec4_type() : glsl_uvec4_type();
413 
414    nir_variable *fs_out_color =
415       nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
416    fs_out_color->data.location = FRAG_RESULT_DATA0 + rt_idx;
417 
418    nir_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
419    nir_store_var(&b, fs_out_color, color_load, 0xf);
420 
421    return b.shader;
422 }
423 
424 static nir_shader *
get_depth_clear_rect_fs()425 get_depth_clear_rect_fs()
426 {
427    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
428    nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
429                                                   "meta depth clear fs");
430 
431    nir_variable *fs_out_depth =
432       nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
433                           "out_depth");
434    fs_out_depth->data.location = FRAG_RESULT_DEPTH;
435 
436    nir_def *depth_load =
437       nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
438 
439    nir_store_var(&b, fs_out_depth, depth_load, 0x1);
440 
441    return b.shader;
442 }
443 
444 static VkResult
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,struct nir_shader * vs_nir,struct nir_shader * gs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineLayout layout,VkPipeline * pipeline)445 create_pipeline(struct v3dv_device *device,
446                 struct v3dv_render_pass *pass,
447                 uint32_t subpass_idx,
448                 uint32_t samples,
449                 struct nir_shader *vs_nir,
450                 struct nir_shader *gs_nir,
451                 struct nir_shader *fs_nir,
452                 const VkPipelineVertexInputStateCreateInfo *vi_state,
453                 const VkPipelineDepthStencilStateCreateInfo *ds_state,
454                 const VkPipelineColorBlendStateCreateInfo *cb_state,
455                 const VkPipelineLayout layout,
456                 VkPipeline *pipeline)
457 {
458    VkPipelineShaderStageCreateInfo stages[3] = { 0 };
459    struct vk_shader_module vs_m = vk_shader_module_from_nir(vs_nir);
460    struct vk_shader_module gs_m;
461    struct vk_shader_module fs_m;
462 
463    uint32_t stage_count = 0;
464    stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
465    stages[stage_count].stage = VK_SHADER_STAGE_VERTEX_BIT;
466    stages[stage_count].module = vk_shader_module_to_handle(&vs_m);
467    stages[stage_count].pName = "main";
468    stage_count++;
469 
470    if (gs_nir) {
471       gs_m = vk_shader_module_from_nir(gs_nir);
472       stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
473       stages[stage_count].stage = VK_SHADER_STAGE_GEOMETRY_BIT;
474       stages[stage_count].module = vk_shader_module_to_handle(&gs_m);
475       stages[stage_count].pName = "main";
476       stage_count++;
477    }
478 
479    if (fs_nir) {
480       fs_m = vk_shader_module_from_nir(fs_nir);
481       stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
482       stages[stage_count].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
483       stages[stage_count].module = vk_shader_module_to_handle(&fs_m);
484       stages[stage_count].pName = "main";
485       stage_count++;
486    }
487 
488    VkGraphicsPipelineCreateInfo info = {
489       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
490 
491       .stageCount = stage_count,
492       .pStages = stages,
493 
494       .pVertexInputState = vi_state,
495 
496       .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
497          .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
498          .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
499          .primitiveRestartEnable = false,
500       },
501 
502       .pViewportState = &(VkPipelineViewportStateCreateInfo) {
503          .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
504          .viewportCount = 1,
505          .scissorCount = 1,
506       },
507 
508       .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
509          .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
510          .rasterizerDiscardEnable = false,
511          .polygonMode = VK_POLYGON_MODE_FILL,
512          .cullMode = VK_CULL_MODE_NONE,
513          .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
514          .depthBiasEnable = false,
515       },
516 
517       .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
518          .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
519          .rasterizationSamples = samples,
520          .sampleShadingEnable = false,
521          .pSampleMask = NULL,
522          .alphaToCoverageEnable = false,
523          .alphaToOneEnable = false,
524       },
525 
526       .pDepthStencilState = ds_state,
527 
528       .pColorBlendState = cb_state,
529 
530       /* The meta clear pipeline declares all state as dynamic.
531        * As a consequence, vkCmdBindPipeline writes no dynamic state
532        * to the cmd buffer. Therefore, at the end of the meta clear,
533        * we need only restore dynamic state that was vkCmdSet.
534        */
535       .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
536          .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
537          .dynamicStateCount = 6,
538          .pDynamicStates = (VkDynamicState[]) {
539             VK_DYNAMIC_STATE_VIEWPORT,
540             VK_DYNAMIC_STATE_SCISSOR,
541             VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
542             VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
543             VK_DYNAMIC_STATE_STENCIL_REFERENCE,
544             VK_DYNAMIC_STATE_BLEND_CONSTANTS,
545             VK_DYNAMIC_STATE_DEPTH_BIAS,
546             VK_DYNAMIC_STATE_LINE_WIDTH,
547          },
548       },
549 
550       .flags = 0,
551       .layout = layout,
552       .renderPass = v3dv_render_pass_to_handle(pass),
553       .subpass = subpass_idx,
554    };
555 
556    VkResult result =
557       v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
558                                    VK_NULL_HANDLE,
559                                    1, &info,
560                                    &device->vk.alloc,
561                                    pipeline);
562 
563    ralloc_free(vs_nir);
564    ralloc_free(gs_nir);
565    ralloc_free(fs_nir);
566 
567    return result;
568 }
569 
570 static VkResult
create_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,VkFormat format,VkSampleCountFlagBits samples,uint32_t components,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)571 create_color_clear_pipeline(struct v3dv_device *device,
572                             struct v3dv_render_pass *pass,
573                             uint32_t subpass_idx,
574                             uint32_t rt_idx,
575                             VkFormat format,
576                             VkSampleCountFlagBits samples,
577                             uint32_t components,
578                             bool is_layered,
579                             VkPipelineLayout pipeline_layout,
580                             VkPipeline *pipeline)
581 {
582    nir_shader *vs_nir = get_clear_rect_vs();
583    nir_shader *fs_nir = get_color_clear_rect_fs(rt_idx, format);
584    nir_shader *gs_nir = is_layered ? get_clear_rect_gs(16) : NULL;
585 
586    const VkPipelineVertexInputStateCreateInfo vi_state = {
587       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
588       .vertexBindingDescriptionCount = 0,
589       .vertexAttributeDescriptionCount = 0,
590    };
591 
592    const VkPipelineDepthStencilStateCreateInfo ds_state = {
593       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
594       .depthTestEnable = false,
595       .depthWriteEnable = false,
596       .depthBoundsTestEnable = false,
597       .stencilTestEnable = false,
598    };
599 
600    assert(subpass_idx < pass->subpass_count);
601    const uint32_t color_count = pass->subpasses[subpass_idx].color_count;
602    assert(rt_idx < color_count);
603 
604    VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS];
605    for (uint32_t i = 0; i < color_count; i++) {
606       blend_att_state[i] = (VkPipelineColorBlendAttachmentState) {
607          .blendEnable = false,
608          .colorWriteMask = i == rt_idx ? components : 0,
609       };
610    }
611 
612    const VkPipelineColorBlendStateCreateInfo cb_state = {
613       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
614       .logicOpEnable = false,
615       .attachmentCount = color_count,
616       .pAttachments = blend_att_state
617    };
618 
619    return create_pipeline(device,
620                           pass, subpass_idx,
621                           samples,
622                           vs_nir, gs_nir, fs_nir,
623                           &vi_state,
624                           &ds_state,
625                           &cb_state,
626                           pipeline_layout,
627                           pipeline);
628 }
629 
630 static VkResult
create_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)631 create_depth_clear_pipeline(struct v3dv_device *device,
632                             VkImageAspectFlags aspects,
633                             struct v3dv_render_pass *pass,
634                             uint32_t subpass_idx,
635                             uint32_t samples,
636                             bool is_layered,
637                             VkPipelineLayout pipeline_layout,
638                             VkPipeline *pipeline)
639 {
640    const bool has_depth = aspects & VK_IMAGE_ASPECT_DEPTH_BIT;
641    const bool has_stencil = aspects & VK_IMAGE_ASPECT_STENCIL_BIT;
642    assert(has_depth || has_stencil);
643 
644    nir_shader *vs_nir = get_clear_rect_vs();
645    nir_shader *fs_nir = has_depth ? get_depth_clear_rect_fs() : NULL;
646    nir_shader *gs_nir = is_layered ? get_clear_rect_gs(4) : NULL;
647 
648    const VkPipelineVertexInputStateCreateInfo vi_state = {
649       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
650       .vertexBindingDescriptionCount = 0,
651       .vertexAttributeDescriptionCount = 0,
652    };
653 
654    const VkPipelineDepthStencilStateCreateInfo ds_state = {
655       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
656       .depthTestEnable = has_depth,
657       .depthWriteEnable = has_depth,
658       .depthCompareOp = VK_COMPARE_OP_ALWAYS,
659       .depthBoundsTestEnable = false,
660       .stencilTestEnable = has_stencil,
661       .front = {
662          .passOp = VK_STENCIL_OP_REPLACE,
663          .compareOp = VK_COMPARE_OP_ALWAYS,
664          /* compareMask, writeMask and reference are dynamic state */
665       },
666       .back = { 0 },
667    };
668 
669    assert(subpass_idx < pass->subpass_count);
670    VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS] = { 0 };
671    const VkPipelineColorBlendStateCreateInfo cb_state = {
672       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
673       .logicOpEnable = false,
674       .attachmentCount = pass->subpasses[subpass_idx].color_count,
675       .pAttachments = blend_att_state,
676    };
677 
678    return create_pipeline(device,
679                           pass, subpass_idx,
680                           samples,
681                           vs_nir, gs_nir, fs_nir,
682                           &vi_state,
683                           &ds_state,
684                           &cb_state,
685                           pipeline_layout,
686                           pipeline);
687 }
688 
689 static VkResult
create_color_clear_render_pass(struct v3dv_device * device,uint32_t rt_idx,VkFormat format,VkSampleCountFlagBits samples,VkRenderPass * pass)690 create_color_clear_render_pass(struct v3dv_device *device,
691                                uint32_t rt_idx,
692                                VkFormat format,
693                                VkSampleCountFlagBits samples,
694                                VkRenderPass *pass)
695 {
696    VkAttachmentDescription2 att = {
697       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
698       .format = format,
699       .samples = samples,
700       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
701       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
702       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
703       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
704    };
705 
706    VkAttachmentReference2 att_ref = {
707       .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
708       .attachment = rt_idx,
709       .layout = VK_IMAGE_LAYOUT_GENERAL,
710    };
711 
712    VkSubpassDescription2 subpass = {
713       .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
714       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
715       .inputAttachmentCount = 0,
716       .colorAttachmentCount = 1,
717       .pColorAttachments = &att_ref,
718       .pResolveAttachments = NULL,
719       .pDepthStencilAttachment = NULL,
720       .preserveAttachmentCount = 0,
721       .pPreserveAttachments = NULL,
722    };
723 
724    VkRenderPassCreateInfo2 info = {
725       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
726       .attachmentCount = 1,
727       .pAttachments = &att,
728       .subpassCount = 1,
729       .pSubpasses = &subpass,
730       .dependencyCount = 0,
731       .pDependencies = NULL,
732    };
733 
734    return v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
735                                  &info, &device->vk.alloc, pass);
736 }
737 
738 static inline uint64_t
get_color_clear_pipeline_cache_key(uint32_t rt_idx,VkFormat format,VkSampleCountFlagBits samples,uint32_t components,bool is_layered)739 get_color_clear_pipeline_cache_key(uint32_t rt_idx,
740                                    VkFormat format,
741                                    VkSampleCountFlagBits samples,
742                                    uint32_t components,
743                                    bool is_layered)
744 {
745    assert(rt_idx < V3D_MAX_DRAW_BUFFERS);
746 
747    uint64_t key = 0;
748    uint32_t bit_offset = 0;
749 
750    key |= rt_idx;
751    bit_offset += 3;
752 
753    key |= ((uint64_t) format) << bit_offset;
754    bit_offset += 32;
755 
756    key |= ((uint64_t) samples) << bit_offset;
757    bit_offset += 4;
758 
759    key |= ((uint64_t) components) << bit_offset;
760    bit_offset += 4;
761 
762    key |= (is_layered ? 1ull : 0ull) << bit_offset;
763    bit_offset += 1;
764 
765    assert(bit_offset <= 64);
766    return key;
767 }
768 
769 static inline uint64_t
get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,VkFormat format,uint32_t samples,bool is_layered)770 get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,
771                                    VkFormat format,
772                                    uint32_t samples,
773                                    bool is_layered)
774 {
775    uint64_t key = 0;
776    uint32_t bit_offset = 0;
777 
778    key |= format;
779    bit_offset += 32;
780 
781    key |= ((uint64_t) samples) << bit_offset;
782    bit_offset += 4;
783 
784    const bool has_depth = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? 1 : 0;
785    key |= ((uint64_t) has_depth) << bit_offset;
786    bit_offset++;
787 
788    const bool has_stencil = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
789    key |= ((uint64_t) has_stencil) << bit_offset;
790    bit_offset++;;
791 
792    key |= (is_layered ? 1ull : 0ull) << bit_offset;
793    bit_offset += 1;
794 
795    assert(bit_offset <= 64);
796    return key;
797 }
798 
799 static VkResult
get_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,uint32_t attachment_idx,VkFormat format,VkSampleCountFlagBits samples,uint32_t components,bool is_layered,struct v3dv_meta_color_clear_pipeline ** pipeline)800 get_color_clear_pipeline(struct v3dv_device *device,
801                          struct v3dv_render_pass *pass,
802                          uint32_t subpass_idx,
803                          uint32_t rt_idx,
804                          uint32_t attachment_idx,
805                          VkFormat format,
806                          VkSampleCountFlagBits samples,
807                          uint32_t components,
808                          bool is_layered,
809                          struct v3dv_meta_color_clear_pipeline **pipeline)
810 {
811    assert(vk_format_is_color(format));
812 
813    VkResult result = VK_SUCCESS;
814 
815    /* If pass != NULL it means that we are emitting the clear as a draw call
816     * in the current pass bound by the application. In that case, we can't
817     * cache the pipeline, since it will be referencing that pass and the
818     * application could be destroying it at any point. Hopefully, the perf
819     * impact is not too big since we still have the device pipeline cache
820     * around and we won't end up re-compiling the clear shader.
821     *
822     * FIXME: alternatively, we could refcount (or maybe clone) the render pass
823     * provided by the application and include it in the pipeline key setup
824     * to make caching safe in this scenario, however, based on tests with
825     * vkQuake3, the fact that we are not caching here doesn't seem to have
826     * any significant impact in performance, so it might not be worth it.
827     */
828    const bool can_cache_pipeline = (pass == NULL);
829 
830    uint64_t key;
831    if (can_cache_pipeline) {
832       key = get_color_clear_pipeline_cache_key(rt_idx, format, samples,
833                                                components, is_layered);
834       mtx_lock(&device->meta.mtx);
835       struct hash_entry *entry =
836          _mesa_hash_table_search(device->meta.color_clear.cache, &key);
837       if (entry) {
838          mtx_unlock(&device->meta.mtx);
839          *pipeline = entry->data;
840          return VK_SUCCESS;
841       }
842    }
843 
844    *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
845                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
846 
847    if (*pipeline == NULL) {
848       result = VK_ERROR_OUT_OF_HOST_MEMORY;
849       goto fail;
850    }
851 
852    if (!pass) {
853       result = create_color_clear_render_pass(device,
854                                               rt_idx,
855                                               format,
856                                               samples,
857                                               &(*pipeline)->pass);
858       if (result != VK_SUCCESS)
859          goto fail;
860 
861       pass = v3dv_render_pass_from_handle((*pipeline)->pass);
862    } else {
863       (*pipeline)->pass = v3dv_render_pass_to_handle(pass);
864    }
865 
866    result = create_color_clear_pipeline(device,
867                                         pass,
868                                         subpass_idx,
869                                         rt_idx,
870                                         format,
871                                         samples,
872                                         components,
873                                         is_layered,
874                                         device->meta.color_clear.p_layout,
875                                         &(*pipeline)->pipeline);
876    if (result != VK_SUCCESS)
877       goto fail;
878 
879    if (can_cache_pipeline) {
880       (*pipeline)->key = key;
881       (*pipeline)->cached = true;
882       _mesa_hash_table_insert(device->meta.color_clear.cache,
883                               &(*pipeline)->key, *pipeline);
884 
885       mtx_unlock(&device->meta.mtx);
886    }
887 
888    return VK_SUCCESS;
889 
890 fail:
891    if (can_cache_pipeline)
892       mtx_unlock(&device->meta.mtx);
893 
894    VkDevice _device = v3dv_device_to_handle(device);
895    if (*pipeline) {
896       if ((*pipeline)->cached)
897          v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
898       if ((*pipeline)->pipeline)
899          v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
900       vk_free(&device->vk.alloc, *pipeline);
901       *pipeline = NULL;
902    }
903 
904    return result;
905 }
906 
907 static VkResult
get_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t attachment_idx,bool is_layered,struct v3dv_meta_depth_clear_pipeline ** pipeline)908 get_depth_clear_pipeline(struct v3dv_device *device,
909                          VkImageAspectFlags aspects,
910                          struct v3dv_render_pass *pass,
911                          uint32_t subpass_idx,
912                          uint32_t attachment_idx,
913                          bool is_layered,
914                          struct v3dv_meta_depth_clear_pipeline **pipeline)
915 {
916    assert(subpass_idx < pass->subpass_count);
917    assert(attachment_idx != VK_ATTACHMENT_UNUSED);
918    assert(attachment_idx < pass->attachment_count);
919 
920    VkResult result = VK_SUCCESS;
921 
922    const uint32_t samples = pass->attachments[attachment_idx].desc.samples;
923    const VkFormat format = pass->attachments[attachment_idx].desc.format;
924    assert(vk_format_is_depth_or_stencil(format));
925 
926    const uint64_t key =
927       get_depth_clear_pipeline_cache_key(aspects, format, samples, is_layered);
928    mtx_lock(&device->meta.mtx);
929    struct hash_entry *entry =
930       _mesa_hash_table_search(device->meta.depth_clear.cache, &key);
931    if (entry) {
932       mtx_unlock(&device->meta.mtx);
933       *pipeline = entry->data;
934       return VK_SUCCESS;
935    }
936 
937    *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
938                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
939 
940    if (*pipeline == NULL) {
941       result = VK_ERROR_OUT_OF_HOST_MEMORY;
942       goto fail;
943    }
944 
945    result = create_depth_clear_pipeline(device,
946                                         aspects,
947                                         pass,
948                                         subpass_idx,
949                                         samples,
950                                         is_layered,
951                                         device->meta.depth_clear.p_layout,
952                                         &(*pipeline)->pipeline);
953    if (result != VK_SUCCESS)
954       goto fail;
955 
956    (*pipeline)->key = key;
957    _mesa_hash_table_insert(device->meta.depth_clear.cache,
958                            &(*pipeline)->key, *pipeline);
959 
960    mtx_unlock(&device->meta.mtx);
961    return VK_SUCCESS;
962 
963 fail:
964    mtx_unlock(&device->meta.mtx);
965 
966    VkDevice _device = v3dv_device_to_handle(device);
967    if (*pipeline) {
968       if ((*pipeline)->pipeline)
969          v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
970       vk_free(&device->vk.alloc, *pipeline);
971       *pipeline = NULL;
972    }
973 
974    return result;
975 }
976 
977 /* Emits a scissored quad in the clear color */
978 static void
emit_subpass_color_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,uint32_t rt_idx,const VkClearColorValue * clear_color,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)979 emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
980                                struct v3dv_render_pass *pass,
981                                struct v3dv_subpass *subpass,
982                                uint32_t rt_idx,
983                                const VkClearColorValue *clear_color,
984                                bool is_layered,
985                                bool all_rects_same_layers,
986                                uint32_t rect_count,
987                                const VkClearRect *rects)
988 {
989    /* Skip if attachment is unused in the current subpass */
990    assert(rt_idx < subpass->color_count);
991    const uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment;
992    if (attachment_idx == VK_ATTACHMENT_UNUSED)
993       return;
994 
995    /* Obtain a pipeline for this clear */
996    assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
997    const VkFormat format =
998       cmd_buffer->state.pass->attachments[attachment_idx].desc.format;
999    const VkSampleCountFlagBits samples =
1000       cmd_buffer->state.pass->attachments[attachment_idx].desc.samples;
1001    const uint32_t components = VK_COLOR_COMPONENT_R_BIT |
1002                                VK_COLOR_COMPONENT_G_BIT |
1003                                VK_COLOR_COMPONENT_B_BIT |
1004                                VK_COLOR_COMPONENT_A_BIT;
1005    struct v3dv_meta_color_clear_pipeline *pipeline = NULL;
1006    VkResult result = get_color_clear_pipeline(cmd_buffer->device,
1007                                               pass,
1008                                               cmd_buffer->state.subpass_idx,
1009                                               rt_idx,
1010                                               attachment_idx,
1011                                               format,
1012                                               samples,
1013                                               components,
1014                                               is_layered,
1015                                               &pipeline);
1016    if (result != VK_SUCCESS) {
1017       if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1018          v3dv_flag_oom(cmd_buffer, NULL);
1019       return;
1020    }
1021    assert(pipeline && pipeline->pipeline);
1022 
1023    /* Emit clear rects */
1024    v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1025 
1026    VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1027    v3dv_CmdPushConstants(cmd_buffer_handle,
1028                          cmd_buffer->device->meta.depth_clear.p_layout,
1029                          VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
1030                          clear_color->float32);
1031 
1032    v3dv_CmdBindPipeline(cmd_buffer_handle,
1033                         VK_PIPELINE_BIND_POINT_GRAPHICS,
1034                         pipeline->pipeline);
1035 
1036    for (uint32_t i = 0; i < rect_count; i++) {
1037       const VkViewport viewport = {
1038          .x = rects[i].rect.offset.x,
1039          .y = rects[i].rect.offset.y,
1040          .width = rects[i].rect.extent.width,
1041          .height = rects[i].rect.extent.height,
1042          .minDepth = 0.0f,
1043          .maxDepth = 1.0f
1044       };
1045       v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1046       v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1047 
1048       if (is_layered) {
1049          for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1050               layer_offset++) {
1051             uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1052             v3dv_CmdPushConstants(cmd_buffer_handle,
1053                                   cmd_buffer->device->meta.depth_clear.p_layout,
1054                                   VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4, &layer);
1055             v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1056          }
1057       } else {
1058          assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1059          v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1060       }
1061    }
1062 
1063    /* Subpass pipelines can't be cached because they include a reference to the
1064     * render pass currently bound by the application, which means that we need
1065     * to destroy them manually here.
1066     */
1067    assert(!pipeline->cached);
1068    v3dv_cmd_buffer_add_private_obj(
1069       cmd_buffer, (uintptr_t)pipeline,
1070       (v3dv_cmd_buffer_private_obj_destroy_cb) destroy_color_clear_pipeline);
1071 
1072    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, false);
1073 }
1074 
1075 /* Emits a scissored quad, clearing the depth aspect by writing to gl_FragDepth
1076  * and the stencil aspect by using stencil testing.
1077  */
1078 static void
emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,VkImageAspectFlags aspects,const VkClearDepthStencilValue * clear_ds,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)1079 emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1080                             struct v3dv_render_pass *pass,
1081                             struct v3dv_subpass *subpass,
1082                             VkImageAspectFlags aspects,
1083                             const VkClearDepthStencilValue *clear_ds,
1084                             bool is_layered,
1085                             bool all_rects_same_layers,
1086                             uint32_t rect_count,
1087                             const VkClearRect *rects)
1088 {
1089    /* Skip if attachment is unused in the current subpass */
1090    const uint32_t attachment_idx = subpass->ds_attachment.attachment;
1091    if (attachment_idx == VK_ATTACHMENT_UNUSED)
1092       return;
1093 
1094    /* Obtain a pipeline for this clear */
1095    assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
1096    struct v3dv_meta_depth_clear_pipeline *pipeline = NULL;
1097    VkResult result = get_depth_clear_pipeline(cmd_buffer->device,
1098                                               aspects,
1099                                               pass,
1100                                               cmd_buffer->state.subpass_idx,
1101                                               attachment_idx,
1102                                               is_layered,
1103                                               &pipeline);
1104    if (result != VK_SUCCESS) {
1105       if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1106          v3dv_flag_oom(cmd_buffer, NULL);
1107       return;
1108    }
1109    assert(pipeline && pipeline->pipeline);
1110 
1111    /* Emit clear rects */
1112    v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1113 
1114    VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1115    v3dv_CmdPushConstants(cmd_buffer_handle,
1116                          cmd_buffer->device->meta.depth_clear.p_layout,
1117                          VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
1118                          &clear_ds->depth);
1119 
1120    v3dv_CmdBindPipeline(cmd_buffer_handle,
1121                         VK_PIPELINE_BIND_POINT_GRAPHICS,
1122                         pipeline->pipeline);
1123 
1124    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1125       v3dv_CmdSetStencilReference(cmd_buffer_handle,
1126                                   VK_STENCIL_FACE_FRONT_AND_BACK,
1127                                   clear_ds->stencil);
1128       v3dv_CmdSetStencilWriteMask(cmd_buffer_handle,
1129                                   VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1130       v3dv_CmdSetStencilCompareMask(cmd_buffer_handle,
1131                                     VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1132    }
1133 
1134    for (uint32_t i = 0; i < rect_count; i++) {
1135       const VkViewport viewport = {
1136          .x = rects[i].rect.offset.x,
1137          .y = rects[i].rect.offset.y,
1138          .width = rects[i].rect.extent.width,
1139          .height = rects[i].rect.extent.height,
1140          .minDepth = 0.0f,
1141          .maxDepth = 1.0f
1142       };
1143       v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1144       v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1145       if (is_layered) {
1146          for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1147               layer_offset++) {
1148             uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1149             v3dv_CmdPushConstants(cmd_buffer_handle,
1150                                   cmd_buffer->device->meta.depth_clear.p_layout,
1151                                   VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4, &layer);
1152             v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1153          }
1154       } else {
1155          assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1156          v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1157       }
1158    }
1159 
1160    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, false);
1161 }
1162 
1163 static void
gather_layering_info(uint32_t rect_count,const VkClearRect * rects,bool * is_layered,bool * all_rects_same_layers)1164 gather_layering_info(uint32_t rect_count, const VkClearRect *rects,
1165                      bool *is_layered, bool *all_rects_same_layers)
1166 {
1167    *all_rects_same_layers = true;
1168 
1169    uint32_t min_layer = rects[0].baseArrayLayer;
1170    uint32_t max_layer = rects[0].baseArrayLayer + rects[0].layerCount - 1;
1171    for (uint32_t i = 1; i < rect_count; i++) {
1172       if (rects[i].baseArrayLayer != rects[i - 1].baseArrayLayer ||
1173           rects[i].layerCount != rects[i - 1].layerCount) {
1174          *all_rects_same_layers = false;
1175          min_layer = MIN2(min_layer, rects[i].baseArrayLayer);
1176          max_layer = MAX2(max_layer, rects[i].baseArrayLayer +
1177                                      rects[i].layerCount - 1);
1178       }
1179    }
1180 
1181    *is_layered = !(min_layer == 0 && max_layer == 0);
1182 }
1183 
1184 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1185 v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,
1186                          uint32_t attachmentCount,
1187                          const VkClearAttachment *pAttachments,
1188                          uint32_t rectCount,
1189                          const VkClearRect *pRects)
1190 {
1191    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1192 
1193    /* We can have at most max_color_RTs + 1 D/S attachments */
1194    assert(attachmentCount <=
1195           V3D_MAX_RENDER_TARGETS(cmd_buffer->device->devinfo.ver) + 1);
1196 
1197    /* We can only clear attachments in the current subpass */
1198    struct v3dv_render_pass *pass = cmd_buffer->state.pass;
1199 
1200    assert(cmd_buffer->state.subpass_idx < pass->subpass_count);
1201    struct v3dv_subpass *subpass =
1202       &cmd_buffer->state.pass->subpasses[cmd_buffer->state.subpass_idx];
1203 
1204    /* Emit a clear rect inside the current job for this subpass. For layered
1205     * framebuffers, we use a geometry shader to redirect clears to the
1206     * appropriate layers.
1207     */
1208 
1209    v3dv_cmd_buffer_pause_occlusion_query(cmd_buffer);
1210 
1211    bool is_layered, all_rects_same_layers;
1212    gather_layering_info(rectCount, pRects, &is_layered, &all_rects_same_layers);
1213    for (uint32_t i = 0; i < attachmentCount; i++) {
1214       if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1215          emit_subpass_color_clear_rects(cmd_buffer, pass, subpass,
1216                                         pAttachments[i].colorAttachment,
1217                                         &pAttachments[i].clearValue.color,
1218                                         is_layered, all_rects_same_layers,
1219                                         rectCount, pRects);
1220       } else {
1221          emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass,
1222                                      pAttachments[i].aspectMask,
1223                                      &pAttachments[i].clearValue.depthStencil,
1224                                      is_layered, all_rects_same_layers,
1225                                      rectCount, pRects);
1226       }
1227    }
1228 
1229    v3dv_cmd_buffer_resume_occlusion_query(cmd_buffer);
1230 }
1231