• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2020 Raspberry Pi
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 
26 #include "broadcom/cle/v3dx_pack.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "vk_format_info.h"
29 #include "util/u_pack_color.h"
30 
31 static void
destroy_color_clear_pipeline(VkDevice _device,uint64_t pipeline,VkAllocationCallbacks * alloc)32 destroy_color_clear_pipeline(VkDevice _device,
33                              uint64_t pipeline,
34                              VkAllocationCallbacks *alloc)
35 {
36    struct v3dv_meta_color_clear_pipeline *p =
37       (struct v3dv_meta_color_clear_pipeline *) (uintptr_t) pipeline;
38    v3dv_DestroyPipeline(_device, p->pipeline, alloc);
39    if (p->cached)
40       v3dv_DestroyRenderPass(_device, p->pass, alloc);
41    vk_free(alloc, p);
42 }
43 
44 static void
destroy_depth_clear_pipeline(VkDevice _device,struct v3dv_meta_depth_clear_pipeline * p,VkAllocationCallbacks * alloc)45 destroy_depth_clear_pipeline(VkDevice _device,
46                              struct v3dv_meta_depth_clear_pipeline *p,
47                              VkAllocationCallbacks *alloc)
48 {
49    v3dv_DestroyPipeline(_device, p->pipeline, alloc);
50    vk_free(alloc, p);
51 }
52 
53 void
v3dv_meta_clear_init(struct v3dv_device * device)54 v3dv_meta_clear_init(struct v3dv_device *device)
55 {
56    device->meta.color_clear.cache =
57       _mesa_hash_table_create(NULL, u64_hash, u64_compare);
58 
59    device->meta.depth_clear.cache =
60       _mesa_hash_table_create(NULL, u64_hash, u64_compare);
61 }
62 
63 void
v3dv_meta_clear_finish(struct v3dv_device * device)64 v3dv_meta_clear_finish(struct v3dv_device *device)
65 {
66    VkDevice _device = v3dv_device_to_handle(device);
67 
68    hash_table_foreach(device->meta.color_clear.cache, entry) {
69       struct v3dv_meta_color_clear_pipeline *item = entry->data;
70       destroy_color_clear_pipeline(_device, (uintptr_t)item, &device->alloc);
71    }
72    _mesa_hash_table_destroy(device->meta.color_clear.cache, NULL);
73 
74    if (device->meta.color_clear.playout) {
75       v3dv_DestroyPipelineLayout(_device, device->meta.color_clear.playout,
76                                  &device->alloc);
77    }
78 
79    hash_table_foreach(device->meta.depth_clear.cache, entry) {
80       struct v3dv_meta_depth_clear_pipeline *item = entry->data;
81       destroy_depth_clear_pipeline(_device, item, &device->alloc);
82    }
83    _mesa_hash_table_destroy(device->meta.depth_clear.cache, NULL);
84 
85    if (device->meta.depth_clear.playout) {
86       v3dv_DestroyPipelineLayout(_device, device->meta.depth_clear.playout,
87                                  &device->alloc);
88    }
89 }
90 
91 static nir_ssa_def *
gen_rect_vertices(nir_builder * b)92 gen_rect_vertices(nir_builder *b)
93 {
94    nir_intrinsic_instr *vertex_id =
95       nir_intrinsic_instr_create(b->shader,
96                                  nir_intrinsic_load_vertex_id);
97    nir_ssa_dest_init(&vertex_id->instr, &vertex_id->dest, 1, 32, "vertexid");
98    nir_builder_instr_insert(b, &vertex_id->instr);
99 
100 
101    /* vertex 0: -1.0, -1.0
102     * vertex 1: -1.0,  1.0
103     * vertex 2:  1.0, -1.0
104     * vertex 3:  1.0,  1.0
105     *
106     * so:
107     *
108     * channel 0 is vertex_id < 2 ? -1.0 :  1.0
109     * channel 1 is vertex id & 1 ?  1.0 : -1.0
110     */
111 
112    nir_ssa_def *one = nir_imm_int(b, 1);
113    nir_ssa_def *c0cmp = nir_ilt(b, &vertex_id->dest.ssa, nir_imm_int(b, 2));
114    nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, &vertex_id->dest.ssa, one), one);
115 
116    nir_ssa_def *comp[4];
117    comp[0] = nir_bcsel(b, c0cmp,
118                        nir_imm_float(b, -1.0f),
119                        nir_imm_float(b, 1.0f));
120 
121    comp[1] = nir_bcsel(b, c1cmp,
122                        nir_imm_float(b, 1.0f),
123                        nir_imm_float(b, -1.0f));
124    comp[2] = nir_imm_float(b, 0.0f);
125    comp[3] = nir_imm_float(b, 1.0f);
126    return nir_vec(b, comp, 4);
127 }
128 
129 static nir_shader *
get_clear_rect_vs()130 get_clear_rect_vs()
131 {
132    nir_builder b;
133    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
134    nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, options);
135    b.shader->info.name = ralloc_strdup(b.shader, "meta clear vs");
136 
137    const struct glsl_type *vec4 = glsl_vec4_type();
138    nir_variable *vs_out_pos =
139       nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
140    vs_out_pos->data.location = VARYING_SLOT_POS;
141 
142    nir_ssa_def *pos = gen_rect_vertices(&b);
143    nir_store_var(&b, vs_out_pos, pos, 0xf);
144 
145    return b.shader;
146 }
147 
148 static nir_shader *
get_color_clear_rect_fs(uint32_t rt_idx,VkFormat format)149 get_color_clear_rect_fs(uint32_t rt_idx, VkFormat format)
150 {
151    nir_builder b;
152    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
153    nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options);
154    b.shader->info.name = ralloc_strdup(b.shader, "meta clear fs");
155 
156    enum pipe_format pformat = vk_format_to_pipe_format(format);
157    const struct glsl_type *fs_out_type =
158       util_format_is_float(pformat) ? glsl_vec4_type() : glsl_uvec4_type();
159 
160    nir_variable *fs_out_color =
161       nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
162    fs_out_color->data.location = FRAG_RESULT_DATA0 + rt_idx;
163 
164    nir_intrinsic_instr *color_load =
165       nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
166    nir_intrinsic_set_base(color_load, 0);
167    nir_intrinsic_set_range(color_load, 16);
168    color_load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
169    color_load->num_components = 4;
170    nir_ssa_dest_init(&color_load->instr, &color_load->dest, 4, 32, "clear color");
171    nir_builder_instr_insert(&b, &color_load->instr);
172 
173    nir_store_var(&b, fs_out_color, &color_load->dest.ssa, 0xf);
174 
175    return b.shader;
176 }
177 
178 static nir_shader *
get_depth_clear_rect_fs()179 get_depth_clear_rect_fs()
180 {
181    nir_builder b;
182    const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
183    nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options);
184    b.shader->info.name = ralloc_strdup(b.shader, "meta depth clear fs");
185 
186    nir_variable *fs_out_depth =
187       nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
188                           "out_depth");
189    fs_out_depth->data.location = FRAG_RESULT_DEPTH;
190 
191    nir_intrinsic_instr *depth_load =
192       nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
193    nir_intrinsic_set_base(depth_load, 0);
194    nir_intrinsic_set_range(depth_load, 4);
195    depth_load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
196    depth_load->num_components = 1;
197    nir_ssa_dest_init(&depth_load->instr, &depth_load->dest, 1, 32,
198                      "clear depth value");
199    nir_builder_instr_insert(&b, &depth_load->instr);
200 
201    nir_store_var(&b, fs_out_depth, &depth_load->dest.ssa, 0x1);
202 
203    return b.shader;
204 }
205 
206 static VkResult
create_color_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)207 create_color_clear_pipeline_layout(struct v3dv_device *device,
208                                    VkPipelineLayout *pipeline_layout)
209 {
210    VkPipelineLayoutCreateInfo info = {
211       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
212       .setLayoutCount = 0,
213       .pushConstantRangeCount = 1,
214       .pPushConstantRanges =
215          &(VkPushConstantRange) { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 },
216    };
217 
218    return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
219                                     &info, &device->alloc, pipeline_layout);
220 }
221 
222 static VkResult
create_depth_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)223 create_depth_clear_pipeline_layout(struct v3dv_device *device,
224                                    VkPipelineLayout *pipeline_layout)
225 {
226    VkPipelineLayoutCreateInfo info = {
227       .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
228       .setLayoutCount = 0,
229       .pushConstantRangeCount = 1,
230       .pPushConstantRanges =
231          &(VkPushConstantRange) { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 },
232    };
233 
234    return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
235                                     &info, &device->alloc, pipeline_layout);
236 }
237 
238 static VkResult
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,struct nir_shader * vs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineLayout layout,VkPipeline * pipeline)239 create_pipeline(struct v3dv_device *device,
240                 struct v3dv_render_pass *pass,
241                 uint32_t subpass_idx,
242                 uint32_t samples,
243                 struct nir_shader *vs_nir,
244                 struct nir_shader *fs_nir,
245                 const VkPipelineVertexInputStateCreateInfo *vi_state,
246                 const VkPipelineDepthStencilStateCreateInfo *ds_state,
247                 const VkPipelineColorBlendStateCreateInfo *cb_state,
248                 const VkPipelineLayout layout,
249                 VkPipeline *pipeline)
250 {
251    struct v3dv_shader_module vs_m;
252    struct v3dv_shader_module fs_m;
253 
254    v3dv_shader_module_internal_init(&vs_m, vs_nir);
255    v3dv_shader_module_internal_init(&fs_m, fs_nir);
256 
257    VkPipelineShaderStageCreateInfo stages[2] = {
258       {
259          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
260          .stage = VK_SHADER_STAGE_VERTEX_BIT,
261          .module = v3dv_shader_module_to_handle(&vs_m),
262          .pName = "main",
263       },
264       {
265          .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
266          .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
267          .module = v3dv_shader_module_to_handle(&fs_m),
268          .pName = "main",
269       },
270    };
271 
272    VkGraphicsPipelineCreateInfo info = {
273       .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
274 
275       .stageCount = fs_nir ? 2 : 1,
276       .pStages = stages,
277 
278       .pVertexInputState = vi_state,
279 
280       .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
281          .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
282          .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
283          .primitiveRestartEnable = false,
284       },
285 
286       .pViewportState = &(VkPipelineViewportStateCreateInfo) {
287          .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
288          .viewportCount = 1,
289          .scissorCount = 1,
290       },
291 
292       .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
293          .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
294          .rasterizerDiscardEnable = false,
295          .polygonMode = VK_POLYGON_MODE_FILL,
296          .cullMode = VK_CULL_MODE_NONE,
297          .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
298          .depthBiasEnable = false,
299       },
300 
301       .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
302          .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
303          .rasterizationSamples = samples,
304          .sampleShadingEnable = false,
305          .pSampleMask = NULL,
306          .alphaToCoverageEnable = false,
307          .alphaToOneEnable = false,
308       },
309 
310       .pDepthStencilState = ds_state,
311 
312       .pColorBlendState = cb_state,
313 
314       /* The meta clear pipeline declares all state as dynamic.
315        * As a consequence, vkCmdBindPipeline writes no dynamic state
316        * to the cmd buffer. Therefore, at the end of the meta clear,
317        * we need only restore dynamic state that was vkCmdSet.
318        */
319       .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
320          .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
321          .dynamicStateCount = 6,
322          .pDynamicStates = (VkDynamicState[]) {
323             VK_DYNAMIC_STATE_VIEWPORT,
324             VK_DYNAMIC_STATE_SCISSOR,
325             VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
326             VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
327             VK_DYNAMIC_STATE_STENCIL_REFERENCE,
328             VK_DYNAMIC_STATE_BLEND_CONSTANTS,
329             VK_DYNAMIC_STATE_DEPTH_BIAS,
330             VK_DYNAMIC_STATE_LINE_WIDTH,
331          },
332       },
333 
334       .flags = 0,
335       .layout = layout,
336       .renderPass = v3dv_render_pass_to_handle(pass),
337       .subpass = subpass_idx,
338    };
339 
340    VkResult result =
341       v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
342                                    VK_NULL_HANDLE,
343                                    1, &info,
344                                    &device->alloc,
345                                    pipeline);
346 
347    ralloc_free(vs_nir);
348    ralloc_free(fs_nir);
349 
350    return result;
351 }
352 
353 static VkResult
create_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,VkFormat format,uint32_t samples,uint32_t components,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)354 create_color_clear_pipeline(struct v3dv_device *device,
355                             struct v3dv_render_pass *pass,
356                             uint32_t subpass_idx,
357                             uint32_t rt_idx,
358                             VkFormat format,
359                             uint32_t samples,
360                             uint32_t components,
361                             VkPipelineLayout pipeline_layout,
362                             VkPipeline *pipeline)
363 {
364    nir_shader *vs_nir = get_clear_rect_vs();
365    nir_shader *fs_nir = get_color_clear_rect_fs(rt_idx, format);
366 
367    const VkPipelineVertexInputStateCreateInfo vi_state = {
368       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
369       .vertexBindingDescriptionCount = 0,
370       .vertexAttributeDescriptionCount = 0,
371    };
372 
373    const VkPipelineDepthStencilStateCreateInfo ds_state = {
374       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
375       .depthTestEnable = false,
376       .depthWriteEnable = false,
377       .depthBoundsTestEnable = false,
378       .stencilTestEnable = false,
379    };
380 
381    assert(subpass_idx < pass->subpass_count);
382    const uint32_t color_count = pass->subpasses[subpass_idx].color_count;
383    assert(rt_idx < color_count);
384 
385    VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS];
386    for (uint32_t i = 0; i < color_count; i++) {
387       blend_att_state[i] = (VkPipelineColorBlendAttachmentState) {
388          .blendEnable = false,
389          .colorWriteMask = i == rt_idx ? components : 0,
390       };
391    }
392 
393    const VkPipelineColorBlendStateCreateInfo cb_state = {
394       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
395       .logicOpEnable = false,
396       .attachmentCount = color_count,
397       .pAttachments = blend_att_state
398    };
399 
400    return create_pipeline(device,
401                           pass, subpass_idx,
402                           samples,
403                           vs_nir, fs_nir,
404                           &vi_state,
405                           &ds_state,
406                           &cb_state,
407                           pipeline_layout,
408                           pipeline);
409 }
410 
411 static VkResult
create_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)412 create_depth_clear_pipeline(struct v3dv_device *device,
413                             VkImageAspectFlags aspects,
414                             struct v3dv_render_pass *pass,
415                             uint32_t subpass_idx,
416                             uint32_t samples,
417                             VkPipelineLayout pipeline_layout,
418                             VkPipeline *pipeline)
419 {
420    const bool has_depth = aspects & VK_IMAGE_ASPECT_DEPTH_BIT;
421    const bool has_stencil = aspects & VK_IMAGE_ASPECT_STENCIL_BIT;
422    assert(has_depth || has_stencil);
423 
424    nir_shader *vs_nir = get_clear_rect_vs();
425    nir_shader *fs_nir = has_depth ? get_depth_clear_rect_fs() : NULL;
426 
427    const VkPipelineVertexInputStateCreateInfo vi_state = {
428       .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
429       .vertexBindingDescriptionCount = 0,
430       .vertexAttributeDescriptionCount = 0,
431    };
432 
433    const VkPipelineDepthStencilStateCreateInfo ds_state = {
434       .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
435       .depthTestEnable = has_depth,
436       .depthWriteEnable = has_depth,
437       .depthCompareOp = VK_COMPARE_OP_ALWAYS,
438       .depthBoundsTestEnable = false,
439       .stencilTestEnable = has_stencil,
440       .front = {
441          .passOp = VK_STENCIL_OP_REPLACE,
442          .compareOp = VK_COMPARE_OP_ALWAYS,
443          /* compareMask, writeMask and reference are dynamic state */
444       },
445       .back = { 0 },
446    };
447 
448    assert(subpass_idx < pass->subpass_count);
449    VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS] = { 0 };
450    const VkPipelineColorBlendStateCreateInfo cb_state = {
451       .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
452       .logicOpEnable = false,
453       .attachmentCount = pass->subpasses[subpass_idx].color_count,
454       .pAttachments = blend_att_state,
455    };
456 
457    return create_pipeline(device,
458                           pass, subpass_idx,
459                           samples,
460                           vs_nir, fs_nir,
461                           &vi_state,
462                           &ds_state,
463                           &cb_state,
464                           pipeline_layout,
465                           pipeline);
466 }
467 
468 static VkResult
create_color_clear_render_pass(struct v3dv_device * device,uint32_t rt_idx,VkFormat format,uint32_t samples,VkRenderPass * pass)469 create_color_clear_render_pass(struct v3dv_device *device,
470                                uint32_t rt_idx,
471                                VkFormat format,
472                                uint32_t samples,
473                                VkRenderPass *pass)
474 {
475    VkAttachmentDescription att = {
476       .format = format,
477       .samples = samples,
478       .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
479       .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
480       .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
481       .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
482    };
483 
484    VkAttachmentReference att_ref = {
485       .attachment = rt_idx,
486       .layout = VK_IMAGE_LAYOUT_GENERAL,
487    };
488 
489    VkSubpassDescription subpass = {
490       .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
491       .inputAttachmentCount = 0,
492       .colorAttachmentCount = 1,
493       .pColorAttachments = &att_ref,
494       .pResolveAttachments = NULL,
495       .pDepthStencilAttachment = NULL,
496       .preserveAttachmentCount = 0,
497       .pPreserveAttachments = NULL,
498    };
499 
500    VkRenderPassCreateInfo info = {
501       .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
502       .attachmentCount = 1,
503       .pAttachments = &att,
504       .subpassCount = 1,
505       .pSubpasses = &subpass,
506       .dependencyCount = 0,
507       .pDependencies = NULL,
508    };
509 
510    return v3dv_CreateRenderPass(v3dv_device_to_handle(device),
511                                 &info, &device->alloc, pass);
512 }
513 
514 static inline uint64_t
get_color_clear_pipeline_cache_key(uint32_t rt_idx,VkFormat format,uint32_t samples,uint32_t components)515 get_color_clear_pipeline_cache_key(uint32_t rt_idx,
516                                    VkFormat format,
517                                    uint32_t samples,
518                                    uint32_t components)
519 {
520    assert(rt_idx < V3D_MAX_DRAW_BUFFERS);
521 
522    uint64_t key = 0;
523    uint32_t bit_offset = 0;
524 
525    key |= rt_idx;
526    bit_offset += 2;
527 
528    key |= ((uint64_t) format) << bit_offset;
529    bit_offset += 32;
530 
531    key |= ((uint64_t) samples) << bit_offset;
532    bit_offset += 4;
533 
534    key |= ((uint64_t) components) << bit_offset;
535    bit_offset += 4;
536 
537    assert(bit_offset <= 64);
538    return key;
539 }
540 
541 static inline uint64_t
get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,VkFormat format,uint32_t samples)542 get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,
543                                    VkFormat format,
544                                    uint32_t samples)
545 {
546    uint64_t key = 0;
547    uint32_t bit_offset = 0;
548 
549    key |= format;
550    bit_offset += 32;
551 
552    key |= ((uint64_t) samples) << bit_offset;
553    bit_offset += 4;
554 
555    const bool has_depth = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? 1 : 0;
556    key |= ((uint64_t) has_depth) << bit_offset;
557    bit_offset++;
558 
559    const bool has_stencil = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
560    key |= ((uint64_t) has_stencil) << bit_offset;
561    bit_offset++;;
562 
563    assert(bit_offset <= 64);
564    return key;
565 }
566 
567 static VkResult
get_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,uint32_t attachment_idx,VkFormat format,uint32_t samples,uint32_t components,struct v3dv_meta_color_clear_pipeline ** pipeline)568 get_color_clear_pipeline(struct v3dv_device *device,
569                          struct v3dv_render_pass *pass,
570                          uint32_t subpass_idx,
571                          uint32_t rt_idx,
572                          uint32_t attachment_idx,
573                          VkFormat format,
574                          uint32_t samples,
575                          uint32_t components,
576                          struct v3dv_meta_color_clear_pipeline **pipeline)
577 {
578    assert(vk_format_is_color(format));
579 
580    VkResult result = VK_SUCCESS;
581 
582    mtx_lock(&device->meta.mtx);
583    if (!device->meta.color_clear.playout) {
584       result =
585          create_color_clear_pipeline_layout(device,
586                                             &device->meta.color_clear.playout);
587    }
588    mtx_unlock(&device->meta.mtx);
589    if (result != VK_SUCCESS)
590       return result;
591 
592    /* If pass != NULL it means that we are emitting the clear as a draw call
593     * in the current pass bound by the application. In that case, we can't
594     * cache the pipeline, since it will be referencing that pass and the
595     * application could be destroying it at any point. Hopefully, the perf
596     * impact is not too big since we still have the device pipeline cache
597     * around and we won't end up re-compiling the clear shader.
598     *
599     * FIXME: alternatively, we could refcount (or maybe clone) the render pass
600     * provided by the application and include it in the pipeline key setup
601     * to make caching safe in this scenario, however, based on tests with
602     * vkQuake3, the fact that we are not caching here doesn't seem to have
603     * any significant impact in performance, so it might not be worth it.
604     */
605    const bool can_cache_pipeline = (pass == NULL);
606 
607    uint64_t key;
608    if (can_cache_pipeline) {
609       key =
610          get_color_clear_pipeline_cache_key(rt_idx, format, samples, components);
611       mtx_lock(&device->meta.mtx);
612       struct hash_entry *entry =
613          _mesa_hash_table_search(device->meta.color_clear.cache, &key);
614       if (entry) {
615          mtx_unlock(&device->meta.mtx);
616          *pipeline = entry->data;
617          return VK_SUCCESS;
618       }
619    }
620 
621    *pipeline = vk_zalloc2(&device->alloc, NULL, sizeof(**pipeline), 8,
622                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
623 
624    if (*pipeline == NULL) {
625       result = VK_ERROR_OUT_OF_HOST_MEMORY;
626       goto fail;
627    }
628 
629    if (!pass) {
630       result = create_color_clear_render_pass(device,
631                                               rt_idx,
632                                               format,
633                                               samples,
634                                               &(*pipeline)->pass);
635       if (result != VK_SUCCESS)
636          goto fail;
637 
638       pass = v3dv_render_pass_from_handle((*pipeline)->pass);
639    } else {
640       (*pipeline)->pass = v3dv_render_pass_to_handle(pass);
641    }
642 
643    result = create_color_clear_pipeline(device,
644                                         pass,
645                                         subpass_idx,
646                                         rt_idx,
647                                         format,
648                                         samples,
649                                         components,
650                                         device->meta.color_clear.playout,
651                                         &(*pipeline)->pipeline);
652    if (result != VK_SUCCESS)
653       goto fail;
654 
655    if (can_cache_pipeline) {
656       (*pipeline)->key = key;
657       (*pipeline)->cached = true;
658       _mesa_hash_table_insert(device->meta.color_clear.cache,
659                               &(*pipeline)->key, *pipeline);
660 
661       mtx_unlock(&device->meta.mtx);
662    }
663 
664    return VK_SUCCESS;
665 
666 fail:
667    if (can_cache_pipeline)
668       mtx_unlock(&device->meta.mtx);
669 
670    VkDevice _device = v3dv_device_to_handle(device);
671    if (*pipeline) {
672       if ((*pipeline)->cached)
673          v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->alloc);
674       if ((*pipeline)->pipeline)
675          v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->alloc);
676       vk_free(&device->alloc, *pipeline);
677       *pipeline = NULL;
678    }
679 
680    return result;
681 }
682 
683 static VkResult
get_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t attachment_idx,struct v3dv_meta_depth_clear_pipeline ** pipeline)684 get_depth_clear_pipeline(struct v3dv_device *device,
685                          VkImageAspectFlags aspects,
686                          struct v3dv_render_pass *pass,
687                          uint32_t subpass_idx,
688                          uint32_t attachment_idx,
689                          struct v3dv_meta_depth_clear_pipeline **pipeline)
690 {
691    assert(subpass_idx < pass->subpass_count);
692    assert(attachment_idx != VK_ATTACHMENT_UNUSED);
693    assert(attachment_idx < pass->attachment_count);
694 
695    VkResult result = VK_SUCCESS;
696 
697    mtx_lock(&device->meta.mtx);
698    if (!device->meta.depth_clear.playout) {
699       result =
700          create_depth_clear_pipeline_layout(device,
701                                             &device->meta.depth_clear.playout);
702    }
703    mtx_unlock(&device->meta.mtx);
704    if (result != VK_SUCCESS)
705       return result;
706 
707    const uint32_t samples = pass->attachments[attachment_idx].desc.samples;
708    const VkFormat format = pass->attachments[attachment_idx].desc.format;
709    assert(vk_format_is_depth_or_stencil(format));
710 
711    const uint64_t key =
712       get_depth_clear_pipeline_cache_key(aspects, format, samples);
713    mtx_lock(&device->meta.mtx);
714    struct hash_entry *entry =
715       _mesa_hash_table_search(device->meta.depth_clear.cache, &key);
716    if (entry) {
717       mtx_unlock(&device->meta.mtx);
718       *pipeline = entry->data;
719       return VK_SUCCESS;
720    }
721 
722    *pipeline = vk_zalloc2(&device->alloc, NULL, sizeof(**pipeline), 8,
723                           VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
724 
725    if (*pipeline == NULL) {
726       result = VK_ERROR_OUT_OF_HOST_MEMORY;
727       goto fail;
728    }
729 
730    result = create_depth_clear_pipeline(device,
731                                         aspects,
732                                         pass,
733                                         subpass_idx,
734                                         samples,
735                                         device->meta.depth_clear.playout,
736                                         &(*pipeline)->pipeline);
737    if (result != VK_SUCCESS)
738       goto fail;
739 
740    (*pipeline)->key = key;
741    _mesa_hash_table_insert(device->meta.depth_clear.cache,
742                            &(*pipeline)->key, *pipeline);
743 
744    mtx_unlock(&device->meta.mtx);
745    return VK_SUCCESS;
746 
747 fail:
748    mtx_unlock(&device->meta.mtx);
749 
750    VkDevice _device = v3dv_device_to_handle(device);
751    if (*pipeline) {
752       if ((*pipeline)->pipeline)
753          v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->alloc);
754       vk_free(&device->alloc, *pipeline);
755       *pipeline = NULL;
756    }
757 
758    return result;
759 }
760 
761 static VkFormat
get_color_format_for_depth_stencil_format(VkFormat format)762 get_color_format_for_depth_stencil_format(VkFormat format)
763 {
764    /* For single depth/stencil aspect formats, we just choose a compatible
765     * 1 channel format, but for combined depth/stencil we want an RGBA format
766     * so we can specify the channels we want to write.
767     */
768    switch (format) {
769    case VK_FORMAT_D16_UNORM:
770       return VK_FORMAT_R16_UINT;
771    case VK_FORMAT_D32_SFLOAT:
772       return VK_FORMAT_R32_SFLOAT;
773    case VK_FORMAT_X8_D24_UNORM_PACK32:
774    case VK_FORMAT_D24_UNORM_S8_UINT:
775       return VK_FORMAT_R8G8B8A8_UINT;
776    default:
777       unreachable("Unsupported depth/stencil format");
778    };
779 }
780 
781 /**
782  * Emits a scissored quad in the clear color, however, unlike the subpass
783  * versions, this creates its own framebuffer setup with a single color
784  * attachment, and therefore spanws new jobs, making it much slower than the
785  * subpass version.
786  *
787  * This path is only used when we have clears on layers other than the
788  * base layer in a framebuffer attachment, since we don't currently
789  * support any form of layered rendering that would allow us to implement
790  * this in the subpass version.
791  *
792  * Notice this can also handle depth/stencil formats by rendering to the
793  * depth/stencil target using a compatible color format.
794  */
795 static void
emit_color_clear_rect(struct v3dv_cmd_buffer * cmd_buffer,uint32_t attachment_idx,VkFormat rt_format,uint32_t rt_samples,uint32_t rt_components,VkClearColorValue clear_color,const VkClearRect * rect)796 emit_color_clear_rect(struct v3dv_cmd_buffer *cmd_buffer,
797                       uint32_t attachment_idx,
798                       VkFormat rt_format,
799                       uint32_t rt_samples,
800                       uint32_t rt_components,
801                       VkClearColorValue clear_color,
802                       const VkClearRect *rect)
803 {
804    assert(cmd_buffer->state.pass);
805    struct v3dv_device *device = cmd_buffer->device;
806    struct v3dv_render_pass *pass = cmd_buffer->state.pass;
807 
808    assert(attachment_idx != VK_ATTACHMENT_UNUSED &&
809           attachment_idx < pass->attachment_count);
810 
811    struct v3dv_meta_color_clear_pipeline *pipeline = NULL;
812    VkResult result =
813       get_color_clear_pipeline(device,
814                                NULL, 0, /* Not using current subpass */
815                                0, attachment_idx,
816                                rt_format, rt_samples, rt_components,
817                                &pipeline);
818    if (result != VK_SUCCESS) {
819       if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
820          v3dv_flag_oom(cmd_buffer, NULL);
821       return;
822    }
823    assert(pipeline && pipeline->pipeline && pipeline->pass);
824 
825    /* Since we are not emitting the draw call in the current subpass we should
826     * be caching the clear pipeline and we don't have to take care of destorying
827     * it below.
828     */
829    assert(pipeline->cached);
830 
831    /* Store command buffer state for the current subpass before we interrupt
832     * it to emit the color clear pass and then finish the job for the
833     * interrupted subpass.
834     */
835    v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
836    v3dv_cmd_buffer_finish_job(cmd_buffer);
837 
838    struct v3dv_framebuffer *subpass_fb =
839       v3dv_framebuffer_from_handle(cmd_buffer->state.meta.framebuffer);
840    VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
841    VkDevice device_handle = v3dv_device_to_handle(cmd_buffer->device);
842 
843    /* If we are clearing a depth/stencil attachment as a color attachment
844     * then we need to configure the framebuffer to the compatible color
845     * format.
846     */
847    const struct v3dv_image_view *att_iview =
848       subpass_fb->attachments[attachment_idx];
849    const bool is_depth_or_stencil =
850       vk_format_is_depth_or_stencil(att_iview->vk_format);
851 
852    /* Emit the pass for each attachment layer, which creates a framebuffer
853     * for each selected layer of the attachment and then renders a scissored
854     * quad in the clear color.
855     */
856    uint32_t dirty_dynamic_state = 0;
857    for (uint32_t i = 0; i < rect->layerCount; i++) {
858       VkImageViewCreateInfo fb_layer_view_info = {
859          .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
860          .image = v3dv_image_to_handle((struct v3dv_image *)att_iview->image),
861          .viewType =
862             v3dv_image_type_to_view_type(att_iview->image->type),
863          .format = is_depth_or_stencil ? rt_format : att_iview->vk_format,
864          .subresourceRange = {
865             .aspectMask = is_depth_or_stencil ? VK_IMAGE_ASPECT_COLOR_BIT :
866                                                 att_iview->aspects,
867             .baseMipLevel = att_iview->base_level,
868             .levelCount = att_iview->max_level - att_iview->base_level + 1,
869             .baseArrayLayer = att_iview->first_layer + rect->baseArrayLayer + i,
870             .layerCount = 1,
871          },
872       };
873       VkImageView fb_attachment;
874       result = v3dv_CreateImageView(v3dv_device_to_handle(device),
875                                     &fb_layer_view_info,
876                                     &device->alloc, &fb_attachment);
877       if (result != VK_SUCCESS)
878          goto fail;
879 
880       v3dv_cmd_buffer_add_private_obj(
881          cmd_buffer, (uintptr_t)fb_attachment,
882          (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
883 
884       VkFramebufferCreateInfo fb_info = {
885          .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
886          .renderPass = v3dv_render_pass_to_handle(pass),
887          .attachmentCount = 1,
888          .pAttachments = &fb_attachment,
889          .width = subpass_fb->width,
890          .height = subpass_fb->height,
891          .layers = 1,
892       };
893 
894       VkFramebuffer fb;
895       result = v3dv_CreateFramebuffer(device_handle, &fb_info,
896                                       &cmd_buffer->device->alloc, &fb);
897       if (result != VK_SUCCESS)
898          goto fail;
899 
900       v3dv_cmd_buffer_add_private_obj(
901          cmd_buffer, (uintptr_t)fb,
902          (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
903 
904       VkRenderPassBeginInfo rp_info = {
905          .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
906          .renderPass = pipeline->pass,
907          .framebuffer = fb,
908          .renderArea = {
909             .offset = { rect->rect.offset.x, rect->rect.offset.y },
910             .extent = { rect->rect.extent.width, rect->rect.extent.height } },
911          .clearValueCount = 0,
912       };
913 
914       v3dv_CmdBeginRenderPass(cmd_buffer_handle, &rp_info,
915                               VK_SUBPASS_CONTENTS_INLINE);
916 
917       struct v3dv_job *job = cmd_buffer->state.job;
918       if (!job)
919          goto fail;
920       job->is_subpass_continue = true;
921 
922       v3dv_CmdPushConstants(cmd_buffer_handle,
923                             device->meta.color_clear.playout,
924                             VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
925                             &clear_color);
926 
927       v3dv_CmdBindPipeline(cmd_buffer_handle,
928                            VK_PIPELINE_BIND_POINT_GRAPHICS,
929                            pipeline->pipeline);
930 
931       const VkViewport viewport = {
932          .x = rect->rect.offset.x,
933          .y = rect->rect.offset.y,
934          .width = rect->rect.extent.width,
935          .height = rect->rect.extent.height,
936          .minDepth = 0.0f,
937          .maxDepth = 1.0f
938       };
939       v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
940       v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rect->rect);
941 
942       v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
943 
944       v3dv_CmdEndRenderPass(cmd_buffer_handle);
945    }
946 
947    /* The clear pipeline sets viewport and scissor state, so we need
948     * to restore it
949     */
950    dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
951 
952 fail:
953    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);
954 }
955 
956 static void
emit_ds_clear_rect(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspects,uint32_t attachment_idx,VkClearDepthStencilValue clear_ds,const VkClearRect * rect)957 emit_ds_clear_rect(struct v3dv_cmd_buffer *cmd_buffer,
958                    VkImageAspectFlags aspects,
959                    uint32_t attachment_idx,
960                    VkClearDepthStencilValue clear_ds,
961                    const VkClearRect *rect)
962 {
963    assert(cmd_buffer->state.pass);
964    assert(attachment_idx != VK_ATTACHMENT_UNUSED);
965    assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
966 
967    VkFormat format =
968       cmd_buffer->state.pass->attachments[attachment_idx].desc.format;
969    assert ((aspects & ~vk_format_aspects(format)) == 0);
970 
971    uint32_t samples =
972       cmd_buffer->state.pass->attachments[attachment_idx].desc.samples;
973 
974    enum pipe_format pformat = vk_format_to_pipe_format(format);
975    VkClearColorValue clear_color;
976    uint32_t clear_zs =
977       util_pack_z_stencil(pformat, clear_ds.depth, clear_ds.stencil);
978 
979    /* We implement depth/stencil clears by turning them into color clears
980     * with a compatible color format.
981     */
982    VkFormat color_format = get_color_format_for_depth_stencil_format(format);
983 
984    uint32_t comps;
985    if (color_format == VK_FORMAT_R8G8B8A8_UINT) {
986     /* We are clearing a D24 format so we need to select the channels that we
987      * are being asked to clear to avoid clearing aspects that should be
988      * preserved. Also, the hardware uses the MSB channels to store the D24
989      * component, so we need to shift the components in the clear value to
990      * match that.
991      */
992       comps = 0;
993       if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
994          comps |= VK_COLOR_COMPONENT_R_BIT;
995          clear_color.uint32[0] = clear_zs >> 24;
996       }
997       if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
998          comps |= VK_COLOR_COMPONENT_G_BIT |
999                   VK_COLOR_COMPONENT_B_BIT |
1000                   VK_COLOR_COMPONENT_A_BIT;
1001          clear_color.uint32[1] = (clear_zs >>  0) & 0xff;
1002          clear_color.uint32[2] = (clear_zs >>  8) & 0xff;
1003          clear_color.uint32[3] = (clear_zs >> 16) & 0xff;
1004       }
1005    } else {
1006       /* For anything else we use a single component format */
1007       comps = VK_COLOR_COMPONENT_R_BIT;
1008       clear_color.uint32[0] = clear_zs;
1009    }
1010 
1011    emit_color_clear_rect(cmd_buffer, attachment_idx,
1012                          color_format, samples, comps,
1013                          clear_color, rect);
1014 }
1015 
1016 /* Emits a scissored quad in the clear color.
1017  *
1018  * This path only works for clears to the base layer in the framebuffer, since
1019  * we don't currently support any form of layered rendering.
1020  */
1021 static void
emit_subpass_color_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,uint32_t rt_idx,const VkClearColorValue * clear_color,uint32_t rect_count,const VkClearRect * rects)1022 emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1023                                struct v3dv_render_pass *pass,
1024                                struct v3dv_subpass *subpass,
1025                                uint32_t rt_idx,
1026                                const VkClearColorValue *clear_color,
1027                                uint32_t rect_count,
1028                                const VkClearRect *rects)
1029 {
1030    /* Skip if attachment is unused in the current subpass */
1031    assert(rt_idx < subpass->color_count);
1032    const uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment;
1033    if (attachment_idx == VK_ATTACHMENT_UNUSED)
1034       return;
1035 
1036    /* Obtain a pipeline for this clear */
1037    assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
1038    const VkFormat format =
1039       cmd_buffer->state.pass->attachments[attachment_idx].desc.format;
1040    const VkFormat samples =
1041       cmd_buffer->state.pass->attachments[attachment_idx].desc.samples;
1042    const uint32_t components = VK_COLOR_COMPONENT_R_BIT |
1043                                VK_COLOR_COMPONENT_G_BIT |
1044                                VK_COLOR_COMPONENT_B_BIT |
1045                                VK_COLOR_COMPONENT_A_BIT;
1046    struct v3dv_meta_color_clear_pipeline *pipeline = NULL;
1047    VkResult result = get_color_clear_pipeline(cmd_buffer->device,
1048                                               pass,
1049                                               cmd_buffer->state.subpass_idx,
1050                                               rt_idx,
1051                                               attachment_idx,
1052                                               format,
1053                                               samples,
1054                                               components,
1055                                               &pipeline);
1056    if (result != VK_SUCCESS) {
1057       if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1058          v3dv_flag_oom(cmd_buffer, NULL);
1059       return;
1060    }
1061    assert(pipeline && pipeline->pipeline);
1062 
1063    /* Emit clear rects */
1064    v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1065 
1066    VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1067    v3dv_CmdPushConstants(cmd_buffer_handle,
1068                          cmd_buffer->device->meta.depth_clear.playout,
1069                          VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
1070                          clear_color->float32);
1071 
1072    v3dv_CmdBindPipeline(cmd_buffer_handle,
1073                         VK_PIPELINE_BIND_POINT_GRAPHICS,
1074                         pipeline->pipeline);
1075 
1076    uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
1077 
1078    for (uint32_t i = 0; i < rect_count; i++) {
1079       assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1080       const VkViewport viewport = {
1081          .x = rects[i].rect.offset.x,
1082          .y = rects[i].rect.offset.y,
1083          .width = rects[i].rect.extent.width,
1084          .height = rects[i].rect.extent.height,
1085          .minDepth = 0.0f,
1086          .maxDepth = 1.0f
1087       };
1088       v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1089       v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1090       v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1091    }
1092 
1093    /* Subpass pipelines can't be cached because they include a reference to the
1094     * render pass currently bound by the application, which means that we need
1095     * to destroy them manually here.
1096     */
1097    assert(!pipeline->cached);
1098    v3dv_cmd_buffer_add_private_obj(
1099       cmd_buffer, (uintptr_t)pipeline,
1100       (v3dv_cmd_buffer_private_obj_destroy_cb) destroy_color_clear_pipeline);
1101 
1102    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
1103 }
1104 
1105 /* Emits a scissored quad, clearing the depth aspect by writing to gl_FragDepth
1106  * and the stencil aspect by using stencil testing.
1107  *
1108  * This path only works for clears to the base layer in the framebuffer, since
1109  * we don't currently support any form of layered rendering.
1110  */
1111 static void
emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,VkImageAspectFlags aspects,const VkClearDepthStencilValue * clear_ds,uint32_t rect_count,const VkClearRect * rects)1112 emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1113                             struct v3dv_render_pass *pass,
1114                             struct v3dv_subpass *subpass,
1115                             VkImageAspectFlags aspects,
1116                             const VkClearDepthStencilValue *clear_ds,
1117                             uint32_t rect_count,
1118                             const VkClearRect *rects)
1119 {
1120    /* Skip if attachment is unused in the current subpass */
1121    const uint32_t attachment_idx = subpass->ds_attachment.attachment;
1122    if (attachment_idx == VK_ATTACHMENT_UNUSED)
1123       return;
1124 
1125    /* Obtain a pipeline for this clear */
1126    assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
1127    struct v3dv_meta_depth_clear_pipeline *pipeline = NULL;
1128    VkResult result = get_depth_clear_pipeline(cmd_buffer->device,
1129                                               aspects,
1130                                               pass,
1131                                               cmd_buffer->state.subpass_idx,
1132                                               attachment_idx,
1133                                               &pipeline);
1134    if (result != VK_SUCCESS) {
1135       if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1136          v3dv_flag_oom(cmd_buffer, NULL);
1137       return;
1138    }
1139    assert(pipeline && pipeline->pipeline);
1140 
1141    /* Emit clear rects */
1142    v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1143 
1144    VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1145    v3dv_CmdPushConstants(cmd_buffer_handle,
1146                          cmd_buffer->device->meta.depth_clear.playout,
1147                          VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
1148                          &clear_ds->depth);
1149 
1150    v3dv_CmdBindPipeline(cmd_buffer_handle,
1151                         VK_PIPELINE_BIND_POINT_GRAPHICS,
1152                         pipeline->pipeline);
1153 
1154    uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
1155    if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1156       v3dv_CmdSetStencilReference(cmd_buffer_handle,
1157                                   VK_STENCIL_FACE_FRONT_AND_BACK,
1158                                   clear_ds->stencil);
1159       v3dv_CmdSetStencilWriteMask(cmd_buffer_handle,
1160                                   VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1161       v3dv_CmdSetStencilCompareMask(cmd_buffer_handle,
1162                                     VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1163       dynamic_states |= VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK |
1164                         VK_DYNAMIC_STATE_STENCIL_WRITE_MASK |
1165                         VK_DYNAMIC_STATE_STENCIL_REFERENCE;
1166    }
1167 
1168    for (uint32_t i = 0; i < rect_count; i++) {
1169       assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1170       const VkViewport viewport = {
1171          .x = rects[i].rect.offset.x,
1172          .y = rects[i].rect.offset.y,
1173          .width = rects[i].rect.extent.width,
1174          .height = rects[i].rect.extent.height,
1175          .minDepth = 0.0f,
1176          .maxDepth = 1.0f
1177       };
1178       v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1179       v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1180       v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1181    }
1182 
1183    v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
1184 }
1185 
1186 static void
emit_tlb_clear_store(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_cl * cl,uint32_t attachment_idx,uint32_t layer,uint32_t buffer)1187 emit_tlb_clear_store(struct v3dv_cmd_buffer *cmd_buffer,
1188                      struct v3dv_cl *cl,
1189                      uint32_t attachment_idx,
1190                      uint32_t layer,
1191                      uint32_t buffer)
1192 {
1193    const struct v3dv_image_view *iview =
1194       cmd_buffer->state.framebuffer->attachments[attachment_idx];
1195    const struct v3dv_image *image = iview->image;
1196    const struct v3d_resource_slice *slice = &image->slices[iview->base_level];
1197    uint32_t layer_offset = v3dv_layer_offset(image,
1198                                              iview->base_level,
1199                                              iview->first_layer + layer);
1200 
1201    cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
1202       store.buffer_to_store = buffer;
1203       store.address = v3dv_cl_address(image->mem->bo, layer_offset);
1204       store.clear_buffer_being_stored = false;
1205 
1206       store.output_image_format = iview->format->rt_type;
1207       store.r_b_swap = iview->swap_rb;
1208       store.memory_format = slice->tiling;
1209 
1210       if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
1211           slice->tiling == VC5_TILING_UIF_XOR) {
1212          store.height_in_ub_or_stride =
1213             slice->padded_height_of_output_image_in_uif_blocks;
1214       } else if (slice->tiling == VC5_TILING_RASTER) {
1215          store.height_in_ub_or_stride = slice->stride;
1216       }
1217 
1218       if (image->samples > VK_SAMPLE_COUNT_1_BIT)
1219          store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
1220       else
1221          store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
1222    }
1223 }
1224 
1225 static void
emit_tlb_clear_stores(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_cl * cl,uint32_t attachment_count,const VkClearAttachment * attachments,uint32_t layer)1226 emit_tlb_clear_stores(struct v3dv_cmd_buffer *cmd_buffer,
1227                       struct v3dv_cl *cl,
1228                       uint32_t attachment_count,
1229                       const VkClearAttachment *attachments,
1230                       uint32_t layer)
1231 {
1232    struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
1233    const struct v3dv_subpass *subpass =
1234       &state->pass->subpasses[state->subpass_idx];
1235 
1236    bool has_stores = false;
1237    for (uint32_t i = 0; i < attachment_count; i++) {
1238       uint32_t attachment_idx;
1239       uint32_t buffer;
1240       if (attachments[i].aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT |
1241                                        VK_IMAGE_ASPECT_STENCIL_BIT)) {
1242          attachment_idx = subpass->ds_attachment.attachment;
1243          buffer = v3dv_zs_buffer_from_aspect_bits(attachments[i].aspectMask);
1244       } else {
1245          uint32_t rt_idx = attachments[i].colorAttachment;
1246          attachment_idx = subpass->color_attachments[rt_idx].attachment;
1247          buffer = RENDER_TARGET_0 + rt_idx;
1248       }
1249 
1250       if (attachment_idx == VK_ATTACHMENT_UNUSED)
1251          continue;
1252 
1253       has_stores = true;
1254       emit_tlb_clear_store(cmd_buffer, cl, attachment_idx, layer, buffer);
1255    }
1256 
1257    if (!has_stores) {
1258       cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
1259          store.buffer_to_store = NONE;
1260       }
1261    }
1262 }
1263 
1264 static void
emit_tlb_clear_per_tile_rcl(struct v3dv_cmd_buffer * cmd_buffer,uint32_t attachment_count,const VkClearAttachment * attachments,uint32_t layer)1265 emit_tlb_clear_per_tile_rcl(struct v3dv_cmd_buffer *cmd_buffer,
1266                             uint32_t attachment_count,
1267                             const VkClearAttachment *attachments,
1268                             uint32_t layer)
1269 {
1270    struct v3dv_job *job = cmd_buffer->state.job;
1271    assert(job);
1272 
1273    struct v3dv_cl *cl = &job->indirect;
1274    v3dv_cl_ensure_space(cl, 200, 1);
1275    v3dv_return_if_oom(cmd_buffer, NULL);
1276 
1277    struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
1278 
1279    cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
1280 
1281    cl_emit(cl, END_OF_LOADS, end); /* Nothing to load */
1282 
1283    cl_emit(cl, PRIM_LIST_FORMAT, fmt) {
1284       fmt.primitive_type = LIST_TRIANGLES;
1285    }
1286 
1287    cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
1288 
1289    emit_tlb_clear_stores(cmd_buffer, cl, attachment_count, attachments, layer);
1290 
1291    cl_emit(cl, END_OF_TILE_MARKER, end);
1292 
1293    cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
1294 
1295    cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
1296       branch.start = tile_list_start;
1297       branch.end = v3dv_cl_get_address(cl);
1298    }
1299 }
1300 
1301 static void
emit_tlb_clear_layer_rcl(struct v3dv_cmd_buffer * cmd_buffer,uint32_t attachment_count,const VkClearAttachment * attachments,uint32_t layer)1302 emit_tlb_clear_layer_rcl(struct v3dv_cmd_buffer *cmd_buffer,
1303                          uint32_t attachment_count,
1304                          const VkClearAttachment *attachments,
1305                          uint32_t layer)
1306 {
1307    const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
1308    const struct v3dv_framebuffer *framebuffer = state->framebuffer;
1309 
1310    struct v3dv_job *job = cmd_buffer->state.job;
1311    struct v3dv_cl *rcl = &job->rcl;
1312 
1313    const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
1314 
1315    const uint32_t tile_alloc_offset =
1316       64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
1317    cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
1318       list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
1319    }
1320 
1321    cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
1322       config.number_of_bin_tile_lists = 1;
1323       config.total_frame_width_in_tiles = tiling->draw_tiles_x;
1324       config.total_frame_height_in_tiles = tiling->draw_tiles_y;
1325 
1326       config.supertile_width_in_tiles = tiling->supertile_width;
1327       config.supertile_height_in_tiles = tiling->supertile_height;
1328 
1329       config.total_frame_width_in_supertiles =
1330          tiling->frame_width_in_supertiles;
1331       config.total_frame_height_in_supertiles =
1332          tiling->frame_height_in_supertiles;
1333    }
1334 
1335    /* Emit the clear and also the workaround for GFXH-1742 */
1336    for (int i = 0; i < 2; i++) {
1337       cl_emit(rcl, TILE_COORDINATES, coords);
1338       cl_emit(rcl, END_OF_LOADS, end);
1339       cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
1340          store.buffer_to_store = NONE;
1341       }
1342       if (i == 0) {
1343          cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
1344             clear.clear_z_stencil_buffer = true;
1345             clear.clear_all_render_targets = true;
1346          }
1347       }
1348       cl_emit(rcl, END_OF_TILE_MARKER, end);
1349    }
1350 
1351    cl_emit(rcl, FLUSH_VCD_CACHE, flush);
1352 
1353    emit_tlb_clear_per_tile_rcl(cmd_buffer, attachment_count, attachments, layer);
1354 
1355    uint32_t supertile_w_in_pixels =
1356       tiling->tile_width * tiling->supertile_width;
1357    uint32_t supertile_h_in_pixels =
1358       tiling->tile_height * tiling->supertile_height;
1359 
1360    const uint32_t max_render_x = framebuffer->width - 1;
1361    const uint32_t max_render_y = framebuffer->height - 1;
1362    const uint32_t max_x_supertile = max_render_x / supertile_w_in_pixels;
1363    const uint32_t max_y_supertile = max_render_y / supertile_h_in_pixels;
1364 
1365    for (int y = 0; y <= max_y_supertile; y++) {
1366       for (int x = 0; x <= max_x_supertile; x++) {
1367          cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
1368             coords.column_number_in_supertiles = x;
1369             coords.row_number_in_supertiles = y;
1370          }
1371       }
1372    }
1373 }
1374 
1375 static void
emit_tlb_clear_job(struct v3dv_cmd_buffer * cmd_buffer,uint32_t attachment_count,const VkClearAttachment * attachments,uint32_t base_layer,uint32_t layer_count)1376 emit_tlb_clear_job(struct v3dv_cmd_buffer *cmd_buffer,
1377                    uint32_t attachment_count,
1378                    const VkClearAttachment *attachments,
1379                    uint32_t base_layer,
1380                    uint32_t layer_count)
1381 {
1382    const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
1383    const struct v3dv_framebuffer *framebuffer = state->framebuffer;
1384    const struct v3dv_subpass *subpass =
1385       &state->pass->subpasses[state->subpass_idx];
1386    struct v3dv_job *job = cmd_buffer->state.job;
1387    assert(job);
1388 
1389    /* Check how many color attachments we have and also if we have a
1390     * depth/stencil attachment.
1391     */
1392    uint32_t color_attachment_count = 0;
1393    VkClearAttachment color_attachments[4];
1394    const VkClearDepthStencilValue *ds_clear_value = NULL;
1395    uint8_t internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F;
1396    for (uint32_t i = 0; i < attachment_count; i++) {
1397       if (attachments[i].aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT |
1398                                        VK_IMAGE_ASPECT_STENCIL_BIT)) {
1399          assert(subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED);
1400          ds_clear_value = &attachments[i].clearValue.depthStencil;
1401          struct v3dv_render_pass_attachment *att =
1402             &state->pass->attachments[subpass->ds_attachment.attachment];
1403          internal_depth_type = v3dv_get_internal_depth_type(att->desc.format);
1404       } else if (attachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1405          color_attachments[color_attachment_count++] = attachments[i];
1406       }
1407    }
1408 
1409    uint8_t internal_bpp;
1410    bool msaa;
1411    v3dv_framebuffer_compute_internal_bpp_msaa(framebuffer, subpass,
1412                                               &internal_bpp, &msaa);
1413 
1414    v3dv_job_start_frame(job,
1415                         framebuffer->width,
1416                         framebuffer->height,
1417                         framebuffer->layers,
1418                         color_attachment_count,
1419                         internal_bpp, msaa);
1420 
1421    struct v3dv_cl *rcl = &job->rcl;
1422    v3dv_cl_ensure_space_with_branch(rcl, 200 +
1423                                     layer_count * 256 *
1424                                     cl_packet_length(SUPERTILE_COORDINATES));
1425    v3dv_return_if_oom(cmd_buffer, NULL);
1426 
1427    const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
1428    cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
1429       config.early_z_disable = true;
1430       config.image_width_pixels = framebuffer->width;
1431       config.image_height_pixels = framebuffer->height;
1432       config.number_of_render_targets = MAX2(color_attachment_count, 1);
1433       config.multisample_mode_4x = false; /* FIXME */
1434       config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
1435       config.internal_depth_type = internal_depth_type;
1436    }
1437 
1438    for (uint32_t i = 0; i < color_attachment_count; i++) {
1439       uint32_t rt_idx = color_attachments[i].colorAttachment;
1440       uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment;
1441       if (attachment_idx == VK_ATTACHMENT_UNUSED)
1442          continue;
1443 
1444       const struct v3dv_render_pass_attachment *attachment =
1445          &state->pass->attachments[attachment_idx];
1446 
1447       uint32_t internal_type, internal_bpp, internal_size;
1448       const struct v3dv_format *format =
1449          v3dv_get_format(attachment->desc.format);
1450       v3dv_get_internal_type_bpp_for_output_format(format->rt_type,
1451                                                    &internal_type,
1452                                                    &internal_bpp);
1453       internal_size = 4 << internal_bpp;
1454 
1455       uint32_t clear_color[4] = { 0 };
1456       v3dv_get_hw_clear_color(&color_attachments[i].clearValue.color,
1457                               internal_type,
1458                               internal_size,
1459                               clear_color);
1460 
1461       struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx];
1462       const struct v3dv_image *image = iview->image;
1463       const struct v3d_resource_slice *slice = &image->slices[iview->base_level];
1464 
1465       uint32_t clear_pad = 0;
1466       if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
1467           slice->tiling == VC5_TILING_UIF_XOR) {
1468          int uif_block_height = v3d_utile_height(image->cpp) * 2;
1469 
1470          uint32_t implicit_padded_height =
1471             align(framebuffer->height, uif_block_height) / uif_block_height;
1472 
1473          if (slice->padded_height_of_output_image_in_uif_blocks -
1474              implicit_padded_height >= 15) {
1475             clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
1476          }
1477       }
1478 
1479       cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
1480          clear.clear_color_low_32_bits = clear_color[0];
1481          clear.clear_color_next_24_bits = clear_color[1] & 0xffffff;
1482          clear.render_target_number = i;
1483       };
1484 
1485       if (iview->internal_bpp >= V3D_INTERNAL_BPP_64) {
1486          cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
1487             clear.clear_color_mid_low_32_bits =
1488               ((clear_color[1] >> 24) | (clear_color[2] << 8));
1489             clear.clear_color_mid_high_24_bits =
1490               ((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8));
1491             clear.render_target_number = i;
1492          };
1493       }
1494 
1495       if (iview->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
1496          cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
1497             clear.uif_padded_height_in_uif_blocks = clear_pad;
1498             clear.clear_color_high_16_bits = clear_color[3] >> 16;
1499             clear.render_target_number = i;
1500          };
1501       }
1502    }
1503 
1504    cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
1505       v3dv_render_pass_setup_render_target(cmd_buffer, 0,
1506                                            &rt.render_target_0_internal_bpp,
1507                                            &rt.render_target_0_internal_type,
1508                                            &rt.render_target_0_clamp);
1509       v3dv_render_pass_setup_render_target(cmd_buffer, 1,
1510                                            &rt.render_target_1_internal_bpp,
1511                                            &rt.render_target_1_internal_type,
1512                                            &rt.render_target_1_clamp);
1513       v3dv_render_pass_setup_render_target(cmd_buffer, 2,
1514                                            &rt.render_target_2_internal_bpp,
1515                                            &rt.render_target_2_internal_type,
1516                                            &rt.render_target_2_clamp);
1517       v3dv_render_pass_setup_render_target(cmd_buffer, 3,
1518                                            &rt.render_target_3_internal_bpp,
1519                                            &rt.render_target_3_internal_type,
1520                                            &rt.render_target_3_clamp);
1521    }
1522 
1523    cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
1524       clear.z_clear_value = ds_clear_value ? ds_clear_value->depth : 1.0f;
1525       clear.stencil_clear_value = ds_clear_value ? ds_clear_value->stencil : 0;
1526    };
1527 
1528    cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
1529       init.use_auto_chained_tile_lists = true;
1530       init.size_of_first_block_in_chained_tile_lists =
1531          TILE_ALLOCATION_BLOCK_SIZE_64B;
1532    }
1533 
1534    for (int layer = base_layer; layer < base_layer + layer_count; layer++) {
1535       emit_tlb_clear_layer_rcl(cmd_buffer,
1536                                attachment_count,
1537                                attachments,
1538                                layer);
1539    }
1540 
1541    cl_emit(rcl, END_OF_RENDERING, end);
1542 }
1543 
1544 static void
emit_tlb_clear(struct v3dv_cmd_buffer * cmd_buffer,uint32_t attachment_count,const VkClearAttachment * attachments,uint32_t base_layer,uint32_t layer_count)1545 emit_tlb_clear(struct v3dv_cmd_buffer *cmd_buffer,
1546                uint32_t attachment_count,
1547                const VkClearAttachment *attachments,
1548                uint32_t base_layer,
1549                uint32_t layer_count)
1550 {
1551    struct v3dv_job *job =
1552       v3dv_cmd_buffer_start_job(cmd_buffer, cmd_buffer->state.subpass_idx,
1553                                 V3DV_JOB_TYPE_GPU_CL);
1554 
1555    /* vkCmdClearAttachments runs inside a render pass */
1556    job->is_subpass_continue = true;
1557 
1558    emit_tlb_clear_job(cmd_buffer,
1559                       attachment_count,
1560                       attachments,
1561                       base_layer, layer_count);
1562 
1563    v3dv_cmd_buffer_subpass_resume(cmd_buffer, cmd_buffer->state.subpass_idx);
1564 }
1565 
1566 static bool
is_subrect(const VkRect2D * r0,const VkRect2D * r1)1567 is_subrect(const VkRect2D *r0, const VkRect2D *r1)
1568 {
1569    return r0->offset.x <= r1->offset.x &&
1570           r0->offset.y <= r1->offset.y &&
1571           r0->offset.x + r0->extent.width >= r1->offset.x + r1->extent.width &&
1572           r0->offset.y + r0->extent.height >= r1->offset.y + r1->extent.height;
1573 }
1574 
1575 static bool
can_use_tlb_clear(struct v3dv_cmd_buffer * cmd_buffer,uint32_t rect_count,const VkClearRect * rects)1576 can_use_tlb_clear(struct v3dv_cmd_buffer *cmd_buffer,
1577                   uint32_t rect_count,
1578                   const VkClearRect* rects)
1579 {
1580    const struct v3dv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
1581 
1582    const VkRect2D *render_area = &cmd_buffer->state.render_area;
1583 
1584    /* Check if we are clearing a single region covering the entire framebuffer
1585     * and that we are not constrained by the current render area.
1586     *
1587     * From the Vulkan 1.0 spec:
1588     *
1589     *   "The vkCmdClearAttachments command is not affected by the bound
1590     *    pipeline state."
1591     *
1592     * So we can ignore scissor and viewport state for this check.
1593     */
1594    const VkRect2D fb_rect = {
1595       { 0, 0 },
1596       { framebuffer->width, framebuffer->height }
1597    };
1598 
1599    return rect_count == 1 &&
1600           is_subrect(&rects[0].rect, &fb_rect) &&
1601           is_subrect(render_area, &fb_rect);
1602 }
1603 
1604 static void
handle_deferred_clear_attachments(struct v3dv_cmd_buffer * cmd_buffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1605 handle_deferred_clear_attachments(struct v3dv_cmd_buffer *cmd_buffer,
1606                                   uint32_t attachmentCount,
1607                                   const VkClearAttachment *pAttachments,
1608                                   uint32_t rectCount,
1609                                   const VkClearRect *pRects)
1610 {
1611    /* Finish the current job */
1612    v3dv_cmd_buffer_finish_job(cmd_buffer);
1613 
1614    /* Add a deferred clear attachments job right after that we will process
1615     * when we execute this secondary command buffer into a primary.
1616     */
1617    struct v3dv_job *job =
1618       v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
1619                                      V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS,
1620                                      cmd_buffer,
1621                                      cmd_buffer->state.subpass_idx);
1622    v3dv_return_if_oom(cmd_buffer, NULL);
1623 
1624    job->cpu.clear_attachments.rects =
1625       vk_alloc(&cmd_buffer->device->alloc,
1626                sizeof(VkClearRect) * rectCount, 8,
1627                VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1628    if (!job->cpu.clear_attachments.rects) {
1629       v3dv_flag_oom(cmd_buffer, NULL);
1630       return;
1631    }
1632 
1633    job->cpu.clear_attachments.attachment_count = attachmentCount;
1634    memcpy(job->cpu.clear_attachments.attachments, pAttachments,
1635           sizeof(VkClearAttachment) * attachmentCount);
1636 
1637    job->cpu.clear_attachments.rect_count = rectCount;
1638    memcpy(job->cpu.clear_attachments.rects, pRects,
1639           sizeof(VkClearRect) * rectCount);
1640 
1641    list_addtail(&job->list_link, &cmd_buffer->jobs);
1642 
1643    /* Resume the subpass so we can continue recording commands */
1644    v3dv_cmd_buffer_subpass_resume(cmd_buffer,
1645                                   cmd_buffer->state.subpass_idx);
1646 }
1647 
1648 static bool
all_clear_rects_in_base_layer(uint32_t rect_count,const VkClearRect * rects)1649 all_clear_rects_in_base_layer(uint32_t rect_count, const VkClearRect *rects)
1650 {
1651    for (uint32_t i = 0; i < rect_count; i++) {
1652       if (rects[i].baseArrayLayer != 0 || rects[i].layerCount != 1)
1653          return false;
1654    }
1655    return true;
1656 }
1657 
1658 void
v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1659 v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,
1660                          uint32_t attachmentCount,
1661                          const VkClearAttachment *pAttachments,
1662                          uint32_t rectCount,
1663                          const VkClearRect *pRects)
1664 {
1665    V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1666 
1667    /* We can only clear attachments in the current subpass */
1668    assert(attachmentCount <= 5); /* 4 color + D/S */
1669 
1670    /* Clear attachments may clear multiple layers of the framebuffer, which
1671     * currently requires that we emit multiple jobs (one per layer) and
1672     * therefore requires that we have the framebuffer information available
1673     * to select the destination layers.
1674     *
1675     * For secondary command buffers the framebuffer state may not be available
1676     * until they are executed inside a primary command buffer, so in that case
1677     * we need to defer recording of the command until that moment.
1678     *
1679     * FIXME: once we add support for geometry shaders in the driver we could
1680     * avoid emitting a job per layer to implement this by always using the clear
1681     * rect path below with a passthrough geometry shader to select the layer to
1682     * clear. If we did that we would not need to special case secondary command
1683     * buffers here and we could ensure that any secondary command buffer in a
1684     * render pass only has on job with a partial CL, which would simplify things
1685     * quite a bit.
1686     */
1687    if (!cmd_buffer->state.framebuffer) {
1688       assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
1689       handle_deferred_clear_attachments(cmd_buffer,
1690                                         attachmentCount, pAttachments,
1691                                         rectCount, pRects);
1692       return;
1693    }
1694 
1695    assert(cmd_buffer->state.framebuffer);
1696 
1697    struct v3dv_render_pass *pass = cmd_buffer->state.pass;
1698 
1699    assert(cmd_buffer->state.subpass_idx < pass->subpass_count);
1700    struct v3dv_subpass *subpass =
1701       &cmd_buffer->state.pass->subpasses[cmd_buffer->state.subpass_idx];
1702 
1703    /* First we try to handle this by emitting a clear rect inside the
1704     * current job for this subpass. This should be optimal but this method
1705     * cannot handle clearing layers other than the base layer, since we don't
1706     * support any form of layered rendering yet.
1707     */
1708    if (all_clear_rects_in_base_layer(rectCount, pRects)) {
1709       for (uint32_t i = 0; i < attachmentCount; i++) {
1710          if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1711             emit_subpass_color_clear_rects(cmd_buffer, pass, subpass,
1712                                            pAttachments[i].colorAttachment,
1713                                            &pAttachments[i].clearValue.color,
1714                                            rectCount, pRects);
1715          } else {
1716             emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass,
1717                                         pAttachments[i].aspectMask,
1718                                         &pAttachments[i].clearValue.depthStencil,
1719                                         rectCount, pRects);
1720          }
1721       }
1722       return;
1723    }
1724 
1725    perf_debug("Falling back to slow path for vkCmdClearAttachments due to "
1726               "clearing layers other than the base array layer.\n");
1727 
1728    /* If we can't handle this as a draw call inside the current job then we
1729     * will have to spawn jobs for the clears, which will be slow. In that case,
1730     * try to use the TLB to clear if possible.
1731     */
1732    if (can_use_tlb_clear(cmd_buffer, rectCount, pRects)) {
1733       emit_tlb_clear(cmd_buffer, attachmentCount, pAttachments,
1734                      pRects[0].baseArrayLayer, pRects[0].layerCount);
1735       return;
1736    }
1737 
1738    /* Otherwise, fall back to drawing rects with the clear value using a
1739     * separate job. This is the slowest path.
1740     */
1741    for (uint32_t i = 0; i < attachmentCount; i++) {
1742       uint32_t attachment_idx = VK_ATTACHMENT_UNUSED;
1743 
1744       if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1745          uint32_t rt_idx = pAttachments[i].colorAttachment;
1746          attachment_idx = subpass->color_attachments[rt_idx].attachment;
1747       } else if (pAttachments[i].aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT |
1748                                                VK_IMAGE_ASPECT_STENCIL_BIT)) {
1749          attachment_idx = subpass->ds_attachment.attachment;
1750       }
1751 
1752       if (attachment_idx == VK_ATTACHMENT_UNUSED)
1753          continue;
1754 
1755       if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1756          const uint32_t components = VK_COLOR_COMPONENT_R_BIT |
1757                                      VK_COLOR_COMPONENT_G_BIT |
1758                                      VK_COLOR_COMPONENT_B_BIT |
1759                                      VK_COLOR_COMPONENT_A_BIT;
1760          const uint32_t samples =
1761             cmd_buffer->state.pass->attachments[attachment_idx].desc.samples;
1762          const VkFormat format =
1763             cmd_buffer->state.pass->attachments[attachment_idx].desc.format;
1764          for (uint32_t j = 0; j < rectCount; j++) {
1765             emit_color_clear_rect(cmd_buffer,
1766                                   attachment_idx,
1767                                   format,
1768                                   samples,
1769                                   components,
1770                                   pAttachments[i].clearValue.color,
1771                                   &pRects[j]);
1772          }
1773       } else {
1774          for (uint32_t j = 0; j < rectCount; j++) {
1775             emit_ds_clear_rect(cmd_buffer,
1776                                pAttachments[i].aspectMask,
1777                                attachment_idx,
1778                                pAttachments[i].clearValue.depthStencil,
1779                                &pRects[j]);
1780          }
1781       }
1782    }
1783 }
1784