1 /*
2 * Copyright © 2020 Raspberry Pi
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25
26 #include "broadcom/cle/v3dx_pack.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "vk_format_info.h"
29 #include "util/u_pack_color.h"
30
31 static void
destroy_color_clear_pipeline(VkDevice _device,uint64_t pipeline,VkAllocationCallbacks * alloc)32 destroy_color_clear_pipeline(VkDevice _device,
33 uint64_t pipeline,
34 VkAllocationCallbacks *alloc)
35 {
36 struct v3dv_meta_color_clear_pipeline *p =
37 (struct v3dv_meta_color_clear_pipeline *) (uintptr_t) pipeline;
38 v3dv_DestroyPipeline(_device, p->pipeline, alloc);
39 if (p->cached)
40 v3dv_DestroyRenderPass(_device, p->pass, alloc);
41 vk_free(alloc, p);
42 }
43
44 static void
destroy_depth_clear_pipeline(VkDevice _device,struct v3dv_meta_depth_clear_pipeline * p,VkAllocationCallbacks * alloc)45 destroy_depth_clear_pipeline(VkDevice _device,
46 struct v3dv_meta_depth_clear_pipeline *p,
47 VkAllocationCallbacks *alloc)
48 {
49 v3dv_DestroyPipeline(_device, p->pipeline, alloc);
50 vk_free(alloc, p);
51 }
52
53 void
v3dv_meta_clear_init(struct v3dv_device * device)54 v3dv_meta_clear_init(struct v3dv_device *device)
55 {
56 device->meta.color_clear.cache =
57 _mesa_hash_table_create(NULL, u64_hash, u64_compare);
58
59 device->meta.depth_clear.cache =
60 _mesa_hash_table_create(NULL, u64_hash, u64_compare);
61 }
62
63 void
v3dv_meta_clear_finish(struct v3dv_device * device)64 v3dv_meta_clear_finish(struct v3dv_device *device)
65 {
66 VkDevice _device = v3dv_device_to_handle(device);
67
68 hash_table_foreach(device->meta.color_clear.cache, entry) {
69 struct v3dv_meta_color_clear_pipeline *item = entry->data;
70 destroy_color_clear_pipeline(_device, (uintptr_t)item, &device->alloc);
71 }
72 _mesa_hash_table_destroy(device->meta.color_clear.cache, NULL);
73
74 if (device->meta.color_clear.playout) {
75 v3dv_DestroyPipelineLayout(_device, device->meta.color_clear.playout,
76 &device->alloc);
77 }
78
79 hash_table_foreach(device->meta.depth_clear.cache, entry) {
80 struct v3dv_meta_depth_clear_pipeline *item = entry->data;
81 destroy_depth_clear_pipeline(_device, item, &device->alloc);
82 }
83 _mesa_hash_table_destroy(device->meta.depth_clear.cache, NULL);
84
85 if (device->meta.depth_clear.playout) {
86 v3dv_DestroyPipelineLayout(_device, device->meta.depth_clear.playout,
87 &device->alloc);
88 }
89 }
90
91 static nir_ssa_def *
gen_rect_vertices(nir_builder * b)92 gen_rect_vertices(nir_builder *b)
93 {
94 nir_intrinsic_instr *vertex_id =
95 nir_intrinsic_instr_create(b->shader,
96 nir_intrinsic_load_vertex_id);
97 nir_ssa_dest_init(&vertex_id->instr, &vertex_id->dest, 1, 32, "vertexid");
98 nir_builder_instr_insert(b, &vertex_id->instr);
99
100
101 /* vertex 0: -1.0, -1.0
102 * vertex 1: -1.0, 1.0
103 * vertex 2: 1.0, -1.0
104 * vertex 3: 1.0, 1.0
105 *
106 * so:
107 *
108 * channel 0 is vertex_id < 2 ? -1.0 : 1.0
109 * channel 1 is vertex id & 1 ? 1.0 : -1.0
110 */
111
112 nir_ssa_def *one = nir_imm_int(b, 1);
113 nir_ssa_def *c0cmp = nir_ilt(b, &vertex_id->dest.ssa, nir_imm_int(b, 2));
114 nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, &vertex_id->dest.ssa, one), one);
115
116 nir_ssa_def *comp[4];
117 comp[0] = nir_bcsel(b, c0cmp,
118 nir_imm_float(b, -1.0f),
119 nir_imm_float(b, 1.0f));
120
121 comp[1] = nir_bcsel(b, c1cmp,
122 nir_imm_float(b, 1.0f),
123 nir_imm_float(b, -1.0f));
124 comp[2] = nir_imm_float(b, 0.0f);
125 comp[3] = nir_imm_float(b, 1.0f);
126 return nir_vec(b, comp, 4);
127 }
128
129 static nir_shader *
get_clear_rect_vs()130 get_clear_rect_vs()
131 {
132 nir_builder b;
133 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
134 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_VERTEX, options);
135 b.shader->info.name = ralloc_strdup(b.shader, "meta clear vs");
136
137 const struct glsl_type *vec4 = glsl_vec4_type();
138 nir_variable *vs_out_pos =
139 nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
140 vs_out_pos->data.location = VARYING_SLOT_POS;
141
142 nir_ssa_def *pos = gen_rect_vertices(&b);
143 nir_store_var(&b, vs_out_pos, pos, 0xf);
144
145 return b.shader;
146 }
147
148 static nir_shader *
get_color_clear_rect_fs(uint32_t rt_idx,VkFormat format)149 get_color_clear_rect_fs(uint32_t rt_idx, VkFormat format)
150 {
151 nir_builder b;
152 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
153 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options);
154 b.shader->info.name = ralloc_strdup(b.shader, "meta clear fs");
155
156 enum pipe_format pformat = vk_format_to_pipe_format(format);
157 const struct glsl_type *fs_out_type =
158 util_format_is_float(pformat) ? glsl_vec4_type() : glsl_uvec4_type();
159
160 nir_variable *fs_out_color =
161 nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
162 fs_out_color->data.location = FRAG_RESULT_DATA0 + rt_idx;
163
164 nir_intrinsic_instr *color_load =
165 nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
166 nir_intrinsic_set_base(color_load, 0);
167 nir_intrinsic_set_range(color_load, 16);
168 color_load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
169 color_load->num_components = 4;
170 nir_ssa_dest_init(&color_load->instr, &color_load->dest, 4, 32, "clear color");
171 nir_builder_instr_insert(&b, &color_load->instr);
172
173 nir_store_var(&b, fs_out_color, &color_load->dest.ssa, 0xf);
174
175 return b.shader;
176 }
177
178 static nir_shader *
get_depth_clear_rect_fs()179 get_depth_clear_rect_fs()
180 {
181 nir_builder b;
182 const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
183 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_FRAGMENT, options);
184 b.shader->info.name = ralloc_strdup(b.shader, "meta depth clear fs");
185
186 nir_variable *fs_out_depth =
187 nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
188 "out_depth");
189 fs_out_depth->data.location = FRAG_RESULT_DEPTH;
190
191 nir_intrinsic_instr *depth_load =
192 nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
193 nir_intrinsic_set_base(depth_load, 0);
194 nir_intrinsic_set_range(depth_load, 4);
195 depth_load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
196 depth_load->num_components = 1;
197 nir_ssa_dest_init(&depth_load->instr, &depth_load->dest, 1, 32,
198 "clear depth value");
199 nir_builder_instr_insert(&b, &depth_load->instr);
200
201 nir_store_var(&b, fs_out_depth, &depth_load->dest.ssa, 0x1);
202
203 return b.shader;
204 }
205
206 static VkResult
create_color_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)207 create_color_clear_pipeline_layout(struct v3dv_device *device,
208 VkPipelineLayout *pipeline_layout)
209 {
210 VkPipelineLayoutCreateInfo info = {
211 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
212 .setLayoutCount = 0,
213 .pushConstantRangeCount = 1,
214 .pPushConstantRanges =
215 &(VkPushConstantRange) { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 },
216 };
217
218 return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
219 &info, &device->alloc, pipeline_layout);
220 }
221
222 static VkResult
create_depth_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)223 create_depth_clear_pipeline_layout(struct v3dv_device *device,
224 VkPipelineLayout *pipeline_layout)
225 {
226 VkPipelineLayoutCreateInfo info = {
227 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
228 .setLayoutCount = 0,
229 .pushConstantRangeCount = 1,
230 .pPushConstantRanges =
231 &(VkPushConstantRange) { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 },
232 };
233
234 return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
235 &info, &device->alloc, pipeline_layout);
236 }
237
238 static VkResult
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,struct nir_shader * vs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineLayout layout,VkPipeline * pipeline)239 create_pipeline(struct v3dv_device *device,
240 struct v3dv_render_pass *pass,
241 uint32_t subpass_idx,
242 uint32_t samples,
243 struct nir_shader *vs_nir,
244 struct nir_shader *fs_nir,
245 const VkPipelineVertexInputStateCreateInfo *vi_state,
246 const VkPipelineDepthStencilStateCreateInfo *ds_state,
247 const VkPipelineColorBlendStateCreateInfo *cb_state,
248 const VkPipelineLayout layout,
249 VkPipeline *pipeline)
250 {
251 struct v3dv_shader_module vs_m;
252 struct v3dv_shader_module fs_m;
253
254 v3dv_shader_module_internal_init(&vs_m, vs_nir);
255 v3dv_shader_module_internal_init(&fs_m, fs_nir);
256
257 VkPipelineShaderStageCreateInfo stages[2] = {
258 {
259 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
260 .stage = VK_SHADER_STAGE_VERTEX_BIT,
261 .module = v3dv_shader_module_to_handle(&vs_m),
262 .pName = "main",
263 },
264 {
265 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
266 .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
267 .module = v3dv_shader_module_to_handle(&fs_m),
268 .pName = "main",
269 },
270 };
271
272 VkGraphicsPipelineCreateInfo info = {
273 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
274
275 .stageCount = fs_nir ? 2 : 1,
276 .pStages = stages,
277
278 .pVertexInputState = vi_state,
279
280 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
281 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
282 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
283 .primitiveRestartEnable = false,
284 },
285
286 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
287 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
288 .viewportCount = 1,
289 .scissorCount = 1,
290 },
291
292 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
293 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
294 .rasterizerDiscardEnable = false,
295 .polygonMode = VK_POLYGON_MODE_FILL,
296 .cullMode = VK_CULL_MODE_NONE,
297 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
298 .depthBiasEnable = false,
299 },
300
301 .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
302 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
303 .rasterizationSamples = samples,
304 .sampleShadingEnable = false,
305 .pSampleMask = NULL,
306 .alphaToCoverageEnable = false,
307 .alphaToOneEnable = false,
308 },
309
310 .pDepthStencilState = ds_state,
311
312 .pColorBlendState = cb_state,
313
314 /* The meta clear pipeline declares all state as dynamic.
315 * As a consequence, vkCmdBindPipeline writes no dynamic state
316 * to the cmd buffer. Therefore, at the end of the meta clear,
317 * we need only restore dynamic state that was vkCmdSet.
318 */
319 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
320 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
321 .dynamicStateCount = 6,
322 .pDynamicStates = (VkDynamicState[]) {
323 VK_DYNAMIC_STATE_VIEWPORT,
324 VK_DYNAMIC_STATE_SCISSOR,
325 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
326 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
327 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
328 VK_DYNAMIC_STATE_BLEND_CONSTANTS,
329 VK_DYNAMIC_STATE_DEPTH_BIAS,
330 VK_DYNAMIC_STATE_LINE_WIDTH,
331 },
332 },
333
334 .flags = 0,
335 .layout = layout,
336 .renderPass = v3dv_render_pass_to_handle(pass),
337 .subpass = subpass_idx,
338 };
339
340 VkResult result =
341 v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
342 VK_NULL_HANDLE,
343 1, &info,
344 &device->alloc,
345 pipeline);
346
347 ralloc_free(vs_nir);
348 ralloc_free(fs_nir);
349
350 return result;
351 }
352
353 static VkResult
create_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,VkFormat format,uint32_t samples,uint32_t components,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)354 create_color_clear_pipeline(struct v3dv_device *device,
355 struct v3dv_render_pass *pass,
356 uint32_t subpass_idx,
357 uint32_t rt_idx,
358 VkFormat format,
359 uint32_t samples,
360 uint32_t components,
361 VkPipelineLayout pipeline_layout,
362 VkPipeline *pipeline)
363 {
364 nir_shader *vs_nir = get_clear_rect_vs();
365 nir_shader *fs_nir = get_color_clear_rect_fs(rt_idx, format);
366
367 const VkPipelineVertexInputStateCreateInfo vi_state = {
368 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
369 .vertexBindingDescriptionCount = 0,
370 .vertexAttributeDescriptionCount = 0,
371 };
372
373 const VkPipelineDepthStencilStateCreateInfo ds_state = {
374 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
375 .depthTestEnable = false,
376 .depthWriteEnable = false,
377 .depthBoundsTestEnable = false,
378 .stencilTestEnable = false,
379 };
380
381 assert(subpass_idx < pass->subpass_count);
382 const uint32_t color_count = pass->subpasses[subpass_idx].color_count;
383 assert(rt_idx < color_count);
384
385 VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS];
386 for (uint32_t i = 0; i < color_count; i++) {
387 blend_att_state[i] = (VkPipelineColorBlendAttachmentState) {
388 .blendEnable = false,
389 .colorWriteMask = i == rt_idx ? components : 0,
390 };
391 }
392
393 const VkPipelineColorBlendStateCreateInfo cb_state = {
394 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
395 .logicOpEnable = false,
396 .attachmentCount = color_count,
397 .pAttachments = blend_att_state
398 };
399
400 return create_pipeline(device,
401 pass, subpass_idx,
402 samples,
403 vs_nir, fs_nir,
404 &vi_state,
405 &ds_state,
406 &cb_state,
407 pipeline_layout,
408 pipeline);
409 }
410
411 static VkResult
create_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)412 create_depth_clear_pipeline(struct v3dv_device *device,
413 VkImageAspectFlags aspects,
414 struct v3dv_render_pass *pass,
415 uint32_t subpass_idx,
416 uint32_t samples,
417 VkPipelineLayout pipeline_layout,
418 VkPipeline *pipeline)
419 {
420 const bool has_depth = aspects & VK_IMAGE_ASPECT_DEPTH_BIT;
421 const bool has_stencil = aspects & VK_IMAGE_ASPECT_STENCIL_BIT;
422 assert(has_depth || has_stencil);
423
424 nir_shader *vs_nir = get_clear_rect_vs();
425 nir_shader *fs_nir = has_depth ? get_depth_clear_rect_fs() : NULL;
426
427 const VkPipelineVertexInputStateCreateInfo vi_state = {
428 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
429 .vertexBindingDescriptionCount = 0,
430 .vertexAttributeDescriptionCount = 0,
431 };
432
433 const VkPipelineDepthStencilStateCreateInfo ds_state = {
434 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
435 .depthTestEnable = has_depth,
436 .depthWriteEnable = has_depth,
437 .depthCompareOp = VK_COMPARE_OP_ALWAYS,
438 .depthBoundsTestEnable = false,
439 .stencilTestEnable = has_stencil,
440 .front = {
441 .passOp = VK_STENCIL_OP_REPLACE,
442 .compareOp = VK_COMPARE_OP_ALWAYS,
443 /* compareMask, writeMask and reference are dynamic state */
444 },
445 .back = { 0 },
446 };
447
448 assert(subpass_idx < pass->subpass_count);
449 VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS] = { 0 };
450 const VkPipelineColorBlendStateCreateInfo cb_state = {
451 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
452 .logicOpEnable = false,
453 .attachmentCount = pass->subpasses[subpass_idx].color_count,
454 .pAttachments = blend_att_state,
455 };
456
457 return create_pipeline(device,
458 pass, subpass_idx,
459 samples,
460 vs_nir, fs_nir,
461 &vi_state,
462 &ds_state,
463 &cb_state,
464 pipeline_layout,
465 pipeline);
466 }
467
468 static VkResult
create_color_clear_render_pass(struct v3dv_device * device,uint32_t rt_idx,VkFormat format,uint32_t samples,VkRenderPass * pass)469 create_color_clear_render_pass(struct v3dv_device *device,
470 uint32_t rt_idx,
471 VkFormat format,
472 uint32_t samples,
473 VkRenderPass *pass)
474 {
475 VkAttachmentDescription att = {
476 .format = format,
477 .samples = samples,
478 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
479 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
480 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
481 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
482 };
483
484 VkAttachmentReference att_ref = {
485 .attachment = rt_idx,
486 .layout = VK_IMAGE_LAYOUT_GENERAL,
487 };
488
489 VkSubpassDescription subpass = {
490 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
491 .inputAttachmentCount = 0,
492 .colorAttachmentCount = 1,
493 .pColorAttachments = &att_ref,
494 .pResolveAttachments = NULL,
495 .pDepthStencilAttachment = NULL,
496 .preserveAttachmentCount = 0,
497 .pPreserveAttachments = NULL,
498 };
499
500 VkRenderPassCreateInfo info = {
501 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
502 .attachmentCount = 1,
503 .pAttachments = &att,
504 .subpassCount = 1,
505 .pSubpasses = &subpass,
506 .dependencyCount = 0,
507 .pDependencies = NULL,
508 };
509
510 return v3dv_CreateRenderPass(v3dv_device_to_handle(device),
511 &info, &device->alloc, pass);
512 }
513
514 static inline uint64_t
get_color_clear_pipeline_cache_key(uint32_t rt_idx,VkFormat format,uint32_t samples,uint32_t components)515 get_color_clear_pipeline_cache_key(uint32_t rt_idx,
516 VkFormat format,
517 uint32_t samples,
518 uint32_t components)
519 {
520 assert(rt_idx < V3D_MAX_DRAW_BUFFERS);
521
522 uint64_t key = 0;
523 uint32_t bit_offset = 0;
524
525 key |= rt_idx;
526 bit_offset += 2;
527
528 key |= ((uint64_t) format) << bit_offset;
529 bit_offset += 32;
530
531 key |= ((uint64_t) samples) << bit_offset;
532 bit_offset += 4;
533
534 key |= ((uint64_t) components) << bit_offset;
535 bit_offset += 4;
536
537 assert(bit_offset <= 64);
538 return key;
539 }
540
541 static inline uint64_t
get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,VkFormat format,uint32_t samples)542 get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,
543 VkFormat format,
544 uint32_t samples)
545 {
546 uint64_t key = 0;
547 uint32_t bit_offset = 0;
548
549 key |= format;
550 bit_offset += 32;
551
552 key |= ((uint64_t) samples) << bit_offset;
553 bit_offset += 4;
554
555 const bool has_depth = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? 1 : 0;
556 key |= ((uint64_t) has_depth) << bit_offset;
557 bit_offset++;
558
559 const bool has_stencil = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
560 key |= ((uint64_t) has_stencil) << bit_offset;
561 bit_offset++;;
562
563 assert(bit_offset <= 64);
564 return key;
565 }
566
567 static VkResult
get_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,uint32_t attachment_idx,VkFormat format,uint32_t samples,uint32_t components,struct v3dv_meta_color_clear_pipeline ** pipeline)568 get_color_clear_pipeline(struct v3dv_device *device,
569 struct v3dv_render_pass *pass,
570 uint32_t subpass_idx,
571 uint32_t rt_idx,
572 uint32_t attachment_idx,
573 VkFormat format,
574 uint32_t samples,
575 uint32_t components,
576 struct v3dv_meta_color_clear_pipeline **pipeline)
577 {
578 assert(vk_format_is_color(format));
579
580 VkResult result = VK_SUCCESS;
581
582 mtx_lock(&device->meta.mtx);
583 if (!device->meta.color_clear.playout) {
584 result =
585 create_color_clear_pipeline_layout(device,
586 &device->meta.color_clear.playout);
587 }
588 mtx_unlock(&device->meta.mtx);
589 if (result != VK_SUCCESS)
590 return result;
591
592 /* If pass != NULL it means that we are emitting the clear as a draw call
593 * in the current pass bound by the application. In that case, we can't
594 * cache the pipeline, since it will be referencing that pass and the
595 * application could be destroying it at any point. Hopefully, the perf
596 * impact is not too big since we still have the device pipeline cache
597 * around and we won't end up re-compiling the clear shader.
598 *
599 * FIXME: alternatively, we could refcount (or maybe clone) the render pass
600 * provided by the application and include it in the pipeline key setup
601 * to make caching safe in this scenario, however, based on tests with
602 * vkQuake3, the fact that we are not caching here doesn't seem to have
603 * any significant impact in performance, so it might not be worth it.
604 */
605 const bool can_cache_pipeline = (pass == NULL);
606
607 uint64_t key;
608 if (can_cache_pipeline) {
609 key =
610 get_color_clear_pipeline_cache_key(rt_idx, format, samples, components);
611 mtx_lock(&device->meta.mtx);
612 struct hash_entry *entry =
613 _mesa_hash_table_search(device->meta.color_clear.cache, &key);
614 if (entry) {
615 mtx_unlock(&device->meta.mtx);
616 *pipeline = entry->data;
617 return VK_SUCCESS;
618 }
619 }
620
621 *pipeline = vk_zalloc2(&device->alloc, NULL, sizeof(**pipeline), 8,
622 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
623
624 if (*pipeline == NULL) {
625 result = VK_ERROR_OUT_OF_HOST_MEMORY;
626 goto fail;
627 }
628
629 if (!pass) {
630 result = create_color_clear_render_pass(device,
631 rt_idx,
632 format,
633 samples,
634 &(*pipeline)->pass);
635 if (result != VK_SUCCESS)
636 goto fail;
637
638 pass = v3dv_render_pass_from_handle((*pipeline)->pass);
639 } else {
640 (*pipeline)->pass = v3dv_render_pass_to_handle(pass);
641 }
642
643 result = create_color_clear_pipeline(device,
644 pass,
645 subpass_idx,
646 rt_idx,
647 format,
648 samples,
649 components,
650 device->meta.color_clear.playout,
651 &(*pipeline)->pipeline);
652 if (result != VK_SUCCESS)
653 goto fail;
654
655 if (can_cache_pipeline) {
656 (*pipeline)->key = key;
657 (*pipeline)->cached = true;
658 _mesa_hash_table_insert(device->meta.color_clear.cache,
659 &(*pipeline)->key, *pipeline);
660
661 mtx_unlock(&device->meta.mtx);
662 }
663
664 return VK_SUCCESS;
665
666 fail:
667 if (can_cache_pipeline)
668 mtx_unlock(&device->meta.mtx);
669
670 VkDevice _device = v3dv_device_to_handle(device);
671 if (*pipeline) {
672 if ((*pipeline)->cached)
673 v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->alloc);
674 if ((*pipeline)->pipeline)
675 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->alloc);
676 vk_free(&device->alloc, *pipeline);
677 *pipeline = NULL;
678 }
679
680 return result;
681 }
682
683 static VkResult
get_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t attachment_idx,struct v3dv_meta_depth_clear_pipeline ** pipeline)684 get_depth_clear_pipeline(struct v3dv_device *device,
685 VkImageAspectFlags aspects,
686 struct v3dv_render_pass *pass,
687 uint32_t subpass_idx,
688 uint32_t attachment_idx,
689 struct v3dv_meta_depth_clear_pipeline **pipeline)
690 {
691 assert(subpass_idx < pass->subpass_count);
692 assert(attachment_idx != VK_ATTACHMENT_UNUSED);
693 assert(attachment_idx < pass->attachment_count);
694
695 VkResult result = VK_SUCCESS;
696
697 mtx_lock(&device->meta.mtx);
698 if (!device->meta.depth_clear.playout) {
699 result =
700 create_depth_clear_pipeline_layout(device,
701 &device->meta.depth_clear.playout);
702 }
703 mtx_unlock(&device->meta.mtx);
704 if (result != VK_SUCCESS)
705 return result;
706
707 const uint32_t samples = pass->attachments[attachment_idx].desc.samples;
708 const VkFormat format = pass->attachments[attachment_idx].desc.format;
709 assert(vk_format_is_depth_or_stencil(format));
710
711 const uint64_t key =
712 get_depth_clear_pipeline_cache_key(aspects, format, samples);
713 mtx_lock(&device->meta.mtx);
714 struct hash_entry *entry =
715 _mesa_hash_table_search(device->meta.depth_clear.cache, &key);
716 if (entry) {
717 mtx_unlock(&device->meta.mtx);
718 *pipeline = entry->data;
719 return VK_SUCCESS;
720 }
721
722 *pipeline = vk_zalloc2(&device->alloc, NULL, sizeof(**pipeline), 8,
723 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
724
725 if (*pipeline == NULL) {
726 result = VK_ERROR_OUT_OF_HOST_MEMORY;
727 goto fail;
728 }
729
730 result = create_depth_clear_pipeline(device,
731 aspects,
732 pass,
733 subpass_idx,
734 samples,
735 device->meta.depth_clear.playout,
736 &(*pipeline)->pipeline);
737 if (result != VK_SUCCESS)
738 goto fail;
739
740 (*pipeline)->key = key;
741 _mesa_hash_table_insert(device->meta.depth_clear.cache,
742 &(*pipeline)->key, *pipeline);
743
744 mtx_unlock(&device->meta.mtx);
745 return VK_SUCCESS;
746
747 fail:
748 mtx_unlock(&device->meta.mtx);
749
750 VkDevice _device = v3dv_device_to_handle(device);
751 if (*pipeline) {
752 if ((*pipeline)->pipeline)
753 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->alloc);
754 vk_free(&device->alloc, *pipeline);
755 *pipeline = NULL;
756 }
757
758 return result;
759 }
760
761 static VkFormat
get_color_format_for_depth_stencil_format(VkFormat format)762 get_color_format_for_depth_stencil_format(VkFormat format)
763 {
764 /* For single depth/stencil aspect formats, we just choose a compatible
765 * 1 channel format, but for combined depth/stencil we want an RGBA format
766 * so we can specify the channels we want to write.
767 */
768 switch (format) {
769 case VK_FORMAT_D16_UNORM:
770 return VK_FORMAT_R16_UINT;
771 case VK_FORMAT_D32_SFLOAT:
772 return VK_FORMAT_R32_SFLOAT;
773 case VK_FORMAT_X8_D24_UNORM_PACK32:
774 case VK_FORMAT_D24_UNORM_S8_UINT:
775 return VK_FORMAT_R8G8B8A8_UINT;
776 default:
777 unreachable("Unsupported depth/stencil format");
778 };
779 }
780
781 /**
782 * Emits a scissored quad in the clear color, however, unlike the subpass
783 * versions, this creates its own framebuffer setup with a single color
784 * attachment, and therefore spanws new jobs, making it much slower than the
785 * subpass version.
786 *
787 * This path is only used when we have clears on layers other than the
788 * base layer in a framebuffer attachment, since we don't currently
789 * support any form of layered rendering that would allow us to implement
790 * this in the subpass version.
791 *
792 * Notice this can also handle depth/stencil formats by rendering to the
793 * depth/stencil target using a compatible color format.
794 */
795 static void
emit_color_clear_rect(struct v3dv_cmd_buffer * cmd_buffer,uint32_t attachment_idx,VkFormat rt_format,uint32_t rt_samples,uint32_t rt_components,VkClearColorValue clear_color,const VkClearRect * rect)796 emit_color_clear_rect(struct v3dv_cmd_buffer *cmd_buffer,
797 uint32_t attachment_idx,
798 VkFormat rt_format,
799 uint32_t rt_samples,
800 uint32_t rt_components,
801 VkClearColorValue clear_color,
802 const VkClearRect *rect)
803 {
804 assert(cmd_buffer->state.pass);
805 struct v3dv_device *device = cmd_buffer->device;
806 struct v3dv_render_pass *pass = cmd_buffer->state.pass;
807
808 assert(attachment_idx != VK_ATTACHMENT_UNUSED &&
809 attachment_idx < pass->attachment_count);
810
811 struct v3dv_meta_color_clear_pipeline *pipeline = NULL;
812 VkResult result =
813 get_color_clear_pipeline(device,
814 NULL, 0, /* Not using current subpass */
815 0, attachment_idx,
816 rt_format, rt_samples, rt_components,
817 &pipeline);
818 if (result != VK_SUCCESS) {
819 if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
820 v3dv_flag_oom(cmd_buffer, NULL);
821 return;
822 }
823 assert(pipeline && pipeline->pipeline && pipeline->pass);
824
825 /* Since we are not emitting the draw call in the current subpass we should
826 * be caching the clear pipeline and we don't have to take care of destorying
827 * it below.
828 */
829 assert(pipeline->cached);
830
831 /* Store command buffer state for the current subpass before we interrupt
832 * it to emit the color clear pass and then finish the job for the
833 * interrupted subpass.
834 */
835 v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
836 v3dv_cmd_buffer_finish_job(cmd_buffer);
837
838 struct v3dv_framebuffer *subpass_fb =
839 v3dv_framebuffer_from_handle(cmd_buffer->state.meta.framebuffer);
840 VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
841 VkDevice device_handle = v3dv_device_to_handle(cmd_buffer->device);
842
843 /* If we are clearing a depth/stencil attachment as a color attachment
844 * then we need to configure the framebuffer to the compatible color
845 * format.
846 */
847 const struct v3dv_image_view *att_iview =
848 subpass_fb->attachments[attachment_idx];
849 const bool is_depth_or_stencil =
850 vk_format_is_depth_or_stencil(att_iview->vk_format);
851
852 /* Emit the pass for each attachment layer, which creates a framebuffer
853 * for each selected layer of the attachment and then renders a scissored
854 * quad in the clear color.
855 */
856 uint32_t dirty_dynamic_state = 0;
857 for (uint32_t i = 0; i < rect->layerCount; i++) {
858 VkImageViewCreateInfo fb_layer_view_info = {
859 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
860 .image = v3dv_image_to_handle((struct v3dv_image *)att_iview->image),
861 .viewType =
862 v3dv_image_type_to_view_type(att_iview->image->type),
863 .format = is_depth_or_stencil ? rt_format : att_iview->vk_format,
864 .subresourceRange = {
865 .aspectMask = is_depth_or_stencil ? VK_IMAGE_ASPECT_COLOR_BIT :
866 att_iview->aspects,
867 .baseMipLevel = att_iview->base_level,
868 .levelCount = att_iview->max_level - att_iview->base_level + 1,
869 .baseArrayLayer = att_iview->first_layer + rect->baseArrayLayer + i,
870 .layerCount = 1,
871 },
872 };
873 VkImageView fb_attachment;
874 result = v3dv_CreateImageView(v3dv_device_to_handle(device),
875 &fb_layer_view_info,
876 &device->alloc, &fb_attachment);
877 if (result != VK_SUCCESS)
878 goto fail;
879
880 v3dv_cmd_buffer_add_private_obj(
881 cmd_buffer, (uintptr_t)fb_attachment,
882 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
883
884 VkFramebufferCreateInfo fb_info = {
885 .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
886 .renderPass = v3dv_render_pass_to_handle(pass),
887 .attachmentCount = 1,
888 .pAttachments = &fb_attachment,
889 .width = subpass_fb->width,
890 .height = subpass_fb->height,
891 .layers = 1,
892 };
893
894 VkFramebuffer fb;
895 result = v3dv_CreateFramebuffer(device_handle, &fb_info,
896 &cmd_buffer->device->alloc, &fb);
897 if (result != VK_SUCCESS)
898 goto fail;
899
900 v3dv_cmd_buffer_add_private_obj(
901 cmd_buffer, (uintptr_t)fb,
902 (v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
903
904 VkRenderPassBeginInfo rp_info = {
905 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
906 .renderPass = pipeline->pass,
907 .framebuffer = fb,
908 .renderArea = {
909 .offset = { rect->rect.offset.x, rect->rect.offset.y },
910 .extent = { rect->rect.extent.width, rect->rect.extent.height } },
911 .clearValueCount = 0,
912 };
913
914 v3dv_CmdBeginRenderPass(cmd_buffer_handle, &rp_info,
915 VK_SUBPASS_CONTENTS_INLINE);
916
917 struct v3dv_job *job = cmd_buffer->state.job;
918 if (!job)
919 goto fail;
920 job->is_subpass_continue = true;
921
922 v3dv_CmdPushConstants(cmd_buffer_handle,
923 device->meta.color_clear.playout,
924 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
925 &clear_color);
926
927 v3dv_CmdBindPipeline(cmd_buffer_handle,
928 VK_PIPELINE_BIND_POINT_GRAPHICS,
929 pipeline->pipeline);
930
931 const VkViewport viewport = {
932 .x = rect->rect.offset.x,
933 .y = rect->rect.offset.y,
934 .width = rect->rect.extent.width,
935 .height = rect->rect.extent.height,
936 .minDepth = 0.0f,
937 .maxDepth = 1.0f
938 };
939 v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
940 v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rect->rect);
941
942 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
943
944 v3dv_CmdEndRenderPass(cmd_buffer_handle);
945 }
946
947 /* The clear pipeline sets viewport and scissor state, so we need
948 * to restore it
949 */
950 dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
951
952 fail:
953 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);
954 }
955
956 static void
emit_ds_clear_rect(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspects,uint32_t attachment_idx,VkClearDepthStencilValue clear_ds,const VkClearRect * rect)957 emit_ds_clear_rect(struct v3dv_cmd_buffer *cmd_buffer,
958 VkImageAspectFlags aspects,
959 uint32_t attachment_idx,
960 VkClearDepthStencilValue clear_ds,
961 const VkClearRect *rect)
962 {
963 assert(cmd_buffer->state.pass);
964 assert(attachment_idx != VK_ATTACHMENT_UNUSED);
965 assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
966
967 VkFormat format =
968 cmd_buffer->state.pass->attachments[attachment_idx].desc.format;
969 assert ((aspects & ~vk_format_aspects(format)) == 0);
970
971 uint32_t samples =
972 cmd_buffer->state.pass->attachments[attachment_idx].desc.samples;
973
974 enum pipe_format pformat = vk_format_to_pipe_format(format);
975 VkClearColorValue clear_color;
976 uint32_t clear_zs =
977 util_pack_z_stencil(pformat, clear_ds.depth, clear_ds.stencil);
978
979 /* We implement depth/stencil clears by turning them into color clears
980 * with a compatible color format.
981 */
982 VkFormat color_format = get_color_format_for_depth_stencil_format(format);
983
984 uint32_t comps;
985 if (color_format == VK_FORMAT_R8G8B8A8_UINT) {
986 /* We are clearing a D24 format so we need to select the channels that we
987 * are being asked to clear to avoid clearing aspects that should be
988 * preserved. Also, the hardware uses the MSB channels to store the D24
989 * component, so we need to shift the components in the clear value to
990 * match that.
991 */
992 comps = 0;
993 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
994 comps |= VK_COLOR_COMPONENT_R_BIT;
995 clear_color.uint32[0] = clear_zs >> 24;
996 }
997 if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
998 comps |= VK_COLOR_COMPONENT_G_BIT |
999 VK_COLOR_COMPONENT_B_BIT |
1000 VK_COLOR_COMPONENT_A_BIT;
1001 clear_color.uint32[1] = (clear_zs >> 0) & 0xff;
1002 clear_color.uint32[2] = (clear_zs >> 8) & 0xff;
1003 clear_color.uint32[3] = (clear_zs >> 16) & 0xff;
1004 }
1005 } else {
1006 /* For anything else we use a single component format */
1007 comps = VK_COLOR_COMPONENT_R_BIT;
1008 clear_color.uint32[0] = clear_zs;
1009 }
1010
1011 emit_color_clear_rect(cmd_buffer, attachment_idx,
1012 color_format, samples, comps,
1013 clear_color, rect);
1014 }
1015
1016 /* Emits a scissored quad in the clear color.
1017 *
1018 * This path only works for clears to the base layer in the framebuffer, since
1019 * we don't currently support any form of layered rendering.
1020 */
1021 static void
emit_subpass_color_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,uint32_t rt_idx,const VkClearColorValue * clear_color,uint32_t rect_count,const VkClearRect * rects)1022 emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1023 struct v3dv_render_pass *pass,
1024 struct v3dv_subpass *subpass,
1025 uint32_t rt_idx,
1026 const VkClearColorValue *clear_color,
1027 uint32_t rect_count,
1028 const VkClearRect *rects)
1029 {
1030 /* Skip if attachment is unused in the current subpass */
1031 assert(rt_idx < subpass->color_count);
1032 const uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment;
1033 if (attachment_idx == VK_ATTACHMENT_UNUSED)
1034 return;
1035
1036 /* Obtain a pipeline for this clear */
1037 assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
1038 const VkFormat format =
1039 cmd_buffer->state.pass->attachments[attachment_idx].desc.format;
1040 const VkFormat samples =
1041 cmd_buffer->state.pass->attachments[attachment_idx].desc.samples;
1042 const uint32_t components = VK_COLOR_COMPONENT_R_BIT |
1043 VK_COLOR_COMPONENT_G_BIT |
1044 VK_COLOR_COMPONENT_B_BIT |
1045 VK_COLOR_COMPONENT_A_BIT;
1046 struct v3dv_meta_color_clear_pipeline *pipeline = NULL;
1047 VkResult result = get_color_clear_pipeline(cmd_buffer->device,
1048 pass,
1049 cmd_buffer->state.subpass_idx,
1050 rt_idx,
1051 attachment_idx,
1052 format,
1053 samples,
1054 components,
1055 &pipeline);
1056 if (result != VK_SUCCESS) {
1057 if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1058 v3dv_flag_oom(cmd_buffer, NULL);
1059 return;
1060 }
1061 assert(pipeline && pipeline->pipeline);
1062
1063 /* Emit clear rects */
1064 v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1065
1066 VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1067 v3dv_CmdPushConstants(cmd_buffer_handle,
1068 cmd_buffer->device->meta.depth_clear.playout,
1069 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
1070 clear_color->float32);
1071
1072 v3dv_CmdBindPipeline(cmd_buffer_handle,
1073 VK_PIPELINE_BIND_POINT_GRAPHICS,
1074 pipeline->pipeline);
1075
1076 uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
1077
1078 for (uint32_t i = 0; i < rect_count; i++) {
1079 assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1080 const VkViewport viewport = {
1081 .x = rects[i].rect.offset.x,
1082 .y = rects[i].rect.offset.y,
1083 .width = rects[i].rect.extent.width,
1084 .height = rects[i].rect.extent.height,
1085 .minDepth = 0.0f,
1086 .maxDepth = 1.0f
1087 };
1088 v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1089 v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1090 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1091 }
1092
1093 /* Subpass pipelines can't be cached because they include a reference to the
1094 * render pass currently bound by the application, which means that we need
1095 * to destroy them manually here.
1096 */
1097 assert(!pipeline->cached);
1098 v3dv_cmd_buffer_add_private_obj(
1099 cmd_buffer, (uintptr_t)pipeline,
1100 (v3dv_cmd_buffer_private_obj_destroy_cb) destroy_color_clear_pipeline);
1101
1102 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
1103 }
1104
1105 /* Emits a scissored quad, clearing the depth aspect by writing to gl_FragDepth
1106 * and the stencil aspect by using stencil testing.
1107 *
1108 * This path only works for clears to the base layer in the framebuffer, since
1109 * we don't currently support any form of layered rendering.
1110 */
1111 static void
emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,VkImageAspectFlags aspects,const VkClearDepthStencilValue * clear_ds,uint32_t rect_count,const VkClearRect * rects)1112 emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1113 struct v3dv_render_pass *pass,
1114 struct v3dv_subpass *subpass,
1115 VkImageAspectFlags aspects,
1116 const VkClearDepthStencilValue *clear_ds,
1117 uint32_t rect_count,
1118 const VkClearRect *rects)
1119 {
1120 /* Skip if attachment is unused in the current subpass */
1121 const uint32_t attachment_idx = subpass->ds_attachment.attachment;
1122 if (attachment_idx == VK_ATTACHMENT_UNUSED)
1123 return;
1124
1125 /* Obtain a pipeline for this clear */
1126 assert(attachment_idx < cmd_buffer->state.pass->attachment_count);
1127 struct v3dv_meta_depth_clear_pipeline *pipeline = NULL;
1128 VkResult result = get_depth_clear_pipeline(cmd_buffer->device,
1129 aspects,
1130 pass,
1131 cmd_buffer->state.subpass_idx,
1132 attachment_idx,
1133 &pipeline);
1134 if (result != VK_SUCCESS) {
1135 if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1136 v3dv_flag_oom(cmd_buffer, NULL);
1137 return;
1138 }
1139 assert(pipeline && pipeline->pipeline);
1140
1141 /* Emit clear rects */
1142 v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1143
1144 VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1145 v3dv_CmdPushConstants(cmd_buffer_handle,
1146 cmd_buffer->device->meta.depth_clear.playout,
1147 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
1148 &clear_ds->depth);
1149
1150 v3dv_CmdBindPipeline(cmd_buffer_handle,
1151 VK_PIPELINE_BIND_POINT_GRAPHICS,
1152 pipeline->pipeline);
1153
1154 uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
1155 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1156 v3dv_CmdSetStencilReference(cmd_buffer_handle,
1157 VK_STENCIL_FACE_FRONT_AND_BACK,
1158 clear_ds->stencil);
1159 v3dv_CmdSetStencilWriteMask(cmd_buffer_handle,
1160 VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1161 v3dv_CmdSetStencilCompareMask(cmd_buffer_handle,
1162 VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1163 dynamic_states |= VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK |
1164 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK |
1165 VK_DYNAMIC_STATE_STENCIL_REFERENCE;
1166 }
1167
1168 for (uint32_t i = 0; i < rect_count; i++) {
1169 assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1170 const VkViewport viewport = {
1171 .x = rects[i].rect.offset.x,
1172 .y = rects[i].rect.offset.y,
1173 .width = rects[i].rect.extent.width,
1174 .height = rects[i].rect.extent.height,
1175 .minDepth = 0.0f,
1176 .maxDepth = 1.0f
1177 };
1178 v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1179 v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1180 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1181 }
1182
1183 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);
1184 }
1185
1186 static void
emit_tlb_clear_store(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_cl * cl,uint32_t attachment_idx,uint32_t layer,uint32_t buffer)1187 emit_tlb_clear_store(struct v3dv_cmd_buffer *cmd_buffer,
1188 struct v3dv_cl *cl,
1189 uint32_t attachment_idx,
1190 uint32_t layer,
1191 uint32_t buffer)
1192 {
1193 const struct v3dv_image_view *iview =
1194 cmd_buffer->state.framebuffer->attachments[attachment_idx];
1195 const struct v3dv_image *image = iview->image;
1196 const struct v3d_resource_slice *slice = &image->slices[iview->base_level];
1197 uint32_t layer_offset = v3dv_layer_offset(image,
1198 iview->base_level,
1199 iview->first_layer + layer);
1200
1201 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
1202 store.buffer_to_store = buffer;
1203 store.address = v3dv_cl_address(image->mem->bo, layer_offset);
1204 store.clear_buffer_being_stored = false;
1205
1206 store.output_image_format = iview->format->rt_type;
1207 store.r_b_swap = iview->swap_rb;
1208 store.memory_format = slice->tiling;
1209
1210 if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
1211 slice->tiling == VC5_TILING_UIF_XOR) {
1212 store.height_in_ub_or_stride =
1213 slice->padded_height_of_output_image_in_uif_blocks;
1214 } else if (slice->tiling == VC5_TILING_RASTER) {
1215 store.height_in_ub_or_stride = slice->stride;
1216 }
1217
1218 if (image->samples > VK_SAMPLE_COUNT_1_BIT)
1219 store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
1220 else
1221 store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
1222 }
1223 }
1224
1225 static void
emit_tlb_clear_stores(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_cl * cl,uint32_t attachment_count,const VkClearAttachment * attachments,uint32_t layer)1226 emit_tlb_clear_stores(struct v3dv_cmd_buffer *cmd_buffer,
1227 struct v3dv_cl *cl,
1228 uint32_t attachment_count,
1229 const VkClearAttachment *attachments,
1230 uint32_t layer)
1231 {
1232 struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
1233 const struct v3dv_subpass *subpass =
1234 &state->pass->subpasses[state->subpass_idx];
1235
1236 bool has_stores = false;
1237 for (uint32_t i = 0; i < attachment_count; i++) {
1238 uint32_t attachment_idx;
1239 uint32_t buffer;
1240 if (attachments[i].aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT |
1241 VK_IMAGE_ASPECT_STENCIL_BIT)) {
1242 attachment_idx = subpass->ds_attachment.attachment;
1243 buffer = v3dv_zs_buffer_from_aspect_bits(attachments[i].aspectMask);
1244 } else {
1245 uint32_t rt_idx = attachments[i].colorAttachment;
1246 attachment_idx = subpass->color_attachments[rt_idx].attachment;
1247 buffer = RENDER_TARGET_0 + rt_idx;
1248 }
1249
1250 if (attachment_idx == VK_ATTACHMENT_UNUSED)
1251 continue;
1252
1253 has_stores = true;
1254 emit_tlb_clear_store(cmd_buffer, cl, attachment_idx, layer, buffer);
1255 }
1256
1257 if (!has_stores) {
1258 cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
1259 store.buffer_to_store = NONE;
1260 }
1261 }
1262 }
1263
1264 static void
emit_tlb_clear_per_tile_rcl(struct v3dv_cmd_buffer * cmd_buffer,uint32_t attachment_count,const VkClearAttachment * attachments,uint32_t layer)1265 emit_tlb_clear_per_tile_rcl(struct v3dv_cmd_buffer *cmd_buffer,
1266 uint32_t attachment_count,
1267 const VkClearAttachment *attachments,
1268 uint32_t layer)
1269 {
1270 struct v3dv_job *job = cmd_buffer->state.job;
1271 assert(job);
1272
1273 struct v3dv_cl *cl = &job->indirect;
1274 v3dv_cl_ensure_space(cl, 200, 1);
1275 v3dv_return_if_oom(cmd_buffer, NULL);
1276
1277 struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
1278
1279 cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
1280
1281 cl_emit(cl, END_OF_LOADS, end); /* Nothing to load */
1282
1283 cl_emit(cl, PRIM_LIST_FORMAT, fmt) {
1284 fmt.primitive_type = LIST_TRIANGLES;
1285 }
1286
1287 cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
1288
1289 emit_tlb_clear_stores(cmd_buffer, cl, attachment_count, attachments, layer);
1290
1291 cl_emit(cl, END_OF_TILE_MARKER, end);
1292
1293 cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
1294
1295 cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
1296 branch.start = tile_list_start;
1297 branch.end = v3dv_cl_get_address(cl);
1298 }
1299 }
1300
1301 static void
emit_tlb_clear_layer_rcl(struct v3dv_cmd_buffer * cmd_buffer,uint32_t attachment_count,const VkClearAttachment * attachments,uint32_t layer)1302 emit_tlb_clear_layer_rcl(struct v3dv_cmd_buffer *cmd_buffer,
1303 uint32_t attachment_count,
1304 const VkClearAttachment *attachments,
1305 uint32_t layer)
1306 {
1307 const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
1308 const struct v3dv_framebuffer *framebuffer = state->framebuffer;
1309
1310 struct v3dv_job *job = cmd_buffer->state.job;
1311 struct v3dv_cl *rcl = &job->rcl;
1312
1313 const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
1314
1315 const uint32_t tile_alloc_offset =
1316 64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
1317 cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
1318 list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
1319 }
1320
1321 cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
1322 config.number_of_bin_tile_lists = 1;
1323 config.total_frame_width_in_tiles = tiling->draw_tiles_x;
1324 config.total_frame_height_in_tiles = tiling->draw_tiles_y;
1325
1326 config.supertile_width_in_tiles = tiling->supertile_width;
1327 config.supertile_height_in_tiles = tiling->supertile_height;
1328
1329 config.total_frame_width_in_supertiles =
1330 tiling->frame_width_in_supertiles;
1331 config.total_frame_height_in_supertiles =
1332 tiling->frame_height_in_supertiles;
1333 }
1334
1335 /* Emit the clear and also the workaround for GFXH-1742 */
1336 for (int i = 0; i < 2; i++) {
1337 cl_emit(rcl, TILE_COORDINATES, coords);
1338 cl_emit(rcl, END_OF_LOADS, end);
1339 cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
1340 store.buffer_to_store = NONE;
1341 }
1342 if (i == 0) {
1343 cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
1344 clear.clear_z_stencil_buffer = true;
1345 clear.clear_all_render_targets = true;
1346 }
1347 }
1348 cl_emit(rcl, END_OF_TILE_MARKER, end);
1349 }
1350
1351 cl_emit(rcl, FLUSH_VCD_CACHE, flush);
1352
1353 emit_tlb_clear_per_tile_rcl(cmd_buffer, attachment_count, attachments, layer);
1354
1355 uint32_t supertile_w_in_pixels =
1356 tiling->tile_width * tiling->supertile_width;
1357 uint32_t supertile_h_in_pixels =
1358 tiling->tile_height * tiling->supertile_height;
1359
1360 const uint32_t max_render_x = framebuffer->width - 1;
1361 const uint32_t max_render_y = framebuffer->height - 1;
1362 const uint32_t max_x_supertile = max_render_x / supertile_w_in_pixels;
1363 const uint32_t max_y_supertile = max_render_y / supertile_h_in_pixels;
1364
1365 for (int y = 0; y <= max_y_supertile; y++) {
1366 for (int x = 0; x <= max_x_supertile; x++) {
1367 cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
1368 coords.column_number_in_supertiles = x;
1369 coords.row_number_in_supertiles = y;
1370 }
1371 }
1372 }
1373 }
1374
1375 static void
emit_tlb_clear_job(struct v3dv_cmd_buffer * cmd_buffer,uint32_t attachment_count,const VkClearAttachment * attachments,uint32_t base_layer,uint32_t layer_count)1376 emit_tlb_clear_job(struct v3dv_cmd_buffer *cmd_buffer,
1377 uint32_t attachment_count,
1378 const VkClearAttachment *attachments,
1379 uint32_t base_layer,
1380 uint32_t layer_count)
1381 {
1382 const struct v3dv_cmd_buffer_state *state = &cmd_buffer->state;
1383 const struct v3dv_framebuffer *framebuffer = state->framebuffer;
1384 const struct v3dv_subpass *subpass =
1385 &state->pass->subpasses[state->subpass_idx];
1386 struct v3dv_job *job = cmd_buffer->state.job;
1387 assert(job);
1388
1389 /* Check how many color attachments we have and also if we have a
1390 * depth/stencil attachment.
1391 */
1392 uint32_t color_attachment_count = 0;
1393 VkClearAttachment color_attachments[4];
1394 const VkClearDepthStencilValue *ds_clear_value = NULL;
1395 uint8_t internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F;
1396 for (uint32_t i = 0; i < attachment_count; i++) {
1397 if (attachments[i].aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT |
1398 VK_IMAGE_ASPECT_STENCIL_BIT)) {
1399 assert(subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED);
1400 ds_clear_value = &attachments[i].clearValue.depthStencil;
1401 struct v3dv_render_pass_attachment *att =
1402 &state->pass->attachments[subpass->ds_attachment.attachment];
1403 internal_depth_type = v3dv_get_internal_depth_type(att->desc.format);
1404 } else if (attachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1405 color_attachments[color_attachment_count++] = attachments[i];
1406 }
1407 }
1408
1409 uint8_t internal_bpp;
1410 bool msaa;
1411 v3dv_framebuffer_compute_internal_bpp_msaa(framebuffer, subpass,
1412 &internal_bpp, &msaa);
1413
1414 v3dv_job_start_frame(job,
1415 framebuffer->width,
1416 framebuffer->height,
1417 framebuffer->layers,
1418 color_attachment_count,
1419 internal_bpp, msaa);
1420
1421 struct v3dv_cl *rcl = &job->rcl;
1422 v3dv_cl_ensure_space_with_branch(rcl, 200 +
1423 layer_count * 256 *
1424 cl_packet_length(SUPERTILE_COORDINATES));
1425 v3dv_return_if_oom(cmd_buffer, NULL);
1426
1427 const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
1428 cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
1429 config.early_z_disable = true;
1430 config.image_width_pixels = framebuffer->width;
1431 config.image_height_pixels = framebuffer->height;
1432 config.number_of_render_targets = MAX2(color_attachment_count, 1);
1433 config.multisample_mode_4x = false; /* FIXME */
1434 config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
1435 config.internal_depth_type = internal_depth_type;
1436 }
1437
1438 for (uint32_t i = 0; i < color_attachment_count; i++) {
1439 uint32_t rt_idx = color_attachments[i].colorAttachment;
1440 uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment;
1441 if (attachment_idx == VK_ATTACHMENT_UNUSED)
1442 continue;
1443
1444 const struct v3dv_render_pass_attachment *attachment =
1445 &state->pass->attachments[attachment_idx];
1446
1447 uint32_t internal_type, internal_bpp, internal_size;
1448 const struct v3dv_format *format =
1449 v3dv_get_format(attachment->desc.format);
1450 v3dv_get_internal_type_bpp_for_output_format(format->rt_type,
1451 &internal_type,
1452 &internal_bpp);
1453 internal_size = 4 << internal_bpp;
1454
1455 uint32_t clear_color[4] = { 0 };
1456 v3dv_get_hw_clear_color(&color_attachments[i].clearValue.color,
1457 internal_type,
1458 internal_size,
1459 clear_color);
1460
1461 struct v3dv_image_view *iview = framebuffer->attachments[attachment_idx];
1462 const struct v3dv_image *image = iview->image;
1463 const struct v3d_resource_slice *slice = &image->slices[iview->base_level];
1464
1465 uint32_t clear_pad = 0;
1466 if (slice->tiling == VC5_TILING_UIF_NO_XOR ||
1467 slice->tiling == VC5_TILING_UIF_XOR) {
1468 int uif_block_height = v3d_utile_height(image->cpp) * 2;
1469
1470 uint32_t implicit_padded_height =
1471 align(framebuffer->height, uif_block_height) / uif_block_height;
1472
1473 if (slice->padded_height_of_output_image_in_uif_blocks -
1474 implicit_padded_height >= 15) {
1475 clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
1476 }
1477 }
1478
1479 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
1480 clear.clear_color_low_32_bits = clear_color[0];
1481 clear.clear_color_next_24_bits = clear_color[1] & 0xffffff;
1482 clear.render_target_number = i;
1483 };
1484
1485 if (iview->internal_bpp >= V3D_INTERNAL_BPP_64) {
1486 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
1487 clear.clear_color_mid_low_32_bits =
1488 ((clear_color[1] >> 24) | (clear_color[2] << 8));
1489 clear.clear_color_mid_high_24_bits =
1490 ((clear_color[2] >> 24) | ((clear_color[3] & 0xffff) << 8));
1491 clear.render_target_number = i;
1492 };
1493 }
1494
1495 if (iview->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
1496 cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
1497 clear.uif_padded_height_in_uif_blocks = clear_pad;
1498 clear.clear_color_high_16_bits = clear_color[3] >> 16;
1499 clear.render_target_number = i;
1500 };
1501 }
1502 }
1503
1504 cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
1505 v3dv_render_pass_setup_render_target(cmd_buffer, 0,
1506 &rt.render_target_0_internal_bpp,
1507 &rt.render_target_0_internal_type,
1508 &rt.render_target_0_clamp);
1509 v3dv_render_pass_setup_render_target(cmd_buffer, 1,
1510 &rt.render_target_1_internal_bpp,
1511 &rt.render_target_1_internal_type,
1512 &rt.render_target_1_clamp);
1513 v3dv_render_pass_setup_render_target(cmd_buffer, 2,
1514 &rt.render_target_2_internal_bpp,
1515 &rt.render_target_2_internal_type,
1516 &rt.render_target_2_clamp);
1517 v3dv_render_pass_setup_render_target(cmd_buffer, 3,
1518 &rt.render_target_3_internal_bpp,
1519 &rt.render_target_3_internal_type,
1520 &rt.render_target_3_clamp);
1521 }
1522
1523 cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
1524 clear.z_clear_value = ds_clear_value ? ds_clear_value->depth : 1.0f;
1525 clear.stencil_clear_value = ds_clear_value ? ds_clear_value->stencil : 0;
1526 };
1527
1528 cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
1529 init.use_auto_chained_tile_lists = true;
1530 init.size_of_first_block_in_chained_tile_lists =
1531 TILE_ALLOCATION_BLOCK_SIZE_64B;
1532 }
1533
1534 for (int layer = base_layer; layer < base_layer + layer_count; layer++) {
1535 emit_tlb_clear_layer_rcl(cmd_buffer,
1536 attachment_count,
1537 attachments,
1538 layer);
1539 }
1540
1541 cl_emit(rcl, END_OF_RENDERING, end);
1542 }
1543
1544 static void
emit_tlb_clear(struct v3dv_cmd_buffer * cmd_buffer,uint32_t attachment_count,const VkClearAttachment * attachments,uint32_t base_layer,uint32_t layer_count)1545 emit_tlb_clear(struct v3dv_cmd_buffer *cmd_buffer,
1546 uint32_t attachment_count,
1547 const VkClearAttachment *attachments,
1548 uint32_t base_layer,
1549 uint32_t layer_count)
1550 {
1551 struct v3dv_job *job =
1552 v3dv_cmd_buffer_start_job(cmd_buffer, cmd_buffer->state.subpass_idx,
1553 V3DV_JOB_TYPE_GPU_CL);
1554
1555 /* vkCmdClearAttachments runs inside a render pass */
1556 job->is_subpass_continue = true;
1557
1558 emit_tlb_clear_job(cmd_buffer,
1559 attachment_count,
1560 attachments,
1561 base_layer, layer_count);
1562
1563 v3dv_cmd_buffer_subpass_resume(cmd_buffer, cmd_buffer->state.subpass_idx);
1564 }
1565
1566 static bool
is_subrect(const VkRect2D * r0,const VkRect2D * r1)1567 is_subrect(const VkRect2D *r0, const VkRect2D *r1)
1568 {
1569 return r0->offset.x <= r1->offset.x &&
1570 r0->offset.y <= r1->offset.y &&
1571 r0->offset.x + r0->extent.width >= r1->offset.x + r1->extent.width &&
1572 r0->offset.y + r0->extent.height >= r1->offset.y + r1->extent.height;
1573 }
1574
1575 static bool
can_use_tlb_clear(struct v3dv_cmd_buffer * cmd_buffer,uint32_t rect_count,const VkClearRect * rects)1576 can_use_tlb_clear(struct v3dv_cmd_buffer *cmd_buffer,
1577 uint32_t rect_count,
1578 const VkClearRect* rects)
1579 {
1580 const struct v3dv_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
1581
1582 const VkRect2D *render_area = &cmd_buffer->state.render_area;
1583
1584 /* Check if we are clearing a single region covering the entire framebuffer
1585 * and that we are not constrained by the current render area.
1586 *
1587 * From the Vulkan 1.0 spec:
1588 *
1589 * "The vkCmdClearAttachments command is not affected by the bound
1590 * pipeline state."
1591 *
1592 * So we can ignore scissor and viewport state for this check.
1593 */
1594 const VkRect2D fb_rect = {
1595 { 0, 0 },
1596 { framebuffer->width, framebuffer->height }
1597 };
1598
1599 return rect_count == 1 &&
1600 is_subrect(&rects[0].rect, &fb_rect) &&
1601 is_subrect(render_area, &fb_rect);
1602 }
1603
1604 static void
handle_deferred_clear_attachments(struct v3dv_cmd_buffer * cmd_buffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1605 handle_deferred_clear_attachments(struct v3dv_cmd_buffer *cmd_buffer,
1606 uint32_t attachmentCount,
1607 const VkClearAttachment *pAttachments,
1608 uint32_t rectCount,
1609 const VkClearRect *pRects)
1610 {
1611 /* Finish the current job */
1612 v3dv_cmd_buffer_finish_job(cmd_buffer);
1613
1614 /* Add a deferred clear attachments job right after that we will process
1615 * when we execute this secondary command buffer into a primary.
1616 */
1617 struct v3dv_job *job =
1618 v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
1619 V3DV_JOB_TYPE_CPU_CLEAR_ATTACHMENTS,
1620 cmd_buffer,
1621 cmd_buffer->state.subpass_idx);
1622 v3dv_return_if_oom(cmd_buffer, NULL);
1623
1624 job->cpu.clear_attachments.rects =
1625 vk_alloc(&cmd_buffer->device->alloc,
1626 sizeof(VkClearRect) * rectCount, 8,
1627 VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1628 if (!job->cpu.clear_attachments.rects) {
1629 v3dv_flag_oom(cmd_buffer, NULL);
1630 return;
1631 }
1632
1633 job->cpu.clear_attachments.attachment_count = attachmentCount;
1634 memcpy(job->cpu.clear_attachments.attachments, pAttachments,
1635 sizeof(VkClearAttachment) * attachmentCount);
1636
1637 job->cpu.clear_attachments.rect_count = rectCount;
1638 memcpy(job->cpu.clear_attachments.rects, pRects,
1639 sizeof(VkClearRect) * rectCount);
1640
1641 list_addtail(&job->list_link, &cmd_buffer->jobs);
1642
1643 /* Resume the subpass so we can continue recording commands */
1644 v3dv_cmd_buffer_subpass_resume(cmd_buffer,
1645 cmd_buffer->state.subpass_idx);
1646 }
1647
1648 static bool
all_clear_rects_in_base_layer(uint32_t rect_count,const VkClearRect * rects)1649 all_clear_rects_in_base_layer(uint32_t rect_count, const VkClearRect *rects)
1650 {
1651 for (uint32_t i = 0; i < rect_count; i++) {
1652 if (rects[i].baseArrayLayer != 0 || rects[i].layerCount != 1)
1653 return false;
1654 }
1655 return true;
1656 }
1657
1658 void
v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1659 v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,
1660 uint32_t attachmentCount,
1661 const VkClearAttachment *pAttachments,
1662 uint32_t rectCount,
1663 const VkClearRect *pRects)
1664 {
1665 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1666
1667 /* We can only clear attachments in the current subpass */
1668 assert(attachmentCount <= 5); /* 4 color + D/S */
1669
1670 /* Clear attachments may clear multiple layers of the framebuffer, which
1671 * currently requires that we emit multiple jobs (one per layer) and
1672 * therefore requires that we have the framebuffer information available
1673 * to select the destination layers.
1674 *
1675 * For secondary command buffers the framebuffer state may not be available
1676 * until they are executed inside a primary command buffer, so in that case
1677 * we need to defer recording of the command until that moment.
1678 *
1679 * FIXME: once we add support for geometry shaders in the driver we could
1680 * avoid emitting a job per layer to implement this by always using the clear
1681 * rect path below with a passthrough geometry shader to select the layer to
1682 * clear. If we did that we would not need to special case secondary command
1683 * buffers here and we could ensure that any secondary command buffer in a
1684 * render pass only has on job with a partial CL, which would simplify things
1685 * quite a bit.
1686 */
1687 if (!cmd_buffer->state.framebuffer) {
1688 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
1689 handle_deferred_clear_attachments(cmd_buffer,
1690 attachmentCount, pAttachments,
1691 rectCount, pRects);
1692 return;
1693 }
1694
1695 assert(cmd_buffer->state.framebuffer);
1696
1697 struct v3dv_render_pass *pass = cmd_buffer->state.pass;
1698
1699 assert(cmd_buffer->state.subpass_idx < pass->subpass_count);
1700 struct v3dv_subpass *subpass =
1701 &cmd_buffer->state.pass->subpasses[cmd_buffer->state.subpass_idx];
1702
1703 /* First we try to handle this by emitting a clear rect inside the
1704 * current job for this subpass. This should be optimal but this method
1705 * cannot handle clearing layers other than the base layer, since we don't
1706 * support any form of layered rendering yet.
1707 */
1708 if (all_clear_rects_in_base_layer(rectCount, pRects)) {
1709 for (uint32_t i = 0; i < attachmentCount; i++) {
1710 if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1711 emit_subpass_color_clear_rects(cmd_buffer, pass, subpass,
1712 pAttachments[i].colorAttachment,
1713 &pAttachments[i].clearValue.color,
1714 rectCount, pRects);
1715 } else {
1716 emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass,
1717 pAttachments[i].aspectMask,
1718 &pAttachments[i].clearValue.depthStencil,
1719 rectCount, pRects);
1720 }
1721 }
1722 return;
1723 }
1724
1725 perf_debug("Falling back to slow path for vkCmdClearAttachments due to "
1726 "clearing layers other than the base array layer.\n");
1727
1728 /* If we can't handle this as a draw call inside the current job then we
1729 * will have to spawn jobs for the clears, which will be slow. In that case,
1730 * try to use the TLB to clear if possible.
1731 */
1732 if (can_use_tlb_clear(cmd_buffer, rectCount, pRects)) {
1733 emit_tlb_clear(cmd_buffer, attachmentCount, pAttachments,
1734 pRects[0].baseArrayLayer, pRects[0].layerCount);
1735 return;
1736 }
1737
1738 /* Otherwise, fall back to drawing rects with the clear value using a
1739 * separate job. This is the slowest path.
1740 */
1741 for (uint32_t i = 0; i < attachmentCount; i++) {
1742 uint32_t attachment_idx = VK_ATTACHMENT_UNUSED;
1743
1744 if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1745 uint32_t rt_idx = pAttachments[i].colorAttachment;
1746 attachment_idx = subpass->color_attachments[rt_idx].attachment;
1747 } else if (pAttachments[i].aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT |
1748 VK_IMAGE_ASPECT_STENCIL_BIT)) {
1749 attachment_idx = subpass->ds_attachment.attachment;
1750 }
1751
1752 if (attachment_idx == VK_ATTACHMENT_UNUSED)
1753 continue;
1754
1755 if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1756 const uint32_t components = VK_COLOR_COMPONENT_R_BIT |
1757 VK_COLOR_COMPONENT_G_BIT |
1758 VK_COLOR_COMPONENT_B_BIT |
1759 VK_COLOR_COMPONENT_A_BIT;
1760 const uint32_t samples =
1761 cmd_buffer->state.pass->attachments[attachment_idx].desc.samples;
1762 const VkFormat format =
1763 cmd_buffer->state.pass->attachments[attachment_idx].desc.format;
1764 for (uint32_t j = 0; j < rectCount; j++) {
1765 emit_color_clear_rect(cmd_buffer,
1766 attachment_idx,
1767 format,
1768 samples,
1769 components,
1770 pAttachments[i].clearValue.color,
1771 &pRects[j]);
1772 }
1773 } else {
1774 for (uint32_t j = 0; j < rectCount; j++) {
1775 emit_ds_clear_rect(cmd_buffer,
1776 pAttachments[i].aspectMask,
1777 attachment_idx,
1778 pAttachments[i].clearValue.depthStencil,
1779 &pRects[j]);
1780 }
1781 }
1782 }
1783 }
1784