1 /*
2 * Copyright © 2020 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "v3dv_meta_common.h"
26
27 #include "compiler/nir/nir_builder.h"
28 #include "util/u_pack_color.h"
29 #include "vk_common_entrypoints.h"
30
31 static void
get_hw_clear_color(struct v3dv_device * device,const VkClearColorValue * color,VkFormat fb_format,VkFormat image_format,uint32_t internal_type,uint32_t internal_bpp,uint32_t * hw_color)32 get_hw_clear_color(struct v3dv_device *device,
33 const VkClearColorValue *color,
34 VkFormat fb_format,
35 VkFormat image_format,
36 uint32_t internal_type,
37 uint32_t internal_bpp,
38 uint32_t *hw_color)
39 {
40 const uint32_t internal_size = 4 << internal_bpp;
41
42 /* If the image format doesn't match the framebuffer format, then we are
43 * trying to clear an unsupported tlb format using a compatible
44 * format for the framebuffer. In this case, we want to make sure that
45 * we pack the clear value according to the original format semantics,
46 * not the compatible format.
47 */
48 if (fb_format == image_format) {
49 v3d_X((&device->devinfo), get_hw_clear_color)(color, internal_type, internal_size,
50 hw_color);
51 } else {
52 union util_color uc;
53 enum pipe_format pipe_image_format =
54 vk_format_to_pipe_format(image_format);
55 util_pack_color(color->float32, pipe_image_format, &uc);
56 memcpy(hw_color, uc.ui, internal_size);
57 }
58 }
59
60 /* Returns true if the implementation is able to handle the case, false
61 * otherwise.
62 */
63 static bool
clear_image_tlb(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_image * image,const VkClearValue * clear_value,const VkImageSubresourceRange * range)64 clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
65 struct v3dv_image *image,
66 const VkClearValue *clear_value,
67 const VkImageSubresourceRange *range)
68 {
69 const VkOffset3D origin = { 0, 0, 0 };
70 VkFormat fb_format;
71
72 /* From vkCmdClearColorImage spec:
73 * "image must not use any of the formats that require a sampler YCBCR
74 * conversion"
75 */
76 assert(image->plane_count == 1);
77 if (!v3dv_meta_can_use_tlb(image, 0, 0, &origin, NULL, &fb_format))
78 return false;
79
80 uint32_t internal_type, internal_bpp;
81 v3d_X((&cmd_buffer->device->devinfo), get_internal_type_bpp_for_image_aspects)
82 (fb_format, range->aspectMask,
83 &internal_type, &internal_bpp);
84
85 union v3dv_clear_value hw_clear_value = { 0 };
86 if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
87 get_hw_clear_color(cmd_buffer->device, &clear_value->color, fb_format,
88 image->vk.format, internal_type, internal_bpp,
89 &hw_clear_value.color[0]);
90 } else {
91 assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||
92 (range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));
93 hw_clear_value.z = clear_value->depthStencil.depth;
94 hw_clear_value.s = clear_value->depthStencil.stencil;
95 }
96
97 uint32_t level_count = vk_image_subresource_level_count(&image->vk, range);
98 uint32_t min_level = range->baseMipLevel;
99 uint32_t max_level = range->baseMipLevel + level_count;
100
101 /* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.
102 * Instead, we need to consider the full depth dimension of the image, which
103 * goes from 0 up to the level's depth extent.
104 */
105 uint32_t min_layer;
106 uint32_t max_layer;
107 if (image->vk.image_type != VK_IMAGE_TYPE_3D) {
108 min_layer = range->baseArrayLayer;
109 max_layer = range->baseArrayLayer +
110 vk_image_subresource_layer_count(&image->vk, range);
111 } else {
112 min_layer = 0;
113 max_layer = 0;
114 }
115
116 for (uint32_t level = min_level; level < max_level; level++) {
117 if (image->vk.image_type == VK_IMAGE_TYPE_3D)
118 max_layer = u_minify(image->vk.extent.depth, level);
119
120 uint32_t width = u_minify(image->vk.extent.width, level);
121 uint32_t height = u_minify(image->vk.extent.height, level);
122
123 struct v3dv_job *job =
124 v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
125
126 if (!job)
127 return true;
128
129 v3dv_job_start_frame(job, width, height, max_layer,
130 false, true, 1, internal_bpp,
131 4 * v3d_internal_bpp_words(internal_bpp),
132 image->vk.samples > VK_SAMPLE_COUNT_1_BIT);
133
134 struct v3dv_meta_framebuffer framebuffer;
135 v3d_X((&job->device->devinfo), meta_framebuffer_init)(&framebuffer, fb_format,
136 internal_type,
137 &job->frame_tiling);
138
139 v3d_X((&job->device->devinfo), job_emit_binning_flush)(job);
140
141 /* If this triggers it is an application bug: the spec requires
142 * that any aspects to clear are present in the image.
143 */
144 assert(range->aspectMask & image->vk.aspects);
145
146 v3d_X((&job->device->devinfo), meta_emit_clear_image_rcl)
147 (job, image, &framebuffer, &hw_clear_value,
148 range->aspectMask, min_layer, max_layer, level);
149
150 v3dv_cmd_buffer_finish_job(cmd_buffer);
151 }
152
153 return true;
154 }
155
156 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearColorValue * pColor,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)157 v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,
158 VkImage _image,
159 VkImageLayout imageLayout,
160 const VkClearColorValue *pColor,
161 uint32_t rangeCount,
162 const VkImageSubresourceRange *pRanges)
163 {
164 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
165 V3DV_FROM_HANDLE(v3dv_image, image, _image);
166
167 const VkClearValue clear_value = {
168 .color = *pColor,
169 };
170
171 cmd_buffer->state.is_transfer = true;
172
173 for (uint32_t i = 0; i < rangeCount; i++) {
174 if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
175 continue;
176 unreachable("Unsupported color clear.");
177 }
178
179 cmd_buffer->state.is_transfer = false;
180 }
181
182 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,VkImage _image,VkImageLayout imageLayout,const VkClearDepthStencilValue * pDepthStencil,uint32_t rangeCount,const VkImageSubresourceRange * pRanges)183 v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
184 VkImage _image,
185 VkImageLayout imageLayout,
186 const VkClearDepthStencilValue *pDepthStencil,
187 uint32_t rangeCount,
188 const VkImageSubresourceRange *pRanges)
189 {
190 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
191 V3DV_FROM_HANDLE(v3dv_image, image, _image);
192
193 const VkClearValue clear_value = {
194 .depthStencil = *pDepthStencil,
195 };
196
197 cmd_buffer->state.is_transfer = true;
198
199 for (uint32_t i = 0; i < rangeCount; i++) {
200 if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
201 continue;
202 unreachable("Unsupported depth/stencil clear.");
203 }
204
205 cmd_buffer->state.is_transfer = false;
206 }
207
208 static void
destroy_color_clear_pipeline(VkDevice _device,uint64_t pipeline,VkAllocationCallbacks * alloc)209 destroy_color_clear_pipeline(VkDevice _device,
210 uint64_t pipeline,
211 VkAllocationCallbacks *alloc)
212 {
213 struct v3dv_meta_color_clear_pipeline *p =
214 (struct v3dv_meta_color_clear_pipeline *) (uintptr_t) pipeline;
215 v3dv_DestroyPipeline(_device, p->pipeline, alloc);
216 if (p->cached)
217 v3dv_DestroyRenderPass(_device, p->pass, alloc);
218 vk_free(alloc, p);
219 }
220
221 static void
destroy_depth_clear_pipeline(VkDevice _device,uint64_t pipeline,VkAllocationCallbacks * alloc)222 destroy_depth_clear_pipeline(VkDevice _device,
223 uint64_t pipeline,
224 VkAllocationCallbacks *alloc)
225 {
226 struct v3dv_meta_depth_clear_pipeline *p =
227 (struct v3dv_meta_depth_clear_pipeline *)(uintptr_t)pipeline;
228 v3dv_DestroyPipeline(_device, p->pipeline, alloc);
229 vk_free(alloc, p);
230 }
231
232 static VkResult
create_color_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)233 create_color_clear_pipeline_layout(struct v3dv_device *device,
234 VkPipelineLayout *pipeline_layout)
235 {
236 /* FIXME: this is abusing a bit the API, since not all of our clear
237 * pipelines have a geometry shader. We could create 2 different pipeline
238 * layouts, but this works for us for now.
239 */
240 VkPushConstantRange ranges[2] = {
241 { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 },
242 { VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4 },
243 };
244
245 VkPipelineLayoutCreateInfo info = {
246 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
247 .setLayoutCount = 0,
248 .pushConstantRangeCount = 2,
249 .pPushConstantRanges = ranges,
250 };
251
252 return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
253 &info, &device->vk.alloc, pipeline_layout);
254 }
255
256 static VkResult
create_depth_clear_pipeline_layout(struct v3dv_device * device,VkPipelineLayout * pipeline_layout)257 create_depth_clear_pipeline_layout(struct v3dv_device *device,
258 VkPipelineLayout *pipeline_layout)
259 {
260 /* FIXME: this is abusing a bit the API, since not all of our clear
261 * pipelines have a geometry shader. We could create 2 different pipeline
262 * layouts, but this works for us for now.
263 */
264 VkPushConstantRange ranges[2] = {
265 { VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 },
266 { VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4 },
267 };
268
269 VkPipelineLayoutCreateInfo info = {
270 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
271 .setLayoutCount = 0,
272 .pushConstantRangeCount = 2,
273 .pPushConstantRanges = ranges
274 };
275
276 return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
277 &info, &device->vk.alloc, pipeline_layout);
278 }
279
280 void
v3dv_meta_clear_init(struct v3dv_device * device)281 v3dv_meta_clear_init(struct v3dv_device *device)
282 {
283 if (device->instance->meta_cache_enabled) {
284 device->meta.color_clear.cache =
285 _mesa_hash_table_create(NULL, u64_hash, u64_compare);
286
287 device->meta.depth_clear.cache =
288 _mesa_hash_table_create(NULL, u64_hash, u64_compare);
289 }
290
291 create_color_clear_pipeline_layout(device,
292 &device->meta.color_clear.p_layout);
293 create_depth_clear_pipeline_layout(device,
294 &device->meta.depth_clear.p_layout);
295 }
296
297 void
v3dv_meta_clear_finish(struct v3dv_device * device)298 v3dv_meta_clear_finish(struct v3dv_device *device)
299 {
300 VkDevice _device = v3dv_device_to_handle(device);
301
302 if (device->instance->meta_cache_enabled) {
303 hash_table_foreach(device->meta.color_clear.cache, entry) {
304 struct v3dv_meta_color_clear_pipeline *item = entry->data;
305 destroy_color_clear_pipeline(_device, (uintptr_t)item, &device->vk.alloc);
306 }
307 _mesa_hash_table_destroy(device->meta.color_clear.cache, NULL);
308
309 hash_table_foreach(device->meta.depth_clear.cache, entry) {
310 struct v3dv_meta_depth_clear_pipeline *item = entry->data;
311 destroy_depth_clear_pipeline(_device, (uintptr_t)item, &device->vk.alloc);
312 }
313 _mesa_hash_table_destroy(device->meta.depth_clear.cache, NULL);
314 }
315
316 if (device->meta.color_clear.p_layout) {
317 v3dv_DestroyPipelineLayout(_device, device->meta.color_clear.p_layout,
318 &device->vk.alloc);
319 }
320
321 if (device->meta.depth_clear.p_layout) {
322 v3dv_DestroyPipelineLayout(_device, device->meta.depth_clear.p_layout,
323 &device->vk.alloc);
324 }
325 }
326
327 static nir_shader *
get_clear_rect_vs(const nir_shader_compiler_options * options)328 get_clear_rect_vs(const nir_shader_compiler_options *options)
329 {
330 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
331 "meta clear vs");
332
333 const struct glsl_type *vec4 = glsl_vec4_type();
334 nir_variable *vs_out_pos =
335 nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
336 vs_out_pos->data.location = VARYING_SLOT_POS;
337
338 nir_def *pos = nir_gen_rect_vertices(&b, NULL, NULL);
339 nir_store_var(&b, vs_out_pos, pos, 0xf);
340
341 return b.shader;
342 }
343
344 static nir_shader *
get_clear_rect_gs(const nir_shader_compiler_options * options,uint32_t push_constant_layer_base)345 get_clear_rect_gs(const nir_shader_compiler_options *options,
346 uint32_t push_constant_layer_base)
347 {
348 /* FIXME: this creates a geometry shader that takes the index of a single
349 * layer to clear from push constants, so we need to emit a draw call for
350 * each layer that we want to clear. We could actually do better and have it
351 * take a range of layers and then emit one triangle per layer to clear,
352 * however, if we were to do this we would need to be careful not to exceed
353 * the maximum number of output vertices allowed in a geometry shader.
354 */
355 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
356 "meta clear gs");
357 nir_shader *nir = b.shader;
358 nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
359 nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
360 (1ull << VARYING_SLOT_LAYER);
361 nir->info.gs.input_primitive = MESA_PRIM_TRIANGLES;
362 nir->info.gs.output_primitive = MESA_PRIM_TRIANGLE_STRIP;
363 nir->info.gs.vertices_in = 3;
364 nir->info.gs.vertices_out = 3;
365 nir->info.gs.invocations = 1;
366 nir->info.gs.active_stream_mask = 0x1;
367
368 /* in vec4 gl_Position[3] */
369 nir_variable *gs_in_pos =
370 nir_variable_create(b.shader, nir_var_shader_in,
371 glsl_array_type(glsl_vec4_type(), 3, 0),
372 "in_gl_Position");
373 gs_in_pos->data.location = VARYING_SLOT_POS;
374
375 /* out vec4 gl_Position */
376 nir_variable *gs_out_pos =
377 nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
378 "out_gl_Position");
379 gs_out_pos->data.location = VARYING_SLOT_POS;
380
381 /* out float gl_Layer */
382 nir_variable *gs_out_layer =
383 nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
384 "out_gl_Layer");
385 gs_out_layer->data.location = VARYING_SLOT_LAYER;
386
387 /* Emit output triangle */
388 for (uint32_t i = 0; i < 3; i++) {
389 /* gl_Position from shader input */
390 nir_deref_instr *in_pos_i =
391 nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
392 nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
393
394 /* gl_Layer from push constants */
395 nir_def *layer =
396 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
397 .base = push_constant_layer_base, .range = 4);
398 nir_store_var(&b, gs_out_layer, layer, 0x1);
399
400 nir_emit_vertex(&b, 0);
401 }
402
403 nir_end_primitive(&b, 0);
404
405 return nir;
406 }
407
408 static nir_shader *
get_color_clear_rect_fs(const nir_shader_compiler_options * options,uint32_t rt_idx,VkFormat format)409 get_color_clear_rect_fs(const nir_shader_compiler_options *options,
410 uint32_t rt_idx, VkFormat format)
411 {
412 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
413 "meta clear fs");
414
415 enum pipe_format pformat = vk_format_to_pipe_format(format);
416 const struct glsl_type *fs_out_type =
417 util_format_is_float(pformat) ? glsl_vec4_type() : glsl_uvec4_type();
418
419 nir_variable *fs_out_color =
420 nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
421 fs_out_color->data.location = FRAG_RESULT_DATA0 + rt_idx;
422
423 nir_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);
424 nir_store_var(&b, fs_out_color, color_load, 0xf);
425
426 return b.shader;
427 }
428
429 static nir_shader *
get_depth_clear_rect_fs(const nir_shader_compiler_options * options)430 get_depth_clear_rect_fs(const nir_shader_compiler_options *options)
431 {
432 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
433 "meta depth clear fs");
434
435 nir_variable *fs_out_depth =
436 nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
437 "out_depth");
438 fs_out_depth->data.location = FRAG_RESULT_DEPTH;
439
440 nir_def *depth_load =
441 nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);
442
443 nir_store_var(&b, fs_out_depth, depth_load, 0x1);
444
445 return b.shader;
446 }
447
448 static VkResult
create_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,struct nir_shader * vs_nir,struct nir_shader * gs_nir,struct nir_shader * fs_nir,const VkPipelineVertexInputStateCreateInfo * vi_state,const VkPipelineDepthStencilStateCreateInfo * ds_state,const VkPipelineColorBlendStateCreateInfo * cb_state,const VkPipelineLayout layout,VkPipeline * pipeline)449 create_pipeline(struct v3dv_device *device,
450 struct v3dv_render_pass *pass,
451 uint32_t subpass_idx,
452 uint32_t samples,
453 struct nir_shader *vs_nir,
454 struct nir_shader *gs_nir,
455 struct nir_shader *fs_nir,
456 const VkPipelineVertexInputStateCreateInfo *vi_state,
457 const VkPipelineDepthStencilStateCreateInfo *ds_state,
458 const VkPipelineColorBlendStateCreateInfo *cb_state,
459 const VkPipelineLayout layout,
460 VkPipeline *pipeline)
461 {
462 VkPipelineShaderStageCreateInfo stages[3] = { 0 };
463 struct vk_shader_module vs_m = vk_shader_module_from_nir(vs_nir);
464 struct vk_shader_module gs_m;
465 struct vk_shader_module fs_m;
466
467 uint32_t stage_count = 0;
468 stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
469 stages[stage_count].stage = VK_SHADER_STAGE_VERTEX_BIT;
470 stages[stage_count].module = vk_shader_module_to_handle(&vs_m);
471 stages[stage_count].pName = "main";
472 stage_count++;
473
474 if (gs_nir) {
475 gs_m = vk_shader_module_from_nir(gs_nir);
476 stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
477 stages[stage_count].stage = VK_SHADER_STAGE_GEOMETRY_BIT;
478 stages[stage_count].module = vk_shader_module_to_handle(&gs_m);
479 stages[stage_count].pName = "main";
480 stage_count++;
481 }
482
483 if (fs_nir) {
484 fs_m = vk_shader_module_from_nir(fs_nir);
485 stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
486 stages[stage_count].stage = VK_SHADER_STAGE_FRAGMENT_BIT;
487 stages[stage_count].module = vk_shader_module_to_handle(&fs_m);
488 stages[stage_count].pName = "main";
489 stage_count++;
490 }
491
492 VkGraphicsPipelineCreateInfo info = {
493 .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
494
495 .stageCount = stage_count,
496 .pStages = stages,
497
498 .pVertexInputState = vi_state,
499
500 .pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
501 .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
502 .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
503 .primitiveRestartEnable = false,
504 },
505
506 .pViewportState = &(VkPipelineViewportStateCreateInfo) {
507 .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
508 .viewportCount = 1,
509 .scissorCount = 1,
510 },
511
512 .pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
513 .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
514 .rasterizerDiscardEnable = false,
515 .polygonMode = VK_POLYGON_MODE_FILL,
516 .cullMode = VK_CULL_MODE_NONE,
517 .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
518 .depthBiasEnable = false,
519 },
520
521 .pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {
522 .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
523 .rasterizationSamples = samples,
524 .sampleShadingEnable = false,
525 .pSampleMask = NULL,
526 .alphaToCoverageEnable = false,
527 .alphaToOneEnable = false,
528 },
529
530 .pDepthStencilState = ds_state,
531
532 .pColorBlendState = cb_state,
533
534 /* The meta clear pipeline declares all state as dynamic.
535 * As a consequence, vkCmdBindPipeline writes no dynamic state
536 * to the cmd buffer. Therefore, at the end of the meta clear,
537 * we need only restore dynamic state that was vkCmdSet.
538 */
539 .pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
540 .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
541 .dynamicStateCount = 6,
542 .pDynamicStates = (VkDynamicState[]) {
543 VK_DYNAMIC_STATE_VIEWPORT,
544 VK_DYNAMIC_STATE_SCISSOR,
545 VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
546 VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
547 VK_DYNAMIC_STATE_STENCIL_REFERENCE,
548 VK_DYNAMIC_STATE_BLEND_CONSTANTS,
549 VK_DYNAMIC_STATE_DEPTH_BIAS,
550 VK_DYNAMIC_STATE_LINE_WIDTH,
551 },
552 },
553
554 .flags = 0,
555 .layout = layout,
556 .renderPass = v3dv_render_pass_to_handle(pass),
557 .subpass = subpass_idx,
558 };
559
560 VkResult result =
561 v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
562 VK_NULL_HANDLE,
563 1, &info,
564 &device->vk.alloc,
565 pipeline);
566
567 ralloc_free(vs_nir);
568 ralloc_free(gs_nir);
569 ralloc_free(fs_nir);
570
571 return result;
572 }
573
574 static VkResult
create_color_clear_pipeline(struct v3dv_device * device,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,VkFormat format,VkSampleCountFlagBits samples,uint32_t components,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)575 create_color_clear_pipeline(struct v3dv_device *device,
576 struct v3dv_render_pass *pass,
577 uint32_t subpass_idx,
578 uint32_t rt_idx,
579 VkFormat format,
580 VkSampleCountFlagBits samples,
581 uint32_t components,
582 bool is_layered,
583 VkPipelineLayout pipeline_layout,
584 VkPipeline *pipeline)
585 {
586 const nir_shader_compiler_options *options =
587 v3dv_pipeline_get_nir_options(&device->devinfo);
588
589 nir_shader *vs_nir = get_clear_rect_vs(options);
590 nir_shader *fs_nir = get_color_clear_rect_fs(options, rt_idx, format);
591 nir_shader *gs_nir = is_layered ? get_clear_rect_gs(options, 16) : NULL;
592
593 const VkPipelineVertexInputStateCreateInfo vi_state = {
594 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
595 .vertexBindingDescriptionCount = 0,
596 .vertexAttributeDescriptionCount = 0,
597 };
598
599 const VkPipelineDepthStencilStateCreateInfo ds_state = {
600 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
601 .depthTestEnable = false,
602 .depthWriteEnable = false,
603 .depthBoundsTestEnable = false,
604 .stencilTestEnable = false,
605 };
606
607 assert(subpass_idx < pass->subpass_count);
608 const uint32_t color_count = pass->subpasses[subpass_idx].color_count;
609 assert(rt_idx < color_count);
610
611 VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS];
612 for (uint32_t i = 0; i < color_count; i++) {
613 blend_att_state[i] = (VkPipelineColorBlendAttachmentState) {
614 .blendEnable = false,
615 .colorWriteMask = i == rt_idx ? components : 0,
616 };
617 }
618
619 const VkPipelineColorBlendStateCreateInfo cb_state = {
620 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
621 .logicOpEnable = false,
622 .attachmentCount = color_count,
623 .pAttachments = blend_att_state
624 };
625
626 return create_pipeline(device,
627 pass, subpass_idx,
628 samples,
629 vs_nir, gs_nir, fs_nir,
630 &vi_state,
631 &ds_state,
632 &cb_state,
633 pipeline_layout,
634 pipeline);
635 }
636
637 static VkResult
create_depth_clear_pipeline(struct v3dv_device * device,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t samples,bool is_layered,VkPipelineLayout pipeline_layout,VkPipeline * pipeline)638 create_depth_clear_pipeline(struct v3dv_device *device,
639 VkImageAspectFlags aspects,
640 struct v3dv_render_pass *pass,
641 uint32_t subpass_idx,
642 uint32_t samples,
643 bool is_layered,
644 VkPipelineLayout pipeline_layout,
645 VkPipeline *pipeline)
646 {
647 const bool has_depth = aspects & VK_IMAGE_ASPECT_DEPTH_BIT;
648 const bool has_stencil = aspects & VK_IMAGE_ASPECT_STENCIL_BIT;
649 assert(has_depth || has_stencil);
650
651 const nir_shader_compiler_options *options =
652 v3dv_pipeline_get_nir_options(&device->devinfo);
653
654 nir_shader *vs_nir = get_clear_rect_vs(options);
655 nir_shader *fs_nir = has_depth ? get_depth_clear_rect_fs(options) : NULL;
656 nir_shader *gs_nir = is_layered ? get_clear_rect_gs(options, 4) : NULL;
657
658 const VkPipelineVertexInputStateCreateInfo vi_state = {
659 .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
660 .vertexBindingDescriptionCount = 0,
661 .vertexAttributeDescriptionCount = 0,
662 };
663
664 const VkPipelineDepthStencilStateCreateInfo ds_state = {
665 .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
666 .depthTestEnable = has_depth,
667 .depthWriteEnable = has_depth,
668 .depthCompareOp = VK_COMPARE_OP_ALWAYS,
669 .depthBoundsTestEnable = false,
670 .stencilTestEnable = has_stencil,
671 .front = {
672 .passOp = VK_STENCIL_OP_REPLACE,
673 .compareOp = VK_COMPARE_OP_ALWAYS,
674 /* compareMask, writeMask and reference are dynamic state */
675 },
676 .back = { 0 },
677 };
678
679 assert(subpass_idx < pass->subpass_count);
680 VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS] = { 0 };
681 const VkPipelineColorBlendStateCreateInfo cb_state = {
682 .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
683 .logicOpEnable = false,
684 .attachmentCount = pass->subpasses[subpass_idx].color_count,
685 .pAttachments = blend_att_state,
686 };
687
688 return create_pipeline(device,
689 pass, subpass_idx,
690 samples,
691 vs_nir, gs_nir, fs_nir,
692 &vi_state,
693 &ds_state,
694 &cb_state,
695 pipeline_layout,
696 pipeline);
697 }
698
699 static VkResult
create_color_clear_render_pass(struct v3dv_device * device,uint32_t rt_idx,VkFormat format,VkSampleCountFlagBits samples,VkRenderPass * pass)700 create_color_clear_render_pass(struct v3dv_device *device,
701 uint32_t rt_idx,
702 VkFormat format,
703 VkSampleCountFlagBits samples,
704 VkRenderPass *pass)
705 {
706 VkAttachmentDescription2 att = {
707 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,
708 .format = format,
709 .samples = samples,
710 .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
711 .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
712 .initialLayout = VK_IMAGE_LAYOUT_GENERAL,
713 .finalLayout = VK_IMAGE_LAYOUT_GENERAL,
714 };
715
716 VkAttachmentReference2 att_ref = {
717 .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
718 .attachment = rt_idx,
719 .layout = VK_IMAGE_LAYOUT_GENERAL,
720 };
721
722 VkSubpassDescription2 subpass = {
723 .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
724 .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
725 .inputAttachmentCount = 0,
726 .colorAttachmentCount = 1,
727 .pColorAttachments = &att_ref,
728 .pResolveAttachments = NULL,
729 .pDepthStencilAttachment = NULL,
730 .preserveAttachmentCount = 0,
731 .pPreserveAttachments = NULL,
732 };
733
734 VkRenderPassCreateInfo2 info = {
735 .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
736 .attachmentCount = 1,
737 .pAttachments = &att,
738 .subpassCount = 1,
739 .pSubpasses = &subpass,
740 .dependencyCount = 0,
741 .pDependencies = NULL,
742 };
743
744 return v3dv_CreateRenderPass2(v3dv_device_to_handle(device),
745 &info, &device->vk.alloc, pass);
746 }
747
748 static inline uint64_t
get_color_clear_pipeline_cache_key(uint32_t rt_idx,VkFormat format,VkSampleCountFlagBits samples,uint32_t components,bool is_layered,bool has_multiview)749 get_color_clear_pipeline_cache_key(uint32_t rt_idx,
750 VkFormat format,
751 VkSampleCountFlagBits samples,
752 uint32_t components,
753 bool is_layered,
754 bool has_multiview)
755 {
756 assert(rt_idx < V3D_MAX_DRAW_BUFFERS);
757
758 uint64_t key = 0;
759 uint32_t bit_offset = 0;
760
761 key |= rt_idx;
762 bit_offset += 3;
763
764 key |= ((uint64_t) format) << bit_offset;
765 bit_offset += 32;
766
767 key |= ((uint64_t) samples) << bit_offset;
768 bit_offset += 4;
769
770 key |= ((uint64_t) components) << bit_offset;
771 bit_offset += 4;
772
773 key |= (is_layered ? 1ull : 0ull) << bit_offset;
774 bit_offset += 1;
775
776 key |= (has_multiview ? 1ull : 0ull) << bit_offset;
777 bit_offset += 1;
778
779 assert(bit_offset <= 64);
780 return key;
781 }
782
783 static inline uint64_t
get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,VkFormat format,uint32_t samples,bool is_layered,bool has_multiview)784 get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,
785 VkFormat format,
786 uint32_t samples,
787 bool is_layered,
788 bool has_multiview)
789 {
790 uint64_t key = 0;
791 uint32_t bit_offset = 0;
792
793 key |= format;
794 bit_offset += 32;
795
796 key |= ((uint64_t) samples) << bit_offset;
797 bit_offset += 4;
798
799 const bool has_depth = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? 1 : 0;
800 key |= ((uint64_t) has_depth) << bit_offset;
801 bit_offset++;
802
803 const bool has_stencil = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;
804 key |= ((uint64_t) has_stencil) << bit_offset;
805 bit_offset++;;
806
807 key |= (is_layered ? 1ull : 0ull) << bit_offset;
808 bit_offset += 1;
809
810 key |= (has_multiview ? 1ull : 0ull) << bit_offset;
811 bit_offset += 1;
812
813 assert(bit_offset <= 64);
814 return key;
815 }
816
817 static VkResult
get_color_clear_pipeline(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t rt_idx,uint32_t attachment_idx,VkFormat format,VkSampleCountFlagBits samples,uint32_t components,bool is_layered,bool has_multiview,struct v3dv_meta_color_clear_pipeline ** pipeline)818 get_color_clear_pipeline(struct v3dv_cmd_buffer *cmd_buffer,
819 struct v3dv_render_pass *pass,
820 uint32_t subpass_idx,
821 uint32_t rt_idx,
822 uint32_t attachment_idx,
823 VkFormat format,
824 VkSampleCountFlagBits samples,
825 uint32_t components,
826 bool is_layered,
827 bool has_multiview,
828 struct v3dv_meta_color_clear_pipeline **pipeline)
829 {
830 assert(vk_format_is_color(format));
831 struct v3dv_device *device = cmd_buffer->device;
832
833 VkResult result = VK_SUCCESS;
834
835 /* If pass != NULL it means that we are emitting the clear as a draw call
836 * in the current pass bound by the application. In that case, we can't
837 * cache the pipeline, since it will be referencing that pass and the
838 * application could be destroying it at any point. Hopefully, the perf
839 * impact is not too big since we still have the device pipeline cache
840 * around and we won't end up re-compiling the clear shader.
841 *
842 * FIXME: alternatively, we could refcount (or maybe clone) the render pass
843 * provided by the application and include it in the pipeline key setup
844 * to make caching safe in this scenario, however, based on tests with
845 * vkQuake3, the fact that we are not caching here doesn't seem to have
846 * any significant impact in performance, so it might not be worth it.
847 */
848 const bool can_cache_pipeline =
849 (pass == NULL) && (device->instance->meta_cache_enabled);
850
851 uint64_t key;
852 if (can_cache_pipeline) {
853 key = get_color_clear_pipeline_cache_key(rt_idx, format, samples,
854 components, is_layered,
855 has_multiview);
856 mtx_lock(&device->meta.mtx);
857 struct hash_entry *entry =
858 _mesa_hash_table_search(device->meta.color_clear.cache, &key);
859 if (entry) {
860 mtx_unlock(&device->meta.mtx);
861 *pipeline = entry->data;
862 return VK_SUCCESS;
863 }
864 }
865
866 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
867 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
868
869 if (*pipeline == NULL) {
870 result = VK_ERROR_OUT_OF_HOST_MEMORY;
871 goto fail;
872 }
873
874 if (!pass) {
875 result = create_color_clear_render_pass(device,
876 rt_idx,
877 format,
878 samples,
879 &(*pipeline)->pass);
880 if (result != VK_SUCCESS)
881 goto fail;
882
883 pass = v3dv_render_pass_from_handle((*pipeline)->pass);
884 } else {
885 (*pipeline)->pass = v3dv_render_pass_to_handle(pass);
886 }
887
888 result = create_color_clear_pipeline(device,
889 pass,
890 subpass_idx,
891 rt_idx,
892 format,
893 samples,
894 components,
895 is_layered,
896 device->meta.color_clear.p_layout,
897 &(*pipeline)->pipeline);
898 if (result != VK_SUCCESS)
899 goto fail;
900
901 if (can_cache_pipeline) {
902 (*pipeline)->key = key;
903 (*pipeline)->cached = true;
904 _mesa_hash_table_insert(device->meta.color_clear.cache,
905 &(*pipeline)->key, *pipeline);
906
907 mtx_unlock(&device->meta.mtx);
908 } else {
909 v3dv_cmd_buffer_add_private_obj(
910 cmd_buffer, (uintptr_t)*pipeline,
911 (v3dv_cmd_buffer_private_obj_destroy_cb)destroy_color_clear_pipeline);
912 }
913
914 return VK_SUCCESS;
915
916 fail:
917 if (can_cache_pipeline)
918 mtx_unlock(&device->meta.mtx);
919
920 VkDevice _device = v3dv_device_to_handle(device);
921 if (*pipeline) {
922 if ((*pipeline)->cached)
923 v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
924 if ((*pipeline)->pipeline)
925 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
926 vk_free(&device->vk.alloc, *pipeline);
927 *pipeline = NULL;
928 }
929
930 return result;
931 }
932
933 static VkResult
get_depth_clear_pipeline(struct v3dv_cmd_buffer * cmd_buffer,VkImageAspectFlags aspects,struct v3dv_render_pass * pass,uint32_t subpass_idx,uint32_t attachment_idx,bool is_layered,bool has_multiview,struct v3dv_meta_depth_clear_pipeline ** pipeline)934 get_depth_clear_pipeline(struct v3dv_cmd_buffer *cmd_buffer,
935 VkImageAspectFlags aspects,
936 struct v3dv_render_pass *pass,
937 uint32_t subpass_idx,
938 uint32_t attachment_idx,
939 bool is_layered,
940 bool has_multiview,
941 struct v3dv_meta_depth_clear_pipeline **pipeline)
942 {
943 assert(subpass_idx < pass->subpass_count);
944 assert(attachment_idx != VK_ATTACHMENT_UNUSED);
945 assert(attachment_idx < pass->attachment_count);
946
947 VkResult result = VK_SUCCESS;
948 struct v3dv_device *device = cmd_buffer->device;
949
950 const uint32_t samples = pass->attachments[attachment_idx].desc.samples;
951 const VkFormat format = pass->attachments[attachment_idx].desc.format;
952 assert(vk_format_is_depth_or_stencil(format));
953
954 uint64_t key;
955 bool meta_cache_enabled = device->instance->meta_cache_enabled;
956
957 if (meta_cache_enabled) {
958 key = get_depth_clear_pipeline_cache_key(aspects, format, samples,
959 is_layered, has_multiview);
960 mtx_lock(&device->meta.mtx);
961 struct hash_entry *entry =
962 _mesa_hash_table_search(device->meta.depth_clear.cache, &key);
963 if (entry) {
964 mtx_unlock(&device->meta.mtx);
965 *pipeline = entry->data;
966 return VK_SUCCESS;
967 }
968 }
969
970 *pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
971 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
972
973 if (*pipeline == NULL) {
974 result = VK_ERROR_OUT_OF_HOST_MEMORY;
975 goto fail;
976 }
977
978 result = create_depth_clear_pipeline(device,
979 aspects,
980 pass,
981 subpass_idx,
982 samples,
983 is_layered,
984 device->meta.depth_clear.p_layout,
985 &(*pipeline)->pipeline);
986 if (result != VK_SUCCESS)
987 goto fail;
988
989 if (meta_cache_enabled) {
990 (*pipeline)->key = key;
991 _mesa_hash_table_insert(device->meta.depth_clear.cache,
992 &(*pipeline)->key, *pipeline);
993 mtx_unlock(&device->meta.mtx);
994 } else {
995 v3dv_cmd_buffer_add_private_obj(
996 cmd_buffer, (uintptr_t)*pipeline,
997 (v3dv_cmd_buffer_private_obj_destroy_cb)destroy_depth_clear_pipeline);
998 }
999
1000 return VK_SUCCESS;
1001
1002 fail:
1003 if (device->instance->meta_cache_enabled)
1004 mtx_unlock(&device->meta.mtx);
1005
1006 VkDevice _device = v3dv_device_to_handle(device);
1007 if (*pipeline) {
1008 if ((*pipeline)->pipeline)
1009 v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
1010 vk_free(&device->vk.alloc, *pipeline);
1011 *pipeline = NULL;
1012 }
1013
1014 return result;
1015 }
1016
1017 /* Emits a scissored quad in the clear color */
1018 static void
emit_subpass_color_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,uint32_t rt_idx,const VkClearColorValue * clear_color,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)1019 emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1020 struct v3dv_render_pass *pass,
1021 struct v3dv_subpass *subpass,
1022 uint32_t rt_idx,
1023 const VkClearColorValue *clear_color,
1024 bool is_layered,
1025 bool all_rects_same_layers,
1026 uint32_t rect_count,
1027 const VkClearRect *rects)
1028 {
1029 /* Skip if attachment is unused in the current subpass */
1030 assert(rt_idx < subpass->color_count);
1031 const uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment;
1032 if (attachment_idx == VK_ATTACHMENT_UNUSED)
1033 return;
1034
1035 /* Obtain a pipeline for this clear */
1036 assert(attachment_idx < pass->attachment_count);
1037 const VkFormat format = pass->attachments[attachment_idx].desc.format;
1038 const VkSampleCountFlagBits samples =
1039 pass->attachments[attachment_idx].desc.samples;
1040 const uint32_t components = VK_COLOR_COMPONENT_R_BIT |
1041 VK_COLOR_COMPONENT_G_BIT |
1042 VK_COLOR_COMPONENT_B_BIT |
1043 VK_COLOR_COMPONENT_A_BIT;
1044
1045 struct v3dv_meta_color_clear_pipeline *pipeline = NULL;
1046 VkResult result = get_color_clear_pipeline(cmd_buffer,
1047 pass,
1048 cmd_buffer->state.subpass_idx,
1049 rt_idx,
1050 attachment_idx,
1051 format,
1052 samples,
1053 components,
1054 is_layered,
1055 pass->multiview_enabled,
1056 &pipeline);
1057 if (result != VK_SUCCESS) {
1058 if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1059 v3dv_flag_oom(cmd_buffer, NULL);
1060 return;
1061 }
1062 assert(pipeline && pipeline->pipeline);
1063
1064 /* Emit clear rects */
1065 v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1066
1067 VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1068 v3dv_CmdPushConstants(cmd_buffer_handle,
1069 cmd_buffer->device->meta.depth_clear.p_layout,
1070 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,
1071 clear_color->float32);
1072
1073 v3dv_CmdBindPipeline(cmd_buffer_handle,
1074 VK_PIPELINE_BIND_POINT_GRAPHICS,
1075 pipeline->pipeline);
1076
1077 for (uint32_t i = 0; i < rect_count; i++) {
1078 const VkViewport viewport = {
1079 .x = rects[i].rect.offset.x,
1080 .y = rects[i].rect.offset.y,
1081 .width = rects[i].rect.extent.width,
1082 .height = rects[i].rect.extent.height,
1083 .minDepth = 0.0f,
1084 .maxDepth = 1.0f
1085 };
1086 v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1087 v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1088
1089 if (is_layered) {
1090 for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1091 layer_offset++) {
1092 uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1093 v3dv_CmdPushConstants(cmd_buffer_handle,
1094 cmd_buffer->device->meta.depth_clear.p_layout,
1095 VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4, &layer);
1096 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1097 }
1098 } else {
1099 assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1100 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1101 }
1102 }
1103
1104 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, false);
1105 }
1106
1107 /* Emits a scissored quad, clearing the depth aspect by writing to gl_FragDepth
1108 * and the stencil aspect by using stencil testing.
1109 */
1110 static void
emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer * cmd_buffer,struct v3dv_render_pass * pass,struct v3dv_subpass * subpass,VkImageAspectFlags aspects,const VkClearDepthStencilValue * clear_ds,bool is_layered,bool all_rects_same_layers,uint32_t rect_count,const VkClearRect * rects)1111 emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,
1112 struct v3dv_render_pass *pass,
1113 struct v3dv_subpass *subpass,
1114 VkImageAspectFlags aspects,
1115 const VkClearDepthStencilValue *clear_ds,
1116 bool is_layered,
1117 bool all_rects_same_layers,
1118 uint32_t rect_count,
1119 const VkClearRect *rects)
1120 {
1121 /* Skip if attachment is unused in the current subpass */
1122 const uint32_t attachment_idx = subpass->ds_attachment.attachment;
1123 if (attachment_idx == VK_ATTACHMENT_UNUSED)
1124 return;
1125
1126 /* Obtain a pipeline for this clear */
1127 assert(attachment_idx < pass->attachment_count);
1128 struct v3dv_meta_depth_clear_pipeline *pipeline = NULL;
1129
1130 VkResult result = get_depth_clear_pipeline(cmd_buffer,
1131 aspects,
1132 pass,
1133 cmd_buffer->state.subpass_idx,
1134 attachment_idx,
1135 is_layered,
1136 pass->multiview_enabled,
1137 &pipeline);
1138 if (result != VK_SUCCESS) {
1139 if (result == VK_ERROR_OUT_OF_HOST_MEMORY)
1140 v3dv_flag_oom(cmd_buffer, NULL);
1141 return;
1142 }
1143 assert(pipeline && pipeline->pipeline);
1144
1145 /* Emit clear rects */
1146 v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);
1147
1148 VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);
1149 v3dv_CmdPushConstants(cmd_buffer_handle,
1150 cmd_buffer->device->meta.depth_clear.p_layout,
1151 VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,
1152 &clear_ds->depth);
1153
1154 v3dv_CmdBindPipeline(cmd_buffer_handle,
1155 VK_PIPELINE_BIND_POINT_GRAPHICS,
1156 pipeline->pipeline);
1157
1158 if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1159 vk_common_CmdSetStencilReference(cmd_buffer_handle,
1160 VK_STENCIL_FACE_FRONT_AND_BACK,
1161 clear_ds->stencil);
1162 vk_common_CmdSetStencilWriteMask(cmd_buffer_handle,
1163 VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1164 vk_common_CmdSetStencilCompareMask(cmd_buffer_handle,
1165 VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);
1166 }
1167
1168 for (uint32_t i = 0; i < rect_count; i++) {
1169 const VkViewport viewport = {
1170 .x = rects[i].rect.offset.x,
1171 .y = rects[i].rect.offset.y,
1172 .width = rects[i].rect.extent.width,
1173 .height = rects[i].rect.extent.height,
1174 .minDepth = 0.0f,
1175 .maxDepth = 1.0f
1176 };
1177 v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);
1178 v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);
1179 if (is_layered) {
1180 for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;
1181 layer_offset++) {
1182 uint32_t layer = rects[i].baseArrayLayer + layer_offset;
1183 v3dv_CmdPushConstants(cmd_buffer_handle,
1184 cmd_buffer->device->meta.depth_clear.p_layout,
1185 VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4, &layer);
1186 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1187 }
1188 } else {
1189 assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);
1190 v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);
1191 }
1192 }
1193
1194 v3dv_cmd_buffer_meta_state_pop(cmd_buffer, false);
1195 }
1196
1197 static void
gather_layering_info(uint32_t rect_count,const VkClearRect * rects,bool * is_layered,bool * all_rects_same_layers)1198 gather_layering_info(uint32_t rect_count, const VkClearRect *rects,
1199 bool *is_layered, bool *all_rects_same_layers)
1200 {
1201 *all_rects_same_layers = true;
1202
1203 uint32_t min_layer = rects[0].baseArrayLayer;
1204 uint32_t max_layer = rects[0].baseArrayLayer + rects[0].layerCount - 1;
1205 for (uint32_t i = 1; i < rect_count; i++) {
1206 if (rects[i].baseArrayLayer != rects[i - 1].baseArrayLayer ||
1207 rects[i].layerCount != rects[i - 1].layerCount) {
1208 *all_rects_same_layers = false;
1209 min_layer = MIN2(min_layer, rects[i].baseArrayLayer);
1210 max_layer = MAX2(max_layer, rects[i].baseArrayLayer +
1211 rects[i].layerCount - 1);
1212 }
1213 }
1214
1215 *is_layered = !(min_layer == 0 && max_layer == 0);
1216 }
1217
1218 VKAPI_ATTR void VKAPI_CALL
v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,uint32_t attachmentCount,const VkClearAttachment * pAttachments,uint32_t rectCount,const VkClearRect * pRects)1219 v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,
1220 uint32_t attachmentCount,
1221 const VkClearAttachment *pAttachments,
1222 uint32_t rectCount,
1223 const VkClearRect *pRects)
1224 {
1225 V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1226
1227 /* We can have at most max_color_RTs + 1 D/S attachments */
1228 assert(attachmentCount <=
1229 V3D_MAX_RENDER_TARGETS(cmd_buffer->device->devinfo.ver) + 1);
1230
1231 /* We can only clear attachments in the current subpass */
1232 struct v3dv_render_pass *pass = cmd_buffer->state.pass;
1233
1234 assert(cmd_buffer->state.subpass_idx < pass->subpass_count);
1235 struct v3dv_subpass *subpass =
1236 &cmd_buffer->state.pass->subpasses[cmd_buffer->state.subpass_idx];
1237
1238 /* Emit a clear rect inside the current job for this subpass. For layered
1239 * framebuffers, we use a geometry shader to redirect clears to the
1240 * appropriate layers.
1241 */
1242
1243 v3dv_cmd_buffer_pause_occlusion_query(cmd_buffer);
1244
1245 bool is_layered, all_rects_same_layers;
1246 gather_layering_info(rectCount, pRects, &is_layered, &all_rects_same_layers);
1247 for (uint32_t i = 0; i < attachmentCount; i++) {
1248 if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1249 emit_subpass_color_clear_rects(cmd_buffer, pass, subpass,
1250 pAttachments[i].colorAttachment,
1251 &pAttachments[i].clearValue.color,
1252 is_layered, all_rects_same_layers,
1253 rectCount, pRects);
1254 } else {
1255 emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass,
1256 pAttachments[i].aspectMask,
1257 &pAttachments[i].clearValue.depthStencil,
1258 is_layered, all_rects_same_layers,
1259 rectCount, pRects);
1260 }
1261 }
1262
1263 v3dv_cmd_buffer_resume_occlusion_query(cmd_buffer);
1264 }
1265