1 /*
2 * Copyright © 2021 Collabora Ltd.
3 *
4 * Derived from tu_pipeline.c which is:
5 * Copyright © 2016 Red Hat.
6 * Copyright © 2016 Bas Nieuwenhuizen
7 * Copyright © 2015 Intel Corporation
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the next
17 * paragraph) shall be included in all copies or substantial portions of the
18 * Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 * DEALINGS IN THE SOFTWARE.
27 */
28
29 #include "panvk_cs.h"
30 #include "panvk_private.h"
31
32 #include "nir/nir.h"
33 #include "nir/nir_builder.h"
34 #include "spirv/nir_spirv.h"
35 #include "util/blend.h"
36 #include "util/mesa-sha1.h"
37 #include "util/u_atomic.h"
38 #include "util/u_debug.h"
39 #include "vk_blend.h"
40 #include "vk_format.h"
41 #include "vk_util.h"
42
43 #include "panfrost/util/pan_lower_framebuffer.h"
44
45 struct panvk_pipeline_builder {
46 struct panvk_device *device;
47 struct panvk_pipeline_cache *cache;
48 const VkAllocationCallbacks *alloc;
49 struct {
50 const VkGraphicsPipelineCreateInfo *gfx;
51 const VkComputePipelineCreateInfo *compute;
52 } create_info;
53 const struct panvk_pipeline_layout *layout;
54
55 struct panvk_shader *shaders[MESA_SHADER_STAGES];
56 struct {
57 uint32_t shader_offset;
58 uint32_t rsd_offset;
59 } stages[MESA_SHADER_STAGES];
60 uint32_t blend_shader_offsets[MAX_RTS];
61 uint32_t shader_total_size;
62 uint32_t static_state_size;
63 uint32_t vpd_offset;
64
65 bool rasterizer_discard;
66 /* these states are affectd by rasterizer_discard */
67 VkSampleCountFlagBits samples;
68 bool use_depth_stencil_attachment;
69 uint8_t active_color_attachments;
70 enum pipe_format color_attachment_formats[MAX_RTS];
71 };
72
73 static VkResult
panvk_pipeline_builder_create_pipeline(struct panvk_pipeline_builder * builder,struct panvk_pipeline ** out_pipeline)74 panvk_pipeline_builder_create_pipeline(struct panvk_pipeline_builder *builder,
75 struct panvk_pipeline **out_pipeline)
76 {
77 struct panvk_device *dev = builder->device;
78
79 struct panvk_pipeline *pipeline = vk_object_zalloc(
80 &dev->vk, builder->alloc, sizeof(*pipeline), VK_OBJECT_TYPE_PIPELINE);
81 if (!pipeline)
82 return VK_ERROR_OUT_OF_HOST_MEMORY;
83
84 pipeline->layout = builder->layout;
85 *out_pipeline = pipeline;
86 return VK_SUCCESS;
87 }
88
89 static void
panvk_pipeline_builder_finish(struct panvk_pipeline_builder * builder)90 panvk_pipeline_builder_finish(struct panvk_pipeline_builder *builder)
91 {
92 for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
93 if (!builder->shaders[i])
94 continue;
95 panvk_shader_destroy(builder->device, builder->shaders[i],
96 builder->alloc);
97 }
98 }
99
100 static bool
panvk_pipeline_static_state(struct panvk_pipeline * pipeline,uint32_t id)101 panvk_pipeline_static_state(struct panvk_pipeline *pipeline, uint32_t id)
102 {
103 return !(pipeline->dynamic_state_mask & (1 << id));
104 }
105
106 static VkResult
panvk_pipeline_builder_compile_shaders(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)107 panvk_pipeline_builder_compile_shaders(struct panvk_pipeline_builder *builder,
108 struct panvk_pipeline *pipeline)
109 {
110 const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = {
111 NULL};
112 const VkPipelineShaderStageCreateInfo *stages =
113 builder->create_info.gfx ? builder->create_info.gfx->pStages
114 : &builder->create_info.compute->stage;
115 unsigned stage_count =
116 builder->create_info.gfx ? builder->create_info.gfx->stageCount : 1;
117
118 for (uint32_t i = 0; i < stage_count; i++) {
119 gl_shader_stage stage = vk_to_mesa_shader_stage(stages[i].stage);
120 stage_infos[stage] = &stages[i];
121 }
122
123 /* compile shaders in reverse order */
124 for (gl_shader_stage stage = MESA_SHADER_STAGES - 1;
125 stage > MESA_SHADER_NONE; stage--) {
126 const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage];
127 if (!stage_info)
128 continue;
129
130 struct panvk_shader *shader;
131
132 shader = panvk_per_arch(shader_create)(
133 builder->device, stage, stage_info, builder->layout,
134 PANVK_SYSVAL_UBO_INDEX, &pipeline->blend.state,
135 panvk_pipeline_static_state(pipeline,
136 VK_DYNAMIC_STATE_BLEND_CONSTANTS),
137 builder->alloc);
138 if (!shader)
139 return VK_ERROR_OUT_OF_HOST_MEMORY;
140
141 builder->shaders[stage] = shader;
142 builder->shader_total_size = ALIGN_POT(builder->shader_total_size, 128);
143 builder->stages[stage].shader_offset = builder->shader_total_size;
144 builder->shader_total_size +=
145 util_dynarray_num_elements(&shader->binary, uint8_t);
146 }
147
148 return VK_SUCCESS;
149 }
150
151 static VkResult
panvk_pipeline_builder_upload_shaders(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)152 panvk_pipeline_builder_upload_shaders(struct panvk_pipeline_builder *builder,
153 struct panvk_pipeline *pipeline)
154 {
155 /* In some cases, the optimized shader is empty. Don't bother allocating
156 * anything in this case.
157 */
158 if (builder->shader_total_size == 0)
159 return VK_SUCCESS;
160
161 struct panvk_priv_bo *bin_bo = panvk_priv_bo_create(
162 builder->device, builder->shader_total_size, PAN_KMOD_BO_FLAG_EXECUTABLE,
163 NULL, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
164
165 pipeline->binary_bo = bin_bo;
166
167 for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
168 const struct panvk_shader *shader = builder->shaders[i];
169 if (!shader)
170 continue;
171
172 memcpy(pipeline->binary_bo->addr.host + builder->stages[i].shader_offset,
173 util_dynarray_element(&shader->binary, uint8_t, 0),
174 util_dynarray_num_elements(&shader->binary, uint8_t));
175 }
176
177 return VK_SUCCESS;
178 }
179
180 static void
panvk_pipeline_builder_alloc_static_state_bo(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)181 panvk_pipeline_builder_alloc_static_state_bo(
182 struct panvk_pipeline_builder *builder, struct panvk_pipeline *pipeline)
183 {
184 unsigned bo_size = 0;
185
186 for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
187 const struct panvk_shader *shader = builder->shaders[i];
188 if (!shader && i != MESA_SHADER_FRAGMENT)
189 continue;
190
191 if (pipeline->fs.dynamic_rsd && i == MESA_SHADER_FRAGMENT)
192 continue;
193
194 bo_size = ALIGN_POT(bo_size, pan_alignment(RENDERER_STATE));
195 builder->stages[i].rsd_offset = bo_size;
196 bo_size += pan_size(RENDERER_STATE);
197 if (i == MESA_SHADER_FRAGMENT)
198 bo_size += pan_size(BLEND) * MAX2(pipeline->blend.state.rt_count, 1);
199 }
200
201 if (builder->create_info.gfx &&
202 panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT) &&
203 panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR)) {
204 bo_size = ALIGN_POT(bo_size, pan_alignment(VIEWPORT));
205 builder->vpd_offset = bo_size;
206 bo_size += pan_size(VIEWPORT);
207 }
208
209 if (bo_size) {
210 pipeline->state_bo = panvk_priv_bo_create(
211 builder->device, bo_size, 0, NULL, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
212 }
213 }
214
215 static void
panvk_pipeline_builder_init_sysvals(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline,gl_shader_stage stage)216 panvk_pipeline_builder_init_sysvals(struct panvk_pipeline_builder *builder,
217 struct panvk_pipeline *pipeline,
218 gl_shader_stage stage)
219 {
220 const struct panvk_shader *shader = builder->shaders[stage];
221
222 pipeline->sysvals[stage].ubo_idx = shader->sysval_ubo;
223 }
224
225 static void
panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)226 panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder *builder,
227 struct panvk_pipeline *pipeline)
228 {
229 for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
230 const struct panvk_shader *shader = builder->shaders[i];
231 if (!shader)
232 continue;
233
234 pipeline->tls_size = MAX2(pipeline->tls_size, shader->info.tls_size);
235 pipeline->wls_size = MAX2(pipeline->wls_size, shader->info.wls_size);
236
237 if (shader->has_img_access)
238 pipeline->img_access_mask |= BITFIELD_BIT(i);
239
240 if (i == MESA_SHADER_VERTEX && shader->info.vs.writes_point_size) {
241 VkPrimitiveTopology topology =
242 builder->create_info.gfx->pInputAssemblyState->topology;
243 bool points = (topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST);
244
245 /* Even if the vertex shader writes point size, we only consider the
246 * pipeline to write point size when we're actually drawing points.
247 * Otherwise the point size write would conflict with wide lines.
248 */
249 pipeline->ia.writes_point_size = points;
250 }
251
252 mali_ptr shader_ptr = 0;
253
254 /* Handle empty shaders gracefully */
255 if (util_dynarray_num_elements(&builder->shaders[i]->binary, uint8_t)) {
256 shader_ptr =
257 pipeline->binary_bo->addr.dev + builder->stages[i].shader_offset;
258 }
259
260 if (i != MESA_SHADER_FRAGMENT) {
261 void *rsd =
262 pipeline->state_bo->addr.host + builder->stages[i].rsd_offset;
263 mali_ptr gpu_rsd =
264 pipeline->state_bo->addr.dev + builder->stages[i].rsd_offset;
265
266 panvk_per_arch(emit_non_fs_rsd)(builder->device, &shader->info,
267 shader_ptr, rsd);
268 pipeline->rsds[i] = gpu_rsd;
269 }
270
271 panvk_pipeline_builder_init_sysvals(builder, pipeline, i);
272
273 if (i == MESA_SHADER_COMPUTE)
274 pipeline->cs.local_size = shader->local_size;
275 }
276
277 if (builder->create_info.gfx && !pipeline->fs.dynamic_rsd) {
278 void *rsd = pipeline->state_bo->addr.host +
279 builder->stages[MESA_SHADER_FRAGMENT].rsd_offset;
280 mali_ptr gpu_rsd = pipeline->state_bo->addr.dev +
281 builder->stages[MESA_SHADER_FRAGMENT].rsd_offset;
282 void *bd = rsd + pan_size(RENDERER_STATE);
283
284 panvk_per_arch(emit_base_fs_rsd)(builder->device, pipeline, rsd);
285 for (unsigned rt = 0; rt < pipeline->blend.state.rt_count; rt++) {
286 panvk_per_arch(emit_blend)(builder->device, pipeline, rt, bd);
287 bd += pan_size(BLEND);
288 }
289
290 pipeline->rsds[MESA_SHADER_FRAGMENT] = gpu_rsd;
291 } else if (builder->create_info.gfx) {
292 panvk_per_arch(emit_base_fs_rsd)(builder->device, pipeline,
293 &pipeline->fs.rsd_template);
294 for (unsigned rt = 0; rt < MAX2(pipeline->blend.state.rt_count, 1);
295 rt++) {
296 panvk_per_arch(emit_blend)(builder->device, pipeline, rt,
297 &pipeline->blend.bd_template[rt]);
298 }
299 }
300
301 pipeline->num_ubos = PANVK_NUM_BUILTIN_UBOS + builder->layout->num_ubos +
302 builder->layout->num_dyn_ubos;
303 }
304
305 static void
panvk_pipeline_builder_parse_viewport(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)306 panvk_pipeline_builder_parse_viewport(struct panvk_pipeline_builder *builder,
307 struct panvk_pipeline *pipeline)
308 {
309 /* The spec says:
310 *
311 * pViewportState is a pointer to an instance of the
312 * VkPipelineViewportStateCreateInfo structure, and is ignored if the
313 * pipeline has rasterization disabled.
314 */
315 if (!builder->rasterizer_discard &&
316 panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT) &&
317 panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR)) {
318 void *vpd = pipeline->state_bo->addr.host + builder->vpd_offset;
319 panvk_per_arch(emit_viewport)(
320 builder->create_info.gfx->pViewportState->pViewports,
321 builder->create_info.gfx->pViewportState->pScissors, vpd);
322 pipeline->vpd = pipeline->state_bo->addr.dev + builder->vpd_offset;
323 }
324 if (panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT))
325 pipeline->viewport =
326 builder->create_info.gfx->pViewportState->pViewports[0];
327
328 if (panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR))
329 pipeline->scissor =
330 builder->create_info.gfx->pViewportState->pScissors[0];
331 }
332
333 static void
panvk_pipeline_builder_parse_dynamic(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)334 panvk_pipeline_builder_parse_dynamic(struct panvk_pipeline_builder *builder,
335 struct panvk_pipeline *pipeline)
336 {
337 const VkPipelineDynamicStateCreateInfo *dynamic_info =
338 builder->create_info.gfx->pDynamicState;
339
340 if (!dynamic_info)
341 return;
342
343 for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) {
344 VkDynamicState state = dynamic_info->pDynamicStates[i];
345 switch (state) {
346 case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE:
347 pipeline->dynamic_state_mask |= 1 << state;
348 break;
349 default:
350 unreachable("unsupported dynamic state");
351 }
352 }
353 }
354
355 static enum mali_draw_mode
translate_prim_topology(VkPrimitiveTopology in)356 translate_prim_topology(VkPrimitiveTopology in)
357 {
358 switch (in) {
359 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
360 return MALI_DRAW_MODE_POINTS;
361 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
362 return MALI_DRAW_MODE_LINES;
363 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
364 return MALI_DRAW_MODE_LINE_STRIP;
365 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
366 return MALI_DRAW_MODE_TRIANGLES;
367 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
368 return MALI_DRAW_MODE_TRIANGLE_STRIP;
369 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
370 return MALI_DRAW_MODE_TRIANGLE_FAN;
371 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
372 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
373 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
374 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
375 case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
376 default:
377 unreachable("Invalid primitive type");
378 }
379 }
380
381 static void
panvk_pipeline_builder_parse_input_assembly(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)382 panvk_pipeline_builder_parse_input_assembly(
383 struct panvk_pipeline_builder *builder, struct panvk_pipeline *pipeline)
384 {
385 pipeline->ia.primitive_restart =
386 builder->create_info.gfx->pInputAssemblyState->primitiveRestartEnable;
387 pipeline->ia.topology = translate_prim_topology(
388 builder->create_info.gfx->pInputAssemblyState->topology);
389 }
390
391 bool
panvk_per_arch(blend_needs_lowering)392 panvk_per_arch(blend_needs_lowering)(const struct panvk_device *dev,
393 const struct pan_blend_state *state,
394 unsigned rt)
395 {
396 /* LogicOp requires a blend shader */
397 if (state->logicop_enable)
398 return true;
399
400 /* Not all formats can be blended by fixed-function hardware */
401 if (!panfrost_blendable_formats_v7[state->rts[rt].format].internal)
402 return true;
403
404 unsigned constant_mask = pan_blend_constant_mask(state->rts[rt].equation);
405
406 /* v6 doesn't support blend constants in FF blend equations.
407 * v7 only uses the constant from RT 0 (TODO: what if it's the same
408 * constant? or a constant is shared?)
409 */
410 if (constant_mask && (PAN_ARCH == 6 || (PAN_ARCH == 7 && rt > 0)))
411 return true;
412
413 if (!pan_blend_is_homogenous_constant(constant_mask, state->constants))
414 return true;
415
416 unsigned arch = pan_arch(dev->physical_device->kmod.props.gpu_prod_id);
417 bool supports_2src = pan_blend_supports_2src(arch);
418 return !pan_blend_can_fixed_function(state->rts[rt].equation, supports_2src);
419 }
420
421 static void
panvk_pipeline_builder_parse_color_blend(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)422 panvk_pipeline_builder_parse_color_blend(struct panvk_pipeline_builder *builder,
423 struct panvk_pipeline *pipeline)
424 {
425 pipeline->blend.state.logicop_enable =
426 builder->create_info.gfx->pColorBlendState->logicOpEnable;
427 pipeline->blend.state.logicop_func =
428 vk_logic_op_to_pipe(builder->create_info.gfx->pColorBlendState->logicOp);
429 pipeline->blend.state.rt_count =
430 util_last_bit(builder->active_color_attachments);
431 memcpy(pipeline->blend.state.constants,
432 builder->create_info.gfx->pColorBlendState->blendConstants,
433 sizeof(pipeline->blend.state.constants));
434
435 for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) {
436 const VkPipelineColorBlendAttachmentState *in =
437 &builder->create_info.gfx->pColorBlendState->pAttachments[i];
438 struct pan_blend_rt_state *out = &pipeline->blend.state.rts[i];
439
440 out->format = builder->color_attachment_formats[i];
441
442 bool dest_has_alpha = util_format_has_alpha(out->format);
443
444 out->nr_samples =
445 builder->create_info.gfx->pMultisampleState->rasterizationSamples;
446 out->equation.blend_enable = in->blendEnable;
447 out->equation.color_mask = in->colorWriteMask;
448 out->equation.rgb_func = vk_blend_op_to_pipe(in->colorBlendOp);
449 out->equation.rgb_src_factor =
450 vk_blend_factor_to_pipe(in->srcColorBlendFactor);
451 out->equation.rgb_dst_factor =
452 vk_blend_factor_to_pipe(in->dstColorBlendFactor);
453 out->equation.alpha_func = vk_blend_op_to_pipe(in->alphaBlendOp);
454 out->equation.alpha_src_factor =
455 vk_blend_factor_to_pipe(in->srcAlphaBlendFactor);
456 out->equation.alpha_dst_factor =
457 vk_blend_factor_to_pipe(in->dstAlphaBlendFactor);
458
459 if (!dest_has_alpha) {
460 out->equation.rgb_src_factor =
461 util_blend_dst_alpha_to_one(out->equation.rgb_src_factor);
462 out->equation.rgb_dst_factor =
463 util_blend_dst_alpha_to_one(out->equation.rgb_dst_factor);
464
465 out->equation.alpha_src_factor =
466 util_blend_dst_alpha_to_one(out->equation.alpha_src_factor);
467 out->equation.alpha_dst_factor =
468 util_blend_dst_alpha_to_one(out->equation.alpha_dst_factor);
469 }
470
471 pipeline->blend.reads_dest |= pan_blend_reads_dest(out->equation);
472
473 unsigned constant_mask = panvk_per_arch(blend_needs_lowering)(
474 builder->device, &pipeline->blend.state, i)
475 ? 0
476 : pan_blend_constant_mask(out->equation);
477 pipeline->blend.constant[i].index = ffs(constant_mask) - 1;
478 if (constant_mask) {
479 /* On Bifrost, the blend constant is expressed with a UNORM of the
480 * size of the target format. The value is then shifted such that
481 * used bits are in the MSB. Here we calculate the factor at pipeline
482 * creation time so we only have to do a
483 * hw_constant = float_constant * factor;
484 * at descriptor emission time.
485 */
486 const struct util_format_description *format_desc =
487 util_format_description(out->format);
488 unsigned chan_size = 0;
489 for (unsigned c = 0; c < format_desc->nr_channels; c++)
490 chan_size = MAX2(format_desc->channel[c].size, chan_size);
491 pipeline->blend.constant[i].bifrost_factor = ((1 << chan_size) - 1)
492 << (16 - chan_size);
493 }
494 }
495 }
496
497 static void
panvk_pipeline_builder_parse_multisample(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)498 panvk_pipeline_builder_parse_multisample(struct panvk_pipeline_builder *builder,
499 struct panvk_pipeline *pipeline)
500 {
501 unsigned nr_samples = MAX2(
502 builder->create_info.gfx->pMultisampleState->rasterizationSamples, 1);
503
504 pipeline->ms.rast_samples =
505 builder->create_info.gfx->pMultisampleState->rasterizationSamples;
506 pipeline->ms.sample_mask =
507 builder->create_info.gfx->pMultisampleState->pSampleMask
508 ? builder->create_info.gfx->pMultisampleState->pSampleMask[0]
509 : UINT16_MAX;
510 pipeline->ms.min_samples =
511 MAX2(builder->create_info.gfx->pMultisampleState->minSampleShading *
512 nr_samples,
513 1);
514 }
515
516 static enum mali_stencil_op
translate_stencil_op(VkStencilOp in)517 translate_stencil_op(VkStencilOp in)
518 {
519 switch (in) {
520 case VK_STENCIL_OP_KEEP:
521 return MALI_STENCIL_OP_KEEP;
522 case VK_STENCIL_OP_ZERO:
523 return MALI_STENCIL_OP_ZERO;
524 case VK_STENCIL_OP_REPLACE:
525 return MALI_STENCIL_OP_REPLACE;
526 case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
527 return MALI_STENCIL_OP_INCR_SAT;
528 case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
529 return MALI_STENCIL_OP_DECR_SAT;
530 case VK_STENCIL_OP_INCREMENT_AND_WRAP:
531 return MALI_STENCIL_OP_INCR_WRAP;
532 case VK_STENCIL_OP_DECREMENT_AND_WRAP:
533 return MALI_STENCIL_OP_DECR_WRAP;
534 case VK_STENCIL_OP_INVERT:
535 return MALI_STENCIL_OP_INVERT;
536 default:
537 unreachable("Invalid stencil op");
538 }
539 }
540
541 static void
panvk_pipeline_builder_parse_zs(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)542 panvk_pipeline_builder_parse_zs(struct panvk_pipeline_builder *builder,
543 struct panvk_pipeline *pipeline)
544 {
545 if (!builder->use_depth_stencil_attachment)
546 return;
547
548 pipeline->zs.z_test =
549 builder->create_info.gfx->pDepthStencilState->depthTestEnable;
550
551 /* The Vulkan spec says:
552 *
553 * depthWriteEnable controls whether depth writes are enabled when
554 * depthTestEnable is VK_TRUE. Depth writes are always disabled when
555 * depthTestEnable is VK_FALSE.
556 *
557 * The hardware does not make this distinction, though, so we AND in the
558 * condition ourselves.
559 */
560 pipeline->zs.z_write =
561 pipeline->zs.z_test &&
562 builder->create_info.gfx->pDepthStencilState->depthWriteEnable;
563
564 pipeline->zs.z_compare_func = panvk_per_arch(translate_compare_func)(
565 builder->create_info.gfx->pDepthStencilState->depthCompareOp);
566 pipeline->zs.s_test =
567 builder->create_info.gfx->pDepthStencilState->stencilTestEnable;
568 pipeline->zs.s_front.fail_op = translate_stencil_op(
569 builder->create_info.gfx->pDepthStencilState->front.failOp);
570 pipeline->zs.s_front.pass_op = translate_stencil_op(
571 builder->create_info.gfx->pDepthStencilState->front.passOp);
572 pipeline->zs.s_front.z_fail_op = translate_stencil_op(
573 builder->create_info.gfx->pDepthStencilState->front.depthFailOp);
574 pipeline->zs.s_front.compare_func = panvk_per_arch(translate_compare_func)(
575 builder->create_info.gfx->pDepthStencilState->front.compareOp);
576 pipeline->zs.s_front.compare_mask =
577 builder->create_info.gfx->pDepthStencilState->front.compareMask;
578 pipeline->zs.s_front.write_mask =
579 builder->create_info.gfx->pDepthStencilState->front.writeMask;
580 pipeline->zs.s_front.ref =
581 builder->create_info.gfx->pDepthStencilState->front.reference;
582 pipeline->zs.s_back.fail_op = translate_stencil_op(
583 builder->create_info.gfx->pDepthStencilState->back.failOp);
584 pipeline->zs.s_back.pass_op = translate_stencil_op(
585 builder->create_info.gfx->pDepthStencilState->back.passOp);
586 pipeline->zs.s_back.z_fail_op = translate_stencil_op(
587 builder->create_info.gfx->pDepthStencilState->back.depthFailOp);
588 pipeline->zs.s_back.compare_func = panvk_per_arch(translate_compare_func)(
589 builder->create_info.gfx->pDepthStencilState->back.compareOp);
590 pipeline->zs.s_back.compare_mask =
591 builder->create_info.gfx->pDepthStencilState->back.compareMask;
592 pipeline->zs.s_back.write_mask =
593 builder->create_info.gfx->pDepthStencilState->back.writeMask;
594 pipeline->zs.s_back.ref =
595 builder->create_info.gfx->pDepthStencilState->back.reference;
596 }
597
598 static void
panvk_pipeline_builder_parse_rast(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)599 panvk_pipeline_builder_parse_rast(struct panvk_pipeline_builder *builder,
600 struct panvk_pipeline *pipeline)
601 {
602 pipeline->rast.clamp_depth =
603 builder->create_info.gfx->pRasterizationState->depthClampEnable;
604 pipeline->rast.depth_bias.enable =
605 builder->create_info.gfx->pRasterizationState->depthBiasEnable;
606 pipeline->rast.depth_bias.constant_factor =
607 builder->create_info.gfx->pRasterizationState->depthBiasConstantFactor;
608 pipeline->rast.depth_bias.clamp =
609 builder->create_info.gfx->pRasterizationState->depthBiasClamp;
610 pipeline->rast.depth_bias.slope_factor =
611 builder->create_info.gfx->pRasterizationState->depthBiasSlopeFactor;
612 pipeline->rast.front_ccw =
613 builder->create_info.gfx->pRasterizationState->frontFace ==
614 VK_FRONT_FACE_COUNTER_CLOCKWISE;
615 pipeline->rast.cull_front_face =
616 builder->create_info.gfx->pRasterizationState->cullMode &
617 VK_CULL_MODE_FRONT_BIT;
618 pipeline->rast.cull_back_face =
619 builder->create_info.gfx->pRasterizationState->cullMode &
620 VK_CULL_MODE_BACK_BIT;
621 pipeline->rast.line_width =
622 builder->create_info.gfx->pRasterizationState->lineWidth;
623 pipeline->rast.enable =
624 !builder->create_info.gfx->pRasterizationState->rasterizerDiscardEnable;
625 }
626
627 static bool
panvk_fs_required(struct panvk_pipeline * pipeline)628 panvk_fs_required(struct panvk_pipeline *pipeline)
629 {
630 const struct pan_shader_info *info = &pipeline->fs.info;
631
632 /* If we generally have side effects */
633 if (info->fs.sidefx)
634 return true;
635
636 /* If colour is written we need to execute */
637 const struct pan_blend_state *blend = &pipeline->blend.state;
638 for (unsigned i = 0; i < blend->rt_count; ++i) {
639 if (blend->rts[i].equation.color_mask)
640 return true;
641 }
642
643 /* If depth is written and not implied we need to execute.
644 * TODO: Predicate on Z/S writes being enabled */
645 return (info->fs.writes_depth || info->fs.writes_stencil);
646 }
647
648 #define PANVK_DYNAMIC_FS_RSD_MASK \
649 ((1 << VK_DYNAMIC_STATE_DEPTH_BIAS) | \
650 (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS) | \
651 (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK) | \
652 (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK) | \
653 (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))
654
655 static void
panvk_pipeline_builder_init_fs_state(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)656 panvk_pipeline_builder_init_fs_state(struct panvk_pipeline_builder *builder,
657 struct panvk_pipeline *pipeline)
658 {
659 if (!builder->shaders[MESA_SHADER_FRAGMENT])
660 return;
661
662 pipeline->fs.dynamic_rsd =
663 pipeline->dynamic_state_mask & PANVK_DYNAMIC_FS_RSD_MASK;
664 pipeline->fs.address = pipeline->binary_bo->addr.dev +
665 builder->stages[MESA_SHADER_FRAGMENT].shader_offset;
666 pipeline->fs.info = builder->shaders[MESA_SHADER_FRAGMENT]->info;
667 pipeline->fs.rt_mask = builder->active_color_attachments;
668 pipeline->fs.required = panvk_fs_required(pipeline);
669 }
670
671 static void
panvk_pipeline_update_varying_slot(struct panvk_varyings_info * varyings,gl_shader_stage stage,const struct pan_shader_varying * varying,bool input)672 panvk_pipeline_update_varying_slot(struct panvk_varyings_info *varyings,
673 gl_shader_stage stage,
674 const struct pan_shader_varying *varying,
675 bool input)
676 {
677 gl_varying_slot loc = varying->location;
678 enum panvk_varying_buf_id buf_id = panvk_varying_buf_id(loc);
679
680 varyings->stage[stage].loc[varyings->stage[stage].count++] = loc;
681
682 assert(loc < ARRAY_SIZE(varyings->varying));
683
684 enum pipe_format new_fmt = varying->format;
685 enum pipe_format old_fmt = varyings->varying[loc].format;
686
687 BITSET_SET(varyings->active, loc);
688
689 /* We expect inputs to either be set by a previous stage or be built
690 * in, skip the entry if that's not the case, we'll emit a const
691 * varying returning zero for those entries.
692 */
693 if (input && old_fmt == PIPE_FORMAT_NONE)
694 return;
695
696 unsigned new_size = util_format_get_blocksize(new_fmt);
697 unsigned old_size = util_format_get_blocksize(old_fmt);
698
699 if (old_size < new_size)
700 varyings->varying[loc].format = new_fmt;
701
702 /* Type (float or not) information is only known in the fragment shader, so
703 * override for that
704 */
705 if (input) {
706 assert(stage == MESA_SHADER_FRAGMENT && "no geom/tess on Bifrost");
707 varyings->varying[loc].format = new_fmt;
708 }
709
710 varyings->buf_mask |= 1 << buf_id;
711 }
712
713 static void
panvk_pipeline_builder_collect_varyings(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)714 panvk_pipeline_builder_collect_varyings(struct panvk_pipeline_builder *builder,
715 struct panvk_pipeline *pipeline)
716 {
717 for (uint32_t s = 0; s < MESA_SHADER_STAGES; s++) {
718 if (!builder->shaders[s])
719 continue;
720
721 const struct pan_shader_info *info = &builder->shaders[s]->info;
722
723 for (unsigned i = 0; i < info->varyings.input_count; i++) {
724 panvk_pipeline_update_varying_slot(&pipeline->varyings, s,
725 &info->varyings.input[i], true);
726 }
727
728 for (unsigned i = 0; i < info->varyings.output_count; i++) {
729 panvk_pipeline_update_varying_slot(&pipeline->varyings, s,
730 &info->varyings.output[i], false);
731 }
732 }
733
734 /* TODO: Xfb */
735 gl_varying_slot loc;
736 BITSET_FOREACH_SET(loc, pipeline->varyings.active, VARYING_SLOT_MAX) {
737 if (pipeline->varyings.varying[loc].format == PIPE_FORMAT_NONE)
738 continue;
739
740 enum panvk_varying_buf_id buf_id = panvk_varying_buf_id(loc);
741 unsigned buf_idx = panvk_varying_buf_index(&pipeline->varyings, buf_id);
742 unsigned varying_sz = panvk_varying_size(&pipeline->varyings, loc);
743
744 pipeline->varyings.varying[loc].buf = buf_idx;
745 pipeline->varyings.varying[loc].offset =
746 pipeline->varyings.buf[buf_idx].stride;
747 pipeline->varyings.buf[buf_idx].stride += varying_sz;
748 }
749 }
750
751 static void
panvk_pipeline_builder_parse_vertex_input(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)752 panvk_pipeline_builder_parse_vertex_input(
753 struct panvk_pipeline_builder *builder, struct panvk_pipeline *pipeline)
754 {
755 struct panvk_attribs_info *attribs = &pipeline->attribs;
756 const VkPipelineVertexInputStateCreateInfo *info =
757 builder->create_info.gfx->pVertexInputState;
758
759 const VkPipelineVertexInputDivisorStateCreateInfoEXT *div_info =
760 vk_find_struct_const(info->pNext,
761 PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
762
763 for (unsigned i = 0; i < info->vertexBindingDescriptionCount; i++) {
764 const VkVertexInputBindingDescription *desc =
765 &info->pVertexBindingDescriptions[i];
766 attribs->buf_count = MAX2(desc->binding + 1, attribs->buf_count);
767 attribs->buf[desc->binding].stride = desc->stride;
768 attribs->buf[desc->binding].per_instance =
769 desc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE;
770 attribs->buf[desc->binding].instance_divisor = 1;
771 attribs->buf[desc->binding].special = false;
772 }
773
774 if (div_info) {
775 for (unsigned i = 0; i < div_info->vertexBindingDivisorCount; i++) {
776 const VkVertexInputBindingDivisorDescriptionEXT *div =
777 &div_info->pVertexBindingDivisors[i];
778 attribs->buf[div->binding].instance_divisor = div->divisor;
779 }
780 }
781
782 const struct pan_shader_info *vs =
783 &builder->shaders[MESA_SHADER_VERTEX]->info;
784
785 for (unsigned i = 0; i < info->vertexAttributeDescriptionCount; i++) {
786 const VkVertexInputAttributeDescription *desc =
787 &info->pVertexAttributeDescriptions[i];
788
789 unsigned attrib = desc->location + VERT_ATTRIB_GENERIC0;
790 unsigned slot =
791 util_bitcount64(vs->attributes_read & BITFIELD64_MASK(attrib));
792
793 attribs->attrib[slot].buf = desc->binding;
794 attribs->attrib[slot].format = vk_format_to_pipe_format(desc->format);
795 attribs->attrib[slot].offset = desc->offset;
796 }
797
798 if (vs->attribute_count >= PAN_VERTEX_ID) {
799 attribs->buf[attribs->buf_count].special = true;
800 attribs->buf[attribs->buf_count].special_id = PAN_VERTEX_ID;
801 attribs->attrib[PAN_VERTEX_ID].buf = attribs->buf_count++;
802 attribs->attrib[PAN_VERTEX_ID].format = PIPE_FORMAT_R32_UINT;
803 }
804
805 if (vs->attribute_count >= PAN_INSTANCE_ID) {
806 attribs->buf[attribs->buf_count].special = true;
807 attribs->buf[attribs->buf_count].special_id = PAN_INSTANCE_ID;
808 attribs->attrib[PAN_INSTANCE_ID].buf = attribs->buf_count++;
809 attribs->attrib[PAN_INSTANCE_ID].format = PIPE_FORMAT_R32_UINT;
810 }
811
812 attribs->attrib_count = MAX2(attribs->attrib_count, vs->attribute_count);
813 }
814
815 static VkResult
panvk_pipeline_builder_build(struct panvk_pipeline_builder * builder,struct panvk_pipeline ** pipeline)816 panvk_pipeline_builder_build(struct panvk_pipeline_builder *builder,
817 struct panvk_pipeline **pipeline)
818 {
819 VkResult result = panvk_pipeline_builder_create_pipeline(builder, pipeline);
820 if (result != VK_SUCCESS)
821 return result;
822
823 /* TODO: make those functions return a result and handle errors */
824 if (builder->create_info.gfx) {
825 panvk_pipeline_builder_parse_dynamic(builder, *pipeline);
826 panvk_pipeline_builder_parse_color_blend(builder, *pipeline);
827 panvk_pipeline_builder_compile_shaders(builder, *pipeline);
828 panvk_pipeline_builder_collect_varyings(builder, *pipeline);
829 panvk_pipeline_builder_parse_input_assembly(builder, *pipeline);
830 panvk_pipeline_builder_parse_multisample(builder, *pipeline);
831 panvk_pipeline_builder_parse_zs(builder, *pipeline);
832 panvk_pipeline_builder_parse_rast(builder, *pipeline);
833 panvk_pipeline_builder_parse_vertex_input(builder, *pipeline);
834 panvk_pipeline_builder_upload_shaders(builder, *pipeline);
835 panvk_pipeline_builder_init_fs_state(builder, *pipeline);
836 panvk_pipeline_builder_alloc_static_state_bo(builder, *pipeline);
837 panvk_pipeline_builder_init_shaders(builder, *pipeline);
838 panvk_pipeline_builder_parse_viewport(builder, *pipeline);
839 } else {
840 panvk_pipeline_builder_compile_shaders(builder, *pipeline);
841 panvk_pipeline_builder_upload_shaders(builder, *pipeline);
842 panvk_pipeline_builder_alloc_static_state_bo(builder, *pipeline);
843 panvk_pipeline_builder_init_shaders(builder, *pipeline);
844 }
845
846 return VK_SUCCESS;
847 }
848
849 static void
panvk_pipeline_builder_init_graphics(struct panvk_pipeline_builder * builder,struct panvk_device * dev,struct panvk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * create_info,const VkAllocationCallbacks * alloc)850 panvk_pipeline_builder_init_graphics(
851 struct panvk_pipeline_builder *builder, struct panvk_device *dev,
852 struct panvk_pipeline_cache *cache,
853 const VkGraphicsPipelineCreateInfo *create_info,
854 const VkAllocationCallbacks *alloc)
855 {
856 VK_FROM_HANDLE(panvk_pipeline_layout, layout, create_info->layout);
857 assert(layout);
858 *builder = (struct panvk_pipeline_builder){
859 .device = dev,
860 .cache = cache,
861 .layout = layout,
862 .create_info.gfx = create_info,
863 .alloc = alloc,
864 };
865
866 builder->rasterizer_discard =
867 create_info->pRasterizationState->rasterizerDiscardEnable;
868
869 if (builder->rasterizer_discard) {
870 builder->samples = VK_SAMPLE_COUNT_1_BIT;
871 } else {
872 builder->samples = create_info->pMultisampleState->rasterizationSamples;
873
874 const struct panvk_render_pass *pass =
875 panvk_render_pass_from_handle(create_info->renderPass);
876 const struct panvk_subpass *subpass =
877 &pass->subpasses[create_info->subpass];
878
879 builder->use_depth_stencil_attachment =
880 subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED;
881
882 assert(subpass->color_count <=
883 create_info->pColorBlendState->attachmentCount);
884 builder->active_color_attachments = 0;
885 for (uint32_t i = 0; i < subpass->color_count; i++) {
886 uint32_t idx = subpass->color_attachments[i].idx;
887 if (idx == VK_ATTACHMENT_UNUSED)
888 continue;
889
890 builder->active_color_attachments |= 1 << i;
891 builder->color_attachment_formats[i] = pass->attachments[idx].format;
892 }
893 }
894 }
895
896 VkResult
panvk_per_arch(CreateGraphicsPipelines)897 panvk_per_arch(CreateGraphicsPipelines)(
898 VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
899 const VkGraphicsPipelineCreateInfo *pCreateInfos,
900 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
901 {
902 VK_FROM_HANDLE(panvk_device, dev, device);
903 VK_FROM_HANDLE(panvk_pipeline_cache, cache, pipelineCache);
904
905 for (uint32_t i = 0; i < count; i++) {
906 struct panvk_pipeline_builder builder;
907 panvk_pipeline_builder_init_graphics(&builder, dev, cache,
908 &pCreateInfos[i], pAllocator);
909
910 struct panvk_pipeline *pipeline;
911 VkResult result = panvk_pipeline_builder_build(&builder, &pipeline);
912 panvk_pipeline_builder_finish(&builder);
913
914 if (result != VK_SUCCESS) {
915 for (uint32_t j = 0; j < i; j++) {
916 panvk_DestroyPipeline(device, pPipelines[j], pAllocator);
917 pPipelines[j] = VK_NULL_HANDLE;
918 }
919
920 return result;
921 }
922
923 pPipelines[i] = panvk_pipeline_to_handle(pipeline);
924 }
925
926 return VK_SUCCESS;
927 }
928
929 static void
panvk_pipeline_builder_init_compute(struct panvk_pipeline_builder * builder,struct panvk_device * dev,struct panvk_pipeline_cache * cache,const VkComputePipelineCreateInfo * create_info,const VkAllocationCallbacks * alloc)930 panvk_pipeline_builder_init_compute(
931 struct panvk_pipeline_builder *builder, struct panvk_device *dev,
932 struct panvk_pipeline_cache *cache,
933 const VkComputePipelineCreateInfo *create_info,
934 const VkAllocationCallbacks *alloc)
935 {
936 VK_FROM_HANDLE(panvk_pipeline_layout, layout, create_info->layout);
937 assert(layout);
938 *builder = (struct panvk_pipeline_builder){
939 .device = dev,
940 .cache = cache,
941 .layout = layout,
942 .create_info.compute = create_info,
943 .alloc = alloc,
944 };
945 }
946
947 VkResult
panvk_per_arch(CreateComputePipelines)948 panvk_per_arch(CreateComputePipelines)(
949 VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
950 const VkComputePipelineCreateInfo *pCreateInfos,
951 const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
952 {
953 VK_FROM_HANDLE(panvk_device, dev, device);
954 VK_FROM_HANDLE(panvk_pipeline_cache, cache, pipelineCache);
955
956 for (uint32_t i = 0; i < count; i++) {
957 struct panvk_pipeline_builder builder;
958 panvk_pipeline_builder_init_compute(&builder, dev, cache,
959 &pCreateInfos[i], pAllocator);
960
961 struct panvk_pipeline *pipeline;
962 VkResult result = panvk_pipeline_builder_build(&builder, &pipeline);
963 panvk_pipeline_builder_finish(&builder);
964
965 if (result != VK_SUCCESS) {
966 for (uint32_t j = 0; j < i; j++) {
967 panvk_DestroyPipeline(device, pPipelines[j], pAllocator);
968 pPipelines[j] = VK_NULL_HANDLE;
969 }
970
971 return result;
972 }
973
974 pPipelines[i] = panvk_pipeline_to_handle(pipeline);
975 }
976
977 return VK_SUCCESS;
978 }
979