• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Collabora Ltd.
3  *
4  * Derived from tu_pipeline.c which is:
5  * Copyright © 2016 Red Hat.
6  * Copyright © 2016 Bas Nieuwenhuizen
7  * Copyright © 2015 Intel Corporation
8  *
9  * Permission is hereby granted, free of charge, to any person obtaining a
10  * copy of this software and associated documentation files (the "Software"),
11  * to deal in the Software without restriction, including without limitation
12  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13  * and/or sell copies of the Software, and to permit persons to whom the
14  * Software is furnished to do so, subject to the following conditions:
15  *
16  * The above copyright notice and this permission notice (including the next
17  * paragraph) shall be included in all copies or substantial portions of the
18  * Software.
19  *
20  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
23  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26  * DEALINGS IN THE SOFTWARE.
27  */
28 
29 #include "panvk_cs.h"
30 #include "panvk_private.h"
31 
32 #include "nir/nir.h"
33 #include "nir/nir_builder.h"
34 #include "spirv/nir_spirv.h"
35 #include "util/blend.h"
36 #include "util/mesa-sha1.h"
37 #include "util/u_atomic.h"
38 #include "util/u_debug.h"
39 #include "vk_blend.h"
40 #include "vk_format.h"
41 #include "vk_util.h"
42 
43 #include "panfrost/util/pan_lower_framebuffer.h"
44 
45 struct panvk_pipeline_builder {
46    struct panvk_device *device;
47    struct panvk_pipeline_cache *cache;
48    const VkAllocationCallbacks *alloc;
49    struct {
50       const VkGraphicsPipelineCreateInfo *gfx;
51       const VkComputePipelineCreateInfo *compute;
52    } create_info;
53    const struct panvk_pipeline_layout *layout;
54 
55    struct panvk_shader *shaders[MESA_SHADER_STAGES];
56    struct {
57       uint32_t shader_offset;
58       uint32_t rsd_offset;
59    } stages[MESA_SHADER_STAGES];
60    uint32_t blend_shader_offsets[MAX_RTS];
61    uint32_t shader_total_size;
62    uint32_t static_state_size;
63    uint32_t vpd_offset;
64 
65    bool rasterizer_discard;
66    /* these states are affectd by rasterizer_discard */
67    VkSampleCountFlagBits samples;
68    bool use_depth_stencil_attachment;
69    uint8_t active_color_attachments;
70    enum pipe_format color_attachment_formats[MAX_RTS];
71 };
72 
73 static VkResult
panvk_pipeline_builder_create_pipeline(struct panvk_pipeline_builder * builder,struct panvk_pipeline ** out_pipeline)74 panvk_pipeline_builder_create_pipeline(struct panvk_pipeline_builder *builder,
75                                        struct panvk_pipeline **out_pipeline)
76 {
77    struct panvk_device *dev = builder->device;
78 
79    struct panvk_pipeline *pipeline = vk_object_zalloc(
80       &dev->vk, builder->alloc, sizeof(*pipeline), VK_OBJECT_TYPE_PIPELINE);
81    if (!pipeline)
82       return VK_ERROR_OUT_OF_HOST_MEMORY;
83 
84    pipeline->layout = builder->layout;
85    *out_pipeline = pipeline;
86    return VK_SUCCESS;
87 }
88 
89 static void
panvk_pipeline_builder_finish(struct panvk_pipeline_builder * builder)90 panvk_pipeline_builder_finish(struct panvk_pipeline_builder *builder)
91 {
92    for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
93       if (!builder->shaders[i])
94          continue;
95       panvk_shader_destroy(builder->device, builder->shaders[i],
96                            builder->alloc);
97    }
98 }
99 
100 static bool
panvk_pipeline_static_state(struct panvk_pipeline * pipeline,uint32_t id)101 panvk_pipeline_static_state(struct panvk_pipeline *pipeline, uint32_t id)
102 {
103    return !(pipeline->dynamic_state_mask & (1 << id));
104 }
105 
106 static VkResult
panvk_pipeline_builder_compile_shaders(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)107 panvk_pipeline_builder_compile_shaders(struct panvk_pipeline_builder *builder,
108                                        struct panvk_pipeline *pipeline)
109 {
110    const VkPipelineShaderStageCreateInfo *stage_infos[MESA_SHADER_STAGES] = {
111       NULL};
112    const VkPipelineShaderStageCreateInfo *stages =
113       builder->create_info.gfx ? builder->create_info.gfx->pStages
114                                : &builder->create_info.compute->stage;
115    unsigned stage_count =
116       builder->create_info.gfx ? builder->create_info.gfx->stageCount : 1;
117 
118    for (uint32_t i = 0; i < stage_count; i++) {
119       gl_shader_stage stage = vk_to_mesa_shader_stage(stages[i].stage);
120       stage_infos[stage] = &stages[i];
121    }
122 
123    /* compile shaders in reverse order */
124    for (gl_shader_stage stage = MESA_SHADER_STAGES - 1;
125         stage > MESA_SHADER_NONE; stage--) {
126       const VkPipelineShaderStageCreateInfo *stage_info = stage_infos[stage];
127       if (!stage_info)
128          continue;
129 
130       struct panvk_shader *shader;
131 
132       shader = panvk_per_arch(shader_create)(
133          builder->device, stage, stage_info, builder->layout,
134          PANVK_SYSVAL_UBO_INDEX, &pipeline->blend.state,
135          panvk_pipeline_static_state(pipeline,
136                                      VK_DYNAMIC_STATE_BLEND_CONSTANTS),
137          builder->alloc);
138       if (!shader)
139          return VK_ERROR_OUT_OF_HOST_MEMORY;
140 
141       builder->shaders[stage] = shader;
142       builder->shader_total_size = ALIGN_POT(builder->shader_total_size, 128);
143       builder->stages[stage].shader_offset = builder->shader_total_size;
144       builder->shader_total_size +=
145          util_dynarray_num_elements(&shader->binary, uint8_t);
146    }
147 
148    return VK_SUCCESS;
149 }
150 
151 static VkResult
panvk_pipeline_builder_upload_shaders(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)152 panvk_pipeline_builder_upload_shaders(struct panvk_pipeline_builder *builder,
153                                       struct panvk_pipeline *pipeline)
154 {
155    /* In some cases, the optimized shader is empty. Don't bother allocating
156     * anything in this case.
157     */
158    if (builder->shader_total_size == 0)
159       return VK_SUCCESS;
160 
161    struct panvk_priv_bo *bin_bo = panvk_priv_bo_create(
162       builder->device, builder->shader_total_size, PAN_KMOD_BO_FLAG_EXECUTABLE,
163       NULL, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
164 
165    pipeline->binary_bo = bin_bo;
166 
167    for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
168       const struct panvk_shader *shader = builder->shaders[i];
169       if (!shader)
170          continue;
171 
172       memcpy(pipeline->binary_bo->addr.host + builder->stages[i].shader_offset,
173              util_dynarray_element(&shader->binary, uint8_t, 0),
174              util_dynarray_num_elements(&shader->binary, uint8_t));
175    }
176 
177    return VK_SUCCESS;
178 }
179 
180 static void
panvk_pipeline_builder_alloc_static_state_bo(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)181 panvk_pipeline_builder_alloc_static_state_bo(
182    struct panvk_pipeline_builder *builder, struct panvk_pipeline *pipeline)
183 {
184    unsigned bo_size = 0;
185 
186    for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
187       const struct panvk_shader *shader = builder->shaders[i];
188       if (!shader && i != MESA_SHADER_FRAGMENT)
189          continue;
190 
191       if (pipeline->fs.dynamic_rsd && i == MESA_SHADER_FRAGMENT)
192          continue;
193 
194       bo_size = ALIGN_POT(bo_size, pan_alignment(RENDERER_STATE));
195       builder->stages[i].rsd_offset = bo_size;
196       bo_size += pan_size(RENDERER_STATE);
197       if (i == MESA_SHADER_FRAGMENT)
198          bo_size += pan_size(BLEND) * MAX2(pipeline->blend.state.rt_count, 1);
199    }
200 
201    if (builder->create_info.gfx &&
202        panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT) &&
203        panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR)) {
204       bo_size = ALIGN_POT(bo_size, pan_alignment(VIEWPORT));
205       builder->vpd_offset = bo_size;
206       bo_size += pan_size(VIEWPORT);
207    }
208 
209    if (bo_size) {
210       pipeline->state_bo = panvk_priv_bo_create(
211          builder->device, bo_size, 0, NULL, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
212    }
213 }
214 
215 static void
panvk_pipeline_builder_init_sysvals(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline,gl_shader_stage stage)216 panvk_pipeline_builder_init_sysvals(struct panvk_pipeline_builder *builder,
217                                     struct panvk_pipeline *pipeline,
218                                     gl_shader_stage stage)
219 {
220    const struct panvk_shader *shader = builder->shaders[stage];
221 
222    pipeline->sysvals[stage].ubo_idx = shader->sysval_ubo;
223 }
224 
225 static void
panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)226 panvk_pipeline_builder_init_shaders(struct panvk_pipeline_builder *builder,
227                                     struct panvk_pipeline *pipeline)
228 {
229    for (uint32_t i = 0; i < MESA_SHADER_STAGES; i++) {
230       const struct panvk_shader *shader = builder->shaders[i];
231       if (!shader)
232          continue;
233 
234       pipeline->tls_size = MAX2(pipeline->tls_size, shader->info.tls_size);
235       pipeline->wls_size = MAX2(pipeline->wls_size, shader->info.wls_size);
236 
237       if (shader->has_img_access)
238          pipeline->img_access_mask |= BITFIELD_BIT(i);
239 
240       if (i == MESA_SHADER_VERTEX && shader->info.vs.writes_point_size) {
241          VkPrimitiveTopology topology =
242             builder->create_info.gfx->pInputAssemblyState->topology;
243          bool points = (topology == VK_PRIMITIVE_TOPOLOGY_POINT_LIST);
244 
245          /* Even if the vertex shader writes point size, we only consider the
246           * pipeline to write point size when we're actually drawing points.
247           * Otherwise the point size write would conflict with wide lines.
248           */
249          pipeline->ia.writes_point_size = points;
250       }
251 
252       mali_ptr shader_ptr = 0;
253 
254       /* Handle empty shaders gracefully */
255       if (util_dynarray_num_elements(&builder->shaders[i]->binary, uint8_t)) {
256          shader_ptr =
257             pipeline->binary_bo->addr.dev + builder->stages[i].shader_offset;
258       }
259 
260       if (i != MESA_SHADER_FRAGMENT) {
261          void *rsd =
262             pipeline->state_bo->addr.host + builder->stages[i].rsd_offset;
263          mali_ptr gpu_rsd =
264             pipeline->state_bo->addr.dev + builder->stages[i].rsd_offset;
265 
266          panvk_per_arch(emit_non_fs_rsd)(builder->device, &shader->info,
267                                          shader_ptr, rsd);
268          pipeline->rsds[i] = gpu_rsd;
269       }
270 
271       panvk_pipeline_builder_init_sysvals(builder, pipeline, i);
272 
273       if (i == MESA_SHADER_COMPUTE)
274          pipeline->cs.local_size = shader->local_size;
275    }
276 
277    if (builder->create_info.gfx && !pipeline->fs.dynamic_rsd) {
278       void *rsd = pipeline->state_bo->addr.host +
279                   builder->stages[MESA_SHADER_FRAGMENT].rsd_offset;
280       mali_ptr gpu_rsd = pipeline->state_bo->addr.dev +
281                          builder->stages[MESA_SHADER_FRAGMENT].rsd_offset;
282       void *bd = rsd + pan_size(RENDERER_STATE);
283 
284       panvk_per_arch(emit_base_fs_rsd)(builder->device, pipeline, rsd);
285       for (unsigned rt = 0; rt < pipeline->blend.state.rt_count; rt++) {
286          panvk_per_arch(emit_blend)(builder->device, pipeline, rt, bd);
287          bd += pan_size(BLEND);
288       }
289 
290       pipeline->rsds[MESA_SHADER_FRAGMENT] = gpu_rsd;
291    } else if (builder->create_info.gfx) {
292       panvk_per_arch(emit_base_fs_rsd)(builder->device, pipeline,
293                                        &pipeline->fs.rsd_template);
294       for (unsigned rt = 0; rt < MAX2(pipeline->blend.state.rt_count, 1);
295            rt++) {
296          panvk_per_arch(emit_blend)(builder->device, pipeline, rt,
297                                     &pipeline->blend.bd_template[rt]);
298       }
299    }
300 
301    pipeline->num_ubos = PANVK_NUM_BUILTIN_UBOS + builder->layout->num_ubos +
302                         builder->layout->num_dyn_ubos;
303 }
304 
305 static void
panvk_pipeline_builder_parse_viewport(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)306 panvk_pipeline_builder_parse_viewport(struct panvk_pipeline_builder *builder,
307                                       struct panvk_pipeline *pipeline)
308 {
309    /* The spec says:
310     *
311     *    pViewportState is a pointer to an instance of the
312     *    VkPipelineViewportStateCreateInfo structure, and is ignored if the
313     *    pipeline has rasterization disabled.
314     */
315    if (!builder->rasterizer_discard &&
316        panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT) &&
317        panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR)) {
318       void *vpd = pipeline->state_bo->addr.host + builder->vpd_offset;
319       panvk_per_arch(emit_viewport)(
320          builder->create_info.gfx->pViewportState->pViewports,
321          builder->create_info.gfx->pViewportState->pScissors, vpd);
322       pipeline->vpd = pipeline->state_bo->addr.dev + builder->vpd_offset;
323    }
324    if (panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_VIEWPORT))
325       pipeline->viewport =
326          builder->create_info.gfx->pViewportState->pViewports[0];
327 
328    if (panvk_pipeline_static_state(pipeline, VK_DYNAMIC_STATE_SCISSOR))
329       pipeline->scissor =
330          builder->create_info.gfx->pViewportState->pScissors[0];
331 }
332 
333 static void
panvk_pipeline_builder_parse_dynamic(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)334 panvk_pipeline_builder_parse_dynamic(struct panvk_pipeline_builder *builder,
335                                      struct panvk_pipeline *pipeline)
336 {
337    const VkPipelineDynamicStateCreateInfo *dynamic_info =
338       builder->create_info.gfx->pDynamicState;
339 
340    if (!dynamic_info)
341       return;
342 
343    for (uint32_t i = 0; i < dynamic_info->dynamicStateCount; i++) {
344       VkDynamicState state = dynamic_info->pDynamicStates[i];
345       switch (state) {
346       case VK_DYNAMIC_STATE_VIEWPORT ... VK_DYNAMIC_STATE_STENCIL_REFERENCE:
347          pipeline->dynamic_state_mask |= 1 << state;
348          break;
349       default:
350          unreachable("unsupported dynamic state");
351       }
352    }
353 }
354 
355 static enum mali_draw_mode
translate_prim_topology(VkPrimitiveTopology in)356 translate_prim_topology(VkPrimitiveTopology in)
357 {
358    switch (in) {
359    case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
360       return MALI_DRAW_MODE_POINTS;
361    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
362       return MALI_DRAW_MODE_LINES;
363    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
364       return MALI_DRAW_MODE_LINE_STRIP;
365    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
366       return MALI_DRAW_MODE_TRIANGLES;
367    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
368       return MALI_DRAW_MODE_TRIANGLE_STRIP;
369    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
370       return MALI_DRAW_MODE_TRIANGLE_FAN;
371    case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
372    case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
373    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
374    case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
375    case VK_PRIMITIVE_TOPOLOGY_PATCH_LIST:
376    default:
377       unreachable("Invalid primitive type");
378    }
379 }
380 
381 static void
panvk_pipeline_builder_parse_input_assembly(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)382 panvk_pipeline_builder_parse_input_assembly(
383    struct panvk_pipeline_builder *builder, struct panvk_pipeline *pipeline)
384 {
385    pipeline->ia.primitive_restart =
386       builder->create_info.gfx->pInputAssemblyState->primitiveRestartEnable;
387    pipeline->ia.topology = translate_prim_topology(
388       builder->create_info.gfx->pInputAssemblyState->topology);
389 }
390 
391 bool
panvk_per_arch(blend_needs_lowering)392 panvk_per_arch(blend_needs_lowering)(const struct panvk_device *dev,
393                                      const struct pan_blend_state *state,
394                                      unsigned rt)
395 {
396    /* LogicOp requires a blend shader */
397    if (state->logicop_enable)
398       return true;
399 
400    /* Not all formats can be blended by fixed-function hardware */
401    if (!panfrost_blendable_formats_v7[state->rts[rt].format].internal)
402       return true;
403 
404    unsigned constant_mask = pan_blend_constant_mask(state->rts[rt].equation);
405 
406    /* v6 doesn't support blend constants in FF blend equations.
407     * v7 only uses the constant from RT 0 (TODO: what if it's the same
408     * constant? or a constant is shared?)
409     */
410    if (constant_mask && (PAN_ARCH == 6 || (PAN_ARCH == 7 && rt > 0)))
411       return true;
412 
413    if (!pan_blend_is_homogenous_constant(constant_mask, state->constants))
414       return true;
415 
416    unsigned arch = pan_arch(dev->physical_device->kmod.props.gpu_prod_id);
417    bool supports_2src = pan_blend_supports_2src(arch);
418    return !pan_blend_can_fixed_function(state->rts[rt].equation, supports_2src);
419 }
420 
421 static void
panvk_pipeline_builder_parse_color_blend(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)422 panvk_pipeline_builder_parse_color_blend(struct panvk_pipeline_builder *builder,
423                                          struct panvk_pipeline *pipeline)
424 {
425    pipeline->blend.state.logicop_enable =
426       builder->create_info.gfx->pColorBlendState->logicOpEnable;
427    pipeline->blend.state.logicop_func =
428       vk_logic_op_to_pipe(builder->create_info.gfx->pColorBlendState->logicOp);
429    pipeline->blend.state.rt_count =
430       util_last_bit(builder->active_color_attachments);
431    memcpy(pipeline->blend.state.constants,
432           builder->create_info.gfx->pColorBlendState->blendConstants,
433           sizeof(pipeline->blend.state.constants));
434 
435    for (unsigned i = 0; i < pipeline->blend.state.rt_count; i++) {
436       const VkPipelineColorBlendAttachmentState *in =
437          &builder->create_info.gfx->pColorBlendState->pAttachments[i];
438       struct pan_blend_rt_state *out = &pipeline->blend.state.rts[i];
439 
440       out->format = builder->color_attachment_formats[i];
441 
442       bool dest_has_alpha = util_format_has_alpha(out->format);
443 
444       out->nr_samples =
445          builder->create_info.gfx->pMultisampleState->rasterizationSamples;
446       out->equation.blend_enable = in->blendEnable;
447       out->equation.color_mask = in->colorWriteMask;
448       out->equation.rgb_func = vk_blend_op_to_pipe(in->colorBlendOp);
449       out->equation.rgb_src_factor =
450          vk_blend_factor_to_pipe(in->srcColorBlendFactor);
451       out->equation.rgb_dst_factor =
452          vk_blend_factor_to_pipe(in->dstColorBlendFactor);
453       out->equation.alpha_func = vk_blend_op_to_pipe(in->alphaBlendOp);
454       out->equation.alpha_src_factor =
455          vk_blend_factor_to_pipe(in->srcAlphaBlendFactor);
456       out->equation.alpha_dst_factor =
457          vk_blend_factor_to_pipe(in->dstAlphaBlendFactor);
458 
459       if (!dest_has_alpha) {
460          out->equation.rgb_src_factor =
461             util_blend_dst_alpha_to_one(out->equation.rgb_src_factor);
462          out->equation.rgb_dst_factor =
463             util_blend_dst_alpha_to_one(out->equation.rgb_dst_factor);
464 
465          out->equation.alpha_src_factor =
466             util_blend_dst_alpha_to_one(out->equation.alpha_src_factor);
467          out->equation.alpha_dst_factor =
468             util_blend_dst_alpha_to_one(out->equation.alpha_dst_factor);
469       }
470 
471       pipeline->blend.reads_dest |= pan_blend_reads_dest(out->equation);
472 
473       unsigned constant_mask = panvk_per_arch(blend_needs_lowering)(
474                                   builder->device, &pipeline->blend.state, i)
475                                   ? 0
476                                   : pan_blend_constant_mask(out->equation);
477       pipeline->blend.constant[i].index = ffs(constant_mask) - 1;
478       if (constant_mask) {
479          /* On Bifrost, the blend constant is expressed with a UNORM of the
480           * size of the target format. The value is then shifted such that
481           * used bits are in the MSB. Here we calculate the factor at pipeline
482           * creation time so we only have to do a
483           *   hw_constant = float_constant * factor;
484           * at descriptor emission time.
485           */
486          const struct util_format_description *format_desc =
487             util_format_description(out->format);
488          unsigned chan_size = 0;
489          for (unsigned c = 0; c < format_desc->nr_channels; c++)
490             chan_size = MAX2(format_desc->channel[c].size, chan_size);
491          pipeline->blend.constant[i].bifrost_factor = ((1 << chan_size) - 1)
492                                                       << (16 - chan_size);
493       }
494    }
495 }
496 
497 static void
panvk_pipeline_builder_parse_multisample(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)498 panvk_pipeline_builder_parse_multisample(struct panvk_pipeline_builder *builder,
499                                          struct panvk_pipeline *pipeline)
500 {
501    unsigned nr_samples = MAX2(
502       builder->create_info.gfx->pMultisampleState->rasterizationSamples, 1);
503 
504    pipeline->ms.rast_samples =
505       builder->create_info.gfx->pMultisampleState->rasterizationSamples;
506    pipeline->ms.sample_mask =
507       builder->create_info.gfx->pMultisampleState->pSampleMask
508          ? builder->create_info.gfx->pMultisampleState->pSampleMask[0]
509          : UINT16_MAX;
510    pipeline->ms.min_samples =
511       MAX2(builder->create_info.gfx->pMultisampleState->minSampleShading *
512               nr_samples,
513            1);
514 }
515 
516 static enum mali_stencil_op
translate_stencil_op(VkStencilOp in)517 translate_stencil_op(VkStencilOp in)
518 {
519    switch (in) {
520    case VK_STENCIL_OP_KEEP:
521       return MALI_STENCIL_OP_KEEP;
522    case VK_STENCIL_OP_ZERO:
523       return MALI_STENCIL_OP_ZERO;
524    case VK_STENCIL_OP_REPLACE:
525       return MALI_STENCIL_OP_REPLACE;
526    case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
527       return MALI_STENCIL_OP_INCR_SAT;
528    case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
529       return MALI_STENCIL_OP_DECR_SAT;
530    case VK_STENCIL_OP_INCREMENT_AND_WRAP:
531       return MALI_STENCIL_OP_INCR_WRAP;
532    case VK_STENCIL_OP_DECREMENT_AND_WRAP:
533       return MALI_STENCIL_OP_DECR_WRAP;
534    case VK_STENCIL_OP_INVERT:
535       return MALI_STENCIL_OP_INVERT;
536    default:
537       unreachable("Invalid stencil op");
538    }
539 }
540 
541 static void
panvk_pipeline_builder_parse_zs(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)542 panvk_pipeline_builder_parse_zs(struct panvk_pipeline_builder *builder,
543                                 struct panvk_pipeline *pipeline)
544 {
545    if (!builder->use_depth_stencil_attachment)
546       return;
547 
548    pipeline->zs.z_test =
549       builder->create_info.gfx->pDepthStencilState->depthTestEnable;
550 
551    /* The Vulkan spec says:
552     *
553     *    depthWriteEnable controls whether depth writes are enabled when
554     *    depthTestEnable is VK_TRUE. Depth writes are always disabled when
555     *    depthTestEnable is VK_FALSE.
556     *
557     * The hardware does not make this distinction, though, so we AND in the
558     * condition ourselves.
559     */
560    pipeline->zs.z_write =
561       pipeline->zs.z_test &&
562       builder->create_info.gfx->pDepthStencilState->depthWriteEnable;
563 
564    pipeline->zs.z_compare_func = panvk_per_arch(translate_compare_func)(
565       builder->create_info.gfx->pDepthStencilState->depthCompareOp);
566    pipeline->zs.s_test =
567       builder->create_info.gfx->pDepthStencilState->stencilTestEnable;
568    pipeline->zs.s_front.fail_op = translate_stencil_op(
569       builder->create_info.gfx->pDepthStencilState->front.failOp);
570    pipeline->zs.s_front.pass_op = translate_stencil_op(
571       builder->create_info.gfx->pDepthStencilState->front.passOp);
572    pipeline->zs.s_front.z_fail_op = translate_stencil_op(
573       builder->create_info.gfx->pDepthStencilState->front.depthFailOp);
574    pipeline->zs.s_front.compare_func = panvk_per_arch(translate_compare_func)(
575       builder->create_info.gfx->pDepthStencilState->front.compareOp);
576    pipeline->zs.s_front.compare_mask =
577       builder->create_info.gfx->pDepthStencilState->front.compareMask;
578    pipeline->zs.s_front.write_mask =
579       builder->create_info.gfx->pDepthStencilState->front.writeMask;
580    pipeline->zs.s_front.ref =
581       builder->create_info.gfx->pDepthStencilState->front.reference;
582    pipeline->zs.s_back.fail_op = translate_stencil_op(
583       builder->create_info.gfx->pDepthStencilState->back.failOp);
584    pipeline->zs.s_back.pass_op = translate_stencil_op(
585       builder->create_info.gfx->pDepthStencilState->back.passOp);
586    pipeline->zs.s_back.z_fail_op = translate_stencil_op(
587       builder->create_info.gfx->pDepthStencilState->back.depthFailOp);
588    pipeline->zs.s_back.compare_func = panvk_per_arch(translate_compare_func)(
589       builder->create_info.gfx->pDepthStencilState->back.compareOp);
590    pipeline->zs.s_back.compare_mask =
591       builder->create_info.gfx->pDepthStencilState->back.compareMask;
592    pipeline->zs.s_back.write_mask =
593       builder->create_info.gfx->pDepthStencilState->back.writeMask;
594    pipeline->zs.s_back.ref =
595       builder->create_info.gfx->pDepthStencilState->back.reference;
596 }
597 
598 static void
panvk_pipeline_builder_parse_rast(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)599 panvk_pipeline_builder_parse_rast(struct panvk_pipeline_builder *builder,
600                                   struct panvk_pipeline *pipeline)
601 {
602    pipeline->rast.clamp_depth =
603       builder->create_info.gfx->pRasterizationState->depthClampEnable;
604    pipeline->rast.depth_bias.enable =
605       builder->create_info.gfx->pRasterizationState->depthBiasEnable;
606    pipeline->rast.depth_bias.constant_factor =
607       builder->create_info.gfx->pRasterizationState->depthBiasConstantFactor;
608    pipeline->rast.depth_bias.clamp =
609       builder->create_info.gfx->pRasterizationState->depthBiasClamp;
610    pipeline->rast.depth_bias.slope_factor =
611       builder->create_info.gfx->pRasterizationState->depthBiasSlopeFactor;
612    pipeline->rast.front_ccw =
613       builder->create_info.gfx->pRasterizationState->frontFace ==
614       VK_FRONT_FACE_COUNTER_CLOCKWISE;
615    pipeline->rast.cull_front_face =
616       builder->create_info.gfx->pRasterizationState->cullMode &
617       VK_CULL_MODE_FRONT_BIT;
618    pipeline->rast.cull_back_face =
619       builder->create_info.gfx->pRasterizationState->cullMode &
620       VK_CULL_MODE_BACK_BIT;
621    pipeline->rast.line_width =
622       builder->create_info.gfx->pRasterizationState->lineWidth;
623    pipeline->rast.enable =
624       !builder->create_info.gfx->pRasterizationState->rasterizerDiscardEnable;
625 }
626 
627 static bool
panvk_fs_required(struct panvk_pipeline * pipeline)628 panvk_fs_required(struct panvk_pipeline *pipeline)
629 {
630    const struct pan_shader_info *info = &pipeline->fs.info;
631 
632    /* If we generally have side effects */
633    if (info->fs.sidefx)
634       return true;
635 
636    /* If colour is written we need to execute */
637    const struct pan_blend_state *blend = &pipeline->blend.state;
638    for (unsigned i = 0; i < blend->rt_count; ++i) {
639       if (blend->rts[i].equation.color_mask)
640          return true;
641    }
642 
643    /* If depth is written and not implied we need to execute.
644     * TODO: Predicate on Z/S writes being enabled */
645    return (info->fs.writes_depth || info->fs.writes_stencil);
646 }
647 
648 #define PANVK_DYNAMIC_FS_RSD_MASK                                              \
649    ((1 << VK_DYNAMIC_STATE_DEPTH_BIAS) |                                       \
650     (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS) |                                  \
651     (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK) |                             \
652     (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK) |                               \
653     (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE))
654 
655 static void
panvk_pipeline_builder_init_fs_state(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)656 panvk_pipeline_builder_init_fs_state(struct panvk_pipeline_builder *builder,
657                                      struct panvk_pipeline *pipeline)
658 {
659    if (!builder->shaders[MESA_SHADER_FRAGMENT])
660       return;
661 
662    pipeline->fs.dynamic_rsd =
663       pipeline->dynamic_state_mask & PANVK_DYNAMIC_FS_RSD_MASK;
664    pipeline->fs.address = pipeline->binary_bo->addr.dev +
665                           builder->stages[MESA_SHADER_FRAGMENT].shader_offset;
666    pipeline->fs.info = builder->shaders[MESA_SHADER_FRAGMENT]->info;
667    pipeline->fs.rt_mask = builder->active_color_attachments;
668    pipeline->fs.required = panvk_fs_required(pipeline);
669 }
670 
671 static void
panvk_pipeline_update_varying_slot(struct panvk_varyings_info * varyings,gl_shader_stage stage,const struct pan_shader_varying * varying,bool input)672 panvk_pipeline_update_varying_slot(struct panvk_varyings_info *varyings,
673                                    gl_shader_stage stage,
674                                    const struct pan_shader_varying *varying,
675                                    bool input)
676 {
677    gl_varying_slot loc = varying->location;
678    enum panvk_varying_buf_id buf_id = panvk_varying_buf_id(loc);
679 
680    varyings->stage[stage].loc[varyings->stage[stage].count++] = loc;
681 
682    assert(loc < ARRAY_SIZE(varyings->varying));
683 
684    enum pipe_format new_fmt = varying->format;
685    enum pipe_format old_fmt = varyings->varying[loc].format;
686 
687    BITSET_SET(varyings->active, loc);
688 
689    /* We expect inputs to either be set by a previous stage or be built
690     * in, skip the entry if that's not the case, we'll emit a const
691     * varying returning zero for those entries.
692     */
693    if (input && old_fmt == PIPE_FORMAT_NONE)
694       return;
695 
696    unsigned new_size = util_format_get_blocksize(new_fmt);
697    unsigned old_size = util_format_get_blocksize(old_fmt);
698 
699    if (old_size < new_size)
700       varyings->varying[loc].format = new_fmt;
701 
702    /* Type (float or not) information is only known in the fragment shader, so
703     * override for that
704     */
705    if (input) {
706       assert(stage == MESA_SHADER_FRAGMENT && "no geom/tess on Bifrost");
707       varyings->varying[loc].format = new_fmt;
708    }
709 
710    varyings->buf_mask |= 1 << buf_id;
711 }
712 
713 static void
panvk_pipeline_builder_collect_varyings(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)714 panvk_pipeline_builder_collect_varyings(struct panvk_pipeline_builder *builder,
715                                         struct panvk_pipeline *pipeline)
716 {
717    for (uint32_t s = 0; s < MESA_SHADER_STAGES; s++) {
718       if (!builder->shaders[s])
719          continue;
720 
721       const struct pan_shader_info *info = &builder->shaders[s]->info;
722 
723       for (unsigned i = 0; i < info->varyings.input_count; i++) {
724          panvk_pipeline_update_varying_slot(&pipeline->varyings, s,
725                                             &info->varyings.input[i], true);
726       }
727 
728       for (unsigned i = 0; i < info->varyings.output_count; i++) {
729          panvk_pipeline_update_varying_slot(&pipeline->varyings, s,
730                                             &info->varyings.output[i], false);
731       }
732    }
733 
734    /* TODO: Xfb */
735    gl_varying_slot loc;
736    BITSET_FOREACH_SET(loc, pipeline->varyings.active, VARYING_SLOT_MAX) {
737       if (pipeline->varyings.varying[loc].format == PIPE_FORMAT_NONE)
738          continue;
739 
740       enum panvk_varying_buf_id buf_id = panvk_varying_buf_id(loc);
741       unsigned buf_idx = panvk_varying_buf_index(&pipeline->varyings, buf_id);
742       unsigned varying_sz = panvk_varying_size(&pipeline->varyings, loc);
743 
744       pipeline->varyings.varying[loc].buf = buf_idx;
745       pipeline->varyings.varying[loc].offset =
746          pipeline->varyings.buf[buf_idx].stride;
747       pipeline->varyings.buf[buf_idx].stride += varying_sz;
748    }
749 }
750 
751 static void
panvk_pipeline_builder_parse_vertex_input(struct panvk_pipeline_builder * builder,struct panvk_pipeline * pipeline)752 panvk_pipeline_builder_parse_vertex_input(
753    struct panvk_pipeline_builder *builder, struct panvk_pipeline *pipeline)
754 {
755    struct panvk_attribs_info *attribs = &pipeline->attribs;
756    const VkPipelineVertexInputStateCreateInfo *info =
757       builder->create_info.gfx->pVertexInputState;
758 
759    const VkPipelineVertexInputDivisorStateCreateInfoEXT *div_info =
760       vk_find_struct_const(info->pNext,
761                            PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);
762 
763    for (unsigned i = 0; i < info->vertexBindingDescriptionCount; i++) {
764       const VkVertexInputBindingDescription *desc =
765          &info->pVertexBindingDescriptions[i];
766       attribs->buf_count = MAX2(desc->binding + 1, attribs->buf_count);
767       attribs->buf[desc->binding].stride = desc->stride;
768       attribs->buf[desc->binding].per_instance =
769          desc->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE;
770       attribs->buf[desc->binding].instance_divisor = 1;
771       attribs->buf[desc->binding].special = false;
772    }
773 
774    if (div_info) {
775       for (unsigned i = 0; i < div_info->vertexBindingDivisorCount; i++) {
776          const VkVertexInputBindingDivisorDescriptionEXT *div =
777             &div_info->pVertexBindingDivisors[i];
778          attribs->buf[div->binding].instance_divisor = div->divisor;
779       }
780    }
781 
782    const struct pan_shader_info *vs =
783       &builder->shaders[MESA_SHADER_VERTEX]->info;
784 
785    for (unsigned i = 0; i < info->vertexAttributeDescriptionCount; i++) {
786       const VkVertexInputAttributeDescription *desc =
787          &info->pVertexAttributeDescriptions[i];
788 
789       unsigned attrib = desc->location + VERT_ATTRIB_GENERIC0;
790       unsigned slot =
791          util_bitcount64(vs->attributes_read & BITFIELD64_MASK(attrib));
792 
793       attribs->attrib[slot].buf = desc->binding;
794       attribs->attrib[slot].format = vk_format_to_pipe_format(desc->format);
795       attribs->attrib[slot].offset = desc->offset;
796    }
797 
798    if (vs->attribute_count >= PAN_VERTEX_ID) {
799       attribs->buf[attribs->buf_count].special = true;
800       attribs->buf[attribs->buf_count].special_id = PAN_VERTEX_ID;
801       attribs->attrib[PAN_VERTEX_ID].buf = attribs->buf_count++;
802       attribs->attrib[PAN_VERTEX_ID].format = PIPE_FORMAT_R32_UINT;
803    }
804 
805    if (vs->attribute_count >= PAN_INSTANCE_ID) {
806       attribs->buf[attribs->buf_count].special = true;
807       attribs->buf[attribs->buf_count].special_id = PAN_INSTANCE_ID;
808       attribs->attrib[PAN_INSTANCE_ID].buf = attribs->buf_count++;
809       attribs->attrib[PAN_INSTANCE_ID].format = PIPE_FORMAT_R32_UINT;
810    }
811 
812    attribs->attrib_count = MAX2(attribs->attrib_count, vs->attribute_count);
813 }
814 
815 static VkResult
panvk_pipeline_builder_build(struct panvk_pipeline_builder * builder,struct panvk_pipeline ** pipeline)816 panvk_pipeline_builder_build(struct panvk_pipeline_builder *builder,
817                              struct panvk_pipeline **pipeline)
818 {
819    VkResult result = panvk_pipeline_builder_create_pipeline(builder, pipeline);
820    if (result != VK_SUCCESS)
821       return result;
822 
823    /* TODO: make those functions return a result and handle errors */
824    if (builder->create_info.gfx) {
825       panvk_pipeline_builder_parse_dynamic(builder, *pipeline);
826       panvk_pipeline_builder_parse_color_blend(builder, *pipeline);
827       panvk_pipeline_builder_compile_shaders(builder, *pipeline);
828       panvk_pipeline_builder_collect_varyings(builder, *pipeline);
829       panvk_pipeline_builder_parse_input_assembly(builder, *pipeline);
830       panvk_pipeline_builder_parse_multisample(builder, *pipeline);
831       panvk_pipeline_builder_parse_zs(builder, *pipeline);
832       panvk_pipeline_builder_parse_rast(builder, *pipeline);
833       panvk_pipeline_builder_parse_vertex_input(builder, *pipeline);
834       panvk_pipeline_builder_upload_shaders(builder, *pipeline);
835       panvk_pipeline_builder_init_fs_state(builder, *pipeline);
836       panvk_pipeline_builder_alloc_static_state_bo(builder, *pipeline);
837       panvk_pipeline_builder_init_shaders(builder, *pipeline);
838       panvk_pipeline_builder_parse_viewport(builder, *pipeline);
839    } else {
840       panvk_pipeline_builder_compile_shaders(builder, *pipeline);
841       panvk_pipeline_builder_upload_shaders(builder, *pipeline);
842       panvk_pipeline_builder_alloc_static_state_bo(builder, *pipeline);
843       panvk_pipeline_builder_init_shaders(builder, *pipeline);
844    }
845 
846    return VK_SUCCESS;
847 }
848 
849 static void
panvk_pipeline_builder_init_graphics(struct panvk_pipeline_builder * builder,struct panvk_device * dev,struct panvk_pipeline_cache * cache,const VkGraphicsPipelineCreateInfo * create_info,const VkAllocationCallbacks * alloc)850 panvk_pipeline_builder_init_graphics(
851    struct panvk_pipeline_builder *builder, struct panvk_device *dev,
852    struct panvk_pipeline_cache *cache,
853    const VkGraphicsPipelineCreateInfo *create_info,
854    const VkAllocationCallbacks *alloc)
855 {
856    VK_FROM_HANDLE(panvk_pipeline_layout, layout, create_info->layout);
857    assert(layout);
858    *builder = (struct panvk_pipeline_builder){
859       .device = dev,
860       .cache = cache,
861       .layout = layout,
862       .create_info.gfx = create_info,
863       .alloc = alloc,
864    };
865 
866    builder->rasterizer_discard =
867       create_info->pRasterizationState->rasterizerDiscardEnable;
868 
869    if (builder->rasterizer_discard) {
870       builder->samples = VK_SAMPLE_COUNT_1_BIT;
871    } else {
872       builder->samples = create_info->pMultisampleState->rasterizationSamples;
873 
874       const struct panvk_render_pass *pass =
875          panvk_render_pass_from_handle(create_info->renderPass);
876       const struct panvk_subpass *subpass =
877          &pass->subpasses[create_info->subpass];
878 
879       builder->use_depth_stencil_attachment =
880          subpass->zs_attachment.idx != VK_ATTACHMENT_UNUSED;
881 
882       assert(subpass->color_count <=
883              create_info->pColorBlendState->attachmentCount);
884       builder->active_color_attachments = 0;
885       for (uint32_t i = 0; i < subpass->color_count; i++) {
886          uint32_t idx = subpass->color_attachments[i].idx;
887          if (idx == VK_ATTACHMENT_UNUSED)
888             continue;
889 
890          builder->active_color_attachments |= 1 << i;
891          builder->color_attachment_formats[i] = pass->attachments[idx].format;
892       }
893    }
894 }
895 
896 VkResult
panvk_per_arch(CreateGraphicsPipelines)897 panvk_per_arch(CreateGraphicsPipelines)(
898    VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
899    const VkGraphicsPipelineCreateInfo *pCreateInfos,
900    const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
901 {
902    VK_FROM_HANDLE(panvk_device, dev, device);
903    VK_FROM_HANDLE(panvk_pipeline_cache, cache, pipelineCache);
904 
905    for (uint32_t i = 0; i < count; i++) {
906       struct panvk_pipeline_builder builder;
907       panvk_pipeline_builder_init_graphics(&builder, dev, cache,
908                                            &pCreateInfos[i], pAllocator);
909 
910       struct panvk_pipeline *pipeline;
911       VkResult result = panvk_pipeline_builder_build(&builder, &pipeline);
912       panvk_pipeline_builder_finish(&builder);
913 
914       if (result != VK_SUCCESS) {
915          for (uint32_t j = 0; j < i; j++) {
916             panvk_DestroyPipeline(device, pPipelines[j], pAllocator);
917             pPipelines[j] = VK_NULL_HANDLE;
918          }
919 
920          return result;
921       }
922 
923       pPipelines[i] = panvk_pipeline_to_handle(pipeline);
924    }
925 
926    return VK_SUCCESS;
927 }
928 
929 static void
panvk_pipeline_builder_init_compute(struct panvk_pipeline_builder * builder,struct panvk_device * dev,struct panvk_pipeline_cache * cache,const VkComputePipelineCreateInfo * create_info,const VkAllocationCallbacks * alloc)930 panvk_pipeline_builder_init_compute(
931    struct panvk_pipeline_builder *builder, struct panvk_device *dev,
932    struct panvk_pipeline_cache *cache,
933    const VkComputePipelineCreateInfo *create_info,
934    const VkAllocationCallbacks *alloc)
935 {
936    VK_FROM_HANDLE(panvk_pipeline_layout, layout, create_info->layout);
937    assert(layout);
938    *builder = (struct panvk_pipeline_builder){
939       .device = dev,
940       .cache = cache,
941       .layout = layout,
942       .create_info.compute = create_info,
943       .alloc = alloc,
944    };
945 }
946 
947 VkResult
panvk_per_arch(CreateComputePipelines)948 panvk_per_arch(CreateComputePipelines)(
949    VkDevice device, VkPipelineCache pipelineCache, uint32_t count,
950    const VkComputePipelineCreateInfo *pCreateInfos,
951    const VkAllocationCallbacks *pAllocator, VkPipeline *pPipelines)
952 {
953    VK_FROM_HANDLE(panvk_device, dev, device);
954    VK_FROM_HANDLE(panvk_pipeline_cache, cache, pipelineCache);
955 
956    for (uint32_t i = 0; i < count; i++) {
957       struct panvk_pipeline_builder builder;
958       panvk_pipeline_builder_init_compute(&builder, dev, cache,
959                                           &pCreateInfos[i], pAllocator);
960 
961       struct panvk_pipeline *pipeline;
962       VkResult result = panvk_pipeline_builder_build(&builder, &pipeline);
963       panvk_pipeline_builder_finish(&builder);
964 
965       if (result != VK_SUCCESS) {
966          for (uint32_t j = 0; j < i; j++) {
967             panvk_DestroyPipeline(device, pPipelines[j], pAllocator);
968             pPipelines[j] = VK_NULL_HANDLE;
969          }
970 
971          return result;
972       }
973 
974       pPipelines[i] = panvk_pipeline_to_handle(pipeline);
975    }
976 
977    return VK_SUCCESS;
978 }
979