• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "broadcom/common/v3d_macros.h"
26 #include "broadcom/cle/v3dx_pack.h"
27 #include "broadcom/compiler/v3d_compiler.h"
28 
29 static uint8_t
blend_factor(VkBlendFactor factor,bool dst_alpha_one,bool * needs_constants)30 blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants)
31 {
32    switch (factor) {
33    case VK_BLEND_FACTOR_ZERO:
34    case VK_BLEND_FACTOR_ONE:
35    case VK_BLEND_FACTOR_SRC_COLOR:
36    case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
37    case VK_BLEND_FACTOR_DST_COLOR:
38    case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
39    case VK_BLEND_FACTOR_SRC_ALPHA:
40    case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
41    case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
42       return factor;
43    case VK_BLEND_FACTOR_CONSTANT_COLOR:
44    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
45    case VK_BLEND_FACTOR_CONSTANT_ALPHA:
46    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
47       *needs_constants = true;
48       return factor;
49    case VK_BLEND_FACTOR_DST_ALPHA:
50       return dst_alpha_one ? V3D_BLEND_FACTOR_ONE :
51                              V3D_BLEND_FACTOR_DST_ALPHA;
52    case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
53       return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO :
54                              V3D_BLEND_FACTOR_INV_DST_ALPHA;
55    case VK_BLEND_FACTOR_SRC1_COLOR:
56    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
57    case VK_BLEND_FACTOR_SRC1_ALPHA:
58    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
59       unreachable("Invalid blend factor: dual source blending not supported.");
60    default:
61       unreachable("Unknown blend factor.");
62    }
63 }
64 
65 static void
pack_blend(struct v3dv_pipeline * pipeline,const VkPipelineColorBlendStateCreateInfo * cb_info)66 pack_blend(struct v3dv_pipeline *pipeline,
67            const VkPipelineColorBlendStateCreateInfo *cb_info)
68 {
69    /* By default, we are not enabling blending and all color channel writes are
70     * enabled. Color write enables are independent of whether blending is
71     * enabled or not.
72     *
73     * Vulkan specifies color write masks so that bits set correspond to
74     * enabled channels. Our hardware does it the other way around.
75     */
76    pipeline->blend.enables = 0;
77    pipeline->blend.color_write_masks = 0; /* All channels enabled */
78 
79    if (!cb_info)
80       return;
81 
82    assert(pipeline->subpass);
83    if (pipeline->subpass->color_count == 0)
84       return;
85 
86    assert(pipeline->subpass->color_count == cb_info->attachmentCount);
87    pipeline->blend.needs_color_constants = false;
88    uint32_t color_write_masks = 0;
89    for (uint32_t i = 0; i < pipeline->subpass->color_count; i++) {
90       const VkPipelineColorBlendAttachmentState *b_state =
91          &cb_info->pAttachments[i];
92 
93       uint32_t attachment_idx =
94          pipeline->subpass->color_attachments[i].attachment;
95       if (attachment_idx == VK_ATTACHMENT_UNUSED)
96          continue;
97 
98       color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i);
99 
100       if (!b_state->blendEnable)
101          continue;
102 
103       VkAttachmentDescription2 *desc =
104          &pipeline->pass->attachments[attachment_idx].desc;
105       const struct v3dv_format *format = v3dX(get_format)(desc->format);
106 
107       /* We only do blending with render pass attachments, so we should not have
108        * multiplanar images here
109        */
110       assert(format->plane_count == 1);
111       bool dst_alpha_one = (format->planes[0].swizzle[3] == PIPE_SWIZZLE_1);
112 
113       uint8_t rt_mask = 1 << i;
114       pipeline->blend.enables |= rt_mask;
115 
116       v3dvx_pack(pipeline->blend.cfg[i], BLEND_CFG, config) {
117          config.render_target_mask = rt_mask;
118 
119          config.color_blend_mode = b_state->colorBlendOp;
120          config.color_blend_dst_factor =
121             blend_factor(b_state->dstColorBlendFactor, dst_alpha_one,
122                          &pipeline->blend.needs_color_constants);
123          config.color_blend_src_factor =
124             blend_factor(b_state->srcColorBlendFactor, dst_alpha_one,
125                          &pipeline->blend.needs_color_constants);
126 
127          config.alpha_blend_mode = b_state->alphaBlendOp;
128          config.alpha_blend_dst_factor =
129             blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one,
130                          &pipeline->blend.needs_color_constants);
131          config.alpha_blend_src_factor =
132             blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one,
133                          &pipeline->blend.needs_color_constants);
134       }
135    }
136 
137    pipeline->blend.color_write_masks = color_write_masks;
138 }
139 
140 /* This requires that pack_blend() had been called before so we can set
141  * the overall blend enable bit in the CFG_BITS packet.
142  */
143 static void
pack_cfg_bits(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info,const VkPipelineRasterizationStateCreateInfo * rs_info,const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT * pv_info,const VkPipelineRasterizationLineStateCreateInfoEXT * ls_info,const VkPipelineMultisampleStateCreateInfo * ms_info)144 pack_cfg_bits(struct v3dv_pipeline *pipeline,
145               const VkPipelineDepthStencilStateCreateInfo *ds_info,
146               const VkPipelineRasterizationStateCreateInfo *rs_info,
147               const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info,
148               const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info,
149               const VkPipelineMultisampleStateCreateInfo *ms_info)
150 {
151    assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS));
152 
153    pipeline->msaa =
154       ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
155 
156    v3dvx_pack(pipeline->cfg_bits, CFG_BITS, config) {
157       config.enable_forward_facing_primitive =
158          rs_info ? !(rs_info->cullMode & VK_CULL_MODE_FRONT_BIT) : false;
159 
160       config.enable_reverse_facing_primitive =
161          rs_info ? !(rs_info->cullMode & VK_CULL_MODE_BACK_BIT) : false;
162 
163       /* Seems like the hardware is backwards regarding this setting... */
164       config.clockwise_primitives =
165          rs_info ? rs_info->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE : false;
166 
167       /* Even if rs_info->depthBiasEnabled is true, we can decide to not
168        * enable it, like if there isn't a depth/stencil attachment with the
169        * pipeline.
170        */
171       config.enable_depth_offset = pipeline->depth_bias.enabled;
172 
173       /* This is required to pass line rasterization tests in CTS while
174        * exposing, at least, a minimum of 4-bits of subpixel precision
175        * (the minimum requirement).
176        */
177       if (ls_info &&
178           ls_info->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT)
179          config.line_rasterization = V3D_LINE_RASTERIZATION_DIAMOND_EXIT;
180       else
181          config.line_rasterization = V3D_LINE_RASTERIZATION_PERP_END_CAPS;
182 
183       if (rs_info && rs_info->polygonMode != VK_POLYGON_MODE_FILL) {
184          config.direct3d_wireframe_triangles_mode = true;
185          config.direct3d_point_fill_mode =
186             rs_info->polygonMode == VK_POLYGON_MODE_POINT;
187       }
188 
189       /* diamond-exit rasterization does not support oversample */
190       config.rasterizer_oversample_mode =
191          (config.line_rasterization == V3D_LINE_RASTERIZATION_PERP_END_CAPS &&
192           pipeline->msaa) ? 1 : 0;
193 
194       /* From the Vulkan spec:
195        *
196        *   "Provoking Vertex:
197        *
198        *       The vertex in a primitive from which flat shaded attribute
199        *       values are taken. This is generally the “first” vertex in the
200        *       primitive, and depends on the primitive topology."
201        *
202        * First vertex is the Direct3D style for provoking vertex. OpenGL uses
203        * the last vertex by default.
204        */
205       if (pv_info) {
206          config.direct3d_provoking_vertex =
207             pv_info->provokingVertexMode ==
208                VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT;
209       } else {
210          config.direct3d_provoking_vertex = true;
211       }
212 
213       config.blend_enable = pipeline->blend.enables != 0;
214 
215       /* Disable depth/stencil if we don't have a D/S attachment */
216       bool has_ds_attachment =
217          pipeline->subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED;
218 
219       if (ds_info && ds_info->depthTestEnable && has_ds_attachment) {
220          config.z_updates_enable = ds_info->depthWriteEnable;
221          config.depth_test_function = ds_info->depthCompareOp;
222       } else {
223          config.depth_test_function = VK_COMPARE_OP_ALWAYS;
224       }
225 
226       config.stencil_enable =
227          ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false;
228 
229       pipeline->z_updates_enable = config.z_updates_enable;
230 
231 #if V3D_VERSION >= 71
232       /* From the Vulkan spec:
233        *
234        *    "depthClampEnable controls whether to clamp the fragment’s depth
235        *     values as described in Depth Test. If the pipeline is not created
236        *     with VkPipelineRasterizationDepthClipStateCreateInfoEXT present
237        *     then enabling depth clamp will also disable clipping primitives to
238        *     the z planes of the frustrum as described in Primitive Clipping.
239        *     Otherwise depth clipping is controlled by the state set in
240        *     VkPipelineRasterizationDepthClipStateCreateInfoEXT."
241        */
242       bool z_clamp_enable = rs_info && rs_info->depthClampEnable;
243       bool z_clip_enable = false;
244       const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =
245          rs_info ? vk_find_struct_const(rs_info->pNext,
246                                         PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT) :
247                    NULL;
248       if (clip_info)
249          z_clip_enable = clip_info->depthClipEnable;
250       else if (!z_clamp_enable)
251          z_clip_enable = true;
252 
253       if (z_clip_enable) {
254          config.z_clipping_mode = pipeline->negative_one_to_one ?
255 	    V3D_Z_CLIP_MODE_MIN_ONE_TO_ONE : V3D_Z_CLIP_MODE_ZERO_TO_ONE;
256       } else {
257          config.z_clipping_mode = V3D_Z_CLIP_MODE_NONE;
258       }
259 
260       config.z_clamp_mode = z_clamp_enable;
261 
262       config.depth_bounds_test_enable =
263               ds_info && ds_info->depthBoundsTestEnable && has_ds_attachment;
264 #endif
265    };
266 }
267 
268 static uint32_t
translate_stencil_op(VkStencilOp op)269 translate_stencil_op(VkStencilOp op)
270 {
271    switch (op) {
272    case VK_STENCIL_OP_KEEP:
273       return V3D_STENCIL_OP_KEEP;
274    case VK_STENCIL_OP_ZERO:
275       return V3D_STENCIL_OP_ZERO;
276    case VK_STENCIL_OP_REPLACE:
277       return V3D_STENCIL_OP_REPLACE;
278    case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
279       return V3D_STENCIL_OP_INCR;
280    case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
281       return V3D_STENCIL_OP_DECR;
282    case VK_STENCIL_OP_INVERT:
283       return V3D_STENCIL_OP_INVERT;
284    case VK_STENCIL_OP_INCREMENT_AND_WRAP:
285       return V3D_STENCIL_OP_INCWRAP;
286    case VK_STENCIL_OP_DECREMENT_AND_WRAP:
287       return V3D_STENCIL_OP_DECWRAP;
288    default:
289       unreachable("bad stencil op");
290    }
291 }
292 
293 static void
pack_single_stencil_cfg(struct v3dv_pipeline * pipeline,uint8_t * stencil_cfg,bool is_front,bool is_back,const VkStencilOpState * stencil_state)294 pack_single_stencil_cfg(struct v3dv_pipeline *pipeline,
295                         uint8_t *stencil_cfg,
296                         bool is_front,
297                         bool is_back,
298                         const VkStencilOpState *stencil_state)
299 {
300    /* From the Vulkan spec:
301     *
302     *   "Reference is an integer reference value that is used in the unsigned
303     *    stencil comparison. The reference value used by stencil comparison
304     *    must be within the range [0,2^s-1] , where s is the number of bits in
305     *    the stencil framebuffer attachment, otherwise the reference value is
306     *    considered undefined."
307     *
308     * In our case, 's' is always 8, so we clamp to that to prevent our packing
309     * functions to assert in debug mode if they see larger values.
310     *
311     * If we have dynamic state we need to make sure we set the corresponding
312     * state bits to 0, since cl_emit_with_prepacked ORs the new value with
313     * the old.
314     */
315    const uint8_t write_mask =
316       pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK ?
317          0 : stencil_state->writeMask & 0xff;
318 
319    const uint8_t compare_mask =
320       pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
321          0 : stencil_state->compareMask & 0xff;
322 
323    const uint8_t reference =
324       pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
325          0 : stencil_state->reference & 0xff;
326 
327    v3dvx_pack(stencil_cfg, STENCIL_CFG, config) {
328       config.front_config = is_front;
329       config.back_config = is_back;
330       config.stencil_write_mask = write_mask;
331       config.stencil_test_mask = compare_mask;
332       config.stencil_test_function = stencil_state->compareOp;
333       config.stencil_pass_op = translate_stencil_op(stencil_state->passOp);
334       config.depth_test_fail_op = translate_stencil_op(stencil_state->depthFailOp);
335       config.stencil_test_fail_op = translate_stencil_op(stencil_state->failOp);
336       config.stencil_ref_value = reference;
337    }
338 }
339 
340 static void
pack_stencil_cfg(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info)341 pack_stencil_cfg(struct v3dv_pipeline *pipeline,
342                  const VkPipelineDepthStencilStateCreateInfo *ds_info)
343 {
344    assert(sizeof(pipeline->stencil_cfg) == 2 * cl_packet_length(STENCIL_CFG));
345 
346    if (!ds_info || !ds_info->stencilTestEnable)
347       return;
348 
349    if (pipeline->subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
350       return;
351 
352    const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK |
353                                            V3DV_DYNAMIC_STENCIL_WRITE_MASK |
354                                            V3DV_DYNAMIC_STENCIL_REFERENCE;
355 
356 
357    /* If front != back or we have dynamic stencil state we can't emit a single
358     * packet for both faces.
359     */
360    bool needs_front_and_back = false;
361    if ((pipeline->dynamic_state.mask & dynamic_stencil_states) ||
362        memcmp(&ds_info->front, &ds_info->back, sizeof(ds_info->front)))
363       needs_front_and_back = true;
364 
365    /* If the front and back configurations are the same we can emit both with
366     * a single packet.
367     */
368    pipeline->emit_stencil_cfg[0] = true;
369    if (!needs_front_and_back) {
370       pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
371                               true, true, &ds_info->front);
372    } else {
373       pipeline->emit_stencil_cfg[1] = true;
374       pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
375                               true, false, &ds_info->front);
376       pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[1],
377                               false, true, &ds_info->back);
378    }
379 }
380 
381 void
v3dX(pipeline_pack_state)382 v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline,
383                           const VkPipelineColorBlendStateCreateInfo *cb_info,
384                           const VkPipelineDepthStencilStateCreateInfo *ds_info,
385                           const VkPipelineRasterizationStateCreateInfo *rs_info,
386                           const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info,
387                           const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info,
388                           const VkPipelineMultisampleStateCreateInfo *ms_info)
389 {
390    pack_blend(pipeline, cb_info);
391    pack_cfg_bits(pipeline, ds_info, rs_info, pv_info, ls_info, ms_info);
392    pack_stencil_cfg(pipeline, ds_info);
393 }
394 
395 static void
pack_shader_state_record(struct v3dv_pipeline * pipeline)396 pack_shader_state_record(struct v3dv_pipeline *pipeline)
397 {
398    assert(sizeof(pipeline->shader_state_record) >=
399           cl_packet_length(GL_SHADER_STATE_RECORD));
400 
401    struct v3d_fs_prog_data *prog_data_fs =
402       pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs;
403 
404    struct v3d_vs_prog_data *prog_data_vs =
405       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
406 
407    struct v3d_vs_prog_data *prog_data_vs_bin =
408       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs;
409 
410 
411    /* Note: we are not packing addresses, as we need the job (see
412     * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this
413     * point as they depend on dynamic info that can be set after create the
414     * pipeline (like viewport), . Would need to be filled later, so we are
415     * doing a partial prepacking.
416     */
417    v3dvx_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) {
418       shader.enable_clipping = true;
419 
420       if (!pipeline->has_gs) {
421          shader.point_size_in_shaded_vertex_data =
422             pipeline->topology == MESA_PRIM_POINTS;
423       } else {
424          struct v3d_gs_prog_data *prog_data_gs =
425             pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]->prog_data.gs;
426          shader.point_size_in_shaded_vertex_data = prog_data_gs->writes_psiz;
427       }
428 
429       /* Must be set if the shader modifies Z, discards, or modifies
430        * the sample mask.  For any of these cases, the fragment
431        * shader needs to write the Z value (even just discards).
432        */
433       shader.fragment_shader_does_z_writes = prog_data_fs->writes_z;
434 
435       /* Set if the EZ test must be disabled (due to shader side
436        * effects and the early_z flag not being present in the
437        * shader).
438        */
439       shader.turn_off_early_z_test = prog_data_fs->disable_ez;
440 
441       shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
442          prog_data_fs->uses_center_w;
443 
444       /* The description for gl_SampleID states that if a fragment shader reads
445        * it, then we should automatically activate per-sample shading. However,
446        * the Vulkan spec also states that if a framebuffer has no attachments:
447        *
448        *    "The subpass continues to use the width, height, and layers of the
449        *     framebuffer to define the dimensions of the rendering area, and the
450        *     rasterizationSamples from each pipeline’s
451        *     VkPipelineMultisampleStateCreateInfo to define the number of
452        *     samples used in rasterization multisample rasterization."
453        *
454        * So in this scenario, if the pipeline doesn't enable multiple samples
455        * but the fragment shader accesses gl_SampleID we would be requested
456        * to do per-sample shading in single sample rasterization mode, which
457        * is pointless, so just disable it in that case.
458        */
459       shader.enable_sample_rate_shading =
460          pipeline->sample_rate_shading ||
461          (pipeline->msaa && prog_data_fs->force_per_sample_msaa);
462 
463       shader.any_shader_reads_hardware_written_primitive_id = false;
464 
465       shader.do_scoreboard_wait_on_first_thread_switch =
466          prog_data_fs->lock_scoreboard_on_first_thrsw;
467       shader.disable_implicit_point_line_varyings =
468          !prog_data_fs->uses_implicit_point_line_varyings;
469 
470       shader.number_of_varyings_in_fragment_shader =
471          prog_data_fs->num_inputs;
472 
473       /* Note: see previous note about addresses */
474       /* shader.coordinate_shader_code_address */
475       /* shader.vertex_shader_code_address */
476       /* shader.fragment_shader_code_address */
477 
478 #if V3D_VERSION == 42
479       shader.coordinate_shader_propagate_nans = true;
480       shader.vertex_shader_propagate_nans = true;
481       shader.fragment_shader_propagate_nans = true;
482 
483       /* FIXME: Use combined input/output size flag in the common case (also
484        * on v3d, see v3dx_draw).
485        */
486       shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
487          prog_data_vs_bin->separate_segments;
488       shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
489          prog_data_vs->separate_segments;
490       shader.coordinate_shader_input_vpm_segment_size =
491          prog_data_vs_bin->separate_segments ?
492          prog_data_vs_bin->vpm_input_size : 1;
493       shader.vertex_shader_input_vpm_segment_size =
494          prog_data_vs->separate_segments ?
495          prog_data_vs->vpm_input_size : 1;
496 #endif
497 
498       /* On V3D 7.1 there isn't a specific flag to set if we are using
499        * shared/separate segments or not. We just set the value of
500        * vpm_input_size to 0, and set output to the max needed. That should be
501        * already properly set on prog_data_vs_bin
502        */
503 #if V3D_VERSION == 71
504       shader.coordinate_shader_input_vpm_segment_size =
505          prog_data_vs_bin->vpm_input_size;
506       shader.vertex_shader_input_vpm_segment_size =
507          prog_data_vs->vpm_input_size;
508 #endif
509 
510       shader.coordinate_shader_output_vpm_segment_size =
511          prog_data_vs_bin->vpm_output_size;
512       shader.vertex_shader_output_vpm_segment_size =
513          prog_data_vs->vpm_output_size;
514 
515       /* Note: see previous note about addresses */
516       /* shader.coordinate_shader_uniforms_address */
517       /* shader.vertex_shader_uniforms_address */
518       /* shader.fragment_shader_uniforms_address */
519 
520       shader.min_coord_shader_input_segments_required_in_play =
521          pipeline->vpm_cfg_bin.As;
522       shader.min_vertex_shader_input_segments_required_in_play =
523          pipeline->vpm_cfg.As;
524 
525       shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
526          pipeline->vpm_cfg_bin.Ve;
527       shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
528          pipeline->vpm_cfg.Ve;
529 
530       shader.coordinate_shader_4_way_threadable =
531          prog_data_vs_bin->base.threads == 4;
532       shader.vertex_shader_4_way_threadable =
533          prog_data_vs->base.threads == 4;
534       shader.fragment_shader_4_way_threadable =
535          prog_data_fs->base.threads == 4;
536 
537       shader.coordinate_shader_start_in_final_thread_section =
538          prog_data_vs_bin->base.single_seg;
539       shader.vertex_shader_start_in_final_thread_section =
540          prog_data_vs->base.single_seg;
541       shader.fragment_shader_start_in_final_thread_section =
542          prog_data_fs->base.single_seg;
543 
544       shader.vertex_id_read_by_coordinate_shader =
545          prog_data_vs_bin->uses_vid;
546       shader.base_instance_id_read_by_coordinate_shader =
547          prog_data_vs_bin->uses_biid;
548       shader.instance_id_read_by_coordinate_shader =
549          prog_data_vs_bin->uses_iid;
550       shader.vertex_id_read_by_vertex_shader =
551          prog_data_vs->uses_vid;
552       shader.base_instance_id_read_by_vertex_shader =
553          prog_data_vs->uses_biid;
554       shader.instance_id_read_by_vertex_shader =
555          prog_data_vs->uses_iid;
556 
557       /* Note: see previous note about addresses */
558       /* shader.address_of_default_attribute_values */
559    }
560 }
561 
562 static void
pack_vcm_cache_size(struct v3dv_pipeline * pipeline)563 pack_vcm_cache_size(struct v3dv_pipeline *pipeline)
564 {
565    assert(sizeof(pipeline->vcm_cache_size) ==
566           cl_packet_length(VCM_CACHE_SIZE));
567 
568    v3dvx_pack(pipeline->vcm_cache_size, VCM_CACHE_SIZE, vcm) {
569       vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc;
570       vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc;
571    }
572 }
573 
574 /* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */
575 static uint8_t
get_attr_type(const struct util_format_description * desc)576 get_attr_type(const struct util_format_description *desc)
577 {
578    uint32_t r_size = desc->channel[0].size;
579    uint8_t attr_type = ATTRIBUTE_FLOAT;
580 
581    switch (desc->channel[0].type) {
582    case UTIL_FORMAT_TYPE_FLOAT:
583       if (r_size == 32) {
584          attr_type = ATTRIBUTE_FLOAT;
585       } else {
586          assert(r_size == 16);
587          attr_type = ATTRIBUTE_HALF_FLOAT;
588       }
589       break;
590 
591    case UTIL_FORMAT_TYPE_SIGNED:
592    case UTIL_FORMAT_TYPE_UNSIGNED:
593       switch (r_size) {
594       case 32:
595          attr_type = ATTRIBUTE_INT;
596          break;
597       case 16:
598          attr_type = ATTRIBUTE_SHORT;
599          break;
600       case 10:
601          attr_type = ATTRIBUTE_INT2_10_10_10;
602          break;
603       case 8:
604          attr_type = ATTRIBUTE_BYTE;
605          break;
606       default:
607          fprintf(stderr,
608                  "format %s unsupported\n",
609                  desc->name);
610          attr_type = ATTRIBUTE_BYTE;
611          abort();
612       }
613       break;
614 
615    default:
616       fprintf(stderr,
617               "format %s unsupported\n",
618               desc->name);
619       abort();
620    }
621 
622    return attr_type;
623 }
624 
625 static void
pack_shader_state_attribute_record(struct v3dv_pipeline * pipeline,uint32_t index,const VkVertexInputAttributeDescription * vi_desc)626 pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline,
627                                    uint32_t index,
628                                    const VkVertexInputAttributeDescription *vi_desc)
629 {
630    const uint32_t packet_length =
631       cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
632 
633    const struct util_format_description *desc =
634       vk_format_description(vi_desc->format);
635 
636    uint32_t binding = vi_desc->binding;
637 
638    v3dvx_pack(&pipeline->vertex_attrs[index * packet_length],
639              GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
640 
641       /* vec_size == 0 means 4 */
642       attr.vec_size = desc->nr_channels & 3;
643       attr.signed_int_type = (desc->channel[0].type ==
644                               UTIL_FORMAT_TYPE_SIGNED);
645       attr.normalized_int_type = desc->channel[0].normalized;
646       attr.read_as_int_uint = desc->channel[0].pure_integer;
647 
648       attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor,
649                                    0xffff);
650       attr.stride = pipeline->vb[binding].stride;
651       attr.type = get_attr_type(desc);
652    }
653 }
654 
655 void
v3dX(pipeline_pack_compile_state)656 v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
657                                   const VkPipelineVertexInputStateCreateInfo *vi_info,
658                                   const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info)
659 {
660    pack_shader_state_record(pipeline);
661    pack_vcm_cache_size(pipeline);
662 
663    pipeline->vb_count = vi_info->vertexBindingDescriptionCount;
664    for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
665       const VkVertexInputBindingDescription *desc =
666          &vi_info->pVertexBindingDescriptions[i];
667 
668       pipeline->vb[desc->binding].stride = desc->stride;
669       pipeline->vb[desc->binding].instance_divisor = desc->inputRate;
670    }
671 
672    if (vd_info) {
673       for (uint32_t i = 0; i < vd_info->vertexBindingDivisorCount; i++) {
674          const VkVertexInputBindingDivisorDescriptionEXT *desc =
675             &vd_info->pVertexBindingDivisors[i];
676 
677          pipeline->vb[desc->binding].instance_divisor = desc->divisor;
678       }
679    }
680 
681    pipeline->va_count = 0;
682    struct v3d_vs_prog_data *prog_data_vs =
683       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
684 
685    for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
686       const VkVertexInputAttributeDescription *desc =
687          &vi_info->pVertexAttributeDescriptions[i];
688       uint32_t location = desc->location + VERT_ATTRIB_GENERIC0;
689 
690       /* We use a custom driver_location_map instead of
691        * nir_find_variable_with_location because if we were able to get the
692        * shader variant from the cache, we would not have the nir shader
693        * available.
694        */
695       uint32_t driver_location =
696          prog_data_vs->driver_location_map[location];
697 
698       if (driver_location != -1) {
699          assert(driver_location < MAX_VERTEX_ATTRIBS);
700          pipeline->va[driver_location].offset = desc->offset;
701          pipeline->va[driver_location].binding = desc->binding;
702          pipeline->va[driver_location].vk_format = desc->format;
703 
704          pack_shader_state_attribute_record(pipeline, driver_location, desc);
705 
706          pipeline->va_count++;
707       }
708    }
709 }
710 
711 #if V3D_VERSION == 42
712 static bool
pipeline_has_integer_vertex_attrib(struct v3dv_pipeline * pipeline)713 pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
714 {
715    for (uint8_t i = 0; i < pipeline->va_count; i++) {
716       if (vk_format_is_int(pipeline->va[i].vk_format))
717          return true;
718    }
719    return false;
720 }
721 #endif
722 
723 bool
v3dX(pipeline_needs_default_attribute_values)724 v3dX(pipeline_needs_default_attribute_values)(struct v3dv_pipeline *pipeline)
725 {
726 #if V3D_VERSION == 42
727    return pipeline_has_integer_vertex_attrib(pipeline);
728 #endif
729 
730    return false;
731 }
732 
733 /* @pipeline can be NULL. In that case we assume the most common case. For
734  * example, for v42 we assume in that case that all the attributes have a
735  * float format (we only create an all-float BO once and we reuse it with all
736  * float pipelines), otherwise we look at the actual type of each attribute
737  * used with the specific pipeline passed in.
738  */
739 struct v3dv_bo *
v3dX(create_default_attribute_values)740 v3dX(create_default_attribute_values)(struct v3dv_device *device,
741                                       struct v3dv_pipeline *pipeline)
742 {
743 #if V3D_VERSION >= 71
744    return NULL;
745 #endif
746 
747    uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
748    struct v3dv_bo *bo;
749 
750    bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
751 
752    if (!bo) {
753       fprintf(stderr, "failed to allocate memory for the default "
754               "attribute values\n");
755       return NULL;
756    }
757 
758    bool ok = v3dv_bo_map(device, bo, size);
759    if (!ok) {
760       fprintf(stderr, "failed to map default attribute values buffer\n");
761       return NULL;
762    }
763 
764    uint32_t *attrs = bo->map;
765    uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
766    for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
767       attrs[i * 4 + 0] = 0;
768       attrs[i * 4 + 1] = 0;
769       attrs[i * 4 + 2] = 0;
770       VkFormat attr_format =
771          pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
772       if (i < va_count && vk_format_is_int(attr_format)) {
773          attrs[i * 4 + 3] = 1;
774       } else {
775          attrs[i * 4 + 3] = fui(1.0);
776       }
777    }
778 
779    v3dv_bo_unmap(device, bo);
780 
781    return bo;
782 }
783