• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Raspberry Pi Ltd
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "v3dv_private.h"
25 #include "broadcom/common/v3d_macros.h"
26 #include "broadcom/cle/v3dx_pack.h"
27 #include "broadcom/compiler/v3d_compiler.h"
28 
29 static uint8_t
blend_factor(VkBlendFactor factor,bool dst_alpha_one,bool * needs_constants)30 blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants)
31 {
32    switch (factor) {
33    case VK_BLEND_FACTOR_ZERO:
34    case VK_BLEND_FACTOR_ONE:
35    case VK_BLEND_FACTOR_SRC_COLOR:
36    case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
37    case VK_BLEND_FACTOR_DST_COLOR:
38    case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
39    case VK_BLEND_FACTOR_SRC_ALPHA:
40    case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
41    case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
42       return factor;
43    case VK_BLEND_FACTOR_CONSTANT_COLOR:
44    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
45    case VK_BLEND_FACTOR_CONSTANT_ALPHA:
46    case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
47       *needs_constants = true;
48       return factor;
49    case VK_BLEND_FACTOR_DST_ALPHA:
50       return dst_alpha_one ? V3D_BLEND_FACTOR_ONE :
51                              V3D_BLEND_FACTOR_DST_ALPHA;
52    case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
53       return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO :
54                              V3D_BLEND_FACTOR_INV_DST_ALPHA;
55    case VK_BLEND_FACTOR_SRC1_COLOR:
56    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
57    case VK_BLEND_FACTOR_SRC1_ALPHA:
58    case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
59       assert(!"Invalid blend factor: dual source blending not supported.");
60    default:
61       assert(!"Unknown blend factor.");
62    }
63 
64    /* Should be handled by the switch, added to avoid a "end of non-void
65     * function" error
66     */
67    unreachable("Unknown blend factor.");
68 }
69 
70 static void
pack_blend(struct v3dv_pipeline * pipeline,const VkPipelineColorBlendStateCreateInfo * cb_info)71 pack_blend(struct v3dv_pipeline *pipeline,
72            const VkPipelineColorBlendStateCreateInfo *cb_info)
73 {
74    /* By default, we are not enabling blending and all color channel writes are
75     * enabled. Color write enables are independent of whether blending is
76     * enabled or not.
77     *
78     * Vulkan specifies color write masks so that bits set correspond to
79     * enabled channels. Our hardware does it the other way around.
80     */
81    pipeline->blend.enables = 0;
82    pipeline->blend.color_write_masks = 0; /* All channels enabled */
83 
84    if (!cb_info)
85       return;
86 
87    assert(pipeline->subpass);
88    if (pipeline->subpass->color_count == 0)
89       return;
90 
91    assert(pipeline->subpass->color_count == cb_info->attachmentCount);
92 
93    pipeline->blend.needs_color_constants = false;
94    uint32_t color_write_masks = 0;
95    for (uint32_t i = 0; i < pipeline->subpass->color_count; i++) {
96       const VkPipelineColorBlendAttachmentState *b_state =
97          &cb_info->pAttachments[i];
98 
99       uint32_t attachment_idx =
100          pipeline->subpass->color_attachments[i].attachment;
101       if (attachment_idx == VK_ATTACHMENT_UNUSED)
102          continue;
103 
104       color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i);
105 
106       if (!b_state->blendEnable)
107          continue;
108 
109       VkAttachmentDescription2 *desc =
110          &pipeline->pass->attachments[attachment_idx].desc;
111       const struct v3dv_format *format = v3dX(get_format)(desc->format);
112       bool dst_alpha_one = (format->swizzle[3] == PIPE_SWIZZLE_1);
113 
114       uint8_t rt_mask = 1 << i;
115       pipeline->blend.enables |= rt_mask;
116 
117       v3dvx_pack(pipeline->blend.cfg[i], BLEND_CFG, config) {
118          config.render_target_mask = rt_mask;
119 
120          config.color_blend_mode = b_state->colorBlendOp;
121          config.color_blend_dst_factor =
122             blend_factor(b_state->dstColorBlendFactor, dst_alpha_one,
123                          &pipeline->blend.needs_color_constants);
124          config.color_blend_src_factor =
125             blend_factor(b_state->srcColorBlendFactor, dst_alpha_one,
126                          &pipeline->blend.needs_color_constants);
127 
128          config.alpha_blend_mode = b_state->alphaBlendOp;
129          config.alpha_blend_dst_factor =
130             blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one,
131                          &pipeline->blend.needs_color_constants);
132          config.alpha_blend_src_factor =
133             blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one,
134                          &pipeline->blend.needs_color_constants);
135       }
136    }
137 
138    pipeline->blend.color_write_masks = color_write_masks;
139 }
140 
141 /* This requires that pack_blend() had been called before so we can set
142  * the overall blend enable bit in the CFG_BITS packet.
143  */
144 static void
pack_cfg_bits(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info,const VkPipelineRasterizationStateCreateInfo * rs_info,const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT * pv_info,const VkPipelineRasterizationLineStateCreateInfoEXT * ls_info,const VkPipelineMultisampleStateCreateInfo * ms_info)145 pack_cfg_bits(struct v3dv_pipeline *pipeline,
146               const VkPipelineDepthStencilStateCreateInfo *ds_info,
147               const VkPipelineRasterizationStateCreateInfo *rs_info,
148               const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info,
149               const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info,
150               const VkPipelineMultisampleStateCreateInfo *ms_info)
151 {
152    assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS));
153 
154    pipeline->msaa =
155       ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
156 
157    v3dvx_pack(pipeline->cfg_bits, CFG_BITS, config) {
158       config.enable_forward_facing_primitive =
159          rs_info ? !(rs_info->cullMode & VK_CULL_MODE_FRONT_BIT) : false;
160 
161       config.enable_reverse_facing_primitive =
162          rs_info ? !(rs_info->cullMode & VK_CULL_MODE_BACK_BIT) : false;
163 
164       /* Seems like the hardware is backwards regarding this setting... */
165       config.clockwise_primitives =
166          rs_info ? rs_info->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE : false;
167 
168       config.enable_depth_offset = rs_info ? rs_info->depthBiasEnable: false;
169 
170       /* This is required to pass line rasterization tests in CTS while
171        * exposing, at least, a minimum of 4-bits of subpixel precision
172        * (the minimum requirement).
173        */
174       if (ls_info &&
175           ls_info->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT)
176          config.line_rasterization = V3D_LINE_RASTERIZATION_DIAMOND_EXIT;
177       else
178          config.line_rasterization = V3D_LINE_RASTERIZATION_PERP_END_CAPS;
179 
180       if (rs_info && rs_info->polygonMode != VK_POLYGON_MODE_FILL) {
181          config.direct3d_wireframe_triangles_mode = true;
182          config.direct3d_point_fill_mode =
183             rs_info->polygonMode == VK_POLYGON_MODE_POINT;
184       }
185 
186       /* diamond-exit rasterization does not suport oversample */
187       config.rasterizer_oversample_mode =
188          (config.line_rasterization == V3D_LINE_RASTERIZATION_PERP_END_CAPS &&
189           pipeline->msaa) ? 1 : 0;
190 
191       /* From the Vulkan spec:
192        *
193        *   "Provoking Vertex:
194        *
195        *       The vertex in a primitive from which flat shaded attribute
196        *       values are taken. This is generally the “first” vertex in the
197        *       primitive, and depends on the primitive topology."
198        *
199        * First vertex is the Direct3D style for provoking vertex. OpenGL uses
200        * the last vertex by default.
201        */
202       if (pv_info) {
203          config.direct3d_provoking_vertex =
204             pv_info->provokingVertexMode ==
205                VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT;
206       } else {
207          config.direct3d_provoking_vertex = true;
208       }
209 
210       config.blend_enable = pipeline->blend.enables != 0;
211 
212       /* Disable depth/stencil if we don't have a D/S attachment */
213       bool has_ds_attachment =
214          pipeline->subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED;
215 
216       if (ds_info && ds_info->depthTestEnable && has_ds_attachment) {
217          config.z_updates_enable = ds_info->depthWriteEnable;
218          config.depth_test_function = ds_info->depthCompareOp;
219       } else {
220          config.depth_test_function = VK_COMPARE_OP_ALWAYS;
221       }
222 
223       /* EZ state will be updated at draw time based on bound pipeline state */
224       config.early_z_updates_enable = false;
225       config.early_z_enable = false;
226 
227       config.stencil_enable =
228          ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false;
229 
230       pipeline->z_updates_enable = config.z_updates_enable;
231    };
232 }
233 
234 static uint32_t
translate_stencil_op(enum pipe_stencil_op op)235 translate_stencil_op(enum pipe_stencil_op op)
236 {
237    switch (op) {
238    case VK_STENCIL_OP_KEEP:
239       return V3D_STENCIL_OP_KEEP;
240    case VK_STENCIL_OP_ZERO:
241       return V3D_STENCIL_OP_ZERO;
242    case VK_STENCIL_OP_REPLACE:
243       return V3D_STENCIL_OP_REPLACE;
244    case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
245       return V3D_STENCIL_OP_INCR;
246    case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
247       return V3D_STENCIL_OP_DECR;
248    case VK_STENCIL_OP_INVERT:
249       return V3D_STENCIL_OP_INVERT;
250    case VK_STENCIL_OP_INCREMENT_AND_WRAP:
251       return V3D_STENCIL_OP_INCWRAP;
252    case VK_STENCIL_OP_DECREMENT_AND_WRAP:
253       return V3D_STENCIL_OP_DECWRAP;
254    default:
255       unreachable("bad stencil op");
256    }
257 }
258 
259 static void
pack_single_stencil_cfg(struct v3dv_pipeline * pipeline,uint8_t * stencil_cfg,bool is_front,bool is_back,const VkStencilOpState * stencil_state)260 pack_single_stencil_cfg(struct v3dv_pipeline *pipeline,
261                         uint8_t *stencil_cfg,
262                         bool is_front,
263                         bool is_back,
264                         const VkStencilOpState *stencil_state)
265 {
266    /* From the Vulkan spec:
267     *
268     *   "Reference is an integer reference value that is used in the unsigned
269     *    stencil comparison. The reference value used by stencil comparison
270     *    must be within the range [0,2^s-1] , where s is the number of bits in
271     *    the stencil framebuffer attachment, otherwise the reference value is
272     *    considered undefined."
273     *
274     * In our case, 's' is always 8, so we clamp to that to prevent our packing
275     * functions to assert in debug mode if they see larger values.
276     *
277     * If we have dynamic state we need to make sure we set the corresponding
278     * state bits to 0, since cl_emit_with_prepacked ORs the new value with
279     * the old.
280     */
281    const uint8_t write_mask =
282       pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK ?
283          0 : stencil_state->writeMask & 0xff;
284 
285    const uint8_t compare_mask =
286       pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
287          0 : stencil_state->compareMask & 0xff;
288 
289    const uint8_t reference =
290       pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
291          0 : stencil_state->reference & 0xff;
292 
293    v3dvx_pack(stencil_cfg, STENCIL_CFG, config) {
294       config.front_config = is_front;
295       config.back_config = is_back;
296       config.stencil_write_mask = write_mask;
297       config.stencil_test_mask = compare_mask;
298       config.stencil_test_function = stencil_state->compareOp;
299       config.stencil_pass_op = translate_stencil_op(stencil_state->passOp);
300       config.depth_test_fail_op = translate_stencil_op(stencil_state->depthFailOp);
301       config.stencil_test_fail_op = translate_stencil_op(stencil_state->failOp);
302       config.stencil_ref_value = reference;
303    }
304 }
305 
306 static void
pack_stencil_cfg(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info)307 pack_stencil_cfg(struct v3dv_pipeline *pipeline,
308                  const VkPipelineDepthStencilStateCreateInfo *ds_info)
309 {
310    assert(sizeof(pipeline->stencil_cfg) == 2 * cl_packet_length(STENCIL_CFG));
311 
312    if (!ds_info || !ds_info->stencilTestEnable)
313       return;
314 
315    if (pipeline->subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
316       return;
317 
318    const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK |
319                                            V3DV_DYNAMIC_STENCIL_WRITE_MASK |
320                                            V3DV_DYNAMIC_STENCIL_REFERENCE;
321 
322 
323    /* If front != back or we have dynamic stencil state we can't emit a single
324     * packet for both faces.
325     */
326    bool needs_front_and_back = false;
327    if ((pipeline->dynamic_state.mask & dynamic_stencil_states) ||
328        memcmp(&ds_info->front, &ds_info->back, sizeof(ds_info->front)))
329       needs_front_and_back = true;
330 
331    /* If the front and back configurations are the same we can emit both with
332     * a single packet.
333     */
334    pipeline->emit_stencil_cfg[0] = true;
335    if (!needs_front_and_back) {
336       pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
337                               true, true, &ds_info->front);
338    } else {
339       pipeline->emit_stencil_cfg[1] = true;
340       pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
341                               true, false, &ds_info->front);
342       pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[1],
343                               false, true, &ds_info->back);
344    }
345 }
346 
347 void
v3dX(pipeline_pack_state)348 v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline,
349                           const VkPipelineColorBlendStateCreateInfo *cb_info,
350                           const VkPipelineDepthStencilStateCreateInfo *ds_info,
351                           const VkPipelineRasterizationStateCreateInfo *rs_info,
352                           const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info,
353                           const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info,
354                           const VkPipelineMultisampleStateCreateInfo *ms_info)
355 {
356    pack_blend(pipeline, cb_info);
357    pack_cfg_bits(pipeline, ds_info, rs_info, pv_info, ls_info, ms_info);
358    pack_stencil_cfg(pipeline, ds_info);
359 }
360 
361 static void
pack_shader_state_record(struct v3dv_pipeline * pipeline)362 pack_shader_state_record(struct v3dv_pipeline *pipeline)
363 {
364    assert(sizeof(pipeline->shader_state_record) ==
365           cl_packet_length(GL_SHADER_STATE_RECORD));
366 
367    struct v3d_fs_prog_data *prog_data_fs =
368       pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs;
369 
370    struct v3d_vs_prog_data *prog_data_vs =
371       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
372 
373    struct v3d_vs_prog_data *prog_data_vs_bin =
374       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs;
375 
376 
377    /* Note: we are not packing addresses, as we need the job (see
378     * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this
379     * point as they depend on dynamic info that can be set after create the
380     * pipeline (like viewport), . Would need to be filled later, so we are
381     * doing a partial prepacking.
382     */
383    v3dvx_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) {
384       shader.enable_clipping = true;
385 
386       if (!pipeline->has_gs) {
387          shader.point_size_in_shaded_vertex_data =
388             pipeline->topology == PIPE_PRIM_POINTS;
389       } else {
390          struct v3d_gs_prog_data *prog_data_gs =
391             pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]->prog_data.gs;
392          shader.point_size_in_shaded_vertex_data = prog_data_gs->writes_psiz;
393       }
394 
395       /* Must be set if the shader modifies Z, discards, or modifies
396        * the sample mask.  For any of these cases, the fragment
397        * shader needs to write the Z value (even just discards).
398        */
399       shader.fragment_shader_does_z_writes = prog_data_fs->writes_z;
400 
401       /* Set if the EZ test must be disabled (due to shader side
402        * effects and the early_z flag not being present in the
403        * shader).
404        */
405       shader.turn_off_early_z_test = prog_data_fs->disable_ez;
406 
407       shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
408          prog_data_fs->uses_center_w;
409 
410       /* The description for gl_SampleID states that if a fragment shader reads
411        * it, then we should automatically activate per-sample shading. However,
412        * the Vulkan spec also states that if a framebuffer has no attachments:
413        *
414        *    "The subpass continues to use the width, height, and layers of the
415        *     framebuffer to define the dimensions of the rendering area, and the
416        *     rasterizationSamples from each pipeline’s
417        *     VkPipelineMultisampleStateCreateInfo to define the number of
418        *     samples used in rasterization multisample rasterization."
419        *
420        * So in this scenario, if the pipeline doesn't enable multiple samples
421        * but the fragment shader accesses gl_SampleID we would be requested
422        * to do per-sample shading in single sample rasterization mode, which
423        * is pointless, so just disable it in that case.
424        */
425       shader.enable_sample_rate_shading =
426          pipeline->sample_rate_shading ||
427          (pipeline->msaa && prog_data_fs->force_per_sample_msaa);
428 
429       shader.any_shader_reads_hardware_written_primitive_id = false;
430 
431       shader.do_scoreboard_wait_on_first_thread_switch =
432          prog_data_fs->lock_scoreboard_on_first_thrsw;
433       shader.disable_implicit_point_line_varyings =
434          !prog_data_fs->uses_implicit_point_line_varyings;
435 
436       shader.number_of_varyings_in_fragment_shader =
437          prog_data_fs->num_inputs;
438 
439       shader.coordinate_shader_propagate_nans = true;
440       shader.vertex_shader_propagate_nans = true;
441       shader.fragment_shader_propagate_nans = true;
442 
443       /* Note: see previous note about adresses */
444       /* shader.coordinate_shader_code_address */
445       /* shader.vertex_shader_code_address */
446       /* shader.fragment_shader_code_address */
447 
448       /* FIXME: Use combined input/output size flag in the common case (also
449        * on v3d, see v3dx_draw).
450        */
451       shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
452          prog_data_vs_bin->separate_segments;
453       shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
454          prog_data_vs->separate_segments;
455 
456       shader.coordinate_shader_input_vpm_segment_size =
457          prog_data_vs_bin->separate_segments ?
458          prog_data_vs_bin->vpm_input_size : 1;
459       shader.vertex_shader_input_vpm_segment_size =
460          prog_data_vs->separate_segments ?
461          prog_data_vs->vpm_input_size : 1;
462 
463       shader.coordinate_shader_output_vpm_segment_size =
464          prog_data_vs_bin->vpm_output_size;
465       shader.vertex_shader_output_vpm_segment_size =
466          prog_data_vs->vpm_output_size;
467 
468       /* Note: see previous note about adresses */
469       /* shader.coordinate_shader_uniforms_address */
470       /* shader.vertex_shader_uniforms_address */
471       /* shader.fragment_shader_uniforms_address */
472 
473       shader.min_coord_shader_input_segments_required_in_play =
474          pipeline->vpm_cfg_bin.As;
475       shader.min_vertex_shader_input_segments_required_in_play =
476          pipeline->vpm_cfg.As;
477 
478       shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
479          pipeline->vpm_cfg_bin.Ve;
480       shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
481          pipeline->vpm_cfg.Ve;
482 
483       shader.coordinate_shader_4_way_threadable =
484          prog_data_vs_bin->base.threads == 4;
485       shader.vertex_shader_4_way_threadable =
486          prog_data_vs->base.threads == 4;
487       shader.fragment_shader_4_way_threadable =
488          prog_data_fs->base.threads == 4;
489 
490       shader.coordinate_shader_start_in_final_thread_section =
491          prog_data_vs_bin->base.single_seg;
492       shader.vertex_shader_start_in_final_thread_section =
493          prog_data_vs->base.single_seg;
494       shader.fragment_shader_start_in_final_thread_section =
495          prog_data_fs->base.single_seg;
496 
497       shader.vertex_id_read_by_coordinate_shader =
498          prog_data_vs_bin->uses_vid;
499       shader.base_instance_id_read_by_coordinate_shader =
500          prog_data_vs_bin->uses_biid;
501       shader.instance_id_read_by_coordinate_shader =
502          prog_data_vs_bin->uses_iid;
503       shader.vertex_id_read_by_vertex_shader =
504          prog_data_vs->uses_vid;
505       shader.base_instance_id_read_by_vertex_shader =
506          prog_data_vs->uses_biid;
507       shader.instance_id_read_by_vertex_shader =
508          prog_data_vs->uses_iid;
509 
510       /* Note: see previous note about adresses */
511       /* shader.address_of_default_attribute_values */
512    }
513 }
514 
515 static void
pack_vcm_cache_size(struct v3dv_pipeline * pipeline)516 pack_vcm_cache_size(struct v3dv_pipeline *pipeline)
517 {
518    assert(sizeof(pipeline->vcm_cache_size) ==
519           cl_packet_length(VCM_CACHE_SIZE));
520 
521    v3dvx_pack(pipeline->vcm_cache_size, VCM_CACHE_SIZE, vcm) {
522       vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc;
523       vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc;
524    }
525 }
526 
527 /* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */
528 static uint8_t
get_attr_type(const struct util_format_description * desc)529 get_attr_type(const struct util_format_description *desc)
530 {
531    uint32_t r_size = desc->channel[0].size;
532    uint8_t attr_type = ATTRIBUTE_FLOAT;
533 
534    switch (desc->channel[0].type) {
535    case UTIL_FORMAT_TYPE_FLOAT:
536       if (r_size == 32) {
537          attr_type = ATTRIBUTE_FLOAT;
538       } else {
539          assert(r_size == 16);
540          attr_type = ATTRIBUTE_HALF_FLOAT;
541       }
542       break;
543 
544    case UTIL_FORMAT_TYPE_SIGNED:
545    case UTIL_FORMAT_TYPE_UNSIGNED:
546       switch (r_size) {
547       case 32:
548          attr_type = ATTRIBUTE_INT;
549          break;
550       case 16:
551          attr_type = ATTRIBUTE_SHORT;
552          break;
553       case 10:
554          attr_type = ATTRIBUTE_INT2_10_10_10;
555          break;
556       case 8:
557          attr_type = ATTRIBUTE_BYTE;
558          break;
559       default:
560          fprintf(stderr,
561                  "format %s unsupported\n",
562                  desc->name);
563          attr_type = ATTRIBUTE_BYTE;
564          abort();
565       }
566       break;
567 
568    default:
569       fprintf(stderr,
570               "format %s unsupported\n",
571               desc->name);
572       abort();
573    }
574 
575    return attr_type;
576 }
577 
578 static void
pack_shader_state_attribute_record(struct v3dv_pipeline * pipeline,uint32_t index,const VkVertexInputAttributeDescription * vi_desc)579 pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline,
580                                    uint32_t index,
581                                    const VkVertexInputAttributeDescription *vi_desc)
582 {
583    const uint32_t packet_length =
584       cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
585 
586    const struct util_format_description *desc =
587       vk_format_description(vi_desc->format);
588 
589    uint32_t binding = vi_desc->binding;
590 
591    v3dvx_pack(&pipeline->vertex_attrs[index * packet_length],
592              GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
593 
594       /* vec_size == 0 means 4 */
595       attr.vec_size = desc->nr_channels & 3;
596       attr.signed_int_type = (desc->channel[0].type ==
597                               UTIL_FORMAT_TYPE_SIGNED);
598       attr.normalized_int_type = desc->channel[0].normalized;
599       attr.read_as_int_uint = desc->channel[0].pure_integer;
600 
601       attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor,
602                                    0xffff);
603       attr.stride = pipeline->vb[binding].stride;
604       attr.type = get_attr_type(desc);
605    }
606 }
607 
608 void
v3dX(pipeline_pack_compile_state)609 v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
610                                   const VkPipelineVertexInputStateCreateInfo *vi_info,
611                                   const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info)
612 {
613    pack_shader_state_record(pipeline);
614    pack_vcm_cache_size(pipeline);
615 
616    pipeline->vb_count = vi_info->vertexBindingDescriptionCount;
617    for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
618       const VkVertexInputBindingDescription *desc =
619          &vi_info->pVertexBindingDescriptions[i];
620 
621       pipeline->vb[desc->binding].stride = desc->stride;
622       pipeline->vb[desc->binding].instance_divisor = desc->inputRate;
623    }
624 
625    if (vd_info) {
626       for (uint32_t i = 0; i < vd_info->vertexBindingDivisorCount; i++) {
627          const VkVertexInputBindingDivisorDescriptionEXT *desc =
628             &vd_info->pVertexBindingDivisors[i];
629 
630          pipeline->vb[desc->binding].instance_divisor = desc->divisor;
631       }
632    }
633 
634    pipeline->va_count = 0;
635    struct v3d_vs_prog_data *prog_data_vs =
636       pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
637 
638    for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
639       const VkVertexInputAttributeDescription *desc =
640          &vi_info->pVertexAttributeDescriptions[i];
641       uint32_t location = desc->location + VERT_ATTRIB_GENERIC0;
642 
643       /* We use a custom driver_location_map instead of
644        * nir_find_variable_with_location because if we were able to get the
645        * shader variant from the cache, we would not have the nir shader
646        * available.
647        */
648       uint32_t driver_location =
649          prog_data_vs->driver_location_map[location];
650 
651       if (driver_location != -1) {
652          assert(driver_location < MAX_VERTEX_ATTRIBS);
653          pipeline->va[driver_location].offset = desc->offset;
654          pipeline->va[driver_location].binding = desc->binding;
655          pipeline->va[driver_location].vk_format = desc->format;
656 
657          pack_shader_state_attribute_record(pipeline, driver_location, desc);
658 
659          pipeline->va_count++;
660       }
661    }
662 }
663