1 /*
2 * Copyright © 2021 Raspberry Pi Ltd
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "v3dv_private.h"
25 #include "broadcom/common/v3d_macros.h"
26 #include "broadcom/cle/v3dx_pack.h"
27 #include "broadcom/compiler/v3d_compiler.h"
28
29 static uint8_t
blend_factor(VkBlendFactor factor,bool dst_alpha_one,bool * needs_constants)30 blend_factor(VkBlendFactor factor, bool dst_alpha_one, bool *needs_constants)
31 {
32 switch (factor) {
33 case VK_BLEND_FACTOR_ZERO:
34 case VK_BLEND_FACTOR_ONE:
35 case VK_BLEND_FACTOR_SRC_COLOR:
36 case VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR:
37 case VK_BLEND_FACTOR_DST_COLOR:
38 case VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR:
39 case VK_BLEND_FACTOR_SRC_ALPHA:
40 case VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA:
41 case VK_BLEND_FACTOR_SRC_ALPHA_SATURATE:
42 return factor;
43 case VK_BLEND_FACTOR_CONSTANT_COLOR:
44 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR:
45 case VK_BLEND_FACTOR_CONSTANT_ALPHA:
46 case VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA:
47 *needs_constants = true;
48 return factor;
49 case VK_BLEND_FACTOR_DST_ALPHA:
50 return dst_alpha_one ? V3D_BLEND_FACTOR_ONE :
51 V3D_BLEND_FACTOR_DST_ALPHA;
52 case VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA:
53 return dst_alpha_one ? V3D_BLEND_FACTOR_ZERO :
54 V3D_BLEND_FACTOR_INV_DST_ALPHA;
55 case VK_BLEND_FACTOR_SRC1_COLOR:
56 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR:
57 case VK_BLEND_FACTOR_SRC1_ALPHA:
58 case VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA:
59 unreachable("Invalid blend factor: dual source blending not supported.");
60 default:
61 unreachable("Unknown blend factor.");
62 }
63 }
64
65 static void
pack_blend(struct v3dv_pipeline * pipeline,const VkPipelineColorBlendStateCreateInfo * cb_info)66 pack_blend(struct v3dv_pipeline *pipeline,
67 const VkPipelineColorBlendStateCreateInfo *cb_info)
68 {
69 /* By default, we are not enabling blending and all color channel writes are
70 * enabled. Color write enables are independent of whether blending is
71 * enabled or not.
72 *
73 * Vulkan specifies color write masks so that bits set correspond to
74 * enabled channels. Our hardware does it the other way around.
75 */
76 pipeline->blend.enables = 0;
77 pipeline->blend.color_write_masks = 0; /* All channels enabled */
78
79 if (!cb_info)
80 return;
81
82 assert(pipeline->subpass);
83 if (pipeline->subpass->color_count == 0)
84 return;
85
86 assert(pipeline->subpass->color_count == cb_info->attachmentCount);
87 pipeline->blend.needs_color_constants = false;
88 uint32_t color_write_masks = 0;
89 for (uint32_t i = 0; i < pipeline->subpass->color_count; i++) {
90 const VkPipelineColorBlendAttachmentState *b_state =
91 &cb_info->pAttachments[i];
92
93 uint32_t attachment_idx =
94 pipeline->subpass->color_attachments[i].attachment;
95 if (attachment_idx == VK_ATTACHMENT_UNUSED)
96 continue;
97
98 color_write_masks |= (~b_state->colorWriteMask & 0xf) << (4 * i);
99
100 if (!b_state->blendEnable)
101 continue;
102
103 VkAttachmentDescription2 *desc =
104 &pipeline->pass->attachments[attachment_idx].desc;
105 const struct v3dv_format *format = v3dX(get_format)(desc->format);
106
107 /* We only do blending with render pass attachments, so we should not have
108 * multiplanar images here
109 */
110 assert(format->plane_count == 1);
111 bool dst_alpha_one = (format->planes[0].swizzle[3] == PIPE_SWIZZLE_1);
112
113 uint8_t rt_mask = 1 << i;
114 pipeline->blend.enables |= rt_mask;
115
116 v3dvx_pack(pipeline->blend.cfg[i], BLEND_CFG, config) {
117 config.render_target_mask = rt_mask;
118
119 config.color_blend_mode = b_state->colorBlendOp;
120 config.color_blend_dst_factor =
121 blend_factor(b_state->dstColorBlendFactor, dst_alpha_one,
122 &pipeline->blend.needs_color_constants);
123 config.color_blend_src_factor =
124 blend_factor(b_state->srcColorBlendFactor, dst_alpha_one,
125 &pipeline->blend.needs_color_constants);
126
127 config.alpha_blend_mode = b_state->alphaBlendOp;
128 config.alpha_blend_dst_factor =
129 blend_factor(b_state->dstAlphaBlendFactor, dst_alpha_one,
130 &pipeline->blend.needs_color_constants);
131 config.alpha_blend_src_factor =
132 blend_factor(b_state->srcAlphaBlendFactor, dst_alpha_one,
133 &pipeline->blend.needs_color_constants);
134 }
135 }
136
137 pipeline->blend.color_write_masks = color_write_masks;
138 }
139
140 /* This requires that pack_blend() had been called before so we can set
141 * the overall blend enable bit in the CFG_BITS packet.
142 */
143 static void
pack_cfg_bits(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info,const VkPipelineRasterizationStateCreateInfo * rs_info,const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT * pv_info,const VkPipelineRasterizationLineStateCreateInfoEXT * ls_info,const VkPipelineMultisampleStateCreateInfo * ms_info)144 pack_cfg_bits(struct v3dv_pipeline *pipeline,
145 const VkPipelineDepthStencilStateCreateInfo *ds_info,
146 const VkPipelineRasterizationStateCreateInfo *rs_info,
147 const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info,
148 const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info,
149 const VkPipelineMultisampleStateCreateInfo *ms_info)
150 {
151 assert(sizeof(pipeline->cfg_bits) == cl_packet_length(CFG_BITS));
152
153 pipeline->msaa =
154 ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;
155
156 v3dvx_pack(pipeline->cfg_bits, CFG_BITS, config) {
157 config.enable_forward_facing_primitive =
158 rs_info ? !(rs_info->cullMode & VK_CULL_MODE_FRONT_BIT) : false;
159
160 config.enable_reverse_facing_primitive =
161 rs_info ? !(rs_info->cullMode & VK_CULL_MODE_BACK_BIT) : false;
162
163 /* Seems like the hardware is backwards regarding this setting... */
164 config.clockwise_primitives =
165 rs_info ? rs_info->frontFace == VK_FRONT_FACE_COUNTER_CLOCKWISE : false;
166
167 /* Even if rs_info->depthBiasEnabled is true, we can decide to not
168 * enable it, like if there isn't a depth/stencil attachment with the
169 * pipeline.
170 */
171 config.enable_depth_offset = pipeline->depth_bias.enabled;
172
173 /* This is required to pass line rasterization tests in CTS while
174 * exposing, at least, a minimum of 4-bits of subpixel precision
175 * (the minimum requirement).
176 */
177 if (ls_info &&
178 ls_info->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT)
179 config.line_rasterization = V3D_LINE_RASTERIZATION_DIAMOND_EXIT;
180 else
181 config.line_rasterization = V3D_LINE_RASTERIZATION_PERP_END_CAPS;
182
183 if (rs_info && rs_info->polygonMode != VK_POLYGON_MODE_FILL) {
184 config.direct3d_wireframe_triangles_mode = true;
185 config.direct3d_point_fill_mode =
186 rs_info->polygonMode == VK_POLYGON_MODE_POINT;
187 }
188
189 /* diamond-exit rasterization does not support oversample */
190 config.rasterizer_oversample_mode =
191 (config.line_rasterization == V3D_LINE_RASTERIZATION_PERP_END_CAPS &&
192 pipeline->msaa) ? 1 : 0;
193
194 /* From the Vulkan spec:
195 *
196 * "Provoking Vertex:
197 *
198 * The vertex in a primitive from which flat shaded attribute
199 * values are taken. This is generally the “first” vertex in the
200 * primitive, and depends on the primitive topology."
201 *
202 * First vertex is the Direct3D style for provoking vertex. OpenGL uses
203 * the last vertex by default.
204 */
205 if (pv_info) {
206 config.direct3d_provoking_vertex =
207 pv_info->provokingVertexMode ==
208 VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT;
209 } else {
210 config.direct3d_provoking_vertex = true;
211 }
212
213 config.blend_enable = pipeline->blend.enables != 0;
214
215 /* Disable depth/stencil if we don't have a D/S attachment */
216 bool has_ds_attachment =
217 pipeline->subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED;
218
219 if (ds_info && ds_info->depthTestEnable && has_ds_attachment) {
220 config.z_updates_enable = ds_info->depthWriteEnable;
221 config.depth_test_function = ds_info->depthCompareOp;
222 } else {
223 config.depth_test_function = VK_COMPARE_OP_ALWAYS;
224 }
225
226 config.stencil_enable =
227 ds_info ? ds_info->stencilTestEnable && has_ds_attachment: false;
228
229 pipeline->z_updates_enable = config.z_updates_enable;
230
231 #if V3D_VERSION >= 71
232 /* From the Vulkan spec:
233 *
234 * "depthClampEnable controls whether to clamp the fragment’s depth
235 * values as described in Depth Test. If the pipeline is not created
236 * with VkPipelineRasterizationDepthClipStateCreateInfoEXT present
237 * then enabling depth clamp will also disable clipping primitives to
238 * the z planes of the frustrum as described in Primitive Clipping.
239 * Otherwise depth clipping is controlled by the state set in
240 * VkPipelineRasterizationDepthClipStateCreateInfoEXT."
241 */
242 bool z_clamp_enable = rs_info && rs_info->depthClampEnable;
243 bool z_clip_enable = false;
244 const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =
245 rs_info ? vk_find_struct_const(rs_info->pNext,
246 PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT) :
247 NULL;
248 if (clip_info)
249 z_clip_enable = clip_info->depthClipEnable;
250 else if (!z_clamp_enable)
251 z_clip_enable = true;
252
253 if (z_clip_enable) {
254 config.z_clipping_mode = pipeline->negative_one_to_one ?
255 V3D_Z_CLIP_MODE_MIN_ONE_TO_ONE : V3D_Z_CLIP_MODE_ZERO_TO_ONE;
256 } else {
257 config.z_clipping_mode = V3D_Z_CLIP_MODE_NONE;
258 }
259
260 config.z_clamp_mode = z_clamp_enable;
261
262 config.depth_bounds_test_enable =
263 ds_info && ds_info->depthBoundsTestEnable && has_ds_attachment;
264 #endif
265 };
266 }
267
268 static uint32_t
translate_stencil_op(VkStencilOp op)269 translate_stencil_op(VkStencilOp op)
270 {
271 switch (op) {
272 case VK_STENCIL_OP_KEEP:
273 return V3D_STENCIL_OP_KEEP;
274 case VK_STENCIL_OP_ZERO:
275 return V3D_STENCIL_OP_ZERO;
276 case VK_STENCIL_OP_REPLACE:
277 return V3D_STENCIL_OP_REPLACE;
278 case VK_STENCIL_OP_INCREMENT_AND_CLAMP:
279 return V3D_STENCIL_OP_INCR;
280 case VK_STENCIL_OP_DECREMENT_AND_CLAMP:
281 return V3D_STENCIL_OP_DECR;
282 case VK_STENCIL_OP_INVERT:
283 return V3D_STENCIL_OP_INVERT;
284 case VK_STENCIL_OP_INCREMENT_AND_WRAP:
285 return V3D_STENCIL_OP_INCWRAP;
286 case VK_STENCIL_OP_DECREMENT_AND_WRAP:
287 return V3D_STENCIL_OP_DECWRAP;
288 default:
289 unreachable("bad stencil op");
290 }
291 }
292
293 static void
pack_single_stencil_cfg(struct v3dv_pipeline * pipeline,uint8_t * stencil_cfg,bool is_front,bool is_back,const VkStencilOpState * stencil_state)294 pack_single_stencil_cfg(struct v3dv_pipeline *pipeline,
295 uint8_t *stencil_cfg,
296 bool is_front,
297 bool is_back,
298 const VkStencilOpState *stencil_state)
299 {
300 /* From the Vulkan spec:
301 *
302 * "Reference is an integer reference value that is used in the unsigned
303 * stencil comparison. The reference value used by stencil comparison
304 * must be within the range [0,2^s-1] , where s is the number of bits in
305 * the stencil framebuffer attachment, otherwise the reference value is
306 * considered undefined."
307 *
308 * In our case, 's' is always 8, so we clamp to that to prevent our packing
309 * functions to assert in debug mode if they see larger values.
310 *
311 * If we have dynamic state we need to make sure we set the corresponding
312 * state bits to 0, since cl_emit_with_prepacked ORs the new value with
313 * the old.
314 */
315 const uint8_t write_mask =
316 pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_WRITE_MASK ?
317 0 : stencil_state->writeMask & 0xff;
318
319 const uint8_t compare_mask =
320 pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
321 0 : stencil_state->compareMask & 0xff;
322
323 const uint8_t reference =
324 pipeline->dynamic_state.mask & V3DV_DYNAMIC_STENCIL_COMPARE_MASK ?
325 0 : stencil_state->reference & 0xff;
326
327 v3dvx_pack(stencil_cfg, STENCIL_CFG, config) {
328 config.front_config = is_front;
329 config.back_config = is_back;
330 config.stencil_write_mask = write_mask;
331 config.stencil_test_mask = compare_mask;
332 config.stencil_test_function = stencil_state->compareOp;
333 config.stencil_pass_op = translate_stencil_op(stencil_state->passOp);
334 config.depth_test_fail_op = translate_stencil_op(stencil_state->depthFailOp);
335 config.stencil_test_fail_op = translate_stencil_op(stencil_state->failOp);
336 config.stencil_ref_value = reference;
337 }
338 }
339
340 static void
pack_stencil_cfg(struct v3dv_pipeline * pipeline,const VkPipelineDepthStencilStateCreateInfo * ds_info)341 pack_stencil_cfg(struct v3dv_pipeline *pipeline,
342 const VkPipelineDepthStencilStateCreateInfo *ds_info)
343 {
344 assert(sizeof(pipeline->stencil_cfg) == 2 * cl_packet_length(STENCIL_CFG));
345
346 if (!ds_info || !ds_info->stencilTestEnable)
347 return;
348
349 if (pipeline->subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)
350 return;
351
352 const uint32_t dynamic_stencil_states = V3DV_DYNAMIC_STENCIL_COMPARE_MASK |
353 V3DV_DYNAMIC_STENCIL_WRITE_MASK |
354 V3DV_DYNAMIC_STENCIL_REFERENCE;
355
356
357 /* If front != back or we have dynamic stencil state we can't emit a single
358 * packet for both faces.
359 */
360 bool needs_front_and_back = false;
361 if ((pipeline->dynamic_state.mask & dynamic_stencil_states) ||
362 memcmp(&ds_info->front, &ds_info->back, sizeof(ds_info->front)))
363 needs_front_and_back = true;
364
365 /* If the front and back configurations are the same we can emit both with
366 * a single packet.
367 */
368 pipeline->emit_stencil_cfg[0] = true;
369 if (!needs_front_and_back) {
370 pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
371 true, true, &ds_info->front);
372 } else {
373 pipeline->emit_stencil_cfg[1] = true;
374 pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[0],
375 true, false, &ds_info->front);
376 pack_single_stencil_cfg(pipeline, pipeline->stencil_cfg[1],
377 false, true, &ds_info->back);
378 }
379 }
380
381 void
v3dX(pipeline_pack_state)382 v3dX(pipeline_pack_state)(struct v3dv_pipeline *pipeline,
383 const VkPipelineColorBlendStateCreateInfo *cb_info,
384 const VkPipelineDepthStencilStateCreateInfo *ds_info,
385 const VkPipelineRasterizationStateCreateInfo *rs_info,
386 const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_info,
387 const VkPipelineRasterizationLineStateCreateInfoEXT *ls_info,
388 const VkPipelineMultisampleStateCreateInfo *ms_info)
389 {
390 pack_blend(pipeline, cb_info);
391 pack_cfg_bits(pipeline, ds_info, rs_info, pv_info, ls_info, ms_info);
392 pack_stencil_cfg(pipeline, ds_info);
393 }
394
395 static void
pack_shader_state_record(struct v3dv_pipeline * pipeline)396 pack_shader_state_record(struct v3dv_pipeline *pipeline)
397 {
398 assert(sizeof(pipeline->shader_state_record) >=
399 cl_packet_length(GL_SHADER_STATE_RECORD));
400
401 struct v3d_fs_prog_data *prog_data_fs =
402 pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]->prog_data.fs;
403
404 struct v3d_vs_prog_data *prog_data_vs =
405 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
406
407 struct v3d_vs_prog_data *prog_data_vs_bin =
408 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]->prog_data.vs;
409
410
411 /* Note: we are not packing addresses, as we need the job (see
412 * cl_pack_emit_reloc). Additionally uniforms can't be filled up at this
413 * point as they depend on dynamic info that can be set after create the
414 * pipeline (like viewport), . Would need to be filled later, so we are
415 * doing a partial prepacking.
416 */
417 v3dvx_pack(pipeline->shader_state_record, GL_SHADER_STATE_RECORD, shader) {
418 shader.enable_clipping = true;
419
420 if (!pipeline->has_gs) {
421 shader.point_size_in_shaded_vertex_data =
422 pipeline->topology == MESA_PRIM_POINTS;
423 } else {
424 struct v3d_gs_prog_data *prog_data_gs =
425 pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]->prog_data.gs;
426 shader.point_size_in_shaded_vertex_data = prog_data_gs->writes_psiz;
427 }
428
429 /* Must be set if the shader modifies Z, discards, or modifies
430 * the sample mask. For any of these cases, the fragment
431 * shader needs to write the Z value (even just discards).
432 */
433 shader.fragment_shader_does_z_writes = prog_data_fs->writes_z;
434
435 /* Set if the EZ test must be disabled (due to shader side
436 * effects and the early_z flag not being present in the
437 * shader).
438 */
439 shader.turn_off_early_z_test = prog_data_fs->disable_ez;
440
441 shader.fragment_shader_uses_real_pixel_centre_w_in_addition_to_centroid_w2 =
442 prog_data_fs->uses_center_w;
443
444 /* The description for gl_SampleID states that if a fragment shader reads
445 * it, then we should automatically activate per-sample shading. However,
446 * the Vulkan spec also states that if a framebuffer has no attachments:
447 *
448 * "The subpass continues to use the width, height, and layers of the
449 * framebuffer to define the dimensions of the rendering area, and the
450 * rasterizationSamples from each pipeline’s
451 * VkPipelineMultisampleStateCreateInfo to define the number of
452 * samples used in rasterization multisample rasterization."
453 *
454 * So in this scenario, if the pipeline doesn't enable multiple samples
455 * but the fragment shader accesses gl_SampleID we would be requested
456 * to do per-sample shading in single sample rasterization mode, which
457 * is pointless, so just disable it in that case.
458 */
459 shader.enable_sample_rate_shading =
460 pipeline->sample_rate_shading ||
461 (pipeline->msaa && prog_data_fs->force_per_sample_msaa);
462
463 shader.any_shader_reads_hardware_written_primitive_id = false;
464
465 shader.do_scoreboard_wait_on_first_thread_switch =
466 prog_data_fs->lock_scoreboard_on_first_thrsw;
467 shader.disable_implicit_point_line_varyings =
468 !prog_data_fs->uses_implicit_point_line_varyings;
469
470 shader.number_of_varyings_in_fragment_shader =
471 prog_data_fs->num_inputs;
472
473 /* Note: see previous note about addresses */
474 /* shader.coordinate_shader_code_address */
475 /* shader.vertex_shader_code_address */
476 /* shader.fragment_shader_code_address */
477
478 #if V3D_VERSION == 42
479 shader.coordinate_shader_propagate_nans = true;
480 shader.vertex_shader_propagate_nans = true;
481 shader.fragment_shader_propagate_nans = true;
482
483 /* FIXME: Use combined input/output size flag in the common case (also
484 * on v3d, see v3dx_draw).
485 */
486 shader.coordinate_shader_has_separate_input_and_output_vpm_blocks =
487 prog_data_vs_bin->separate_segments;
488 shader.vertex_shader_has_separate_input_and_output_vpm_blocks =
489 prog_data_vs->separate_segments;
490 shader.coordinate_shader_input_vpm_segment_size =
491 prog_data_vs_bin->separate_segments ?
492 prog_data_vs_bin->vpm_input_size : 1;
493 shader.vertex_shader_input_vpm_segment_size =
494 prog_data_vs->separate_segments ?
495 prog_data_vs->vpm_input_size : 1;
496 #endif
497
498 /* On V3D 7.1 there isn't a specific flag to set if we are using
499 * shared/separate segments or not. We just set the value of
500 * vpm_input_size to 0, and set output to the max needed. That should be
501 * already properly set on prog_data_vs_bin
502 */
503 #if V3D_VERSION == 71
504 shader.coordinate_shader_input_vpm_segment_size =
505 prog_data_vs_bin->vpm_input_size;
506 shader.vertex_shader_input_vpm_segment_size =
507 prog_data_vs->vpm_input_size;
508 #endif
509
510 shader.coordinate_shader_output_vpm_segment_size =
511 prog_data_vs_bin->vpm_output_size;
512 shader.vertex_shader_output_vpm_segment_size =
513 prog_data_vs->vpm_output_size;
514
515 /* Note: see previous note about addresses */
516 /* shader.coordinate_shader_uniforms_address */
517 /* shader.vertex_shader_uniforms_address */
518 /* shader.fragment_shader_uniforms_address */
519
520 shader.min_coord_shader_input_segments_required_in_play =
521 pipeline->vpm_cfg_bin.As;
522 shader.min_vertex_shader_input_segments_required_in_play =
523 pipeline->vpm_cfg.As;
524
525 shader.min_coord_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
526 pipeline->vpm_cfg_bin.Ve;
527 shader.min_vertex_shader_output_segments_required_in_play_in_addition_to_vcm_cache_size =
528 pipeline->vpm_cfg.Ve;
529
530 shader.coordinate_shader_4_way_threadable =
531 prog_data_vs_bin->base.threads == 4;
532 shader.vertex_shader_4_way_threadable =
533 prog_data_vs->base.threads == 4;
534 shader.fragment_shader_4_way_threadable =
535 prog_data_fs->base.threads == 4;
536
537 shader.coordinate_shader_start_in_final_thread_section =
538 prog_data_vs_bin->base.single_seg;
539 shader.vertex_shader_start_in_final_thread_section =
540 prog_data_vs->base.single_seg;
541 shader.fragment_shader_start_in_final_thread_section =
542 prog_data_fs->base.single_seg;
543
544 shader.vertex_id_read_by_coordinate_shader =
545 prog_data_vs_bin->uses_vid;
546 shader.base_instance_id_read_by_coordinate_shader =
547 prog_data_vs_bin->uses_biid;
548 shader.instance_id_read_by_coordinate_shader =
549 prog_data_vs_bin->uses_iid;
550 shader.vertex_id_read_by_vertex_shader =
551 prog_data_vs->uses_vid;
552 shader.base_instance_id_read_by_vertex_shader =
553 prog_data_vs->uses_biid;
554 shader.instance_id_read_by_vertex_shader =
555 prog_data_vs->uses_iid;
556
557 /* Note: see previous note about addresses */
558 /* shader.address_of_default_attribute_values */
559 }
560 }
561
562 static void
pack_vcm_cache_size(struct v3dv_pipeline * pipeline)563 pack_vcm_cache_size(struct v3dv_pipeline *pipeline)
564 {
565 assert(sizeof(pipeline->vcm_cache_size) ==
566 cl_packet_length(VCM_CACHE_SIZE));
567
568 v3dvx_pack(pipeline->vcm_cache_size, VCM_CACHE_SIZE, vcm) {
569 vcm.number_of_16_vertex_batches_for_binning = pipeline->vpm_cfg_bin.Vc;
570 vcm.number_of_16_vertex_batches_for_rendering = pipeline->vpm_cfg.Vc;
571 }
572 }
573
574 /* As defined on the GL_SHADER_STATE_ATTRIBUTE_RECORD */
575 static uint8_t
get_attr_type(const struct util_format_description * desc)576 get_attr_type(const struct util_format_description *desc)
577 {
578 uint32_t r_size = desc->channel[0].size;
579 uint8_t attr_type = ATTRIBUTE_FLOAT;
580
581 switch (desc->channel[0].type) {
582 case UTIL_FORMAT_TYPE_FLOAT:
583 if (r_size == 32) {
584 attr_type = ATTRIBUTE_FLOAT;
585 } else {
586 assert(r_size == 16);
587 attr_type = ATTRIBUTE_HALF_FLOAT;
588 }
589 break;
590
591 case UTIL_FORMAT_TYPE_SIGNED:
592 case UTIL_FORMAT_TYPE_UNSIGNED:
593 switch (r_size) {
594 case 32:
595 attr_type = ATTRIBUTE_INT;
596 break;
597 case 16:
598 attr_type = ATTRIBUTE_SHORT;
599 break;
600 case 10:
601 attr_type = ATTRIBUTE_INT2_10_10_10;
602 break;
603 case 8:
604 attr_type = ATTRIBUTE_BYTE;
605 break;
606 default:
607 fprintf(stderr,
608 "format %s unsupported\n",
609 desc->name);
610 attr_type = ATTRIBUTE_BYTE;
611 abort();
612 }
613 break;
614
615 default:
616 fprintf(stderr,
617 "format %s unsupported\n",
618 desc->name);
619 abort();
620 }
621
622 return attr_type;
623 }
624
625 static void
pack_shader_state_attribute_record(struct v3dv_pipeline * pipeline,uint32_t index,const VkVertexInputAttributeDescription * vi_desc)626 pack_shader_state_attribute_record(struct v3dv_pipeline *pipeline,
627 uint32_t index,
628 const VkVertexInputAttributeDescription *vi_desc)
629 {
630 const uint32_t packet_length =
631 cl_packet_length(GL_SHADER_STATE_ATTRIBUTE_RECORD);
632
633 const struct util_format_description *desc =
634 vk_format_description(vi_desc->format);
635
636 uint32_t binding = vi_desc->binding;
637
638 v3dvx_pack(&pipeline->vertex_attrs[index * packet_length],
639 GL_SHADER_STATE_ATTRIBUTE_RECORD, attr) {
640
641 /* vec_size == 0 means 4 */
642 attr.vec_size = desc->nr_channels & 3;
643 attr.signed_int_type = (desc->channel[0].type ==
644 UTIL_FORMAT_TYPE_SIGNED);
645 attr.normalized_int_type = desc->channel[0].normalized;
646 attr.read_as_int_uint = desc->channel[0].pure_integer;
647
648 attr.instance_divisor = MIN2(pipeline->vb[binding].instance_divisor,
649 0xffff);
650 attr.stride = pipeline->vb[binding].stride;
651 attr.type = get_attr_type(desc);
652 }
653 }
654
655 void
v3dX(pipeline_pack_compile_state)656 v3dX(pipeline_pack_compile_state)(struct v3dv_pipeline *pipeline,
657 const VkPipelineVertexInputStateCreateInfo *vi_info,
658 const VkPipelineVertexInputDivisorStateCreateInfoEXT *vd_info)
659 {
660 pack_shader_state_record(pipeline);
661 pack_vcm_cache_size(pipeline);
662
663 pipeline->vb_count = vi_info->vertexBindingDescriptionCount;
664 for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {
665 const VkVertexInputBindingDescription *desc =
666 &vi_info->pVertexBindingDescriptions[i];
667
668 pipeline->vb[desc->binding].stride = desc->stride;
669 pipeline->vb[desc->binding].instance_divisor = desc->inputRate;
670 }
671
672 if (vd_info) {
673 for (uint32_t i = 0; i < vd_info->vertexBindingDivisorCount; i++) {
674 const VkVertexInputBindingDivisorDescriptionEXT *desc =
675 &vd_info->pVertexBindingDivisors[i];
676
677 pipeline->vb[desc->binding].instance_divisor = desc->divisor;
678 }
679 }
680
681 pipeline->va_count = 0;
682 struct v3d_vs_prog_data *prog_data_vs =
683 pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]->prog_data.vs;
684
685 for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {
686 const VkVertexInputAttributeDescription *desc =
687 &vi_info->pVertexAttributeDescriptions[i];
688 uint32_t location = desc->location + VERT_ATTRIB_GENERIC0;
689
690 /* We use a custom driver_location_map instead of
691 * nir_find_variable_with_location because if we were able to get the
692 * shader variant from the cache, we would not have the nir shader
693 * available.
694 */
695 uint32_t driver_location =
696 prog_data_vs->driver_location_map[location];
697
698 if (driver_location != -1) {
699 assert(driver_location < MAX_VERTEX_ATTRIBS);
700 pipeline->va[driver_location].offset = desc->offset;
701 pipeline->va[driver_location].binding = desc->binding;
702 pipeline->va[driver_location].vk_format = desc->format;
703
704 pack_shader_state_attribute_record(pipeline, driver_location, desc);
705
706 pipeline->va_count++;
707 }
708 }
709 }
710
711 #if V3D_VERSION == 42
712 static bool
pipeline_has_integer_vertex_attrib(struct v3dv_pipeline * pipeline)713 pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)
714 {
715 for (uint8_t i = 0; i < pipeline->va_count; i++) {
716 if (vk_format_is_int(pipeline->va[i].vk_format))
717 return true;
718 }
719 return false;
720 }
721 #endif
722
723 bool
v3dX(pipeline_needs_default_attribute_values)724 v3dX(pipeline_needs_default_attribute_values)(struct v3dv_pipeline *pipeline)
725 {
726 #if V3D_VERSION == 42
727 return pipeline_has_integer_vertex_attrib(pipeline);
728 #endif
729
730 return false;
731 }
732
733 /* @pipeline can be NULL. In that case we assume the most common case. For
734 * example, for v42 we assume in that case that all the attributes have a
735 * float format (we only create an all-float BO once and we reuse it with all
736 * float pipelines), otherwise we look at the actual type of each attribute
737 * used with the specific pipeline passed in.
738 */
739 struct v3dv_bo *
v3dX(create_default_attribute_values)740 v3dX(create_default_attribute_values)(struct v3dv_device *device,
741 struct v3dv_pipeline *pipeline)
742 {
743 #if V3D_VERSION >= 71
744 return NULL;
745 #endif
746
747 uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;
748 struct v3dv_bo *bo;
749
750 bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);
751
752 if (!bo) {
753 fprintf(stderr, "failed to allocate memory for the default "
754 "attribute values\n");
755 return NULL;
756 }
757
758 bool ok = v3dv_bo_map(device, bo, size);
759 if (!ok) {
760 fprintf(stderr, "failed to map default attribute values buffer\n");
761 return NULL;
762 }
763
764 uint32_t *attrs = bo->map;
765 uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;
766 for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {
767 attrs[i * 4 + 0] = 0;
768 attrs[i * 4 + 1] = 0;
769 attrs[i * 4 + 2] = 0;
770 VkFormat attr_format =
771 pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;
772 if (i < va_count && vk_format_is_int(attr_format)) {
773 attrs[i * 4 + 3] = 1;
774 } else {
775 attrs[i * 4 + 3] = fui(1.0);
776 }
777 }
778
779 v3dv_bo_unmap(device, bo);
780
781 return bo;
782 }
783