1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 #include "genxml/gen_macros.h"
27 #include "genxml/genX_pack.h"
28 #include "genxml/genX_rt_pack.h"
29
30 #include "common/intel_compute_slm.h"
31 #include "common/intel_genX_state_elk.h"
32 #include "common/intel_l3_config.h"
33 #include "common/intel_sample_positions.h"
34 #include "nir/nir_xfb_info.h"
35 #include "vk_util.h"
36 #include "vk_format.h"
37 #include "vk_log.h"
38 #include "vk_render_pass.h"
39
40 static uint32_t
vertex_element_comp_control(enum isl_format format,unsigned comp)41 vertex_element_comp_control(enum isl_format format, unsigned comp)
42 {
43 uint8_t bits;
44 switch (comp) {
45 case 0: bits = isl_format_layouts[format].channels.r.bits; break;
46 case 1: bits = isl_format_layouts[format].channels.g.bits; break;
47 case 2: bits = isl_format_layouts[format].channels.b.bits; break;
48 case 3: bits = isl_format_layouts[format].channels.a.bits; break;
49 default: unreachable("Invalid component");
50 }
51
52 /*
53 * Take in account hardware restrictions when dealing with 64-bit floats.
54 *
55 * From Broadwell spec, command reference structures, page 586:
56 * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats,
57 * 64-bit components are stored * in the URB without any conversion. In
58 * this case, vertex elements must be written as 128 or 256 bits, with
59 * VFCOMP_STORE_0 being used to pad the output as required. E.g., if
60 * R64_PASSTHRU is used to copy a 64-bit Red component into the URB,
61 * Component 1 must be specified as VFCOMP_STORE_0 (with Components 2,3
62 * set to VFCOMP_NOSTORE) in order to output a 128-bit vertex element, or
63 * Components 1-3 must be specified as VFCOMP_STORE_0 in order to output
64 * a 256-bit vertex element. Likewise, use of R64G64B64_PASSTHRU requires
65 * Component 3 to be specified as VFCOMP_STORE_0 in order to output a
66 * 256-bit vertex element."
67 */
68 if (bits) {
69 return VFCOMP_STORE_SRC;
70 } else if (comp >= 2 &&
71 !isl_format_layouts[format].channels.b.bits &&
72 isl_format_layouts[format].channels.r.type == ISL_RAW) {
73 /* When emitting 64-bit attributes, we need to write either 128 or 256
74 * bit chunks, using VFCOMP_NOSTORE when not writing the chunk, and
75 * VFCOMP_STORE_0 to pad the written chunk */
76 return VFCOMP_NOSTORE;
77 } else if (comp < 3 ||
78 isl_format_layouts[format].channels.r.type == ISL_RAW) {
79 /* Note we need to pad with value 0, not 1, due hardware restrictions
80 * (see comment above) */
81 return VFCOMP_STORE_0;
82 } else if (isl_format_layouts[format].channels.r.type == ISL_UINT ||
83 isl_format_layouts[format].channels.r.type == ISL_SINT) {
84 assert(comp == 3);
85 return VFCOMP_STORE_1_INT;
86 } else {
87 assert(comp == 3);
88 return VFCOMP_STORE_1_FP;
89 }
90 }
91
92 static void
emit_vertex_input(struct anv_graphics_pipeline * pipeline,const struct vk_vertex_input_state * vi)93 emit_vertex_input(struct anv_graphics_pipeline *pipeline,
94 const struct vk_vertex_input_state *vi)
95 {
96 const struct elk_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
97
98 /* Pull inputs_read out of the VS prog data */
99 const uint64_t inputs_read = vs_prog_data->inputs_read;
100 const uint64_t double_inputs_read =
101 vs_prog_data->double_inputs_read & inputs_read;
102 assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
103 const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0;
104 const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0;
105 const bool needs_svgs_elem = vs_prog_data->uses_vertexid ||
106 vs_prog_data->uses_instanceid ||
107 vs_prog_data->uses_firstvertex ||
108 vs_prog_data->uses_baseinstance;
109
110 uint32_t elem_count = __builtin_popcount(elements) -
111 __builtin_popcount(elements_double) / 2;
112
113 const uint32_t total_elems =
114 MAX2(1, elem_count + needs_svgs_elem + vs_prog_data->uses_drawid);
115
116 uint32_t *p;
117
118 const uint32_t num_dwords = 1 + total_elems * 2;
119 p = anv_batch_emitn(&pipeline->base.batch, num_dwords,
120 GENX(3DSTATE_VERTEX_ELEMENTS));
121 if (!p)
122 return;
123
124 for (uint32_t i = 0; i < total_elems; i++) {
125 /* The SKL docs for VERTEX_ELEMENT_STATE say:
126 *
127 * "All elements must be valid from Element[0] to the last valid
128 * element. (I.e. if Element[2] is valid then Element[1] and
129 * Element[0] must also be valid)."
130 *
131 * The SKL docs for 3D_Vertex_Component_Control say:
132 *
133 * "Don't store this component. (Not valid for Component 0, but can
134 * be used for Component 1-3)."
135 *
136 * So we can't just leave a vertex element blank and hope for the best.
137 * We have to tell the VF hardware to put something in it; so we just
138 * store a bunch of zero.
139 *
140 * TODO: Compact vertex elements so we never end up with holes.
141 */
142 struct GENX(VERTEX_ELEMENT_STATE) element = {
143 .Valid = true,
144 .Component0Control = VFCOMP_STORE_0,
145 .Component1Control = VFCOMP_STORE_0,
146 .Component2Control = VFCOMP_STORE_0,
147 .Component3Control = VFCOMP_STORE_0,
148 };
149 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + i * 2], &element);
150 }
151
152 u_foreach_bit(a, vi->attributes_valid) {
153 enum isl_format format = anv_get_isl_format(pipeline->base.device->info,
154 vi->attributes[a].format,
155 VK_IMAGE_ASPECT_COLOR_BIT,
156 VK_IMAGE_TILING_LINEAR);
157 assume(format < ISL_NUM_FORMATS);
158
159 uint32_t binding = vi->attributes[a].binding;
160 assert(binding < MAX_VBS);
161
162 if ((elements & (1 << a)) == 0)
163 continue; /* Binding unused */
164
165 uint32_t slot =
166 __builtin_popcount(elements & ((1 << a) - 1)) -
167 DIV_ROUND_UP(__builtin_popcount(elements_double &
168 ((1 << a) -1)), 2);
169
170 struct GENX(VERTEX_ELEMENT_STATE) element = {
171 .VertexBufferIndex = vi->attributes[a].binding,
172 .Valid = true,
173 .SourceElementFormat = format,
174 .EdgeFlagEnable = false,
175 .SourceElementOffset = vi->attributes[a].offset,
176 .Component0Control = vertex_element_comp_control(format, 0),
177 .Component1Control = vertex_element_comp_control(format, 1),
178 .Component2Control = vertex_element_comp_control(format, 2),
179 .Component3Control = vertex_element_comp_control(format, 3),
180 };
181 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element);
182
183 #if GFX_VER >= 8
184 /* On Broadwell and later, we have a separate VF_INSTANCING packet
185 * that controls instancing. On Haswell and prior, that's part of
186 * VERTEX_BUFFER_STATE which we emit later.
187 */
188 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
189 bool per_instance = pipeline->vb[binding].instanced;
190 uint32_t divisor = pipeline->vb[binding].instance_divisor *
191 pipeline->instance_multiplier;
192
193 vfi.InstancingEnable = per_instance;
194 vfi.VertexElementIndex = slot;
195 vfi.InstanceDataStepRate = per_instance ? divisor : 1;
196 }
197 #endif
198 }
199
200 const uint32_t id_slot = elem_count;
201 if (needs_svgs_elem) {
202 /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum:
203 * "Within a VERTEX_ELEMENT_STATE structure, if a Component
204 * Control field is set to something other than VFCOMP_STORE_SRC,
205 * no higher-numbered Component Control fields may be set to
206 * VFCOMP_STORE_SRC"
207 *
208 * This means, that if we have BaseInstance, we need BaseVertex as
209 * well. Just do all or nothing.
210 */
211 uint32_t base_ctrl = (vs_prog_data->uses_firstvertex ||
212 vs_prog_data->uses_baseinstance) ?
213 VFCOMP_STORE_SRC : VFCOMP_STORE_0;
214
215 struct GENX(VERTEX_ELEMENT_STATE) element = {
216 .VertexBufferIndex = ANV_SVGS_VB_INDEX,
217 .Valid = true,
218 .SourceElementFormat = ISL_FORMAT_R32G32_UINT,
219 .Component0Control = base_ctrl,
220 .Component1Control = base_ctrl,
221 #if GFX_VER >= 8
222 .Component2Control = VFCOMP_STORE_0,
223 .Component3Control = VFCOMP_STORE_0,
224 #else
225 .Component2Control = VFCOMP_STORE_VID,
226 .Component3Control = VFCOMP_STORE_IID,
227 #endif
228 };
229 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element);
230
231 #if GFX_VER >= 8
232 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
233 vfi.VertexElementIndex = id_slot;
234 }
235 #endif
236 }
237
238 #if GFX_VER >= 8
239 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_SGVS), sgvs) {
240 sgvs.VertexIDEnable = vs_prog_data->uses_vertexid;
241 sgvs.VertexIDComponentNumber = 2;
242 sgvs.VertexIDElementOffset = id_slot;
243 sgvs.InstanceIDEnable = vs_prog_data->uses_instanceid;
244 sgvs.InstanceIDComponentNumber = 3;
245 sgvs.InstanceIDElementOffset = id_slot;
246 }
247 #endif
248
249 const uint32_t drawid_slot = elem_count + needs_svgs_elem;
250 if (vs_prog_data->uses_drawid) {
251 struct GENX(VERTEX_ELEMENT_STATE) element = {
252 .VertexBufferIndex = ANV_DRAWID_VB_INDEX,
253 .Valid = true,
254 .SourceElementFormat = ISL_FORMAT_R32_UINT,
255 .Component0Control = VFCOMP_STORE_SRC,
256 .Component1Control = VFCOMP_STORE_0,
257 .Component2Control = VFCOMP_STORE_0,
258 .Component3Control = VFCOMP_STORE_0,
259 };
260 GENX(VERTEX_ELEMENT_STATE_pack)(NULL,
261 &p[1 + drawid_slot * 2],
262 &element);
263
264 #if GFX_VER >= 8
265 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
266 vfi.VertexElementIndex = drawid_slot;
267 }
268 #endif
269 }
270 }
271
272 void
genX(emit_urb_setup)273 genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
274 const struct intel_l3_config *l3_config,
275 VkShaderStageFlags active_stages,
276 const unsigned entry_size[4],
277 enum intel_urb_deref_block_size *deref_block_size)
278 {
279 const struct intel_device_info *devinfo = device->info;
280 struct intel_urb_config urb_cfg = {
281 .size = { entry_size[0], entry_size[1], entry_size[2], entry_size[3], },
282 };
283
284 bool constrained;
285 intel_get_urb_config(devinfo, l3_config,
286 active_stages &
287 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
288 active_stages & VK_SHADER_STAGE_GEOMETRY_BIT,
289 &urb_cfg, deref_block_size, &constrained);
290
291 #if GFX_VERx10 == 70
292 /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
293 *
294 * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
295 * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
296 * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
297 * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
298 * needs to be sent before any combination of VS associated 3DSTATE."
299 */
300 anv_batch_emit(batch, GFX7_PIPE_CONTROL, pc) {
301 pc.DepthStallEnable = true;
302 pc.PostSyncOperation = WriteImmediateData;
303 pc.Address = device->workaround_address;
304 }
305 #endif
306
307 for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
308 anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) {
309 urb._3DCommandSubOpcode += i;
310 urb.VSURBStartingAddress = urb_cfg.start[i];
311 urb.VSURBEntryAllocationSize = urb_cfg.size[i] - 1;
312 urb.VSNumberofURBEntries = urb_cfg.entries[i];
313 }
314 }
315 }
316
317 static void
emit_urb_setup(struct anv_graphics_pipeline * pipeline,enum intel_urb_deref_block_size * deref_block_size)318 emit_urb_setup(struct anv_graphics_pipeline *pipeline,
319 enum intel_urb_deref_block_size *deref_block_size)
320 {
321 unsigned entry_size[4];
322 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
323 const struct elk_vue_prog_data *prog_data =
324 !anv_pipeline_has_stage(pipeline, i) ? NULL :
325 (const struct elk_vue_prog_data *) pipeline->shaders[i]->prog_data;
326
327 entry_size[i] = prog_data ? prog_data->urb_entry_size : 1;
328 }
329
330 genX(emit_urb_setup)(pipeline->base.device, &pipeline->base.batch,
331 pipeline->base.l3_config,
332 pipeline->active_stages, entry_size,
333 deref_block_size);
334 }
335
336 static void
emit_3dstate_sbe(struct anv_graphics_pipeline * pipeline)337 emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
338 {
339 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
340
341 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
342 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE), sbe);
343 #if GFX_VER >= 8
344 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ), sbe);
345 #endif
346 return;
347 }
348
349 struct GENX(3DSTATE_SBE) sbe = {
350 GENX(3DSTATE_SBE_header),
351 .AttributeSwizzleEnable = anv_pipeline_is_primitive(pipeline),
352 .PointSpriteTextureCoordinateOrigin = UPPERLEFT,
353 .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs,
354 .ConstantInterpolationEnable = wm_prog_data->flat_inputs,
355 };
356
357 #if GFX_VER >= 8
358 /* On Broadwell, they broke 3DSTATE_SBE into two packets */
359 struct GENX(3DSTATE_SBE_SWIZ) swiz = {
360 GENX(3DSTATE_SBE_SWIZ_header),
361 };
362 #else
363 # define swiz sbe
364 #endif
365
366 const struct intel_vue_map *fs_input_map =
367 &anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map;
368
369 int first_slot = elk_compute_first_urb_slot_required(wm_prog_data->inputs,
370 fs_input_map);
371 assert(first_slot % 2 == 0);
372 unsigned urb_entry_read_offset = first_slot / 2;
373 int max_source_attr = 0;
374 for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) {
375 uint8_t attr = wm_prog_data->urb_setup_attribs[idx];
376 int input_index = wm_prog_data->urb_setup[attr];
377
378 assert(0 <= input_index);
379
380 /* gl_Viewport, gl_Layer and FragmentShadingRateKHR are stored in the
381 * VUE header
382 */
383 if (attr == VARYING_SLOT_VIEWPORT ||
384 attr == VARYING_SLOT_LAYER ||
385 attr == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
386 continue;
387 }
388
389 if (attr == VARYING_SLOT_PNTC) {
390 sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
391 continue;
392 }
393
394 const int slot = fs_input_map->varying_to_slot[attr];
395
396 if (slot == -1) {
397 /* This attribute does not exist in the VUE--that means that the
398 * vertex shader did not write to it. It could be that it's a regular
399 * varying read by the fragment shader but not written by the vertex
400 * shader or it's gl_PrimitiveID. In the first case the value is
401 * undefined, in the second it needs to be gl_PrimitiveID.
402 */
403 swiz.Attribute[input_index].ConstantSource = PRIM_ID;
404 swiz.Attribute[input_index].ComponentOverrideX = true;
405 swiz.Attribute[input_index].ComponentOverrideY = true;
406 swiz.Attribute[input_index].ComponentOverrideZ = true;
407 swiz.Attribute[input_index].ComponentOverrideW = true;
408 continue;
409 }
410
411 /* We have to subtract two slots to account for the URB entry output
412 * read offset in the VS and GS stages.
413 */
414 const int source_attr = slot - 2 * urb_entry_read_offset;
415 assert(source_attr >= 0 && source_attr < 32);
416 max_source_attr = MAX2(max_source_attr, source_attr);
417 /* The hardware can only do overrides on 16 overrides at a time, and the
418 * other up to 16 have to be lined up so that the input index = the
419 * output index. We'll need to do some tweaking to make sure that's the
420 * case.
421 */
422 if (input_index < 16)
423 swiz.Attribute[input_index].SourceAttribute = source_attr;
424 else
425 assert(source_attr == input_index);
426 }
427
428 sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
429 sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2);
430 #if GFX_VER >= 8
431 sbe.ForceVertexURBEntryReadOffset = true;
432 sbe.ForceVertexURBEntryReadLength = true;
433 #endif
434
435 uint32_t *dw = anv_batch_emit_dwords(&pipeline->base.batch,
436 GENX(3DSTATE_SBE_length));
437 if (!dw)
438 return;
439 GENX(3DSTATE_SBE_pack)(&pipeline->base.batch, dw, &sbe);
440
441 #if GFX_VER >= 8
442 dw = anv_batch_emit_dwords(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ_length));
443 if (!dw)
444 return;
445 GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->base.batch, dw, &swiz);
446 #endif
447 }
448
449 /** Returns the final polygon mode for rasterization
450 *
451 * This function takes into account polygon mode, primitive topology and the
452 * different shader stages which might generate their own type of primitives.
453 */
454 VkPolygonMode
genX(raster_polygon_mode)455 genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline,
456 VkPrimitiveTopology primitive_topology)
457 {
458 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
459 switch (get_gs_prog_data(pipeline)->output_topology) {
460 case _3DPRIM_POINTLIST:
461 return VK_POLYGON_MODE_POINT;
462
463 case _3DPRIM_LINELIST:
464 case _3DPRIM_LINESTRIP:
465 case _3DPRIM_LINELOOP:
466 return VK_POLYGON_MODE_LINE;
467
468 case _3DPRIM_TRILIST:
469 case _3DPRIM_TRIFAN:
470 case _3DPRIM_TRISTRIP:
471 case _3DPRIM_RECTLIST:
472 case _3DPRIM_QUADLIST:
473 case _3DPRIM_QUADSTRIP:
474 case _3DPRIM_POLYGON:
475 return pipeline->polygon_mode;
476 }
477 unreachable("Unsupported GS output topology");
478 } else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
479 switch (get_tes_prog_data(pipeline)->output_topology) {
480 case INTEL_TESS_OUTPUT_TOPOLOGY_POINT:
481 return VK_POLYGON_MODE_POINT;
482
483 case INTEL_TESS_OUTPUT_TOPOLOGY_LINE:
484 return VK_POLYGON_MODE_LINE;
485
486 case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW:
487 case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW:
488 return pipeline->polygon_mode;
489 }
490 unreachable("Unsupported TCS output topology");
491 } else {
492 switch (primitive_topology) {
493 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
494 return VK_POLYGON_MODE_POINT;
495
496 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
497 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
498 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
499 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
500 return VK_POLYGON_MODE_LINE;
501
502 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
503 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
504 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
505 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
506 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
507 return pipeline->polygon_mode;
508
509 default:
510 unreachable("Unsupported primitive topology");
511 }
512 }
513 }
514
515 uint32_t
genX(ms_rasterization_mode)516 genX(ms_rasterization_mode)(struct anv_graphics_pipeline *pipeline,
517 VkPolygonMode raster_mode)
518 {
519 #if GFX_VER <= 7
520 if (raster_mode == VK_POLYGON_MODE_LINE) {
521 switch (pipeline->line_mode) {
522 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
523 return MSRASTMODE_ON_PATTERN;
524
525 case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
526 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
527 return MSRASTMODE_OFF_PIXEL;
528
529 default:
530 unreachable("Unsupported line rasterization mode");
531 }
532 } else {
533 return pipeline->rasterization_samples > 1 ?
534 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
535 }
536 #else
537 unreachable("Only on gen7");
538 #endif
539 }
540
541 const uint32_t genX(vk_to_intel_cullmode)[] = {
542 [VK_CULL_MODE_NONE] = CULLMODE_NONE,
543 [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
544 [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK,
545 [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH
546 };
547
548 const uint32_t genX(vk_to_intel_fillmode)[] = {
549 [VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
550 [VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
551 [VK_POLYGON_MODE_POINT] = FILL_MODE_POINT,
552 };
553
554 const uint32_t genX(vk_to_intel_front_face)[] = {
555 [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1,
556 [VK_FRONT_FACE_CLOCKWISE] = 0
557 };
558
559 void
genX(rasterization_mode)560 genX(rasterization_mode)(VkPolygonMode raster_mode,
561 VkLineRasterizationModeEXT line_mode,
562 float line_width,
563 uint32_t *api_mode,
564 bool *msaa_rasterization_enable)
565 {
566 #if GFX_VER >= 8
567 if (raster_mode == VK_POLYGON_MODE_LINE) {
568 /* Unfortunately, configuring our line rasterization hardware on gfx8
569 * and later is rather painful. Instead of giving us bits to tell the
570 * hardware what line mode to use like we had on gfx7, we now have an
571 * arcane combination of API Mode and MSAA enable bits which do things
572 * in a table which are expected to magically put the hardware into the
573 * right mode for your API. Sadly, Vulkan isn't any of the APIs the
574 * hardware people thought of so nothing works the way you want it to.
575 *
576 * Look at the table titled "Multisample Rasterization Modes" in Vol 7
577 * of the Skylake PRM for more details.
578 */
579 switch (line_mode) {
580 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
581 *api_mode = DX100;
582 /* The algorithm the HW uses to draw wide lines doesn't quite match
583 * what the CTS expects, at least for rectangular lines, so we set
584 * this to false here, making it draw parallelograms instead, which
585 * work well enough.
586 */
587 *msaa_rasterization_enable = line_width < 1.0078125;
588 break;
589
590 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
591 case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
592 *api_mode = DX9OGL;
593 *msaa_rasterization_enable = false;
594 break;
595
596 default:
597 unreachable("Unsupported line rasterization mode");
598 }
599 } else {
600 *api_mode = DX100;
601 *msaa_rasterization_enable = true;
602 }
603 #else
604 unreachable("Invalid call");
605 #endif
606 }
607
608 static void
emit_rs_state(struct anv_graphics_pipeline * pipeline,const struct vk_input_assembly_state * ia,const struct vk_rasterization_state * rs,const struct vk_multisample_state * ms,const struct vk_render_pass_state * rp,enum intel_urb_deref_block_size urb_deref_block_size)609 emit_rs_state(struct anv_graphics_pipeline *pipeline,
610 const struct vk_input_assembly_state *ia,
611 const struct vk_rasterization_state *rs,
612 const struct vk_multisample_state *ms,
613 const struct vk_render_pass_state *rp,
614 enum intel_urb_deref_block_size urb_deref_block_size)
615 {
616 struct GENX(3DSTATE_SF) sf = {
617 GENX(3DSTATE_SF_header),
618 };
619
620 sf.ViewportTransformEnable = true;
621 sf.StatisticsEnable = true;
622 sf.VertexSubPixelPrecisionSelect = _8Bit;
623 sf.AALineDistanceMode = true;
624
625 switch (rs->provoking_vertex) {
626 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
627 sf.TriangleStripListProvokingVertexSelect = 0;
628 sf.LineStripListProvokingVertexSelect = 0;
629 sf.TriangleFanProvokingVertexSelect = 1;
630 break;
631
632 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
633 sf.TriangleStripListProvokingVertexSelect = 2;
634 sf.LineStripListProvokingVertexSelect = 1;
635 sf.TriangleFanProvokingVertexSelect = 2;
636 break;
637
638 default:
639 unreachable("Invalid provoking vertex mode");
640 }
641
642 #if GFX_VERx10 == 75
643 sf.LineStippleEnable = rs->line.stipple.enable;
644 #endif
645
646 bool point_from_shader;
647 const struct elk_vue_prog_data *last_vue_prog_data =
648 anv_pipeline_get_last_vue_prog_data(pipeline);
649 point_from_shader = last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ;
650
651 if (point_from_shader) {
652 sf.PointWidthSource = Vertex;
653 } else {
654 sf.PointWidthSource = State;
655 sf.PointWidth = 1.0;
656 }
657
658 #if GFX_VER >= 8
659 struct GENX(3DSTATE_RASTER) raster = {
660 GENX(3DSTATE_RASTER_header),
661 };
662 #else
663 # define raster sf
664 #endif
665
666 /* For details on 3DSTATE_RASTER multisample state, see the BSpec table
667 * "Multisample Modes State".
668 */
669 #if GFX_VER >= 8
670 /* NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the BDW and SKL PMA fix
671 * computations. If we ever set this bit to a different value, they will
672 * need to be updated accordingly.
673 */
674 raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0;
675 raster.ForceMultisampling = false;
676 #endif
677
678 raster.FrontFaceFillMode = genX(vk_to_intel_fillmode)[rs->polygon_mode];
679 raster.BackFaceFillMode = genX(vk_to_intel_fillmode)[rs->polygon_mode];
680 raster.ScissorRectangleEnable = true;
681
682 #if GFX_VER >= 8
683 raster.ViewportZClipTestEnable = pipeline->depth_clip_enable;
684 #endif
685
686 #if GFX_VER == 7
687 /* Gfx7 requires that we provide the depth format in 3DSTATE_SF so that it
688 * can get the depth offsets correct.
689 */
690 if (rp != NULL &&
691 rp->depth_attachment_format != VK_FORMAT_UNDEFINED) {
692 assert(vk_format_has_depth(rp->depth_attachment_format));
693 enum isl_format isl_format =
694 anv_get_isl_format(pipeline->base.device->info,
695 rp->depth_attachment_format,
696 VK_IMAGE_ASPECT_DEPTH_BIT,
697 VK_IMAGE_TILING_OPTIMAL);
698 sf.DepthBufferSurfaceFormat =
699 isl_format_get_depth_format(isl_format, false);
700 }
701 #endif
702
703 #if GFX_VER >= 8
704 GENX(3DSTATE_SF_pack)(NULL, pipeline->gfx8.sf, &sf);
705 GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gfx8.raster, &raster);
706 #else
707 # undef raster
708 GENX(3DSTATE_SF_pack)(NULL, &pipeline->gfx7.sf, &sf);
709 #endif
710 }
711
712 static void
emit_ms_state(struct anv_graphics_pipeline * pipeline,const struct vk_multisample_state * ms)713 emit_ms_state(struct anv_graphics_pipeline *pipeline,
714 const struct vk_multisample_state *ms)
715 {
716 #if GFX_VER >= 8
717 /* On Gfx8+ 3DSTATE_MULTISAMPLE only holds the number of samples. */
718 genX(emit_multisample)(&pipeline->base.batch,
719 pipeline->rasterization_samples,
720 NULL);
721 #endif
722
723 /* From the Vulkan 1.0 spec:
724 * If pSampleMask is NULL, it is treated as if the mask has all bits
725 * enabled, i.e. no coverage is removed from fragments.
726 *
727 * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
728 */
729 #if GFX_VER >= 8
730 uint32_t sample_mask = 0xffff;
731 #else
732 uint32_t sample_mask = 0xff;
733 #endif
734
735 if (ms != NULL)
736 sample_mask &= ms->sample_mask;
737
738 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
739 sm.SampleMask = sample_mask;
740 }
741 }
742
743 const uint32_t genX(vk_to_intel_logic_op)[] = {
744 [VK_LOGIC_OP_COPY] = LOGICOP_COPY,
745 [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR,
746 [VK_LOGIC_OP_AND] = LOGICOP_AND,
747 [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE,
748 [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED,
749 [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP,
750 [VK_LOGIC_OP_XOR] = LOGICOP_XOR,
751 [VK_LOGIC_OP_OR] = LOGICOP_OR,
752 [VK_LOGIC_OP_NOR] = LOGICOP_NOR,
753 [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV,
754 [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT,
755 [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE,
756 [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED,
757 [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED,
758 [VK_LOGIC_OP_NAND] = LOGICOP_NAND,
759 [VK_LOGIC_OP_SET] = LOGICOP_SET,
760 };
761
762 static const uint32_t vk_to_intel_blend[] = {
763 [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO,
764 [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE,
765 [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR,
766 [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR,
767 [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR,
768 [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR,
769 [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA,
770 [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA,
771 [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA,
772 [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA,
773 [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR,
774 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR,
775 [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA,
776 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA,
777 [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE,
778 [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR,
779 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR,
780 [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA,
781 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA,
782 };
783
784 static const uint32_t vk_to_intel_blend_op[] = {
785 [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD,
786 [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT,
787 [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT,
788 [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN,
789 [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX,
790 };
791
792 const uint32_t genX(vk_to_intel_compare_op)[] = {
793 [VK_COMPARE_OP_NEVER] = PREFILTEROP_NEVER,
794 [VK_COMPARE_OP_LESS] = PREFILTEROP_LESS,
795 [VK_COMPARE_OP_EQUAL] = PREFILTEROP_EQUAL,
796 [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROP_LEQUAL,
797 [VK_COMPARE_OP_GREATER] = PREFILTEROP_GREATER,
798 [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROP_NOTEQUAL,
799 [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROP_GEQUAL,
800 [VK_COMPARE_OP_ALWAYS] = PREFILTEROP_ALWAYS,
801 };
802
803 const uint32_t genX(vk_to_intel_stencil_op)[] = {
804 [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP,
805 [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO,
806 [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE,
807 [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT,
808 [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT,
809 [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT,
810 [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR,
811 [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR,
812 };
813
814 const uint32_t genX(vk_to_intel_primitive_type)[] = {
815 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
816 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
817 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
818 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
819 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
820 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
821 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
822 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
823 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
824 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
825 };
826
827 static bool
is_dual_src_blend_factor(VkBlendFactor factor)828 is_dual_src_blend_factor(VkBlendFactor factor)
829 {
830 return factor == VK_BLEND_FACTOR_SRC1_COLOR ||
831 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR ||
832 factor == VK_BLEND_FACTOR_SRC1_ALPHA ||
833 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA;
834 }
835
836 static inline uint32_t *
write_disabled_blend(uint32_t * state)837 write_disabled_blend(uint32_t *state)
838 {
839 struct GENX(BLEND_STATE_ENTRY) entry = {
840 .WriteDisableAlpha = true,
841 .WriteDisableRed = true,
842 .WriteDisableGreen = true,
843 .WriteDisableBlue = true,
844 };
845 GENX(BLEND_STATE_ENTRY_pack)(NULL, state, &entry);
846 return state + GENX(BLEND_STATE_ENTRY_length);
847 }
848
849 static void
emit_cb_state(struct anv_graphics_pipeline * pipeline,const struct vk_color_blend_state * cb,const struct vk_multisample_state * ms,const struct vk_render_pass_state * rp)850 emit_cb_state(struct anv_graphics_pipeline *pipeline,
851 const struct vk_color_blend_state *cb,
852 const struct vk_multisample_state *ms,
853 const struct vk_render_pass_state *rp)
854 {
855 struct anv_device *device = pipeline->base.device;
856 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
857
858 struct GENX(BLEND_STATE) blend_state = {
859 #if GFX_VER >= 8
860 .AlphaToCoverageEnable = ms && ms->alpha_to_coverage_enable,
861 .AlphaToOneEnable = ms && ms->alpha_to_one_enable,
862 #endif
863 };
864
865 uint32_t surface_count = 0;
866 struct anv_pipeline_bind_map *map;
867 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
868 map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map;
869 surface_count = map->surface_count;
870 }
871
872 const struct intel_device_info *devinfo = pipeline->base.device->info;
873 uint32_t *blend_state_start = devinfo->ver >= 8 ?
874 pipeline->gfx8.blend_state : pipeline->gfx7.blend_state;
875 uint32_t *state_pos = blend_state_start;
876
877 state_pos += GENX(BLEND_STATE_length);
878 #if GFX_VER >= 8
879 struct GENX(BLEND_STATE_ENTRY) bs0 = { 0 };
880 #endif
881 for (unsigned i = 0; i < surface_count; i++) {
882 struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
883
884 /* All color attachments are at the beginning of the binding table */
885 if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
886 break;
887
888 /* We can have at most 8 attachments */
889 assert(i < MAX_RTS);
890
891 if (cb == NULL || binding->index >= cb->attachment_count) {
892 state_pos = write_disabled_blend(state_pos);
893 continue;
894 }
895
896 const struct vk_color_blend_attachment_state *a =
897 &cb->attachments[binding->index];
898
899 VkFormat att_format = rp->color_attachment_formats[binding->index];
900 bool ignore_logic_op =
901 vk_format_is_float(att_format) || vk_format_is_srgb(att_format);
902
903 struct GENX(BLEND_STATE_ENTRY) entry = {
904 #if GFX_VER < 8
905 .AlphaToCoverageEnable = ms && ms->alpha_to_coverage_enable,
906 .AlphaToOneEnable = ms && ms->alpha_to_one_enable,
907 #endif
908 .LogicOpEnable = cb->logic_op_enable && !ignore_logic_op,
909
910 /* Vulkan specification 1.2.168, VkLogicOp:
911 *
912 * "Logical operations are controlled by the logicOpEnable and
913 * logicOp members of VkPipelineColorBlendStateCreateInfo. If
914 * logicOpEnable is VK_TRUE, then a logical operation selected by
915 * logicOp is applied between each color attachment and the
916 * fragment’s corresponding output value, and blending of all
917 * attachments is treated as if it were disabled."
918 *
919 * From the Broadwell PRM Volume 2d: Command Reference: Structures:
920 * BLEND_STATE_ENTRY:
921 *
922 * "Enabling LogicOp and Color Buffer Blending at the same time is
923 * UNDEFINED"
924 */
925 .ColorBufferBlendEnable = !cb->logic_op_enable && a->blend_enable,
926 .ColorClampRange = COLORCLAMP_RTFORMAT,
927 .PreBlendColorClampEnable = true,
928 .PostBlendColorClampEnable = true,
929 .SourceBlendFactor = vk_to_intel_blend[a->src_color_blend_factor],
930 .DestinationBlendFactor = vk_to_intel_blend[a->dst_color_blend_factor],
931 .ColorBlendFunction = vk_to_intel_blend_op[a->color_blend_op],
932 .SourceAlphaBlendFactor = vk_to_intel_blend[a->src_alpha_blend_factor],
933 .DestinationAlphaBlendFactor = vk_to_intel_blend[a->dst_alpha_blend_factor],
934 .AlphaBlendFunction = vk_to_intel_blend_op[a->alpha_blend_op],
935 };
936
937 if (a->src_color_blend_factor != a->src_alpha_blend_factor ||
938 a->dst_color_blend_factor != a->dst_alpha_blend_factor ||
939 a->color_blend_op != a->alpha_blend_op) {
940 #if GFX_VER >= 8
941 blend_state.IndependentAlphaBlendEnable = true;
942 #else
943 entry.IndependentAlphaBlendEnable = true;
944 #endif
945 }
946
947 /* The Dual Source Blending documentation says:
948 *
949 * "If SRC1 is included in a src/dst blend factor and
950 * a DualSource RT Write message is not used, results
951 * are UNDEFINED. (This reflects the same restriction in DX APIs,
952 * where undefined results are produced if “o1” is not written
953 * by a PS – there are no default values defined)."
954 *
955 * There is no way to gracefully fix this undefined situation
956 * so we just disable the blending to prevent possible issues.
957 */
958 if (!wm_prog_data->dual_src_blend &&
959 (is_dual_src_blend_factor(a->src_color_blend_factor) ||
960 is_dual_src_blend_factor(a->dst_color_blend_factor) ||
961 is_dual_src_blend_factor(a->src_alpha_blend_factor) ||
962 is_dual_src_blend_factor(a->dst_alpha_blend_factor))) {
963 vk_logw(VK_LOG_OBJS(&device->vk.base),
964 "Enabled dual-src blend factors without writing both targets "
965 "in the shader. Disabling blending to avoid GPU hangs.");
966 entry.ColorBufferBlendEnable = false;
967 }
968
969 /* Our hardware applies the blend factor prior to the blend function
970 * regardless of what function is used. Technically, this means the
971 * hardware can do MORE than GL or Vulkan specify. However, it also
972 * means that, for MIN and MAX, we have to stomp the blend factor to
973 * ONE to make it a no-op.
974 */
975 if (a->color_blend_op == VK_BLEND_OP_MIN ||
976 a->color_blend_op == VK_BLEND_OP_MAX) {
977 entry.SourceBlendFactor = BLENDFACTOR_ONE;
978 entry.DestinationBlendFactor = BLENDFACTOR_ONE;
979 }
980 if (a->alpha_blend_op == VK_BLEND_OP_MIN ||
981 a->alpha_blend_op == VK_BLEND_OP_MAX) {
982 entry.SourceAlphaBlendFactor = BLENDFACTOR_ONE;
983 entry.DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
984 }
985 GENX(BLEND_STATE_ENTRY_pack)(NULL, state_pos, &entry);
986 state_pos += GENX(BLEND_STATE_ENTRY_length);
987 #if GFX_VER >= 8
988 if (i == 0)
989 bs0 = entry;
990 #endif
991 }
992
993 #if GFX_VER >= 8
994 struct GENX(3DSTATE_PS_BLEND) blend = {
995 GENX(3DSTATE_PS_BLEND_header),
996 };
997 blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable;
998 blend.ColorBufferBlendEnable = bs0.ColorBufferBlendEnable;
999 blend.SourceAlphaBlendFactor = bs0.SourceAlphaBlendFactor;
1000 blend.DestinationAlphaBlendFactor = bs0.DestinationAlphaBlendFactor;
1001 blend.SourceBlendFactor = bs0.SourceBlendFactor;
1002 blend.DestinationBlendFactor = bs0.DestinationBlendFactor;
1003 blend.AlphaTestEnable = false;
1004 blend.IndependentAlphaBlendEnable = blend_state.IndependentAlphaBlendEnable;
1005
1006 GENX(3DSTATE_PS_BLEND_pack)(NULL, pipeline->gfx8.ps_blend, &blend);
1007 #endif
1008
1009 GENX(BLEND_STATE_pack)(NULL, blend_state_start, &blend_state);
1010 }
1011
1012 static void
emit_3dstate_clip(struct anv_graphics_pipeline * pipeline,const struct vk_input_assembly_state * ia,const struct vk_viewport_state * vp,const struct vk_rasterization_state * rs)1013 emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
1014 const struct vk_input_assembly_state *ia,
1015 const struct vk_viewport_state *vp,
1016 const struct vk_rasterization_state *rs)
1017 {
1018 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1019 (void) wm_prog_data;
1020
1021 struct GENX(3DSTATE_CLIP) clip = {
1022 GENX(3DSTATE_CLIP_header),
1023 };
1024
1025 clip.ClipEnable = true;
1026 clip.StatisticsEnable = true;
1027 clip.EarlyCullEnable = true;
1028 clip.APIMode = pipeline->negative_one_to_one ? APIMODE_OGL : APIMODE_D3D;
1029 clip.GuardbandClipTestEnable = true;
1030
1031 #if GFX_VER >= 8
1032 clip.VertexSubPixelPrecisionSelect = _8Bit;
1033 #endif
1034 clip.ClipMode = CLIPMODE_NORMAL;
1035
1036 switch (rs->provoking_vertex) {
1037 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
1038 clip.TriangleStripListProvokingVertexSelect = 0;
1039 clip.LineStripListProvokingVertexSelect = 0;
1040 clip.TriangleFanProvokingVertexSelect = 1;
1041 break;
1042
1043 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
1044 clip.TriangleStripListProvokingVertexSelect = 2;
1045 clip.LineStripListProvokingVertexSelect = 1;
1046 clip.TriangleFanProvokingVertexSelect = 2;
1047 break;
1048
1049 default:
1050 unreachable("Invalid provoking vertex mode");
1051 }
1052
1053 clip.MinimumPointWidth = 0.125;
1054 clip.MaximumPointWidth = 255.875;
1055
1056 const struct elk_vue_prog_data *last =
1057 anv_pipeline_get_last_vue_prog_data(pipeline);
1058
1059 /* From the Vulkan 1.0.45 spec:
1060 *
1061 * "If the last active vertex processing stage shader entry point's
1062 * interface does not include a variable decorated with ViewportIndex,
1063 * then the first viewport is used."
1064 */
1065 if (vp && (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT)) {
1066 clip.MaximumVPIndex = vp->viewport_count > 0 ?
1067 vp->viewport_count - 1 : 0;
1068 } else {
1069 clip.MaximumVPIndex = 0;
1070 }
1071
1072 /* From the Vulkan 1.0.45 spec:
1073 *
1074 * "If the last active vertex processing stage shader entry point's
1075 * interface does not include a variable decorated with Layer, then the
1076 * first layer is used."
1077 */
1078 clip.ForceZeroRTAIndexEnable =
1079 !(last->vue_map.slots_valid & VARYING_BIT_LAYER);
1080
1081 #if GFX_VER == 7
1082 clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask;
1083 clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask;
1084 clip.FrontWinding = genX(vk_to_intel_front_face)[rs->front_face];
1085 clip.CullMode = genX(vk_to_intel_cullmode)[rs->cull_mode];
1086 clip.ViewportZClipTestEnable = pipeline->depth_clip_enable;
1087 #endif
1088
1089 clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
1090 wm_prog_data->uses_nonperspective_interp_modes : 0;
1091
1092 GENX(3DSTATE_CLIP_pack)(NULL, pipeline->gfx7.clip, &clip);
1093 }
1094
1095 static void
emit_3dstate_streamout(struct anv_graphics_pipeline * pipeline,const struct vk_rasterization_state * rs)1096 emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
1097 const struct vk_rasterization_state *rs)
1098 {
1099 const struct elk_vue_prog_data *prog_data =
1100 anv_pipeline_get_last_vue_prog_data(pipeline);
1101 const struct intel_vue_map *vue_map = &prog_data->vue_map;
1102
1103 nir_xfb_info *xfb_info;
1104 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
1105 xfb_info = pipeline->shaders[MESA_SHADER_GEOMETRY]->xfb_info;
1106 else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
1107 xfb_info = pipeline->shaders[MESA_SHADER_TESS_EVAL]->xfb_info;
1108 else
1109 xfb_info = pipeline->shaders[MESA_SHADER_VERTEX]->xfb_info;
1110
1111 if (xfb_info) {
1112 struct GENX(SO_DECL) so_decl[MAX_XFB_STREAMS][128];
1113 int next_offset[MAX_XFB_BUFFERS] = {0, 0, 0, 0};
1114 int decls[MAX_XFB_STREAMS] = {0, 0, 0, 0};
1115
1116 memset(so_decl, 0, sizeof(so_decl));
1117
1118 for (unsigned i = 0; i < xfb_info->output_count; i++) {
1119 const nir_xfb_output_info *output = &xfb_info->outputs[i];
1120 unsigned buffer = output->buffer;
1121 unsigned stream = xfb_info->buffer_to_stream[buffer];
1122
1123 /* Our hardware is unusual in that it requires us to program SO_DECLs
1124 * for fake "hole" components, rather than simply taking the offset
1125 * for each real varying. Each hole can have size 1, 2, 3, or 4; we
1126 * program as many size = 4 holes as we can, then a final hole to
1127 * accommodate the final 1, 2, or 3 remaining.
1128 */
1129 int hole_dwords = (output->offset - next_offset[buffer]) / 4;
1130 while (hole_dwords > 0) {
1131 so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
1132 .HoleFlag = 1,
1133 .OutputBufferSlot = buffer,
1134 .ComponentMask = (1 << MIN2(hole_dwords, 4)) - 1,
1135 };
1136 hole_dwords -= 4;
1137 }
1138
1139 int varying = output->location;
1140 uint8_t component_mask = output->component_mask;
1141 /* VARYING_SLOT_PSIZ contains four scalar fields packed together:
1142 * - VARYING_SLOT_PRIMITIVE_SHADING_RATE in VARYING_SLOT_PSIZ.x
1143 * - VARYING_SLOT_LAYER in VARYING_SLOT_PSIZ.y
1144 * - VARYING_SLOT_VIEWPORT in VARYING_SLOT_PSIZ.z
1145 * - VARYING_SLOT_PSIZ in VARYING_SLOT_PSIZ.w
1146 */
1147 if (varying == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
1148 varying = VARYING_SLOT_PSIZ;
1149 component_mask = 1 << 0; // SO_DECL_COMPMASK_X
1150 } else if (varying == VARYING_SLOT_LAYER) {
1151 varying = VARYING_SLOT_PSIZ;
1152 component_mask = 1 << 1; // SO_DECL_COMPMASK_Y
1153 } else if (varying == VARYING_SLOT_VIEWPORT) {
1154 varying = VARYING_SLOT_PSIZ;
1155 component_mask = 1 << 2; // SO_DECL_COMPMASK_Z
1156 } else if (varying == VARYING_SLOT_PSIZ) {
1157 component_mask = 1 << 3; // SO_DECL_COMPMASK_W
1158 }
1159
1160 next_offset[buffer] = output->offset +
1161 __builtin_popcount(component_mask) * 4;
1162
1163 const int slot = vue_map->varying_to_slot[varying];
1164 if (slot < 0) {
1165 /* This can happen if the shader never writes to the varying.
1166 * Insert a hole instead of actual varying data.
1167 */
1168 so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
1169 .HoleFlag = true,
1170 .OutputBufferSlot = buffer,
1171 .ComponentMask = component_mask,
1172 };
1173 } else {
1174 so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
1175 .OutputBufferSlot = buffer,
1176 .RegisterIndex = slot,
1177 .ComponentMask = component_mask,
1178 };
1179 }
1180 }
1181
1182 int max_decls = 0;
1183 for (unsigned s = 0; s < MAX_XFB_STREAMS; s++)
1184 max_decls = MAX2(max_decls, decls[s]);
1185
1186 uint8_t sbs[MAX_XFB_STREAMS] = { };
1187 for (unsigned b = 0; b < MAX_XFB_BUFFERS; b++) {
1188 if (xfb_info->buffers_written & (1 << b))
1189 sbs[xfb_info->buffer_to_stream[b]] |= 1 << b;
1190 }
1191
1192 /* Wa_16011773973:
1193 * If SOL is enabled and SO_DECL state has to be programmed,
1194 * 1. Send 3D State SOL state with SOL disabled
1195 * 2. Send SO_DECL NP state
1196 * 3. Send 3D State SOL with SOL Enabled
1197 */
1198 if (intel_device_info_is_dg2(pipeline->base.device->info))
1199 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_STREAMOUT), so);
1200
1201 uint32_t *dw = anv_batch_emitn(&pipeline->base.batch, 3 + 2 * max_decls,
1202 GENX(3DSTATE_SO_DECL_LIST),
1203 .StreamtoBufferSelects0 = sbs[0],
1204 .StreamtoBufferSelects1 = sbs[1],
1205 .StreamtoBufferSelects2 = sbs[2],
1206 .StreamtoBufferSelects3 = sbs[3],
1207 .NumEntries0 = decls[0],
1208 .NumEntries1 = decls[1],
1209 .NumEntries2 = decls[2],
1210 .NumEntries3 = decls[3]);
1211
1212 for (int i = 0; i < max_decls; i++) {
1213 GENX(SO_DECL_ENTRY_pack)(NULL, dw + 3 + i * 2,
1214 &(struct GENX(SO_DECL_ENTRY)) {
1215 .Stream0Decl = so_decl[0][i],
1216 .Stream1Decl = so_decl[1][i],
1217 .Stream2Decl = so_decl[2][i],
1218 .Stream3Decl = so_decl[3][i],
1219 });
1220 }
1221 }
1222
1223 #if GFX_VER == 7
1224 # define streamout_state_dw pipeline->gfx7.streamout_state
1225 #else
1226 # define streamout_state_dw pipeline->gfx8.streamout_state
1227 #endif
1228
1229 struct GENX(3DSTATE_STREAMOUT) so = {
1230 GENX(3DSTATE_STREAMOUT_header),
1231 };
1232
1233 if (xfb_info) {
1234 so.SOFunctionEnable = true;
1235 so.SOStatisticsEnable = true;
1236
1237 switch (rs->provoking_vertex) {
1238 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
1239 so.ReorderMode = LEADING;
1240 break;
1241
1242 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
1243 so.ReorderMode = TRAILING;
1244 break;
1245
1246 default:
1247 unreachable("Invalid provoking vertex mode");
1248 }
1249
1250 so.RenderStreamSelect = rs->rasterization_stream;
1251
1252 #if GFX_VER >= 8
1253 so.Buffer0SurfacePitch = xfb_info->buffers[0].stride;
1254 so.Buffer1SurfacePitch = xfb_info->buffers[1].stride;
1255 so.Buffer2SurfacePitch = xfb_info->buffers[2].stride;
1256 so.Buffer3SurfacePitch = xfb_info->buffers[3].stride;
1257 #else
1258 pipeline->gfx7.xfb_bo_pitch[0] = xfb_info->buffers[0].stride;
1259 pipeline->gfx7.xfb_bo_pitch[1] = xfb_info->buffers[1].stride;
1260 pipeline->gfx7.xfb_bo_pitch[2] = xfb_info->buffers[2].stride;
1261 pipeline->gfx7.xfb_bo_pitch[3] = xfb_info->buffers[3].stride;
1262
1263 /* On Gfx7, the SO buffer enables live in 3DSTATE_STREAMOUT which
1264 * is a bit inconvenient because we don't know what buffers will
1265 * actually be enabled until draw time. We do our best here by
1266 * setting them based on buffers_written and we disable them
1267 * as-needed at draw time by setting EndAddress = BaseAddress.
1268 */
1269 so.SOBufferEnable0 = xfb_info->buffers_written & (1 << 0);
1270 so.SOBufferEnable1 = xfb_info->buffers_written & (1 << 1);
1271 so.SOBufferEnable2 = xfb_info->buffers_written & (1 << 2);
1272 so.SOBufferEnable3 = xfb_info->buffers_written & (1 << 3);
1273 #endif
1274
1275 int urb_entry_read_offset = 0;
1276 int urb_entry_read_length =
1277 (prog_data->vue_map.num_slots + 1) / 2 - urb_entry_read_offset;
1278
1279 /* We always read the whole vertex. This could be reduced at some
1280 * point by reading less and offsetting the register index in the
1281 * SO_DECLs.
1282 */
1283 so.Stream0VertexReadOffset = urb_entry_read_offset;
1284 so.Stream0VertexReadLength = urb_entry_read_length - 1;
1285 so.Stream1VertexReadOffset = urb_entry_read_offset;
1286 so.Stream1VertexReadLength = urb_entry_read_length - 1;
1287 so.Stream2VertexReadOffset = urb_entry_read_offset;
1288 so.Stream2VertexReadLength = urb_entry_read_length - 1;
1289 so.Stream3VertexReadOffset = urb_entry_read_offset;
1290 so.Stream3VertexReadLength = urb_entry_read_length - 1;
1291 }
1292
1293 GENX(3DSTATE_STREAMOUT_pack)(NULL, streamout_state_dw, &so);
1294 }
1295
1296 static uint32_t
get_sampler_count(const struct anv_shader_bin * bin)1297 get_sampler_count(const struct anv_shader_bin *bin)
1298 {
1299 uint32_t count_by_4 = DIV_ROUND_UP(bin->bind_map.sampler_count, 4);
1300
1301 /* We can potentially have way more than 32 samplers and that's ok.
1302 * However, the 3DSTATE_XS packets only have 3 bits to specify how
1303 * many to pre-fetch and all values above 4 are marked reserved.
1304 */
1305 return MIN2(count_by_4, 4);
1306 }
1307
1308 static UNUSED struct anv_address
get_scratch_address(struct anv_pipeline * pipeline,gl_shader_stage stage,const struct anv_shader_bin * bin)1309 get_scratch_address(struct anv_pipeline *pipeline,
1310 gl_shader_stage stage,
1311 const struct anv_shader_bin *bin)
1312 {
1313 return (struct anv_address) {
1314 .bo = anv_scratch_pool_alloc(pipeline->device,
1315 &pipeline->device->scratch_pool,
1316 stage, bin->prog_data->total_scratch),
1317 .offset = 0,
1318 };
1319 }
1320
1321 static UNUSED uint32_t
get_scratch_space(const struct anv_shader_bin * bin)1322 get_scratch_space(const struct anv_shader_bin *bin)
1323 {
1324 return ffs(bin->prog_data->total_scratch / 2048);
1325 }
1326
1327 static void
emit_3dstate_vs(struct anv_graphics_pipeline * pipeline)1328 emit_3dstate_vs(struct anv_graphics_pipeline *pipeline)
1329 {
1330 const struct intel_device_info *devinfo = pipeline->base.device->info;
1331 const struct elk_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
1332 const struct anv_shader_bin *vs_bin =
1333 pipeline->shaders[MESA_SHADER_VERTEX];
1334
1335 assert(anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX));
1336
1337 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VS), vs) {
1338 vs.Enable = true;
1339 vs.StatisticsEnable = true;
1340 vs.KernelStartPointer = vs_bin->kernel.offset;
1341 #if GFX_VER >= 8
1342 vs.SIMD8DispatchEnable =
1343 vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8;
1344 #endif
1345
1346 assert(!vs_prog_data->base.base.use_alt_mode);
1347 vs.SingleVertexDispatch = false;
1348 vs.VectorMaskEnable = false;
1349 vs.SamplerCount = get_sampler_count(vs_bin);
1350 vs.BindingTableEntryCount = vs_bin->bind_map.surface_count;
1351 vs.FloatingPointMode = IEEE754;
1352 vs.IllegalOpcodeExceptionEnable = false;
1353 vs.SoftwareExceptionEnable = false;
1354 vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
1355
1356 vs.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length;
1357 vs.VertexURBEntryReadOffset = 0;
1358 vs.DispatchGRFStartRegisterForURBData =
1359 vs_prog_data->base.base.dispatch_grf_start_reg;
1360
1361 #if GFX_VER >= 8
1362 vs.UserClipDistanceClipTestEnableBitmask =
1363 vs_prog_data->base.clip_distance_mask;
1364 vs.UserClipDistanceCullTestEnableBitmask =
1365 vs_prog_data->base.cull_distance_mask;
1366 #endif
1367
1368 vs.PerThreadScratchSpace = get_scratch_space(vs_bin);
1369 vs.ScratchSpaceBasePointer =
1370 get_scratch_address(&pipeline->base, MESA_SHADER_VERTEX, vs_bin);
1371 }
1372 }
1373
1374 static void
emit_3dstate_hs_te_ds(struct anv_graphics_pipeline * pipeline,const struct vk_tessellation_state * ts)1375 emit_3dstate_hs_te_ds(struct anv_graphics_pipeline *pipeline,
1376 const struct vk_tessellation_state *ts)
1377 {
1378 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
1379 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_HS), hs);
1380 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TE), te);
1381 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_DS), ds);
1382 return;
1383 }
1384
1385 const struct intel_device_info *devinfo = pipeline->base.device->info;
1386 const struct anv_shader_bin *tcs_bin =
1387 pipeline->shaders[MESA_SHADER_TESS_CTRL];
1388 const struct anv_shader_bin *tes_bin =
1389 pipeline->shaders[MESA_SHADER_TESS_EVAL];
1390
1391 const struct elk_tcs_prog_data *tcs_prog_data = get_tcs_prog_data(pipeline);
1392 const struct elk_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline);
1393
1394 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_HS), hs) {
1395 hs.Enable = true;
1396 hs.StatisticsEnable = true;
1397 hs.KernelStartPointer = tcs_bin->kernel.offset;
1398 hs.SamplerCount = get_sampler_count(tcs_bin);
1399 hs.BindingTableEntryCount = tcs_bin->bind_map.surface_count;
1400
1401 hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
1402 hs.IncludeVertexHandles = true;
1403 hs.InstanceCount = tcs_prog_data->instances - 1;
1404
1405 hs.VertexURBEntryReadLength = 0;
1406 hs.VertexURBEntryReadOffset = 0;
1407 hs.DispatchGRFStartRegisterForURBData =
1408 tcs_prog_data->base.base.dispatch_grf_start_reg & 0x1f;
1409
1410 hs.PerThreadScratchSpace = get_scratch_space(tcs_bin);
1411 hs.ScratchSpaceBasePointer =
1412 get_scratch_address(&pipeline->base, MESA_SHADER_TESS_CTRL, tcs_bin);
1413 }
1414
1415 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TE), te) {
1416 te.Partitioning = tes_prog_data->partitioning;
1417
1418 if (ts->domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) {
1419 te.OutputTopology = tes_prog_data->output_topology;
1420 } else {
1421 /* When the origin is upper-left, we have to flip the winding order */
1422 if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) {
1423 te.OutputTopology = OUTPUT_TRI_CW;
1424 } else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) {
1425 te.OutputTopology = OUTPUT_TRI_CCW;
1426 } else {
1427 te.OutputTopology = tes_prog_data->output_topology;
1428 }
1429 }
1430
1431 te.TEDomain = tes_prog_data->domain;
1432 te.TEEnable = true;
1433 te.MaximumTessellationFactorOdd = 63.0;
1434 te.MaximumTessellationFactorNotOdd = 64.0;
1435 }
1436
1437 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_DS), ds) {
1438 ds.Enable = true;
1439 ds.StatisticsEnable = true;
1440 ds.KernelStartPointer = tes_bin->kernel.offset;
1441 ds.SamplerCount = get_sampler_count(tes_bin);
1442 ds.BindingTableEntryCount = tes_bin->bind_map.surface_count;
1443 ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
1444
1445 ds.ComputeWCoordinateEnable =
1446 tes_prog_data->domain == INTEL_TESS_DOMAIN_TRI;
1447
1448 ds.PatchURBEntryReadLength = tes_prog_data->base.urb_read_length;
1449 ds.PatchURBEntryReadOffset = 0;
1450 ds.DispatchGRFStartRegisterForURBData =
1451 tes_prog_data->base.base.dispatch_grf_start_reg;
1452
1453 #if GFX_VER >= 8
1454 ds.DispatchMode =
1455 tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8 ?
1456 DISPATCH_MODE_SIMD8_SINGLE_PATCH :
1457 DISPATCH_MODE_SIMD4X2;
1458
1459 ds.UserClipDistanceClipTestEnableBitmask =
1460 tes_prog_data->base.clip_distance_mask;
1461 ds.UserClipDistanceCullTestEnableBitmask =
1462 tes_prog_data->base.cull_distance_mask;
1463 #endif
1464
1465 ds.PerThreadScratchSpace = get_scratch_space(tes_bin);
1466 ds.ScratchSpaceBasePointer =
1467 get_scratch_address(&pipeline->base, MESA_SHADER_TESS_EVAL, tes_bin);
1468 }
1469 }
1470
1471 static void
emit_3dstate_gs(struct anv_graphics_pipeline * pipeline,const struct vk_rasterization_state * rs)1472 emit_3dstate_gs(struct anv_graphics_pipeline *pipeline,
1473 const struct vk_rasterization_state *rs)
1474 {
1475 const struct intel_device_info *devinfo = pipeline->base.device->info;
1476 const struct anv_shader_bin *gs_bin =
1477 pipeline->shaders[MESA_SHADER_GEOMETRY];
1478
1479 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
1480 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_GS), gs);
1481 return;
1482 }
1483
1484 const struct elk_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
1485
1486 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_GS), gs) {
1487 gs.Enable = true;
1488 gs.StatisticsEnable = true;
1489 gs.KernelStartPointer = gs_bin->kernel.offset;
1490 gs.DispatchMode = gs_prog_data->base.dispatch_mode;
1491
1492 gs.SingleProgramFlow = false;
1493 gs.VectorMaskEnable = false;
1494 gs.SamplerCount = get_sampler_count(gs_bin);
1495 gs.BindingTableEntryCount = gs_bin->bind_map.surface_count;
1496 gs.IncludeVertexHandles = gs_prog_data->base.include_vue_handles;
1497 gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
1498
1499 if (GFX_VER == 8) {
1500 /* Broadwell is weird. It needs us to divide by 2. */
1501 gs.MaximumNumberofThreads = devinfo->max_gs_threads / 2 - 1;
1502 } else {
1503 gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1;
1504 }
1505
1506 gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
1507 gs.OutputTopology = gs_prog_data->output_topology;
1508 gs.ControlDataFormat = gs_prog_data->control_data_format;
1509 gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords;
1510 gs.InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1;
1511
1512 switch (rs->provoking_vertex) {
1513 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
1514 gs.ReorderMode = LEADING;
1515 break;
1516
1517 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
1518 gs.ReorderMode = TRAILING;
1519 break;
1520
1521 default:
1522 unreachable("Invalid provoking vertex mode");
1523 }
1524
1525 #if GFX_VER >= 8
1526 gs.ExpectedVertexCount = gs_prog_data->vertices_in;
1527 gs.StaticOutput = gs_prog_data->static_vertex_count >= 0;
1528 gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count >= 0 ?
1529 gs_prog_data->static_vertex_count : 0;
1530 #endif
1531
1532 gs.VertexURBEntryReadOffset = 0;
1533 gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
1534 gs.DispatchGRFStartRegisterForURBData =
1535 gs_prog_data->base.base.dispatch_grf_start_reg;
1536
1537 #if GFX_VER >= 8
1538 gs.UserClipDistanceClipTestEnableBitmask =
1539 gs_prog_data->base.clip_distance_mask;
1540 gs.UserClipDistanceCullTestEnableBitmask =
1541 gs_prog_data->base.cull_distance_mask;
1542 #endif
1543
1544 gs.PerThreadScratchSpace = get_scratch_space(gs_bin);
1545 gs.ScratchSpaceBasePointer =
1546 get_scratch_address(&pipeline->base, MESA_SHADER_GEOMETRY, gs_bin);
1547 }
1548 }
1549
1550 static bool
state_has_ds_self_dep(const struct vk_graphics_pipeline_state * state)1551 state_has_ds_self_dep(const struct vk_graphics_pipeline_state *state)
1552 {
1553 return state->pipeline_flags &
1554 VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
1555 }
1556
1557 static void
emit_3dstate_wm(struct anv_graphics_pipeline * pipeline,const struct vk_input_assembly_state * ia,const struct vk_rasterization_state * rs,const struct vk_multisample_state * ms,const struct vk_color_blend_state * cb,const struct vk_graphics_pipeline_state * state)1558 emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
1559 const struct vk_input_assembly_state *ia,
1560 const struct vk_rasterization_state *rs,
1561 const struct vk_multisample_state *ms,
1562 const struct vk_color_blend_state *cb,
1563 const struct vk_graphics_pipeline_state *state)
1564 {
1565 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1566
1567 struct GENX(3DSTATE_WM) wm = {
1568 GENX(3DSTATE_WM_header),
1569 };
1570 wm.StatisticsEnable = true;
1571 wm.LineEndCapAntialiasingRegionWidth = _05pixels;
1572 wm.LineAntialiasingRegionWidth = _10pixels;
1573 wm.PointRasterizationRule = RASTRULE_UPPER_LEFT;
1574
1575 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
1576 if (wm_prog_data->early_fragment_tests) {
1577 wm.EarlyDepthStencilControl = EDSC_PREPS;
1578 } else if (wm_prog_data->has_side_effects) {
1579 wm.EarlyDepthStencilControl = EDSC_PSEXEC;
1580 } else {
1581 wm.EarlyDepthStencilControl = EDSC_NORMAL;
1582 }
1583
1584 #if GFX_VER >= 8
1585 /* Gen8 hardware tries to compute ThreadDispatchEnable for us but
1586 * doesn't take into account KillPixels when no depth or stencil
1587 * writes are enabled. In order for occlusion queries to work
1588 * correctly with no attachments, we need to force-enable PS thread
1589 * dispatch.
1590 *
1591 * The BDW docs are pretty clear that that this bit isn't validated
1592 * and probably shouldn't be used in production:
1593 *
1594 * "This must always be set to Normal. This field should not be
1595 * tested for functional validation."
1596 *
1597 * Unfortunately, however, the other mechanism we have for doing this
1598 * is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW.
1599 * Given two bad options, we choose the one which works.
1600 */
1601 pipeline->force_fragment_thread_dispatch =
1602 wm_prog_data->has_side_effects ||
1603 wm_prog_data->uses_kill;
1604 #endif
1605
1606 wm.BarycentricInterpolationMode =
1607 elk_wm_prog_data_barycentric_modes(wm_prog_data, 0);
1608
1609 #if GFX_VER < 8
1610 wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
1611 wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
1612 wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
1613 wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
1614
1615 /* If the subpass has a depth or stencil self-dependency, then we
1616 * need to force the hardware to do the depth/stencil write *after*
1617 * fragment shader execution. Otherwise, the writes may hit memory
1618 * before we get around to fetching from the input attachment and we
1619 * may get the depth or stencil value from the current draw rather
1620 * than the previous one.
1621 */
1622 wm.PixelShaderKillsPixel = state_has_ds_self_dep(state) ||
1623 wm_prog_data->uses_kill ||
1624 wm_prog_data->uses_omask;
1625
1626 pipeline->force_fragment_thread_dispatch =
1627 wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF ||
1628 wm_prog_data->has_side_effects ||
1629 wm.PixelShaderKillsPixel;
1630
1631 if (ms != NULL && ms->rasterization_samples > 1) {
1632 if (elk_wm_prog_data_is_persample(wm_prog_data, 0)) {
1633 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
1634 } else {
1635 wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
1636 }
1637 } else {
1638 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
1639 }
1640 #endif
1641
1642 wm.LineStippleEnable = rs->line.stipple.enable;
1643 }
1644
1645 const struct intel_device_info *devinfo = pipeline->base.device->info;
1646 uint32_t *dws = devinfo->ver >= 8 ? pipeline->gfx8.wm : pipeline->gfx7.wm;
1647 GENX(3DSTATE_WM_pack)(NULL, dws, &wm);
1648 }
1649
1650 static void
emit_3dstate_ps(struct anv_graphics_pipeline * pipeline,const struct vk_multisample_state * ms,const struct vk_color_blend_state * cb)1651 emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
1652 const struct vk_multisample_state *ms,
1653 const struct vk_color_blend_state *cb)
1654 {
1655 UNUSED const struct intel_device_info *devinfo =
1656 pipeline->base.device->info;
1657 const struct anv_shader_bin *fs_bin =
1658 pipeline->shaders[MESA_SHADER_FRAGMENT];
1659
1660 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
1661 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
1662 #if GFX_VER == 7
1663 /* Even if no fragments are ever dispatched, gfx7 hardware hangs if
1664 * we don't at least set the maximum number of threads.
1665 */
1666 ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
1667 #endif
1668 }
1669 return;
1670 }
1671
1672 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1673
1674 #if GFX_VER < 8
1675 /* The hardware wedges if you have this bit set but don't turn on any dual
1676 * source blend factors.
1677 */
1678 bool dual_src_blend = false;
1679 if (wm_prog_data->dual_src_blend && cb) {
1680 for (uint32_t i = 0; i < cb->attachment_count; i++) {
1681 const struct vk_color_blend_attachment_state *a =
1682 &cb->attachments[i];
1683
1684 if (a->blend_enable &&
1685 (is_dual_src_blend_factor(a->src_color_blend_factor) ||
1686 is_dual_src_blend_factor(a->dst_color_blend_factor) ||
1687 is_dual_src_blend_factor(a->src_alpha_blend_factor) ||
1688 is_dual_src_blend_factor(a->dst_alpha_blend_factor))) {
1689 dual_src_blend = true;
1690 break;
1691 }
1692 }
1693 }
1694 #endif
1695
1696 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
1697 intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
1698 ms != NULL ? ms->rasterization_samples : 1,
1699 0 /* msaa_flags */);
1700
1701 ps.KernelStartPointer0 = fs_bin->kernel.offset +
1702 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
1703 ps.KernelStartPointer1 = fs_bin->kernel.offset +
1704 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
1705 ps.KernelStartPointer2 = fs_bin->kernel.offset +
1706 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
1707
1708 ps.SingleProgramFlow = false;
1709 ps.VectorMaskEnable = GFX_VER >= 8 &&
1710 wm_prog_data->uses_vmask;
1711 ps.SamplerCount = get_sampler_count(fs_bin);
1712 ps.BindingTableEntryCount = fs_bin->bind_map.surface_count;
1713 ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 ||
1714 wm_prog_data->base.ubo_ranges[0].length;
1715 ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
1716 POSOFFSET_SAMPLE: POSOFFSET_NONE;
1717 #if GFX_VER < 8
1718 ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
1719 ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
1720 ps.DualSourceBlendEnable = dual_src_blend;
1721 #endif
1722
1723 #if GFX_VERx10 == 75
1724 /* Haswell requires the sample mask to be set in this packet as well
1725 * as in 3DSTATE_SAMPLE_MASK; the values should match.
1726 */
1727 ps.SampleMask = 0xff;
1728 #endif
1729
1730 #if GFX_VER >= 8
1731 ps.MaximumNumberofThreadsPerPSD =
1732 devinfo->max_threads_per_psd - (GFX_VER == 8 ? 2 : 1);
1733 #else
1734 ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
1735 #endif
1736
1737 ps.DispatchGRFStartRegisterForConstantSetupData0 =
1738 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
1739 ps.DispatchGRFStartRegisterForConstantSetupData1 =
1740 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
1741 ps.DispatchGRFStartRegisterForConstantSetupData2 =
1742 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
1743
1744 ps.PerThreadScratchSpace = get_scratch_space(fs_bin);
1745 ps.ScratchSpaceBasePointer =
1746 get_scratch_address(&pipeline->base, MESA_SHADER_FRAGMENT, fs_bin);
1747 }
1748 }
1749
1750 #if GFX_VER >= 8
1751 static void
emit_3dstate_ps_extra(struct anv_graphics_pipeline * pipeline,const struct vk_rasterization_state * rs,const struct vk_graphics_pipeline_state * state)1752 emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
1753 const struct vk_rasterization_state *rs,
1754 const struct vk_graphics_pipeline_state *state)
1755 {
1756 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1757
1758 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
1759 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_EXTRA), ps);
1760 return;
1761 }
1762
1763 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_EXTRA), ps) {
1764 ps.PixelShaderValid = true;
1765 ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
1766 ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
1767 ps.PixelShaderIsPerSample =
1768 elk_wm_prog_data_is_persample(wm_prog_data, 0);
1769 ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
1770 ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
1771 ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
1772
1773 /* If the subpass has a depth or stencil self-dependency, then we need
1774 * to force the hardware to do the depth/stencil write *after* fragment
1775 * shader execution. Otherwise, the writes may hit memory before we get
1776 * around to fetching from the input attachment and we may get the depth
1777 * or stencil value from the current draw rather than the previous one.
1778 */
1779 ps.PixelShaderKillsPixel = state_has_ds_self_dep(state) ||
1780 wm_prog_data->uses_kill;
1781
1782 ps.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
1783 }
1784 }
1785 #endif
1786
1787 static void
emit_3dstate_vf_statistics(struct anv_graphics_pipeline * pipeline)1788 emit_3dstate_vf_statistics(struct anv_graphics_pipeline *pipeline)
1789 {
1790 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_STATISTICS), vfs) {
1791 vfs.StatisticsEnable = true;
1792 }
1793 }
1794
1795 static void
compute_kill_pixel(struct anv_graphics_pipeline * pipeline,const struct vk_multisample_state * ms,const struct vk_graphics_pipeline_state * state)1796 compute_kill_pixel(struct anv_graphics_pipeline *pipeline,
1797 const struct vk_multisample_state *ms,
1798 const struct vk_graphics_pipeline_state *state)
1799 {
1800 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
1801 pipeline->kill_pixel = false;
1802 return;
1803 }
1804
1805 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1806
1807 /* This computes the KillPixel portion of the computation for whether or
1808 * not we want to enable the PMA fix on gfx8 or gfx9. It's given by this
1809 * chunk of the giant formula:
1810 *
1811 * (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
1812 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
1813 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
1814 * 3DSTATE_PS_BLEND::AlphaTestEnable ||
1815 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)
1816 *
1817 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable is always false and so is
1818 * 3DSTATE_PS_BLEND::AlphaTestEnable since Vulkan doesn't have a concept
1819 * of an alpha test.
1820 */
1821 pipeline->kill_pixel =
1822 state_has_ds_self_dep(state) ||
1823 wm_prog_data->uses_kill ||
1824 wm_prog_data->uses_omask ||
1825 (ms && ms->alpha_to_coverage_enable);
1826 }
1827
1828 void
genX(graphics_pipeline_emit)1829 genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
1830 const struct vk_graphics_pipeline_state *state)
1831 {
1832 enum intel_urb_deref_block_size urb_deref_block_size;
1833 emit_urb_setup(pipeline, &urb_deref_block_size);
1834
1835 assert(state->rs != NULL);
1836 emit_rs_state(pipeline, state->ia, state->rs, state->ms, state->rp,
1837 urb_deref_block_size);
1838 emit_ms_state(pipeline, state->ms);
1839 emit_cb_state(pipeline, state->cb, state->ms, state->rp);
1840 compute_kill_pixel(pipeline, state->ms, state);
1841
1842 emit_3dstate_clip(pipeline, state->ia, state->vp, state->rs);
1843
1844 #if 0
1845 /* From gfx7_vs_state.c */
1846
1847 /**
1848 * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
1849 * Geometry > Geometry Shader > State:
1850 *
1851 * "Note: Because of corruption in IVB:GT2, software needs to flush the
1852 * whole fixed function pipeline when the GS enable changes value in
1853 * the 3DSTATE_GS."
1854 *
1855 * The hardware architects have clarified that in this context "flush the
1856 * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
1857 * Stall" bit set.
1858 */
1859 if (device->info->platform == INTEL_PLATFORM_IVB)
1860 gfx7_emit_vs_workaround_flush(elk);
1861 #endif
1862
1863 emit_vertex_input(pipeline, state->vi);
1864
1865 emit_3dstate_vs(pipeline);
1866 emit_3dstate_hs_te_ds(pipeline, state->ts);
1867 emit_3dstate_gs(pipeline, state->rs);
1868
1869 emit_3dstate_vf_statistics(pipeline);
1870
1871 emit_3dstate_streamout(pipeline, state->rs);
1872
1873 emit_3dstate_sbe(pipeline);
1874 emit_3dstate_wm(pipeline, state->ia, state->rs,
1875 state->ms, state->cb, state);
1876 emit_3dstate_ps(pipeline, state->ms, state->cb);
1877 #if GFX_VER >= 8
1878 emit_3dstate_ps_extra(pipeline, state->rs, state);
1879 #endif
1880 }
1881
1882 void
genX(compute_pipeline_emit)1883 genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
1884 {
1885 struct anv_device *device = pipeline->base.device;
1886 const struct intel_device_info *devinfo = device->info;
1887 const struct elk_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
1888
1889 anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
1890
1891 const struct intel_cs_dispatch_info dispatch =
1892 elk_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
1893 const uint32_t vfe_curbe_allocation =
1894 ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
1895 cs_prog_data->push.cross_thread.regs, 2);
1896
1897 const struct anv_shader_bin *cs_bin = pipeline->cs;
1898
1899 anv_batch_emit(&pipeline->base.batch, GENX(MEDIA_VFE_STATE), vfe) {
1900 #if GFX_VER > 7
1901 vfe.StackSize = 0;
1902 #else
1903 vfe.GPGPUMode = true;
1904 #endif
1905 vfe.MaximumNumberofThreads =
1906 devinfo->max_cs_threads * devinfo->subslice_total - 1;
1907 vfe.NumberofURBEntries = GFX_VER <= 7 ? 0 : 2;
1908 vfe.ResetGatewayTimer = true;
1909 vfe.BypassGatewayControl = true;
1910 vfe.URBEntryAllocationSize = GFX_VER <= 7 ? 0 : 2;
1911 vfe.CURBEAllocationSize = vfe_curbe_allocation;
1912
1913 if (cs_bin->prog_data->total_scratch) {
1914 if (GFX_VER >= 8) {
1915 /* Broadwell's Per Thread Scratch Space is in the range [0, 11]
1916 * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
1917 */
1918 vfe.PerThreadScratchSpace =
1919 ffs(cs_bin->prog_data->total_scratch) - 11;
1920 } else if (GFX_VERx10 == 75) {
1921 /* Haswell's Per Thread Scratch Space is in the range [0, 10]
1922 * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
1923 */
1924 vfe.PerThreadScratchSpace =
1925 ffs(cs_bin->prog_data->total_scratch) - 12;
1926 } else {
1927 /* IVB and BYT use the range [0, 11] to mean [1kB, 12kB]
1928 * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
1929 */
1930 vfe.PerThreadScratchSpace =
1931 cs_bin->prog_data->total_scratch / 1024 - 1;
1932 }
1933 vfe.ScratchSpaceBasePointer =
1934 get_scratch_address(&pipeline->base, MESA_SHADER_COMPUTE, cs_bin);
1935 }
1936 }
1937
1938 struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
1939 .KernelStartPointer =
1940 cs_bin->kernel.offset +
1941 elk_cs_prog_data_prog_offset(cs_prog_data, dispatch.simd_size),
1942 .SamplerCount = get_sampler_count(cs_bin),
1943 /* We add 1 because the CS indirect parameters buffer isn't accounted
1944 * for in bind_map.surface_count.
1945 */
1946 .BindingTableEntryCount = 1 + MIN2(cs_bin->bind_map.surface_count, 30),
1947 .BarrierEnable = cs_prog_data->uses_barrier,
1948 .SharedLocalMemorySize = intel_compute_slm_encode_size(GFX_VER, cs_prog_data->base.total_shared),
1949
1950 #if GFX_VERx10 != 75
1951 .ConstantURBEntryReadOffset = 0,
1952 #endif
1953 .ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
1954 #if GFX_VERx10 >= 75
1955 .CrossThreadConstantDataReadLength =
1956 cs_prog_data->push.cross_thread.regs,
1957 #endif
1958
1959 .NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
1960 };
1961 GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
1962 pipeline->interface_descriptor_data,
1963 &desc);
1964 }
1965