1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "anv_private.h"
25
26 #include "genxml/gen_macros.h"
27 #include "genxml/genX_pack.h"
28 #include "genxml/genX_rt_pack.h"
29
30 #include "common/intel_genX_state_elk.h"
31 #include "common/intel_l3_config.h"
32 #include "common/intel_sample_positions.h"
33 #include "nir/nir_xfb_info.h"
34 #include "vk_util.h"
35 #include "vk_format.h"
36 #include "vk_log.h"
37 #include "vk_render_pass.h"
38
39 static uint32_t
vertex_element_comp_control(enum isl_format format,unsigned comp)40 vertex_element_comp_control(enum isl_format format, unsigned comp)
41 {
42 uint8_t bits;
43 switch (comp) {
44 case 0: bits = isl_format_layouts[format].channels.r.bits; break;
45 case 1: bits = isl_format_layouts[format].channels.g.bits; break;
46 case 2: bits = isl_format_layouts[format].channels.b.bits; break;
47 case 3: bits = isl_format_layouts[format].channels.a.bits; break;
48 default: unreachable("Invalid component");
49 }
50
51 /*
52 * Take in account hardware restrictions when dealing with 64-bit floats.
53 *
54 * From Broadwell spec, command reference structures, page 586:
55 * "When SourceElementFormat is set to one of the *64*_PASSTHRU formats,
56 * 64-bit components are stored * in the URB without any conversion. In
57 * this case, vertex elements must be written as 128 or 256 bits, with
58 * VFCOMP_STORE_0 being used to pad the output as required. E.g., if
59 * R64_PASSTHRU is used to copy a 64-bit Red component into the URB,
60 * Component 1 must be specified as VFCOMP_STORE_0 (with Components 2,3
61 * set to VFCOMP_NOSTORE) in order to output a 128-bit vertex element, or
62 * Components 1-3 must be specified as VFCOMP_STORE_0 in order to output
63 * a 256-bit vertex element. Likewise, use of R64G64B64_PASSTHRU requires
64 * Component 3 to be specified as VFCOMP_STORE_0 in order to output a
65 * 256-bit vertex element."
66 */
67 if (bits) {
68 return VFCOMP_STORE_SRC;
69 } else if (comp >= 2 &&
70 !isl_format_layouts[format].channels.b.bits &&
71 isl_format_layouts[format].channels.r.type == ISL_RAW) {
72 /* When emitting 64-bit attributes, we need to write either 128 or 256
73 * bit chunks, using VFCOMP_NOSTORE when not writing the chunk, and
74 * VFCOMP_STORE_0 to pad the written chunk */
75 return VFCOMP_NOSTORE;
76 } else if (comp < 3 ||
77 isl_format_layouts[format].channels.r.type == ISL_RAW) {
78 /* Note we need to pad with value 0, not 1, due hardware restrictions
79 * (see comment above) */
80 return VFCOMP_STORE_0;
81 } else if (isl_format_layouts[format].channels.r.type == ISL_UINT ||
82 isl_format_layouts[format].channels.r.type == ISL_SINT) {
83 assert(comp == 3);
84 return VFCOMP_STORE_1_INT;
85 } else {
86 assert(comp == 3);
87 return VFCOMP_STORE_1_FP;
88 }
89 }
90
91 static void
emit_vertex_input(struct anv_graphics_pipeline * pipeline,const struct vk_vertex_input_state * vi)92 emit_vertex_input(struct anv_graphics_pipeline *pipeline,
93 const struct vk_vertex_input_state *vi)
94 {
95 const struct elk_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
96
97 /* Pull inputs_read out of the VS prog data */
98 const uint64_t inputs_read = vs_prog_data->inputs_read;
99 const uint64_t double_inputs_read =
100 vs_prog_data->double_inputs_read & inputs_read;
101 assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
102 const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0;
103 const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0;
104 const bool needs_svgs_elem = vs_prog_data->uses_vertexid ||
105 vs_prog_data->uses_instanceid ||
106 vs_prog_data->uses_firstvertex ||
107 vs_prog_data->uses_baseinstance;
108
109 uint32_t elem_count = __builtin_popcount(elements) -
110 __builtin_popcount(elements_double) / 2;
111
112 const uint32_t total_elems =
113 MAX2(1, elem_count + needs_svgs_elem + vs_prog_data->uses_drawid);
114
115 uint32_t *p;
116
117 const uint32_t num_dwords = 1 + total_elems * 2;
118 p = anv_batch_emitn(&pipeline->base.batch, num_dwords,
119 GENX(3DSTATE_VERTEX_ELEMENTS));
120 if (!p)
121 return;
122
123 for (uint32_t i = 0; i < total_elems; i++) {
124 /* The SKL docs for VERTEX_ELEMENT_STATE say:
125 *
126 * "All elements must be valid from Element[0] to the last valid
127 * element. (I.e. if Element[2] is valid then Element[1] and
128 * Element[0] must also be valid)."
129 *
130 * The SKL docs for 3D_Vertex_Component_Control say:
131 *
132 * "Don't store this component. (Not valid for Component 0, but can
133 * be used for Component 1-3)."
134 *
135 * So we can't just leave a vertex element blank and hope for the best.
136 * We have to tell the VF hardware to put something in it; so we just
137 * store a bunch of zero.
138 *
139 * TODO: Compact vertex elements so we never end up with holes.
140 */
141 struct GENX(VERTEX_ELEMENT_STATE) element = {
142 .Valid = true,
143 .Component0Control = VFCOMP_STORE_0,
144 .Component1Control = VFCOMP_STORE_0,
145 .Component2Control = VFCOMP_STORE_0,
146 .Component3Control = VFCOMP_STORE_0,
147 };
148 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + i * 2], &element);
149 }
150
151 u_foreach_bit(a, vi->attributes_valid) {
152 enum isl_format format = anv_get_isl_format(pipeline->base.device->info,
153 vi->attributes[a].format,
154 VK_IMAGE_ASPECT_COLOR_BIT,
155 VK_IMAGE_TILING_LINEAR);
156 assume(format < ISL_NUM_FORMATS);
157
158 uint32_t binding = vi->attributes[a].binding;
159 assert(binding < MAX_VBS);
160
161 if ((elements & (1 << a)) == 0)
162 continue; /* Binding unused */
163
164 uint32_t slot =
165 __builtin_popcount(elements & ((1 << a) - 1)) -
166 DIV_ROUND_UP(__builtin_popcount(elements_double &
167 ((1 << a) -1)), 2);
168
169 struct GENX(VERTEX_ELEMENT_STATE) element = {
170 .VertexBufferIndex = vi->attributes[a].binding,
171 .Valid = true,
172 .SourceElementFormat = format,
173 .EdgeFlagEnable = false,
174 .SourceElementOffset = vi->attributes[a].offset,
175 .Component0Control = vertex_element_comp_control(format, 0),
176 .Component1Control = vertex_element_comp_control(format, 1),
177 .Component2Control = vertex_element_comp_control(format, 2),
178 .Component3Control = vertex_element_comp_control(format, 3),
179 };
180 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element);
181
182 #if GFX_VER >= 8
183 /* On Broadwell and later, we have a separate VF_INSTANCING packet
184 * that controls instancing. On Haswell and prior, that's part of
185 * VERTEX_BUFFER_STATE which we emit later.
186 */
187 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
188 bool per_instance = pipeline->vb[binding].instanced;
189 uint32_t divisor = pipeline->vb[binding].instance_divisor *
190 pipeline->instance_multiplier;
191
192 vfi.InstancingEnable = per_instance;
193 vfi.VertexElementIndex = slot;
194 vfi.InstanceDataStepRate = per_instance ? divisor : 1;
195 }
196 #endif
197 }
198
199 const uint32_t id_slot = elem_count;
200 if (needs_svgs_elem) {
201 /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum:
202 * "Within a VERTEX_ELEMENT_STATE structure, if a Component
203 * Control field is set to something other than VFCOMP_STORE_SRC,
204 * no higher-numbered Component Control fields may be set to
205 * VFCOMP_STORE_SRC"
206 *
207 * This means, that if we have BaseInstance, we need BaseVertex as
208 * well. Just do all or nothing.
209 */
210 uint32_t base_ctrl = (vs_prog_data->uses_firstvertex ||
211 vs_prog_data->uses_baseinstance) ?
212 VFCOMP_STORE_SRC : VFCOMP_STORE_0;
213
214 struct GENX(VERTEX_ELEMENT_STATE) element = {
215 .VertexBufferIndex = ANV_SVGS_VB_INDEX,
216 .Valid = true,
217 .SourceElementFormat = ISL_FORMAT_R32G32_UINT,
218 .Component0Control = base_ctrl,
219 .Component1Control = base_ctrl,
220 #if GFX_VER >= 8
221 .Component2Control = VFCOMP_STORE_0,
222 .Component3Control = VFCOMP_STORE_0,
223 #else
224 .Component2Control = VFCOMP_STORE_VID,
225 .Component3Control = VFCOMP_STORE_IID,
226 #endif
227 };
228 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element);
229
230 #if GFX_VER >= 8
231 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
232 vfi.VertexElementIndex = id_slot;
233 }
234 #endif
235 }
236
237 #if GFX_VER >= 8
238 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_SGVS), sgvs) {
239 sgvs.VertexIDEnable = vs_prog_data->uses_vertexid;
240 sgvs.VertexIDComponentNumber = 2;
241 sgvs.VertexIDElementOffset = id_slot;
242 sgvs.InstanceIDEnable = vs_prog_data->uses_instanceid;
243 sgvs.InstanceIDComponentNumber = 3;
244 sgvs.InstanceIDElementOffset = id_slot;
245 }
246 #endif
247
248 const uint32_t drawid_slot = elem_count + needs_svgs_elem;
249 if (vs_prog_data->uses_drawid) {
250 struct GENX(VERTEX_ELEMENT_STATE) element = {
251 .VertexBufferIndex = ANV_DRAWID_VB_INDEX,
252 .Valid = true,
253 .SourceElementFormat = ISL_FORMAT_R32_UINT,
254 .Component0Control = VFCOMP_STORE_SRC,
255 .Component1Control = VFCOMP_STORE_0,
256 .Component2Control = VFCOMP_STORE_0,
257 .Component3Control = VFCOMP_STORE_0,
258 };
259 GENX(VERTEX_ELEMENT_STATE_pack)(NULL,
260 &p[1 + drawid_slot * 2],
261 &element);
262
263 #if GFX_VER >= 8
264 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
265 vfi.VertexElementIndex = drawid_slot;
266 }
267 #endif
268 }
269 }
270
271 void
genX(emit_urb_setup)272 genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
273 const struct intel_l3_config *l3_config,
274 VkShaderStageFlags active_stages,
275 const unsigned entry_size[4],
276 enum intel_urb_deref_block_size *deref_block_size)
277 {
278 const struct intel_device_info *devinfo = device->info;
279 struct intel_urb_config urb_cfg = {
280 .size = { entry_size[0], entry_size[1], entry_size[2], entry_size[3], },
281 };
282
283 bool constrained;
284 intel_get_urb_config(devinfo, l3_config,
285 active_stages &
286 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
287 active_stages & VK_SHADER_STAGE_GEOMETRY_BIT,
288 &urb_cfg, deref_block_size, &constrained);
289
290 #if GFX_VERx10 == 70
291 /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
292 *
293 * "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
294 * needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
295 * 3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
296 * 3DSTATE_SAMPLER_STATE_POINTER_VS command. Only one PIPE_CONTROL
297 * needs to be sent before any combination of VS associated 3DSTATE."
298 */
299 anv_batch_emit(batch, GFX7_PIPE_CONTROL, pc) {
300 pc.DepthStallEnable = true;
301 pc.PostSyncOperation = WriteImmediateData;
302 pc.Address = device->workaround_address;
303 }
304 #endif
305
306 for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
307 anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) {
308 urb._3DCommandSubOpcode += i;
309 urb.VSURBStartingAddress = urb_cfg.start[i];
310 urb.VSURBEntryAllocationSize = urb_cfg.size[i] - 1;
311 urb.VSNumberofURBEntries = urb_cfg.entries[i];
312 }
313 }
314 }
315
316 static void
emit_urb_setup(struct anv_graphics_pipeline * pipeline,enum intel_urb_deref_block_size * deref_block_size)317 emit_urb_setup(struct anv_graphics_pipeline *pipeline,
318 enum intel_urb_deref_block_size *deref_block_size)
319 {
320 unsigned entry_size[4];
321 for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
322 const struct elk_vue_prog_data *prog_data =
323 !anv_pipeline_has_stage(pipeline, i) ? NULL :
324 (const struct elk_vue_prog_data *) pipeline->shaders[i]->prog_data;
325
326 entry_size[i] = prog_data ? prog_data->urb_entry_size : 1;
327 }
328
329 genX(emit_urb_setup)(pipeline->base.device, &pipeline->base.batch,
330 pipeline->base.l3_config,
331 pipeline->active_stages, entry_size,
332 deref_block_size);
333 }
334
335 static void
emit_3dstate_sbe(struct anv_graphics_pipeline * pipeline)336 emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
337 {
338 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
339
340 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
341 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE), sbe);
342 #if GFX_VER >= 8
343 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ), sbe);
344 #endif
345 return;
346 }
347
348 struct GENX(3DSTATE_SBE) sbe = {
349 GENX(3DSTATE_SBE_header),
350 .AttributeSwizzleEnable = anv_pipeline_is_primitive(pipeline),
351 .PointSpriteTextureCoordinateOrigin = UPPERLEFT,
352 .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs,
353 .ConstantInterpolationEnable = wm_prog_data->flat_inputs,
354 };
355
356 #if GFX_VER >= 8
357 /* On Broadwell, they broke 3DSTATE_SBE into two packets */
358 struct GENX(3DSTATE_SBE_SWIZ) swiz = {
359 GENX(3DSTATE_SBE_SWIZ_header),
360 };
361 #else
362 # define swiz sbe
363 #endif
364
365 const struct intel_vue_map *fs_input_map =
366 &anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map;
367
368 int first_slot = elk_compute_first_urb_slot_required(wm_prog_data->inputs,
369 fs_input_map);
370 assert(first_slot % 2 == 0);
371 unsigned urb_entry_read_offset = first_slot / 2;
372 int max_source_attr = 0;
373 for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) {
374 uint8_t attr = wm_prog_data->urb_setup_attribs[idx];
375 int input_index = wm_prog_data->urb_setup[attr];
376
377 assert(0 <= input_index);
378
379 /* gl_Viewport, gl_Layer and FragmentShadingRateKHR are stored in the
380 * VUE header
381 */
382 if (attr == VARYING_SLOT_VIEWPORT ||
383 attr == VARYING_SLOT_LAYER ||
384 attr == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
385 continue;
386 }
387
388 if (attr == VARYING_SLOT_PNTC) {
389 sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
390 continue;
391 }
392
393 const int slot = fs_input_map->varying_to_slot[attr];
394
395 if (slot == -1) {
396 /* This attribute does not exist in the VUE--that means that the
397 * vertex shader did not write to it. It could be that it's a regular
398 * varying read by the fragment shader but not written by the vertex
399 * shader or it's gl_PrimitiveID. In the first case the value is
400 * undefined, in the second it needs to be gl_PrimitiveID.
401 */
402 swiz.Attribute[input_index].ConstantSource = PRIM_ID;
403 swiz.Attribute[input_index].ComponentOverrideX = true;
404 swiz.Attribute[input_index].ComponentOverrideY = true;
405 swiz.Attribute[input_index].ComponentOverrideZ = true;
406 swiz.Attribute[input_index].ComponentOverrideW = true;
407 continue;
408 }
409
410 /* We have to subtract two slots to account for the URB entry output
411 * read offset in the VS and GS stages.
412 */
413 const int source_attr = slot - 2 * urb_entry_read_offset;
414 assert(source_attr >= 0 && source_attr < 32);
415 max_source_attr = MAX2(max_source_attr, source_attr);
416 /* The hardware can only do overrides on 16 overrides at a time, and the
417 * other up to 16 have to be lined up so that the input index = the
418 * output index. We'll need to do some tweaking to make sure that's the
419 * case.
420 */
421 if (input_index < 16)
422 swiz.Attribute[input_index].SourceAttribute = source_attr;
423 else
424 assert(source_attr == input_index);
425 }
426
427 sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
428 sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2);
429 #if GFX_VER >= 8
430 sbe.ForceVertexURBEntryReadOffset = true;
431 sbe.ForceVertexURBEntryReadLength = true;
432 #endif
433
434 uint32_t *dw = anv_batch_emit_dwords(&pipeline->base.batch,
435 GENX(3DSTATE_SBE_length));
436 if (!dw)
437 return;
438 GENX(3DSTATE_SBE_pack)(&pipeline->base.batch, dw, &sbe);
439
440 #if GFX_VER >= 8
441 dw = anv_batch_emit_dwords(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ_length));
442 if (!dw)
443 return;
444 GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->base.batch, dw, &swiz);
445 #endif
446 }
447
448 /** Returns the final polygon mode for rasterization
449 *
450 * This function takes into account polygon mode, primitive topology and the
451 * different shader stages which might generate their own type of primitives.
452 */
453 VkPolygonMode
genX(raster_polygon_mode)454 genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline,
455 VkPrimitiveTopology primitive_topology)
456 {
457 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
458 switch (get_gs_prog_data(pipeline)->output_topology) {
459 case _3DPRIM_POINTLIST:
460 return VK_POLYGON_MODE_POINT;
461
462 case _3DPRIM_LINELIST:
463 case _3DPRIM_LINESTRIP:
464 case _3DPRIM_LINELOOP:
465 return VK_POLYGON_MODE_LINE;
466
467 case _3DPRIM_TRILIST:
468 case _3DPRIM_TRIFAN:
469 case _3DPRIM_TRISTRIP:
470 case _3DPRIM_RECTLIST:
471 case _3DPRIM_QUADLIST:
472 case _3DPRIM_QUADSTRIP:
473 case _3DPRIM_POLYGON:
474 return pipeline->polygon_mode;
475 }
476 unreachable("Unsupported GS output topology");
477 } else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
478 switch (get_tes_prog_data(pipeline)->output_topology) {
479 case INTEL_TESS_OUTPUT_TOPOLOGY_POINT:
480 return VK_POLYGON_MODE_POINT;
481
482 case INTEL_TESS_OUTPUT_TOPOLOGY_LINE:
483 return VK_POLYGON_MODE_LINE;
484
485 case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW:
486 case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW:
487 return pipeline->polygon_mode;
488 }
489 unreachable("Unsupported TCS output topology");
490 } else {
491 switch (primitive_topology) {
492 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
493 return VK_POLYGON_MODE_POINT;
494
495 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
496 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
497 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
498 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
499 return VK_POLYGON_MODE_LINE;
500
501 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
502 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
503 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
504 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
505 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
506 return pipeline->polygon_mode;
507
508 default:
509 unreachable("Unsupported primitive topology");
510 }
511 }
512 }
513
514 uint32_t
genX(ms_rasterization_mode)515 genX(ms_rasterization_mode)(struct anv_graphics_pipeline *pipeline,
516 VkPolygonMode raster_mode)
517 {
518 #if GFX_VER <= 7
519 if (raster_mode == VK_POLYGON_MODE_LINE) {
520 switch (pipeline->line_mode) {
521 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
522 return MSRASTMODE_ON_PATTERN;
523
524 case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
525 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
526 return MSRASTMODE_OFF_PIXEL;
527
528 default:
529 unreachable("Unsupported line rasterization mode");
530 }
531 } else {
532 return pipeline->rasterization_samples > 1 ?
533 MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
534 }
535 #else
536 unreachable("Only on gen7");
537 #endif
538 }
539
540 const uint32_t genX(vk_to_intel_cullmode)[] = {
541 [VK_CULL_MODE_NONE] = CULLMODE_NONE,
542 [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
543 [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK,
544 [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH
545 };
546
547 const uint32_t genX(vk_to_intel_fillmode)[] = {
548 [VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
549 [VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
550 [VK_POLYGON_MODE_POINT] = FILL_MODE_POINT,
551 };
552
553 const uint32_t genX(vk_to_intel_front_face)[] = {
554 [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1,
555 [VK_FRONT_FACE_CLOCKWISE] = 0
556 };
557
558 void
genX(rasterization_mode)559 genX(rasterization_mode)(VkPolygonMode raster_mode,
560 VkLineRasterizationModeEXT line_mode,
561 float line_width,
562 uint32_t *api_mode,
563 bool *msaa_rasterization_enable)
564 {
565 #if GFX_VER >= 8
566 if (raster_mode == VK_POLYGON_MODE_LINE) {
567 /* Unfortunately, configuring our line rasterization hardware on gfx8
568 * and later is rather painful. Instead of giving us bits to tell the
569 * hardware what line mode to use like we had on gfx7, we now have an
570 * arcane combination of API Mode and MSAA enable bits which do things
571 * in a table which are expected to magically put the hardware into the
572 * right mode for your API. Sadly, Vulkan isn't any of the APIs the
573 * hardware people thought of so nothing works the way you want it to.
574 *
575 * Look at the table titled "Multisample Rasterization Modes" in Vol 7
576 * of the Skylake PRM for more details.
577 */
578 switch (line_mode) {
579 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
580 *api_mode = DX100;
581 /* The algorithm the HW uses to draw wide lines doesn't quite match
582 * what the CTS expects, at least for rectangular lines, so we set
583 * this to false here, making it draw parallelograms instead, which
584 * work well enough.
585 */
586 *msaa_rasterization_enable = line_width < 1.0078125;
587 break;
588
589 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
590 case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
591 *api_mode = DX9OGL;
592 *msaa_rasterization_enable = false;
593 break;
594
595 default:
596 unreachable("Unsupported line rasterization mode");
597 }
598 } else {
599 *api_mode = DX100;
600 *msaa_rasterization_enable = true;
601 }
602 #else
603 unreachable("Invalid call");
604 #endif
605 }
606
607 static void
emit_rs_state(struct anv_graphics_pipeline * pipeline,const struct vk_input_assembly_state * ia,const struct vk_rasterization_state * rs,const struct vk_multisample_state * ms,const struct vk_render_pass_state * rp,enum intel_urb_deref_block_size urb_deref_block_size)608 emit_rs_state(struct anv_graphics_pipeline *pipeline,
609 const struct vk_input_assembly_state *ia,
610 const struct vk_rasterization_state *rs,
611 const struct vk_multisample_state *ms,
612 const struct vk_render_pass_state *rp,
613 enum intel_urb_deref_block_size urb_deref_block_size)
614 {
615 struct GENX(3DSTATE_SF) sf = {
616 GENX(3DSTATE_SF_header),
617 };
618
619 sf.ViewportTransformEnable = true;
620 sf.StatisticsEnable = true;
621 sf.VertexSubPixelPrecisionSelect = _8Bit;
622 sf.AALineDistanceMode = true;
623
624 switch (rs->provoking_vertex) {
625 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
626 sf.TriangleStripListProvokingVertexSelect = 0;
627 sf.LineStripListProvokingVertexSelect = 0;
628 sf.TriangleFanProvokingVertexSelect = 1;
629 break;
630
631 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
632 sf.TriangleStripListProvokingVertexSelect = 2;
633 sf.LineStripListProvokingVertexSelect = 1;
634 sf.TriangleFanProvokingVertexSelect = 2;
635 break;
636
637 default:
638 unreachable("Invalid provoking vertex mode");
639 }
640
641 #if GFX_VERx10 == 75
642 sf.LineStippleEnable = rs->line.stipple.enable;
643 #endif
644
645 bool point_from_shader;
646 const struct elk_vue_prog_data *last_vue_prog_data =
647 anv_pipeline_get_last_vue_prog_data(pipeline);
648 point_from_shader = last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ;
649
650 if (point_from_shader) {
651 sf.PointWidthSource = Vertex;
652 } else {
653 sf.PointWidthSource = State;
654 sf.PointWidth = 1.0;
655 }
656
657 #if GFX_VER >= 8
658 struct GENX(3DSTATE_RASTER) raster = {
659 GENX(3DSTATE_RASTER_header),
660 };
661 #else
662 # define raster sf
663 #endif
664
665 /* For details on 3DSTATE_RASTER multisample state, see the BSpec table
666 * "Multisample Modes State".
667 */
668 #if GFX_VER >= 8
669 /* NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the BDW and SKL PMA fix
670 * computations. If we ever set this bit to a different value, they will
671 * need to be updated accordingly.
672 */
673 raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0;
674 raster.ForceMultisampling = false;
675 #endif
676
677 raster.FrontFaceFillMode = genX(vk_to_intel_fillmode)[rs->polygon_mode];
678 raster.BackFaceFillMode = genX(vk_to_intel_fillmode)[rs->polygon_mode];
679 raster.ScissorRectangleEnable = true;
680
681 #if GFX_VER >= 8
682 raster.ViewportZClipTestEnable = pipeline->depth_clip_enable;
683 #endif
684
685 #if GFX_VER == 7
686 /* Gfx7 requires that we provide the depth format in 3DSTATE_SF so that it
687 * can get the depth offsets correct.
688 */
689 if (rp != NULL &&
690 rp->depth_attachment_format != VK_FORMAT_UNDEFINED) {
691 assert(vk_format_has_depth(rp->depth_attachment_format));
692 enum isl_format isl_format =
693 anv_get_isl_format(pipeline->base.device->info,
694 rp->depth_attachment_format,
695 VK_IMAGE_ASPECT_DEPTH_BIT,
696 VK_IMAGE_TILING_OPTIMAL);
697 sf.DepthBufferSurfaceFormat =
698 isl_format_get_depth_format(isl_format, false);
699 }
700 #endif
701
702 #if GFX_VER >= 8
703 GENX(3DSTATE_SF_pack)(NULL, pipeline->gfx8.sf, &sf);
704 GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gfx8.raster, &raster);
705 #else
706 # undef raster
707 GENX(3DSTATE_SF_pack)(NULL, &pipeline->gfx7.sf, &sf);
708 #endif
709 }
710
711 static void
emit_ms_state(struct anv_graphics_pipeline * pipeline,const struct vk_multisample_state * ms)712 emit_ms_state(struct anv_graphics_pipeline *pipeline,
713 const struct vk_multisample_state *ms)
714 {
715 #if GFX_VER >= 8
716 /* On Gfx8+ 3DSTATE_MULTISAMPLE only holds the number of samples. */
717 genX(emit_multisample)(&pipeline->base.batch,
718 pipeline->rasterization_samples,
719 NULL);
720 #endif
721
722 /* From the Vulkan 1.0 spec:
723 * If pSampleMask is NULL, it is treated as if the mask has all bits
724 * enabled, i.e. no coverage is removed from fragments.
725 *
726 * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
727 */
728 #if GFX_VER >= 8
729 uint32_t sample_mask = 0xffff;
730 #else
731 uint32_t sample_mask = 0xff;
732 #endif
733
734 if (ms != NULL)
735 sample_mask &= ms->sample_mask;
736
737 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
738 sm.SampleMask = sample_mask;
739 }
740 }
741
742 const uint32_t genX(vk_to_intel_logic_op)[] = {
743 [VK_LOGIC_OP_COPY] = LOGICOP_COPY,
744 [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR,
745 [VK_LOGIC_OP_AND] = LOGICOP_AND,
746 [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE,
747 [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED,
748 [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP,
749 [VK_LOGIC_OP_XOR] = LOGICOP_XOR,
750 [VK_LOGIC_OP_OR] = LOGICOP_OR,
751 [VK_LOGIC_OP_NOR] = LOGICOP_NOR,
752 [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV,
753 [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT,
754 [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE,
755 [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED,
756 [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED,
757 [VK_LOGIC_OP_NAND] = LOGICOP_NAND,
758 [VK_LOGIC_OP_SET] = LOGICOP_SET,
759 };
760
761 static const uint32_t vk_to_intel_blend[] = {
762 [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO,
763 [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE,
764 [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR,
765 [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR,
766 [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR,
767 [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR,
768 [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA,
769 [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA,
770 [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA,
771 [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA,
772 [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR,
773 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR,
774 [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA,
775 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA,
776 [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE,
777 [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR,
778 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR,
779 [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA,
780 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA,
781 };
782
783 static const uint32_t vk_to_intel_blend_op[] = {
784 [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD,
785 [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT,
786 [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT,
787 [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN,
788 [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX,
789 };
790
791 const uint32_t genX(vk_to_intel_compare_op)[] = {
792 [VK_COMPARE_OP_NEVER] = PREFILTEROP_NEVER,
793 [VK_COMPARE_OP_LESS] = PREFILTEROP_LESS,
794 [VK_COMPARE_OP_EQUAL] = PREFILTEROP_EQUAL,
795 [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROP_LEQUAL,
796 [VK_COMPARE_OP_GREATER] = PREFILTEROP_GREATER,
797 [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROP_NOTEQUAL,
798 [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROP_GEQUAL,
799 [VK_COMPARE_OP_ALWAYS] = PREFILTEROP_ALWAYS,
800 };
801
802 const uint32_t genX(vk_to_intel_stencil_op)[] = {
803 [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP,
804 [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO,
805 [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE,
806 [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT,
807 [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT,
808 [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT,
809 [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR,
810 [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR,
811 };
812
813 const uint32_t genX(vk_to_intel_primitive_type)[] = {
814 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
815 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
816 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
817 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
818 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
819 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
820 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
821 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
822 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
823 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
824 };
825
826 static bool
is_dual_src_blend_factor(VkBlendFactor factor)827 is_dual_src_blend_factor(VkBlendFactor factor)
828 {
829 return factor == VK_BLEND_FACTOR_SRC1_COLOR ||
830 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR ||
831 factor == VK_BLEND_FACTOR_SRC1_ALPHA ||
832 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA;
833 }
834
835 static inline uint32_t *
write_disabled_blend(uint32_t * state)836 write_disabled_blend(uint32_t *state)
837 {
838 struct GENX(BLEND_STATE_ENTRY) entry = {
839 .WriteDisableAlpha = true,
840 .WriteDisableRed = true,
841 .WriteDisableGreen = true,
842 .WriteDisableBlue = true,
843 };
844 GENX(BLEND_STATE_ENTRY_pack)(NULL, state, &entry);
845 return state + GENX(BLEND_STATE_ENTRY_length);
846 }
847
848 static void
emit_cb_state(struct anv_graphics_pipeline * pipeline,const struct vk_color_blend_state * cb,const struct vk_multisample_state * ms)849 emit_cb_state(struct anv_graphics_pipeline *pipeline,
850 const struct vk_color_blend_state *cb,
851 const struct vk_multisample_state *ms)
852 {
853 struct anv_device *device = pipeline->base.device;
854 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
855
856 struct GENX(BLEND_STATE) blend_state = {
857 #if GFX_VER >= 8
858 .AlphaToCoverageEnable = ms && ms->alpha_to_coverage_enable,
859 .AlphaToOneEnable = ms && ms->alpha_to_one_enable,
860 #endif
861 };
862
863 uint32_t surface_count = 0;
864 struct anv_pipeline_bind_map *map;
865 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
866 map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map;
867 surface_count = map->surface_count;
868 }
869
870 const struct intel_device_info *devinfo = pipeline->base.device->info;
871 uint32_t *blend_state_start = devinfo->ver >= 8 ?
872 pipeline->gfx8.blend_state : pipeline->gfx7.blend_state;
873 uint32_t *state_pos = blend_state_start;
874
875 state_pos += GENX(BLEND_STATE_length);
876 #if GFX_VER >= 8
877 struct GENX(BLEND_STATE_ENTRY) bs0 = { 0 };
878 #endif
879 for (unsigned i = 0; i < surface_count; i++) {
880 struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
881
882 /* All color attachments are at the beginning of the binding table */
883 if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
884 break;
885
886 /* We can have at most 8 attachments */
887 assert(i < MAX_RTS);
888
889 if (cb == NULL || binding->index >= cb->attachment_count) {
890 state_pos = write_disabled_blend(state_pos);
891 continue;
892 }
893
894 const struct vk_color_blend_attachment_state *a =
895 &cb->attachments[binding->index];
896
897 struct GENX(BLEND_STATE_ENTRY) entry = {
898 #if GFX_VER < 8
899 .AlphaToCoverageEnable = ms && ms->alpha_to_coverage_enable,
900 .AlphaToOneEnable = ms && ms->alpha_to_one_enable,
901 #endif
902 .LogicOpEnable = cb->logic_op_enable,
903
904 /* Vulkan specification 1.2.168, VkLogicOp:
905 *
906 * "Logical operations are controlled by the logicOpEnable and
907 * logicOp members of VkPipelineColorBlendStateCreateInfo. If
908 * logicOpEnable is VK_TRUE, then a logical operation selected by
909 * logicOp is applied between each color attachment and the
910 * fragment’s corresponding output value, and blending of all
911 * attachments is treated as if it were disabled."
912 *
913 * From the Broadwell PRM Volume 2d: Command Reference: Structures:
914 * BLEND_STATE_ENTRY:
915 *
916 * "Enabling LogicOp and Color Buffer Blending at the same time is
917 * UNDEFINED"
918 */
919 .ColorBufferBlendEnable = !cb->logic_op_enable && a->blend_enable,
920 .ColorClampRange = COLORCLAMP_RTFORMAT,
921 .PreBlendColorClampEnable = true,
922 .PostBlendColorClampEnable = true,
923 .SourceBlendFactor = vk_to_intel_blend[a->src_color_blend_factor],
924 .DestinationBlendFactor = vk_to_intel_blend[a->dst_color_blend_factor],
925 .ColorBlendFunction = vk_to_intel_blend_op[a->color_blend_op],
926 .SourceAlphaBlendFactor = vk_to_intel_blend[a->src_alpha_blend_factor],
927 .DestinationAlphaBlendFactor = vk_to_intel_blend[a->dst_alpha_blend_factor],
928 .AlphaBlendFunction = vk_to_intel_blend_op[a->alpha_blend_op],
929 };
930
931 if (a->src_color_blend_factor != a->src_alpha_blend_factor ||
932 a->dst_color_blend_factor != a->dst_alpha_blend_factor ||
933 a->color_blend_op != a->alpha_blend_op) {
934 #if GFX_VER >= 8
935 blend_state.IndependentAlphaBlendEnable = true;
936 #else
937 entry.IndependentAlphaBlendEnable = true;
938 #endif
939 }
940
941 /* The Dual Source Blending documentation says:
942 *
943 * "If SRC1 is included in a src/dst blend factor and
944 * a DualSource RT Write message is not used, results
945 * are UNDEFINED. (This reflects the same restriction in DX APIs,
946 * where undefined results are produced if “o1” is not written
947 * by a PS – there are no default values defined)."
948 *
949 * There is no way to gracefully fix this undefined situation
950 * so we just disable the blending to prevent possible issues.
951 */
952 if (!wm_prog_data->dual_src_blend &&
953 (is_dual_src_blend_factor(a->src_color_blend_factor) ||
954 is_dual_src_blend_factor(a->dst_color_blend_factor) ||
955 is_dual_src_blend_factor(a->src_alpha_blend_factor) ||
956 is_dual_src_blend_factor(a->dst_alpha_blend_factor))) {
957 vk_logw(VK_LOG_OBJS(&device->vk.base),
958 "Enabled dual-src blend factors without writing both targets "
959 "in the shader. Disabling blending to avoid GPU hangs.");
960 entry.ColorBufferBlendEnable = false;
961 }
962
963 /* Our hardware applies the blend factor prior to the blend function
964 * regardless of what function is used. Technically, this means the
965 * hardware can do MORE than GL or Vulkan specify. However, it also
966 * means that, for MIN and MAX, we have to stomp the blend factor to
967 * ONE to make it a no-op.
968 */
969 if (a->color_blend_op == VK_BLEND_OP_MIN ||
970 a->color_blend_op == VK_BLEND_OP_MAX) {
971 entry.SourceBlendFactor = BLENDFACTOR_ONE;
972 entry.DestinationBlendFactor = BLENDFACTOR_ONE;
973 }
974 if (a->alpha_blend_op == VK_BLEND_OP_MIN ||
975 a->alpha_blend_op == VK_BLEND_OP_MAX) {
976 entry.SourceAlphaBlendFactor = BLENDFACTOR_ONE;
977 entry.DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
978 }
979 GENX(BLEND_STATE_ENTRY_pack)(NULL, state_pos, &entry);
980 state_pos += GENX(BLEND_STATE_ENTRY_length);
981 #if GFX_VER >= 8
982 if (i == 0)
983 bs0 = entry;
984 #endif
985 }
986
987 #if GFX_VER >= 8
988 struct GENX(3DSTATE_PS_BLEND) blend = {
989 GENX(3DSTATE_PS_BLEND_header),
990 };
991 blend.AlphaToCoverageEnable = blend_state.AlphaToCoverageEnable;
992 blend.ColorBufferBlendEnable = bs0.ColorBufferBlendEnable;
993 blend.SourceAlphaBlendFactor = bs0.SourceAlphaBlendFactor;
994 blend.DestinationAlphaBlendFactor = bs0.DestinationAlphaBlendFactor;
995 blend.SourceBlendFactor = bs0.SourceBlendFactor;
996 blend.DestinationBlendFactor = bs0.DestinationBlendFactor;
997 blend.AlphaTestEnable = false;
998 blend.IndependentAlphaBlendEnable = blend_state.IndependentAlphaBlendEnable;
999
1000 GENX(3DSTATE_PS_BLEND_pack)(NULL, pipeline->gfx8.ps_blend, &blend);
1001 #endif
1002
1003 GENX(BLEND_STATE_pack)(NULL, blend_state_start, &blend_state);
1004 }
1005
1006 static void
emit_3dstate_clip(struct anv_graphics_pipeline * pipeline,const struct vk_input_assembly_state * ia,const struct vk_viewport_state * vp,const struct vk_rasterization_state * rs)1007 emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
1008 const struct vk_input_assembly_state *ia,
1009 const struct vk_viewport_state *vp,
1010 const struct vk_rasterization_state *rs)
1011 {
1012 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1013 (void) wm_prog_data;
1014
1015 struct GENX(3DSTATE_CLIP) clip = {
1016 GENX(3DSTATE_CLIP_header),
1017 };
1018
1019 clip.ClipEnable = true;
1020 clip.StatisticsEnable = true;
1021 clip.EarlyCullEnable = true;
1022 clip.APIMode = pipeline->negative_one_to_one ? APIMODE_OGL : APIMODE_D3D;
1023 clip.GuardbandClipTestEnable = true;
1024
1025 #if GFX_VER >= 8
1026 clip.VertexSubPixelPrecisionSelect = _8Bit;
1027 #endif
1028 clip.ClipMode = CLIPMODE_NORMAL;
1029
1030 switch (rs->provoking_vertex) {
1031 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
1032 clip.TriangleStripListProvokingVertexSelect = 0;
1033 clip.LineStripListProvokingVertexSelect = 0;
1034 clip.TriangleFanProvokingVertexSelect = 1;
1035 break;
1036
1037 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
1038 clip.TriangleStripListProvokingVertexSelect = 2;
1039 clip.LineStripListProvokingVertexSelect = 1;
1040 clip.TriangleFanProvokingVertexSelect = 2;
1041 break;
1042
1043 default:
1044 unreachable("Invalid provoking vertex mode");
1045 }
1046
1047 clip.MinimumPointWidth = 0.125;
1048 clip.MaximumPointWidth = 255.875;
1049
1050 const struct elk_vue_prog_data *last =
1051 anv_pipeline_get_last_vue_prog_data(pipeline);
1052
1053 /* From the Vulkan 1.0.45 spec:
1054 *
1055 * "If the last active vertex processing stage shader entry point's
1056 * interface does not include a variable decorated with ViewportIndex,
1057 * then the first viewport is used."
1058 */
1059 if (vp && (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT)) {
1060 clip.MaximumVPIndex = vp->viewport_count > 0 ?
1061 vp->viewport_count - 1 : 0;
1062 } else {
1063 clip.MaximumVPIndex = 0;
1064 }
1065
1066 /* From the Vulkan 1.0.45 spec:
1067 *
1068 * "If the last active vertex processing stage shader entry point's
1069 * interface does not include a variable decorated with Layer, then the
1070 * first layer is used."
1071 */
1072 clip.ForceZeroRTAIndexEnable =
1073 !(last->vue_map.slots_valid & VARYING_BIT_LAYER);
1074
1075 #if GFX_VER == 7
1076 clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask;
1077 clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask;
1078 clip.FrontWinding = genX(vk_to_intel_front_face)[rs->front_face];
1079 clip.CullMode = genX(vk_to_intel_cullmode)[rs->cull_mode];
1080 clip.ViewportZClipTestEnable = pipeline->depth_clip_enable;
1081 #endif
1082
1083 clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
1084 wm_prog_data->uses_nonperspective_interp_modes : 0;
1085
1086 GENX(3DSTATE_CLIP_pack)(NULL, pipeline->gfx7.clip, &clip);
1087 }
1088
1089 static void
emit_3dstate_streamout(struct anv_graphics_pipeline * pipeline,const struct vk_rasterization_state * rs)1090 emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
1091 const struct vk_rasterization_state *rs)
1092 {
1093 const struct elk_vue_prog_data *prog_data =
1094 anv_pipeline_get_last_vue_prog_data(pipeline);
1095 const struct intel_vue_map *vue_map = &prog_data->vue_map;
1096
1097 nir_xfb_info *xfb_info;
1098 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
1099 xfb_info = pipeline->shaders[MESA_SHADER_GEOMETRY]->xfb_info;
1100 else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
1101 xfb_info = pipeline->shaders[MESA_SHADER_TESS_EVAL]->xfb_info;
1102 else
1103 xfb_info = pipeline->shaders[MESA_SHADER_VERTEX]->xfb_info;
1104
1105 if (xfb_info) {
1106 struct GENX(SO_DECL) so_decl[MAX_XFB_STREAMS][128];
1107 int next_offset[MAX_XFB_BUFFERS] = {0, 0, 0, 0};
1108 int decls[MAX_XFB_STREAMS] = {0, 0, 0, 0};
1109
1110 memset(so_decl, 0, sizeof(so_decl));
1111
1112 for (unsigned i = 0; i < xfb_info->output_count; i++) {
1113 const nir_xfb_output_info *output = &xfb_info->outputs[i];
1114 unsigned buffer = output->buffer;
1115 unsigned stream = xfb_info->buffer_to_stream[buffer];
1116
1117 /* Our hardware is unusual in that it requires us to program SO_DECLs
1118 * for fake "hole" components, rather than simply taking the offset
1119 * for each real varying. Each hole can have size 1, 2, 3, or 4; we
1120 * program as many size = 4 holes as we can, then a final hole to
1121 * accommodate the final 1, 2, or 3 remaining.
1122 */
1123 int hole_dwords = (output->offset - next_offset[buffer]) / 4;
1124 while (hole_dwords > 0) {
1125 so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
1126 .HoleFlag = 1,
1127 .OutputBufferSlot = buffer,
1128 .ComponentMask = (1 << MIN2(hole_dwords, 4)) - 1,
1129 };
1130 hole_dwords -= 4;
1131 }
1132
1133 int varying = output->location;
1134 uint8_t component_mask = output->component_mask;
1135 /* VARYING_SLOT_PSIZ contains four scalar fields packed together:
1136 * - VARYING_SLOT_PRIMITIVE_SHADING_RATE in VARYING_SLOT_PSIZ.x
1137 * - VARYING_SLOT_LAYER in VARYING_SLOT_PSIZ.y
1138 * - VARYING_SLOT_VIEWPORT in VARYING_SLOT_PSIZ.z
1139 * - VARYING_SLOT_PSIZ in VARYING_SLOT_PSIZ.w
1140 */
1141 if (varying == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
1142 varying = VARYING_SLOT_PSIZ;
1143 component_mask = 1 << 0; // SO_DECL_COMPMASK_X
1144 } else if (varying == VARYING_SLOT_LAYER) {
1145 varying = VARYING_SLOT_PSIZ;
1146 component_mask = 1 << 1; // SO_DECL_COMPMASK_Y
1147 } else if (varying == VARYING_SLOT_VIEWPORT) {
1148 varying = VARYING_SLOT_PSIZ;
1149 component_mask = 1 << 2; // SO_DECL_COMPMASK_Z
1150 } else if (varying == VARYING_SLOT_PSIZ) {
1151 component_mask = 1 << 3; // SO_DECL_COMPMASK_W
1152 }
1153
1154 next_offset[buffer] = output->offset +
1155 __builtin_popcount(component_mask) * 4;
1156
1157 const int slot = vue_map->varying_to_slot[varying];
1158 if (slot < 0) {
1159 /* This can happen if the shader never writes to the varying.
1160 * Insert a hole instead of actual varying data.
1161 */
1162 so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
1163 .HoleFlag = true,
1164 .OutputBufferSlot = buffer,
1165 .ComponentMask = component_mask,
1166 };
1167 } else {
1168 so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
1169 .OutputBufferSlot = buffer,
1170 .RegisterIndex = slot,
1171 .ComponentMask = component_mask,
1172 };
1173 }
1174 }
1175
1176 int max_decls = 0;
1177 for (unsigned s = 0; s < MAX_XFB_STREAMS; s++)
1178 max_decls = MAX2(max_decls, decls[s]);
1179
1180 uint8_t sbs[MAX_XFB_STREAMS] = { };
1181 for (unsigned b = 0; b < MAX_XFB_BUFFERS; b++) {
1182 if (xfb_info->buffers_written & (1 << b))
1183 sbs[xfb_info->buffer_to_stream[b]] |= 1 << b;
1184 }
1185
1186 /* Wa_16011773973:
1187 * If SOL is enabled and SO_DECL state has to be programmed,
1188 * 1. Send 3D State SOL state with SOL disabled
1189 * 2. Send SO_DECL NP state
1190 * 3. Send 3D State SOL with SOL Enabled
1191 */
1192 if (intel_device_info_is_dg2(pipeline->base.device->info))
1193 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_STREAMOUT), so);
1194
1195 uint32_t *dw = anv_batch_emitn(&pipeline->base.batch, 3 + 2 * max_decls,
1196 GENX(3DSTATE_SO_DECL_LIST),
1197 .StreamtoBufferSelects0 = sbs[0],
1198 .StreamtoBufferSelects1 = sbs[1],
1199 .StreamtoBufferSelects2 = sbs[2],
1200 .StreamtoBufferSelects3 = sbs[3],
1201 .NumEntries0 = decls[0],
1202 .NumEntries1 = decls[1],
1203 .NumEntries2 = decls[2],
1204 .NumEntries3 = decls[3]);
1205
1206 for (int i = 0; i < max_decls; i++) {
1207 GENX(SO_DECL_ENTRY_pack)(NULL, dw + 3 + i * 2,
1208 &(struct GENX(SO_DECL_ENTRY)) {
1209 .Stream0Decl = so_decl[0][i],
1210 .Stream1Decl = so_decl[1][i],
1211 .Stream2Decl = so_decl[2][i],
1212 .Stream3Decl = so_decl[3][i],
1213 });
1214 }
1215 }
1216
1217 #if GFX_VER == 7
1218 # define streamout_state_dw pipeline->gfx7.streamout_state
1219 #else
1220 # define streamout_state_dw pipeline->gfx8.streamout_state
1221 #endif
1222
1223 struct GENX(3DSTATE_STREAMOUT) so = {
1224 GENX(3DSTATE_STREAMOUT_header),
1225 };
1226
1227 if (xfb_info) {
1228 so.SOFunctionEnable = true;
1229 so.SOStatisticsEnable = true;
1230
1231 switch (rs->provoking_vertex) {
1232 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
1233 so.ReorderMode = LEADING;
1234 break;
1235
1236 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
1237 so.ReorderMode = TRAILING;
1238 break;
1239
1240 default:
1241 unreachable("Invalid provoking vertex mode");
1242 }
1243
1244 so.RenderStreamSelect = rs->rasterization_stream;
1245
1246 #if GFX_VER >= 8
1247 so.Buffer0SurfacePitch = xfb_info->buffers[0].stride;
1248 so.Buffer1SurfacePitch = xfb_info->buffers[1].stride;
1249 so.Buffer2SurfacePitch = xfb_info->buffers[2].stride;
1250 so.Buffer3SurfacePitch = xfb_info->buffers[3].stride;
1251 #else
1252 pipeline->gfx7.xfb_bo_pitch[0] = xfb_info->buffers[0].stride;
1253 pipeline->gfx7.xfb_bo_pitch[1] = xfb_info->buffers[1].stride;
1254 pipeline->gfx7.xfb_bo_pitch[2] = xfb_info->buffers[2].stride;
1255 pipeline->gfx7.xfb_bo_pitch[3] = xfb_info->buffers[3].stride;
1256
1257 /* On Gfx7, the SO buffer enables live in 3DSTATE_STREAMOUT which
1258 * is a bit inconvenient because we don't know what buffers will
1259 * actually be enabled until draw time. We do our best here by
1260 * setting them based on buffers_written and we disable them
1261 * as-needed at draw time by setting EndAddress = BaseAddress.
1262 */
1263 so.SOBufferEnable0 = xfb_info->buffers_written & (1 << 0);
1264 so.SOBufferEnable1 = xfb_info->buffers_written & (1 << 1);
1265 so.SOBufferEnable2 = xfb_info->buffers_written & (1 << 2);
1266 so.SOBufferEnable3 = xfb_info->buffers_written & (1 << 3);
1267 #endif
1268
1269 int urb_entry_read_offset = 0;
1270 int urb_entry_read_length =
1271 (prog_data->vue_map.num_slots + 1) / 2 - urb_entry_read_offset;
1272
1273 /* We always read the whole vertex. This could be reduced at some
1274 * point by reading less and offsetting the register index in the
1275 * SO_DECLs.
1276 */
1277 so.Stream0VertexReadOffset = urb_entry_read_offset;
1278 so.Stream0VertexReadLength = urb_entry_read_length - 1;
1279 so.Stream1VertexReadOffset = urb_entry_read_offset;
1280 so.Stream1VertexReadLength = urb_entry_read_length - 1;
1281 so.Stream2VertexReadOffset = urb_entry_read_offset;
1282 so.Stream2VertexReadLength = urb_entry_read_length - 1;
1283 so.Stream3VertexReadOffset = urb_entry_read_offset;
1284 so.Stream3VertexReadLength = urb_entry_read_length - 1;
1285 }
1286
1287 GENX(3DSTATE_STREAMOUT_pack)(NULL, streamout_state_dw, &so);
1288 }
1289
1290 static uint32_t
get_sampler_count(const struct anv_shader_bin * bin)1291 get_sampler_count(const struct anv_shader_bin *bin)
1292 {
1293 uint32_t count_by_4 = DIV_ROUND_UP(bin->bind_map.sampler_count, 4);
1294
1295 /* We can potentially have way more than 32 samplers and that's ok.
1296 * However, the 3DSTATE_XS packets only have 3 bits to specify how
1297 * many to pre-fetch and all values above 4 are marked reserved.
1298 */
1299 return MIN2(count_by_4, 4);
1300 }
1301
1302 static UNUSED struct anv_address
get_scratch_address(struct anv_pipeline * pipeline,gl_shader_stage stage,const struct anv_shader_bin * bin)1303 get_scratch_address(struct anv_pipeline *pipeline,
1304 gl_shader_stage stage,
1305 const struct anv_shader_bin *bin)
1306 {
1307 return (struct anv_address) {
1308 .bo = anv_scratch_pool_alloc(pipeline->device,
1309 &pipeline->device->scratch_pool,
1310 stage, bin->prog_data->total_scratch),
1311 .offset = 0,
1312 };
1313 }
1314
1315 static UNUSED uint32_t
get_scratch_space(const struct anv_shader_bin * bin)1316 get_scratch_space(const struct anv_shader_bin *bin)
1317 {
1318 return ffs(bin->prog_data->total_scratch / 2048);
1319 }
1320
1321 static void
emit_3dstate_vs(struct anv_graphics_pipeline * pipeline)1322 emit_3dstate_vs(struct anv_graphics_pipeline *pipeline)
1323 {
1324 const struct intel_device_info *devinfo = pipeline->base.device->info;
1325 const struct elk_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
1326 const struct anv_shader_bin *vs_bin =
1327 pipeline->shaders[MESA_SHADER_VERTEX];
1328
1329 assert(anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX));
1330
1331 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VS), vs) {
1332 vs.Enable = true;
1333 vs.StatisticsEnable = true;
1334 vs.KernelStartPointer = vs_bin->kernel.offset;
1335 #if GFX_VER >= 8
1336 vs.SIMD8DispatchEnable =
1337 vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8;
1338 #endif
1339
1340 assert(!vs_prog_data->base.base.use_alt_mode);
1341 vs.SingleVertexDispatch = false;
1342 vs.VectorMaskEnable = false;
1343 vs.SamplerCount = get_sampler_count(vs_bin);
1344 vs.BindingTableEntryCount = vs_bin->bind_map.surface_count;
1345 vs.FloatingPointMode = IEEE754;
1346 vs.IllegalOpcodeExceptionEnable = false;
1347 vs.SoftwareExceptionEnable = false;
1348 vs.MaximumNumberofThreads = devinfo->max_vs_threads - 1;
1349
1350 vs.VertexURBEntryReadLength = vs_prog_data->base.urb_read_length;
1351 vs.VertexURBEntryReadOffset = 0;
1352 vs.DispatchGRFStartRegisterForURBData =
1353 vs_prog_data->base.base.dispatch_grf_start_reg;
1354
1355 #if GFX_VER >= 8
1356 vs.UserClipDistanceClipTestEnableBitmask =
1357 vs_prog_data->base.clip_distance_mask;
1358 vs.UserClipDistanceCullTestEnableBitmask =
1359 vs_prog_data->base.cull_distance_mask;
1360 #endif
1361
1362 vs.PerThreadScratchSpace = get_scratch_space(vs_bin);
1363 vs.ScratchSpaceBasePointer =
1364 get_scratch_address(&pipeline->base, MESA_SHADER_VERTEX, vs_bin);
1365 }
1366 }
1367
1368 static void
emit_3dstate_hs_te_ds(struct anv_graphics_pipeline * pipeline,const struct vk_tessellation_state * ts)1369 emit_3dstate_hs_te_ds(struct anv_graphics_pipeline *pipeline,
1370 const struct vk_tessellation_state *ts)
1371 {
1372 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
1373 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_HS), hs);
1374 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TE), te);
1375 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_DS), ds);
1376 return;
1377 }
1378
1379 const struct intel_device_info *devinfo = pipeline->base.device->info;
1380 const struct anv_shader_bin *tcs_bin =
1381 pipeline->shaders[MESA_SHADER_TESS_CTRL];
1382 const struct anv_shader_bin *tes_bin =
1383 pipeline->shaders[MESA_SHADER_TESS_EVAL];
1384
1385 const struct elk_tcs_prog_data *tcs_prog_data = get_tcs_prog_data(pipeline);
1386 const struct elk_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline);
1387
1388 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_HS), hs) {
1389 hs.Enable = true;
1390 hs.StatisticsEnable = true;
1391 hs.KernelStartPointer = tcs_bin->kernel.offset;
1392 hs.SamplerCount = get_sampler_count(tcs_bin);
1393 hs.BindingTableEntryCount = tcs_bin->bind_map.surface_count;
1394
1395 hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
1396 hs.IncludeVertexHandles = true;
1397 hs.InstanceCount = tcs_prog_data->instances - 1;
1398
1399 hs.VertexURBEntryReadLength = 0;
1400 hs.VertexURBEntryReadOffset = 0;
1401 hs.DispatchGRFStartRegisterForURBData =
1402 tcs_prog_data->base.base.dispatch_grf_start_reg & 0x1f;
1403
1404 hs.PerThreadScratchSpace = get_scratch_space(tcs_bin);
1405 hs.ScratchSpaceBasePointer =
1406 get_scratch_address(&pipeline->base, MESA_SHADER_TESS_CTRL, tcs_bin);
1407 }
1408
1409 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TE), te) {
1410 te.Partitioning = tes_prog_data->partitioning;
1411
1412 if (ts->domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) {
1413 te.OutputTopology = tes_prog_data->output_topology;
1414 } else {
1415 /* When the origin is upper-left, we have to flip the winding order */
1416 if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) {
1417 te.OutputTopology = OUTPUT_TRI_CW;
1418 } else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) {
1419 te.OutputTopology = OUTPUT_TRI_CCW;
1420 } else {
1421 te.OutputTopology = tes_prog_data->output_topology;
1422 }
1423 }
1424
1425 te.TEDomain = tes_prog_data->domain;
1426 te.TEEnable = true;
1427 te.MaximumTessellationFactorOdd = 63.0;
1428 te.MaximumTessellationFactorNotOdd = 64.0;
1429 }
1430
1431 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_DS), ds) {
1432 ds.Enable = true;
1433 ds.StatisticsEnable = true;
1434 ds.KernelStartPointer = tes_bin->kernel.offset;
1435 ds.SamplerCount = get_sampler_count(tes_bin);
1436 ds.BindingTableEntryCount = tes_bin->bind_map.surface_count;
1437 ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
1438
1439 ds.ComputeWCoordinateEnable =
1440 tes_prog_data->domain == INTEL_TESS_DOMAIN_TRI;
1441
1442 ds.PatchURBEntryReadLength = tes_prog_data->base.urb_read_length;
1443 ds.PatchURBEntryReadOffset = 0;
1444 ds.DispatchGRFStartRegisterForURBData =
1445 tes_prog_data->base.base.dispatch_grf_start_reg;
1446
1447 #if GFX_VER >= 8
1448 ds.DispatchMode =
1449 tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8 ?
1450 DISPATCH_MODE_SIMD8_SINGLE_PATCH :
1451 DISPATCH_MODE_SIMD4X2;
1452
1453 ds.UserClipDistanceClipTestEnableBitmask =
1454 tes_prog_data->base.clip_distance_mask;
1455 ds.UserClipDistanceCullTestEnableBitmask =
1456 tes_prog_data->base.cull_distance_mask;
1457 #endif
1458
1459 ds.PerThreadScratchSpace = get_scratch_space(tes_bin);
1460 ds.ScratchSpaceBasePointer =
1461 get_scratch_address(&pipeline->base, MESA_SHADER_TESS_EVAL, tes_bin);
1462 }
1463 }
1464
1465 static void
emit_3dstate_gs(struct anv_graphics_pipeline * pipeline,const struct vk_rasterization_state * rs)1466 emit_3dstate_gs(struct anv_graphics_pipeline *pipeline,
1467 const struct vk_rasterization_state *rs)
1468 {
1469 const struct intel_device_info *devinfo = pipeline->base.device->info;
1470 const struct anv_shader_bin *gs_bin =
1471 pipeline->shaders[MESA_SHADER_GEOMETRY];
1472
1473 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
1474 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_GS), gs);
1475 return;
1476 }
1477
1478 const struct elk_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
1479
1480 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_GS), gs) {
1481 gs.Enable = true;
1482 gs.StatisticsEnable = true;
1483 gs.KernelStartPointer = gs_bin->kernel.offset;
1484 gs.DispatchMode = gs_prog_data->base.dispatch_mode;
1485
1486 gs.SingleProgramFlow = false;
1487 gs.VectorMaskEnable = false;
1488 gs.SamplerCount = get_sampler_count(gs_bin);
1489 gs.BindingTableEntryCount = gs_bin->bind_map.surface_count;
1490 gs.IncludeVertexHandles = gs_prog_data->base.include_vue_handles;
1491 gs.IncludePrimitiveID = gs_prog_data->include_primitive_id;
1492
1493 if (GFX_VER == 8) {
1494 /* Broadwell is weird. It needs us to divide by 2. */
1495 gs.MaximumNumberofThreads = devinfo->max_gs_threads / 2 - 1;
1496 } else {
1497 gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1;
1498 }
1499
1500 gs.OutputVertexSize = gs_prog_data->output_vertex_size_hwords * 2 - 1;
1501 gs.OutputTopology = gs_prog_data->output_topology;
1502 gs.ControlDataFormat = gs_prog_data->control_data_format;
1503 gs.ControlDataHeaderSize = gs_prog_data->control_data_header_size_hwords;
1504 gs.InstanceControl = MAX2(gs_prog_data->invocations, 1) - 1;
1505
1506 switch (rs->provoking_vertex) {
1507 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
1508 gs.ReorderMode = LEADING;
1509 break;
1510
1511 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
1512 gs.ReorderMode = TRAILING;
1513 break;
1514
1515 default:
1516 unreachable("Invalid provoking vertex mode");
1517 }
1518
1519 #if GFX_VER >= 8
1520 gs.ExpectedVertexCount = gs_prog_data->vertices_in;
1521 gs.StaticOutput = gs_prog_data->static_vertex_count >= 0;
1522 gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count >= 0 ?
1523 gs_prog_data->static_vertex_count : 0;
1524 #endif
1525
1526 gs.VertexURBEntryReadOffset = 0;
1527 gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
1528 gs.DispatchGRFStartRegisterForURBData =
1529 gs_prog_data->base.base.dispatch_grf_start_reg;
1530
1531 #if GFX_VER >= 8
1532 gs.UserClipDistanceClipTestEnableBitmask =
1533 gs_prog_data->base.clip_distance_mask;
1534 gs.UserClipDistanceCullTestEnableBitmask =
1535 gs_prog_data->base.cull_distance_mask;
1536 #endif
1537
1538 gs.PerThreadScratchSpace = get_scratch_space(gs_bin);
1539 gs.ScratchSpaceBasePointer =
1540 get_scratch_address(&pipeline->base, MESA_SHADER_GEOMETRY, gs_bin);
1541 }
1542 }
1543
1544 static bool
state_has_ds_self_dep(const struct vk_graphics_pipeline_state * state)1545 state_has_ds_self_dep(const struct vk_graphics_pipeline_state *state)
1546 {
1547 return state->pipeline_flags &
1548 VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
1549 }
1550
1551 static void
emit_3dstate_wm(struct anv_graphics_pipeline * pipeline,const struct vk_input_assembly_state * ia,const struct vk_rasterization_state * rs,const struct vk_multisample_state * ms,const struct vk_color_blend_state * cb,const struct vk_graphics_pipeline_state * state)1552 emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
1553 const struct vk_input_assembly_state *ia,
1554 const struct vk_rasterization_state *rs,
1555 const struct vk_multisample_state *ms,
1556 const struct vk_color_blend_state *cb,
1557 const struct vk_graphics_pipeline_state *state)
1558 {
1559 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1560
1561 struct GENX(3DSTATE_WM) wm = {
1562 GENX(3DSTATE_WM_header),
1563 };
1564 wm.StatisticsEnable = true;
1565 wm.LineEndCapAntialiasingRegionWidth = _05pixels;
1566 wm.LineAntialiasingRegionWidth = _10pixels;
1567 wm.PointRasterizationRule = RASTRULE_UPPER_LEFT;
1568
1569 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
1570 if (wm_prog_data->early_fragment_tests) {
1571 wm.EarlyDepthStencilControl = EDSC_PREPS;
1572 } else if (wm_prog_data->has_side_effects) {
1573 wm.EarlyDepthStencilControl = EDSC_PSEXEC;
1574 } else {
1575 wm.EarlyDepthStencilControl = EDSC_NORMAL;
1576 }
1577
1578 #if GFX_VER >= 8
1579 /* Gen8 hardware tries to compute ThreadDispatchEnable for us but
1580 * doesn't take into account KillPixels when no depth or stencil
1581 * writes are enabled. In order for occlusion queries to work
1582 * correctly with no attachments, we need to force-enable PS thread
1583 * dispatch.
1584 *
1585 * The BDW docs are pretty clear that that this bit isn't validated
1586 * and probably shouldn't be used in production:
1587 *
1588 * "This must always be set to Normal. This field should not be
1589 * tested for functional validation."
1590 *
1591 * Unfortunately, however, the other mechanism we have for doing this
1592 * is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW.
1593 * Given two bad options, we choose the one which works.
1594 */
1595 pipeline->force_fragment_thread_dispatch =
1596 wm_prog_data->has_side_effects ||
1597 wm_prog_data->uses_kill;
1598 #endif
1599
1600 wm.BarycentricInterpolationMode =
1601 elk_wm_prog_data_barycentric_modes(wm_prog_data, 0);
1602
1603 #if GFX_VER < 8
1604 wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
1605 wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
1606 wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
1607 wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
1608
1609 /* If the subpass has a depth or stencil self-dependency, then we
1610 * need to force the hardware to do the depth/stencil write *after*
1611 * fragment shader execution. Otherwise, the writes may hit memory
1612 * before we get around to fetching from the input attachment and we
1613 * may get the depth or stencil value from the current draw rather
1614 * than the previous one.
1615 */
1616 wm.PixelShaderKillsPixel = state_has_ds_self_dep(state) ||
1617 wm_prog_data->uses_kill ||
1618 wm_prog_data->uses_omask;
1619
1620 pipeline->force_fragment_thread_dispatch =
1621 wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF ||
1622 wm_prog_data->has_side_effects ||
1623 wm.PixelShaderKillsPixel;
1624
1625 if (ms != NULL && ms->rasterization_samples > 1) {
1626 if (elk_wm_prog_data_is_persample(wm_prog_data, 0)) {
1627 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
1628 } else {
1629 wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
1630 }
1631 } else {
1632 wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
1633 }
1634 #endif
1635
1636 wm.LineStippleEnable = rs->line.stipple.enable;
1637 }
1638
1639 const struct intel_device_info *devinfo = pipeline->base.device->info;
1640 uint32_t *dws = devinfo->ver >= 8 ? pipeline->gfx8.wm : pipeline->gfx7.wm;
1641 GENX(3DSTATE_WM_pack)(NULL, dws, &wm);
1642 }
1643
1644 static void
emit_3dstate_ps(struct anv_graphics_pipeline * pipeline,const struct vk_multisample_state * ms,const struct vk_color_blend_state * cb)1645 emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
1646 const struct vk_multisample_state *ms,
1647 const struct vk_color_blend_state *cb)
1648 {
1649 UNUSED const struct intel_device_info *devinfo =
1650 pipeline->base.device->info;
1651 const struct anv_shader_bin *fs_bin =
1652 pipeline->shaders[MESA_SHADER_FRAGMENT];
1653
1654 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
1655 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
1656 #if GFX_VER == 7
1657 /* Even if no fragments are ever dispatched, gfx7 hardware hangs if
1658 * we don't at least set the maximum number of threads.
1659 */
1660 ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
1661 #endif
1662 }
1663 return;
1664 }
1665
1666 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1667
1668 #if GFX_VER < 8
1669 /* The hardware wedges if you have this bit set but don't turn on any dual
1670 * source blend factors.
1671 */
1672 bool dual_src_blend = false;
1673 if (wm_prog_data->dual_src_blend && cb) {
1674 for (uint32_t i = 0; i < cb->attachment_count; i++) {
1675 const struct vk_color_blend_attachment_state *a =
1676 &cb->attachments[i];
1677
1678 if (a->blend_enable &&
1679 (is_dual_src_blend_factor(a->src_color_blend_factor) ||
1680 is_dual_src_blend_factor(a->dst_color_blend_factor) ||
1681 is_dual_src_blend_factor(a->src_alpha_blend_factor) ||
1682 is_dual_src_blend_factor(a->dst_alpha_blend_factor))) {
1683 dual_src_blend = true;
1684 break;
1685 }
1686 }
1687 }
1688 #endif
1689
1690 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
1691 intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
1692 ms != NULL ? ms->rasterization_samples : 1,
1693 0 /* msaa_flags */);
1694
1695 ps.KernelStartPointer0 = fs_bin->kernel.offset +
1696 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
1697 ps.KernelStartPointer1 = fs_bin->kernel.offset +
1698 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
1699 ps.KernelStartPointer2 = fs_bin->kernel.offset +
1700 elk_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
1701
1702 ps.SingleProgramFlow = false;
1703 ps.VectorMaskEnable = GFX_VER >= 8 &&
1704 wm_prog_data->uses_vmask;
1705 ps.SamplerCount = get_sampler_count(fs_bin);
1706 ps.BindingTableEntryCount = fs_bin->bind_map.surface_count;
1707 ps.PushConstantEnable = wm_prog_data->base.nr_params > 0 ||
1708 wm_prog_data->base.ubo_ranges[0].length;
1709 ps.PositionXYOffsetSelect = wm_prog_data->uses_pos_offset ?
1710 POSOFFSET_SAMPLE: POSOFFSET_NONE;
1711 #if GFX_VER < 8
1712 ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
1713 ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
1714 ps.DualSourceBlendEnable = dual_src_blend;
1715 #endif
1716
1717 #if GFX_VERx10 == 75
1718 /* Haswell requires the sample mask to be set in this packet as well
1719 * as in 3DSTATE_SAMPLE_MASK; the values should match.
1720 */
1721 ps.SampleMask = 0xff;
1722 #endif
1723
1724 #if GFX_VER >= 8
1725 ps.MaximumNumberofThreadsPerPSD =
1726 devinfo->max_threads_per_psd - (GFX_VER == 8 ? 2 : 1);
1727 #else
1728 ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
1729 #endif
1730
1731 ps.DispatchGRFStartRegisterForConstantSetupData0 =
1732 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
1733 ps.DispatchGRFStartRegisterForConstantSetupData1 =
1734 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
1735 ps.DispatchGRFStartRegisterForConstantSetupData2 =
1736 elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
1737
1738 ps.PerThreadScratchSpace = get_scratch_space(fs_bin);
1739 ps.ScratchSpaceBasePointer =
1740 get_scratch_address(&pipeline->base, MESA_SHADER_FRAGMENT, fs_bin);
1741 }
1742 }
1743
1744 #if GFX_VER >= 8
1745 static void
emit_3dstate_ps_extra(struct anv_graphics_pipeline * pipeline,const struct vk_rasterization_state * rs,const struct vk_graphics_pipeline_state * state)1746 emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
1747 const struct vk_rasterization_state *rs,
1748 const struct vk_graphics_pipeline_state *state)
1749 {
1750 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1751
1752 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
1753 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_EXTRA), ps);
1754 return;
1755 }
1756
1757 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_EXTRA), ps) {
1758 ps.PixelShaderValid = true;
1759 ps.AttributeEnable = wm_prog_data->num_varying_inputs > 0;
1760 ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
1761 ps.PixelShaderIsPerSample =
1762 elk_wm_prog_data_is_persample(wm_prog_data, 0);
1763 ps.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
1764 ps.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
1765 ps.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
1766
1767 /* If the subpass has a depth or stencil self-dependency, then we need
1768 * to force the hardware to do the depth/stencil write *after* fragment
1769 * shader execution. Otherwise, the writes may hit memory before we get
1770 * around to fetching from the input attachment and we may get the depth
1771 * or stencil value from the current draw rather than the previous one.
1772 */
1773 ps.PixelShaderKillsPixel = state_has_ds_self_dep(state) ||
1774 wm_prog_data->uses_kill;
1775
1776 ps.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
1777 }
1778 }
1779 #endif
1780
1781 static void
emit_3dstate_vf_statistics(struct anv_graphics_pipeline * pipeline)1782 emit_3dstate_vf_statistics(struct anv_graphics_pipeline *pipeline)
1783 {
1784 anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_STATISTICS), vfs) {
1785 vfs.StatisticsEnable = true;
1786 }
1787 }
1788
1789 static void
compute_kill_pixel(struct anv_graphics_pipeline * pipeline,const struct vk_multisample_state * ms,const struct vk_graphics_pipeline_state * state)1790 compute_kill_pixel(struct anv_graphics_pipeline *pipeline,
1791 const struct vk_multisample_state *ms,
1792 const struct vk_graphics_pipeline_state *state)
1793 {
1794 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
1795 pipeline->kill_pixel = false;
1796 return;
1797 }
1798
1799 const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1800
1801 /* This computes the KillPixel portion of the computation for whether or
1802 * not we want to enable the PMA fix on gfx8 or gfx9. It's given by this
1803 * chunk of the giant formula:
1804 *
1805 * (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
1806 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
1807 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
1808 * 3DSTATE_PS_BLEND::AlphaTestEnable ||
1809 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)
1810 *
1811 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable is always false and so is
1812 * 3DSTATE_PS_BLEND::AlphaTestEnable since Vulkan doesn't have a concept
1813 * of an alpha test.
1814 */
1815 pipeline->kill_pixel =
1816 state_has_ds_self_dep(state) ||
1817 wm_prog_data->uses_kill ||
1818 wm_prog_data->uses_omask ||
1819 (ms && ms->alpha_to_coverage_enable);
1820 }
1821
1822 void
genX(graphics_pipeline_emit)1823 genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
1824 const struct vk_graphics_pipeline_state *state)
1825 {
1826 enum intel_urb_deref_block_size urb_deref_block_size;
1827 emit_urb_setup(pipeline, &urb_deref_block_size);
1828
1829 assert(state->rs != NULL);
1830 emit_rs_state(pipeline, state->ia, state->rs, state->ms, state->rp,
1831 urb_deref_block_size);
1832 emit_ms_state(pipeline, state->ms);
1833 emit_cb_state(pipeline, state->cb, state->ms);
1834 compute_kill_pixel(pipeline, state->ms, state);
1835
1836 emit_3dstate_clip(pipeline, state->ia, state->vp, state->rs);
1837
1838 #if 0
1839 /* From gfx7_vs_state.c */
1840
1841 /**
1842 * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
1843 * Geometry > Geometry Shader > State:
1844 *
1845 * "Note: Because of corruption in IVB:GT2, software needs to flush the
1846 * whole fixed function pipeline when the GS enable changes value in
1847 * the 3DSTATE_GS."
1848 *
1849 * The hardware architects have clarified that in this context "flush the
1850 * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
1851 * Stall" bit set.
1852 */
1853 if (device->info->platform == INTEL_PLATFORM_IVB)
1854 gfx7_emit_vs_workaround_flush(elk);
1855 #endif
1856
1857 emit_vertex_input(pipeline, state->vi);
1858
1859 emit_3dstate_vs(pipeline);
1860 emit_3dstate_hs_te_ds(pipeline, state->ts);
1861 emit_3dstate_gs(pipeline, state->rs);
1862
1863 emit_3dstate_vf_statistics(pipeline);
1864
1865 emit_3dstate_streamout(pipeline, state->rs);
1866
1867 emit_3dstate_sbe(pipeline);
1868 emit_3dstate_wm(pipeline, state->ia, state->rs,
1869 state->ms, state->cb, state);
1870 emit_3dstate_ps(pipeline, state->ms, state->cb);
1871 #if GFX_VER >= 8
1872 emit_3dstate_ps_extra(pipeline, state->rs, state);
1873 #endif
1874 }
1875
1876 void
genX(compute_pipeline_emit)1877 genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
1878 {
1879 struct anv_device *device = pipeline->base.device;
1880 const struct intel_device_info *devinfo = device->info;
1881 const struct elk_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
1882
1883 anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
1884
1885 const struct intel_cs_dispatch_info dispatch =
1886 elk_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
1887 const uint32_t vfe_curbe_allocation =
1888 ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
1889 cs_prog_data->push.cross_thread.regs, 2);
1890
1891 const struct anv_shader_bin *cs_bin = pipeline->cs;
1892
1893 anv_batch_emit(&pipeline->base.batch, GENX(MEDIA_VFE_STATE), vfe) {
1894 #if GFX_VER > 7
1895 vfe.StackSize = 0;
1896 #else
1897 vfe.GPGPUMode = true;
1898 #endif
1899 vfe.MaximumNumberofThreads =
1900 devinfo->max_cs_threads * devinfo->subslice_total - 1;
1901 vfe.NumberofURBEntries = GFX_VER <= 7 ? 0 : 2;
1902 vfe.ResetGatewayTimer = true;
1903 vfe.BypassGatewayControl = true;
1904 vfe.URBEntryAllocationSize = GFX_VER <= 7 ? 0 : 2;
1905 vfe.CURBEAllocationSize = vfe_curbe_allocation;
1906
1907 if (cs_bin->prog_data->total_scratch) {
1908 if (GFX_VER >= 8) {
1909 /* Broadwell's Per Thread Scratch Space is in the range [0, 11]
1910 * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
1911 */
1912 vfe.PerThreadScratchSpace =
1913 ffs(cs_bin->prog_data->total_scratch) - 11;
1914 } else if (GFX_VERx10 == 75) {
1915 /* Haswell's Per Thread Scratch Space is in the range [0, 10]
1916 * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
1917 */
1918 vfe.PerThreadScratchSpace =
1919 ffs(cs_bin->prog_data->total_scratch) - 12;
1920 } else {
1921 /* IVB and BYT use the range [0, 11] to mean [1kB, 12kB]
1922 * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
1923 */
1924 vfe.PerThreadScratchSpace =
1925 cs_bin->prog_data->total_scratch / 1024 - 1;
1926 }
1927 vfe.ScratchSpaceBasePointer =
1928 get_scratch_address(&pipeline->base, MESA_SHADER_COMPUTE, cs_bin);
1929 }
1930 }
1931
1932 struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
1933 .KernelStartPointer =
1934 cs_bin->kernel.offset +
1935 elk_cs_prog_data_prog_offset(cs_prog_data, dispatch.simd_size),
1936 .SamplerCount = get_sampler_count(cs_bin),
1937 /* We add 1 because the CS indirect parameters buffer isn't accounted
1938 * for in bind_map.surface_count.
1939 */
1940 .BindingTableEntryCount = 1 + MIN2(cs_bin->bind_map.surface_count, 30),
1941 .BarrierEnable = cs_prog_data->uses_barrier,
1942 .SharedLocalMemorySize =
1943 elk_encode_slm_size(GFX_VER, cs_prog_data->base.total_shared),
1944
1945 #if GFX_VERx10 != 75
1946 .ConstantURBEntryReadOffset = 0,
1947 #endif
1948 .ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
1949 #if GFX_VERx10 >= 75
1950 .CrossThreadConstantDataReadLength =
1951 cs_prog_data->push.cross_thread.regs,
1952 #endif
1953
1954 .NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
1955 };
1956 GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
1957 pipeline->interface_descriptor_data,
1958 &desc);
1959 }
1960