• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include "anv_private.h"
25 
26 #include "genxml/gen_macros.h"
27 #include "genxml/genX_pack.h"
28 #include "genxml/genX_rt_pack.h"
29 
30 #include "common/intel_genX_state_elk.h"
31 #include "common/intel_l3_config.h"
32 #include "common/intel_sample_positions.h"
33 #include "nir/nir_xfb_info.h"
34 #include "vk_util.h"
35 #include "vk_format.h"
36 #include "vk_log.h"
37 #include "vk_render_pass.h"
38 
39 static uint32_t
vertex_element_comp_control(enum isl_format format,unsigned comp)40 vertex_element_comp_control(enum isl_format format, unsigned comp)
41 {
42    uint8_t bits;
43    switch (comp) {
44    case 0: bits = isl_format_layouts[format].channels.r.bits; break;
45    case 1: bits = isl_format_layouts[format].channels.g.bits; break;
46    case 2: bits = isl_format_layouts[format].channels.b.bits; break;
47    case 3: bits = isl_format_layouts[format].channels.a.bits; break;
48    default: unreachable("Invalid component");
49    }
50 
51    /*
52     * Take in account hardware restrictions when dealing with 64-bit floats.
53     *
54     * From Broadwell spec, command reference structures, page 586:
55     *  "When SourceElementFormat is set to one of the *64*_PASSTHRU formats,
56     *   64-bit components are stored * in the URB without any conversion. In
57     *   this case, vertex elements must be written as 128 or 256 bits, with
58     *   VFCOMP_STORE_0 being used to pad the output as required. E.g., if
59     *   R64_PASSTHRU is used to copy a 64-bit Red component into the URB,
60     *   Component 1 must be specified as VFCOMP_STORE_0 (with Components 2,3
61     *   set to VFCOMP_NOSTORE) in order to output a 128-bit vertex element, or
62     *   Components 1-3 must be specified as VFCOMP_STORE_0 in order to output
63     *   a 256-bit vertex element. Likewise, use of R64G64B64_PASSTHRU requires
64     *   Component 3 to be specified as VFCOMP_STORE_0 in order to output a
65     *   256-bit vertex element."
66     */
67    if (bits) {
68       return VFCOMP_STORE_SRC;
69    } else if (comp >= 2 &&
70               !isl_format_layouts[format].channels.b.bits &&
71               isl_format_layouts[format].channels.r.type == ISL_RAW) {
72       /* When emitting 64-bit attributes, we need to write either 128 or 256
73        * bit chunks, using VFCOMP_NOSTORE when not writing the chunk, and
74        * VFCOMP_STORE_0 to pad the written chunk */
75       return VFCOMP_NOSTORE;
76    } else if (comp < 3 ||
77               isl_format_layouts[format].channels.r.type == ISL_RAW) {
78       /* Note we need to pad with value 0, not 1, due hardware restrictions
79        * (see comment above) */
80       return VFCOMP_STORE_0;
81    } else if (isl_format_layouts[format].channels.r.type == ISL_UINT ||
82             isl_format_layouts[format].channels.r.type == ISL_SINT) {
83       assert(comp == 3);
84       return VFCOMP_STORE_1_INT;
85    } else {
86       assert(comp == 3);
87       return VFCOMP_STORE_1_FP;
88    }
89 }
90 
91 static void
emit_vertex_input(struct anv_graphics_pipeline * pipeline,const struct vk_vertex_input_state * vi)92 emit_vertex_input(struct anv_graphics_pipeline *pipeline,
93                   const struct vk_vertex_input_state *vi)
94 {
95    const struct elk_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
96 
97    /* Pull inputs_read out of the VS prog data */
98    const uint64_t inputs_read = vs_prog_data->inputs_read;
99    const uint64_t double_inputs_read =
100       vs_prog_data->double_inputs_read & inputs_read;
101    assert((inputs_read & ((1 << VERT_ATTRIB_GENERIC0) - 1)) == 0);
102    const uint32_t elements = inputs_read >> VERT_ATTRIB_GENERIC0;
103    const uint32_t elements_double = double_inputs_read >> VERT_ATTRIB_GENERIC0;
104    const bool needs_svgs_elem = vs_prog_data->uses_vertexid ||
105                                 vs_prog_data->uses_instanceid ||
106                                 vs_prog_data->uses_firstvertex ||
107                                 vs_prog_data->uses_baseinstance;
108 
109    uint32_t elem_count = __builtin_popcount(elements) -
110       __builtin_popcount(elements_double) / 2;
111 
112    const uint32_t total_elems =
113       MAX2(1, elem_count + needs_svgs_elem + vs_prog_data->uses_drawid);
114 
115    uint32_t *p;
116 
117    const uint32_t num_dwords = 1 + total_elems * 2;
118    p = anv_batch_emitn(&pipeline->base.batch, num_dwords,
119                        GENX(3DSTATE_VERTEX_ELEMENTS));
120    if (!p)
121       return;
122 
123    for (uint32_t i = 0; i < total_elems; i++) {
124       /* The SKL docs for VERTEX_ELEMENT_STATE say:
125        *
126        *    "All elements must be valid from Element[0] to the last valid
127        *    element. (I.e. if Element[2] is valid then Element[1] and
128        *    Element[0] must also be valid)."
129        *
130        * The SKL docs for 3D_Vertex_Component_Control say:
131        *
132        *    "Don't store this component. (Not valid for Component 0, but can
133        *    be used for Component 1-3)."
134        *
135        * So we can't just leave a vertex element blank and hope for the best.
136        * We have to tell the VF hardware to put something in it; so we just
137        * store a bunch of zero.
138        *
139        * TODO: Compact vertex elements so we never end up with holes.
140        */
141       struct GENX(VERTEX_ELEMENT_STATE) element = {
142          .Valid = true,
143          .Component0Control = VFCOMP_STORE_0,
144          .Component1Control = VFCOMP_STORE_0,
145          .Component2Control = VFCOMP_STORE_0,
146          .Component3Control = VFCOMP_STORE_0,
147       };
148       GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + i * 2], &element);
149    }
150 
151    u_foreach_bit(a, vi->attributes_valid) {
152       enum isl_format format = anv_get_isl_format(pipeline->base.device->info,
153                                                   vi->attributes[a].format,
154                                                   VK_IMAGE_ASPECT_COLOR_BIT,
155                                                   VK_IMAGE_TILING_LINEAR);
156       assume(format < ISL_NUM_FORMATS);
157 
158       uint32_t binding = vi->attributes[a].binding;
159       assert(binding < MAX_VBS);
160 
161       if ((elements & (1 << a)) == 0)
162          continue; /* Binding unused */
163 
164       uint32_t slot =
165          __builtin_popcount(elements & ((1 << a) - 1)) -
166          DIV_ROUND_UP(__builtin_popcount(elements_double &
167                                         ((1 << a) -1)), 2);
168 
169       struct GENX(VERTEX_ELEMENT_STATE) element = {
170          .VertexBufferIndex = vi->attributes[a].binding,
171          .Valid = true,
172          .SourceElementFormat = format,
173          .EdgeFlagEnable = false,
174          .SourceElementOffset = vi->attributes[a].offset,
175          .Component0Control = vertex_element_comp_control(format, 0),
176          .Component1Control = vertex_element_comp_control(format, 1),
177          .Component2Control = vertex_element_comp_control(format, 2),
178          .Component3Control = vertex_element_comp_control(format, 3),
179       };
180       GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + slot * 2], &element);
181 
182 #if GFX_VER >= 8
183       /* On Broadwell and later, we have a separate VF_INSTANCING packet
184        * that controls instancing.  On Haswell and prior, that's part of
185        * VERTEX_BUFFER_STATE which we emit later.
186        */
187       anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
188          bool per_instance = pipeline->vb[binding].instanced;
189          uint32_t divisor = pipeline->vb[binding].instance_divisor *
190                             pipeline->instance_multiplier;
191 
192          vfi.InstancingEnable = per_instance;
193          vfi.VertexElementIndex = slot;
194          vfi.InstanceDataStepRate = per_instance ? divisor : 1;
195       }
196 #endif
197    }
198 
199    const uint32_t id_slot = elem_count;
200    if (needs_svgs_elem) {
201       /* From the Broadwell PRM for the 3D_Vertex_Component_Control enum:
202        *    "Within a VERTEX_ELEMENT_STATE structure, if a Component
203        *    Control field is set to something other than VFCOMP_STORE_SRC,
204        *    no higher-numbered Component Control fields may be set to
205        *    VFCOMP_STORE_SRC"
206        *
207        * This means, that if we have BaseInstance, we need BaseVertex as
208        * well.  Just do all or nothing.
209        */
210       uint32_t base_ctrl = (vs_prog_data->uses_firstvertex ||
211                             vs_prog_data->uses_baseinstance) ?
212                            VFCOMP_STORE_SRC : VFCOMP_STORE_0;
213 
214       struct GENX(VERTEX_ELEMENT_STATE) element = {
215          .VertexBufferIndex = ANV_SVGS_VB_INDEX,
216          .Valid = true,
217          .SourceElementFormat = ISL_FORMAT_R32G32_UINT,
218          .Component0Control = base_ctrl,
219          .Component1Control = base_ctrl,
220 #if GFX_VER >= 8
221          .Component2Control = VFCOMP_STORE_0,
222          .Component3Control = VFCOMP_STORE_0,
223 #else
224          .Component2Control = VFCOMP_STORE_VID,
225          .Component3Control = VFCOMP_STORE_IID,
226 #endif
227       };
228       GENX(VERTEX_ELEMENT_STATE_pack)(NULL, &p[1 + id_slot * 2], &element);
229 
230 #if GFX_VER >= 8
231       anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
232          vfi.VertexElementIndex = id_slot;
233       }
234 #endif
235    }
236 
237 #if GFX_VER >= 8
238    anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_SGVS), sgvs) {
239       sgvs.VertexIDEnable              = vs_prog_data->uses_vertexid;
240       sgvs.VertexIDComponentNumber     = 2;
241       sgvs.VertexIDElementOffset       = id_slot;
242       sgvs.InstanceIDEnable            = vs_prog_data->uses_instanceid;
243       sgvs.InstanceIDComponentNumber   = 3;
244       sgvs.InstanceIDElementOffset     = id_slot;
245    }
246 #endif
247 
248    const uint32_t drawid_slot = elem_count + needs_svgs_elem;
249    if (vs_prog_data->uses_drawid) {
250       struct GENX(VERTEX_ELEMENT_STATE) element = {
251          .VertexBufferIndex = ANV_DRAWID_VB_INDEX,
252          .Valid = true,
253          .SourceElementFormat = ISL_FORMAT_R32_UINT,
254          .Component0Control = VFCOMP_STORE_SRC,
255          .Component1Control = VFCOMP_STORE_0,
256          .Component2Control = VFCOMP_STORE_0,
257          .Component3Control = VFCOMP_STORE_0,
258       };
259       GENX(VERTEX_ELEMENT_STATE_pack)(NULL,
260                                       &p[1 + drawid_slot * 2],
261                                       &element);
262 
263 #if GFX_VER >= 8
264       anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_INSTANCING), vfi) {
265          vfi.VertexElementIndex = drawid_slot;
266       }
267 #endif
268    }
269 }
270 
271 void
genX(emit_urb_setup)272 genX(emit_urb_setup)(struct anv_device *device, struct anv_batch *batch,
273                      const struct intel_l3_config *l3_config,
274                      VkShaderStageFlags active_stages,
275                      const unsigned entry_size[4],
276                      enum intel_urb_deref_block_size *deref_block_size)
277 {
278    const struct intel_device_info *devinfo = device->info;
279    struct intel_urb_config urb_cfg = {
280       .size = { entry_size[0], entry_size[1], entry_size[2], entry_size[3], },
281    };
282 
283    bool constrained;
284    intel_get_urb_config(devinfo, l3_config,
285                         active_stages &
286                            VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
287                         active_stages & VK_SHADER_STAGE_GEOMETRY_BIT,
288                         &urb_cfg, deref_block_size, &constrained);
289 
290 #if GFX_VERx10 == 70
291    /* From the IVB PRM Vol. 2, Part 1, Section 3.2.1:
292     *
293     *    "A PIPE_CONTROL with Post-Sync Operation set to 1h and a depth stall
294     *    needs to be sent just prior to any 3DSTATE_VS, 3DSTATE_URB_VS,
295     *    3DSTATE_CONSTANT_VS, 3DSTATE_BINDING_TABLE_POINTER_VS,
296     *    3DSTATE_SAMPLER_STATE_POINTER_VS command.  Only one PIPE_CONTROL
297     *    needs to be sent before any combination of VS associated 3DSTATE."
298     */
299    anv_batch_emit(batch, GFX7_PIPE_CONTROL, pc) {
300       pc.DepthStallEnable  = true;
301       pc.PostSyncOperation = WriteImmediateData;
302       pc.Address           = device->workaround_address;
303    }
304 #endif
305 
306    for (int i = 0; i <= MESA_SHADER_GEOMETRY; i++) {
307       anv_batch_emit(batch, GENX(3DSTATE_URB_VS), urb) {
308          urb._3DCommandSubOpcode      += i;
309          urb.VSURBStartingAddress      = urb_cfg.start[i];
310          urb.VSURBEntryAllocationSize  = urb_cfg.size[i] - 1;
311          urb.VSNumberofURBEntries      = urb_cfg.entries[i];
312       }
313    }
314 }
315 
316 static void
emit_urb_setup(struct anv_graphics_pipeline * pipeline,enum intel_urb_deref_block_size * deref_block_size)317 emit_urb_setup(struct anv_graphics_pipeline *pipeline,
318                enum intel_urb_deref_block_size *deref_block_size)
319 {
320    unsigned entry_size[4];
321    for (int i = MESA_SHADER_VERTEX; i <= MESA_SHADER_GEOMETRY; i++) {
322       const struct elk_vue_prog_data *prog_data =
323          !anv_pipeline_has_stage(pipeline, i) ? NULL :
324          (const struct elk_vue_prog_data *) pipeline->shaders[i]->prog_data;
325 
326       entry_size[i] = prog_data ? prog_data->urb_entry_size : 1;
327    }
328 
329    genX(emit_urb_setup)(pipeline->base.device, &pipeline->base.batch,
330                         pipeline->base.l3_config,
331                         pipeline->active_stages, entry_size,
332                         deref_block_size);
333 }
334 
335 static void
emit_3dstate_sbe(struct anv_graphics_pipeline * pipeline)336 emit_3dstate_sbe(struct anv_graphics_pipeline *pipeline)
337 {
338    const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
339 
340    if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
341       anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE), sbe);
342 #if GFX_VER >= 8
343       anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ), sbe);
344 #endif
345       return;
346    }
347 
348    struct GENX(3DSTATE_SBE) sbe = {
349       GENX(3DSTATE_SBE_header),
350       .AttributeSwizzleEnable = anv_pipeline_is_primitive(pipeline),
351       .PointSpriteTextureCoordinateOrigin = UPPERLEFT,
352       .NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs,
353       .ConstantInterpolationEnable = wm_prog_data->flat_inputs,
354    };
355 
356 #if GFX_VER >= 8
357    /* On Broadwell, they broke 3DSTATE_SBE into two packets */
358    struct GENX(3DSTATE_SBE_SWIZ) swiz = {
359       GENX(3DSTATE_SBE_SWIZ_header),
360    };
361 #else
362 #  define swiz sbe
363 #endif
364 
365    const struct intel_vue_map *fs_input_map =
366       &anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map;
367 
368    int first_slot = elk_compute_first_urb_slot_required(wm_prog_data->inputs,
369                                                         fs_input_map);
370    assert(first_slot % 2 == 0);
371    unsigned urb_entry_read_offset = first_slot / 2;
372    int max_source_attr = 0;
373    for (uint8_t idx = 0; idx < wm_prog_data->urb_setup_attribs_count; idx++) {
374       uint8_t attr = wm_prog_data->urb_setup_attribs[idx];
375       int input_index = wm_prog_data->urb_setup[attr];
376 
377       assert(0 <= input_index);
378 
379       /* gl_Viewport, gl_Layer and FragmentShadingRateKHR are stored in the
380        * VUE header
381        */
382       if (attr == VARYING_SLOT_VIEWPORT ||
383           attr == VARYING_SLOT_LAYER ||
384           attr == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
385          continue;
386       }
387 
388       if (attr == VARYING_SLOT_PNTC) {
389          sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
390          continue;
391       }
392 
393       const int slot = fs_input_map->varying_to_slot[attr];
394 
395       if (slot == -1) {
396          /* This attribute does not exist in the VUE--that means that the
397           * vertex shader did not write to it. It could be that it's a regular
398           * varying read by the fragment shader but not written by the vertex
399           * shader or it's gl_PrimitiveID. In the first case the value is
400           * undefined, in the second it needs to be gl_PrimitiveID.
401           */
402          swiz.Attribute[input_index].ConstantSource = PRIM_ID;
403          swiz.Attribute[input_index].ComponentOverrideX = true;
404          swiz.Attribute[input_index].ComponentOverrideY = true;
405          swiz.Attribute[input_index].ComponentOverrideZ = true;
406          swiz.Attribute[input_index].ComponentOverrideW = true;
407          continue;
408       }
409 
410       /* We have to subtract two slots to account for the URB entry output
411        * read offset in the VS and GS stages.
412        */
413       const int source_attr = slot - 2 * urb_entry_read_offset;
414       assert(source_attr >= 0 && source_attr < 32);
415       max_source_attr = MAX2(max_source_attr, source_attr);
416       /* The hardware can only do overrides on 16 overrides at a time, and the
417        * other up to 16 have to be lined up so that the input index = the
418        * output index. We'll need to do some tweaking to make sure that's the
419        * case.
420        */
421       if (input_index < 16)
422          swiz.Attribute[input_index].SourceAttribute = source_attr;
423       else
424          assert(source_attr == input_index);
425    }
426 
427    sbe.VertexURBEntryReadOffset = urb_entry_read_offset;
428    sbe.VertexURBEntryReadLength = DIV_ROUND_UP(max_source_attr + 1, 2);
429 #if GFX_VER >= 8
430    sbe.ForceVertexURBEntryReadOffset = true;
431    sbe.ForceVertexURBEntryReadLength = true;
432 #endif
433 
434    uint32_t *dw = anv_batch_emit_dwords(&pipeline->base.batch,
435                                         GENX(3DSTATE_SBE_length));
436    if (!dw)
437       return;
438    GENX(3DSTATE_SBE_pack)(&pipeline->base.batch, dw, &sbe);
439 
440 #if GFX_VER >= 8
441    dw = anv_batch_emit_dwords(&pipeline->base.batch, GENX(3DSTATE_SBE_SWIZ_length));
442    if (!dw)
443       return;
444    GENX(3DSTATE_SBE_SWIZ_pack)(&pipeline->base.batch, dw, &swiz);
445 #endif
446 }
447 
448 /** Returns the final polygon mode for rasterization
449  *
450  * This function takes into account polygon mode, primitive topology and the
451  * different shader stages which might generate their own type of primitives.
452  */
453 VkPolygonMode
genX(raster_polygon_mode)454 genX(raster_polygon_mode)(struct anv_graphics_pipeline *pipeline,
455                           VkPrimitiveTopology primitive_topology)
456 {
457    if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
458       switch (get_gs_prog_data(pipeline)->output_topology) {
459       case _3DPRIM_POINTLIST:
460          return VK_POLYGON_MODE_POINT;
461 
462       case _3DPRIM_LINELIST:
463       case _3DPRIM_LINESTRIP:
464       case _3DPRIM_LINELOOP:
465          return VK_POLYGON_MODE_LINE;
466 
467       case _3DPRIM_TRILIST:
468       case _3DPRIM_TRIFAN:
469       case _3DPRIM_TRISTRIP:
470       case _3DPRIM_RECTLIST:
471       case _3DPRIM_QUADLIST:
472       case _3DPRIM_QUADSTRIP:
473       case _3DPRIM_POLYGON:
474          return pipeline->polygon_mode;
475       }
476       unreachable("Unsupported GS output topology");
477    } else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
478       switch (get_tes_prog_data(pipeline)->output_topology) {
479       case INTEL_TESS_OUTPUT_TOPOLOGY_POINT:
480          return VK_POLYGON_MODE_POINT;
481 
482       case INTEL_TESS_OUTPUT_TOPOLOGY_LINE:
483          return VK_POLYGON_MODE_LINE;
484 
485       case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW:
486       case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW:
487          return pipeline->polygon_mode;
488       }
489       unreachable("Unsupported TCS output topology");
490    } else {
491       switch (primitive_topology) {
492       case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
493          return VK_POLYGON_MODE_POINT;
494 
495       case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
496       case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
497       case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
498       case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
499          return VK_POLYGON_MODE_LINE;
500 
501       case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
502       case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
503       case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
504       case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
505       case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
506          return pipeline->polygon_mode;
507 
508       default:
509          unreachable("Unsupported primitive topology");
510       }
511    }
512 }
513 
514 uint32_t
genX(ms_rasterization_mode)515 genX(ms_rasterization_mode)(struct anv_graphics_pipeline *pipeline,
516                             VkPolygonMode raster_mode)
517 {
518 #if GFX_VER <= 7
519    if (raster_mode == VK_POLYGON_MODE_LINE) {
520       switch (pipeline->line_mode) {
521       case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
522          return MSRASTMODE_ON_PATTERN;
523 
524       case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
525       case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
526          return MSRASTMODE_OFF_PIXEL;
527 
528       default:
529          unreachable("Unsupported line rasterization mode");
530       }
531    } else {
532       return pipeline->rasterization_samples > 1 ?
533          MSRASTMODE_ON_PATTERN : MSRASTMODE_OFF_PIXEL;
534    }
535 #else
536    unreachable("Only on gen7");
537 #endif
538 }
539 
540 const uint32_t genX(vk_to_intel_cullmode)[] = {
541    [VK_CULL_MODE_NONE]                       = CULLMODE_NONE,
542    [VK_CULL_MODE_FRONT_BIT]                  = CULLMODE_FRONT,
543    [VK_CULL_MODE_BACK_BIT]                   = CULLMODE_BACK,
544    [VK_CULL_MODE_FRONT_AND_BACK]             = CULLMODE_BOTH
545 };
546 
547 const uint32_t genX(vk_to_intel_fillmode)[] = {
548    [VK_POLYGON_MODE_FILL]                    = FILL_MODE_SOLID,
549    [VK_POLYGON_MODE_LINE]                    = FILL_MODE_WIREFRAME,
550    [VK_POLYGON_MODE_POINT]                   = FILL_MODE_POINT,
551 };
552 
553 const uint32_t genX(vk_to_intel_front_face)[] = {
554    [VK_FRONT_FACE_COUNTER_CLOCKWISE]         = 1,
555    [VK_FRONT_FACE_CLOCKWISE]                 = 0
556 };
557 
558 void
genX(rasterization_mode)559 genX(rasterization_mode)(VkPolygonMode raster_mode,
560                          VkLineRasterizationModeEXT line_mode,
561                          float line_width,
562                          uint32_t *api_mode,
563                          bool *msaa_rasterization_enable)
564 {
565 #if GFX_VER >= 8
566    if (raster_mode == VK_POLYGON_MODE_LINE) {
567       /* Unfortunately, configuring our line rasterization hardware on gfx8
568        * and later is rather painful.  Instead of giving us bits to tell the
569        * hardware what line mode to use like we had on gfx7, we now have an
570        * arcane combination of API Mode and MSAA enable bits which do things
571        * in a table which are expected to magically put the hardware into the
572        * right mode for your API.  Sadly, Vulkan isn't any of the APIs the
573        * hardware people thought of so nothing works the way you want it to.
574        *
575        * Look at the table titled "Multisample Rasterization Modes" in Vol 7
576        * of the Skylake PRM for more details.
577        */
578       switch (line_mode) {
579       case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
580          *api_mode = DX100;
581          /* The algorithm the HW uses to draw wide lines doesn't quite match
582           * what the CTS expects, at least for rectangular lines, so we set
583           * this to false here, making it draw parallelograms instead, which
584           * work well enough.
585           */
586          *msaa_rasterization_enable = line_width < 1.0078125;
587          break;
588 
589       case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
590       case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
591          *api_mode = DX9OGL;
592          *msaa_rasterization_enable = false;
593          break;
594 
595       default:
596          unreachable("Unsupported line rasterization mode");
597       }
598    } else {
599       *api_mode = DX100;
600       *msaa_rasterization_enable = true;
601    }
602 #else
603    unreachable("Invalid call");
604 #endif
605 }
606 
607 static void
emit_rs_state(struct anv_graphics_pipeline * pipeline,const struct vk_input_assembly_state * ia,const struct vk_rasterization_state * rs,const struct vk_multisample_state * ms,const struct vk_render_pass_state * rp,enum intel_urb_deref_block_size urb_deref_block_size)608 emit_rs_state(struct anv_graphics_pipeline *pipeline,
609               const struct vk_input_assembly_state *ia,
610               const struct vk_rasterization_state *rs,
611               const struct vk_multisample_state *ms,
612               const struct vk_render_pass_state *rp,
613               enum intel_urb_deref_block_size urb_deref_block_size)
614 {
615    struct GENX(3DSTATE_SF) sf = {
616       GENX(3DSTATE_SF_header),
617    };
618 
619    sf.ViewportTransformEnable = true;
620    sf.StatisticsEnable = true;
621    sf.VertexSubPixelPrecisionSelect = _8Bit;
622    sf.AALineDistanceMode = true;
623 
624    switch (rs->provoking_vertex) {
625    case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
626       sf.TriangleStripListProvokingVertexSelect = 0;
627       sf.LineStripListProvokingVertexSelect = 0;
628       sf.TriangleFanProvokingVertexSelect = 1;
629       break;
630 
631    case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
632       sf.TriangleStripListProvokingVertexSelect = 2;
633       sf.LineStripListProvokingVertexSelect = 1;
634       sf.TriangleFanProvokingVertexSelect = 2;
635       break;
636 
637    default:
638       unreachable("Invalid provoking vertex mode");
639    }
640 
641 #if GFX_VERx10 == 75
642    sf.LineStippleEnable = rs->line.stipple.enable;
643 #endif
644 
645    bool point_from_shader;
646    const struct elk_vue_prog_data *last_vue_prog_data =
647       anv_pipeline_get_last_vue_prog_data(pipeline);
648    point_from_shader = last_vue_prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ;
649 
650    if (point_from_shader) {
651       sf.PointWidthSource = Vertex;
652    } else {
653       sf.PointWidthSource = State;
654       sf.PointWidth = 1.0;
655    }
656 
657 #if GFX_VER >= 8
658    struct GENX(3DSTATE_RASTER) raster = {
659       GENX(3DSTATE_RASTER_header),
660    };
661 #else
662 #  define raster sf
663 #endif
664 
665    /* For details on 3DSTATE_RASTER multisample state, see the BSpec table
666     * "Multisample Modes State".
667     */
668 #if GFX_VER >= 8
669    /* NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the BDW and SKL PMA fix
670     * computations.  If we ever set this bit to a different value, they will
671     * need to be updated accordingly.
672     */
673    raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0;
674    raster.ForceMultisampling = false;
675 #endif
676 
677    raster.FrontFaceFillMode = genX(vk_to_intel_fillmode)[rs->polygon_mode];
678    raster.BackFaceFillMode = genX(vk_to_intel_fillmode)[rs->polygon_mode];
679    raster.ScissorRectangleEnable = true;
680 
681 #if GFX_VER >= 8
682    raster.ViewportZClipTestEnable = pipeline->depth_clip_enable;
683 #endif
684 
685 #if GFX_VER == 7
686    /* Gfx7 requires that we provide the depth format in 3DSTATE_SF so that it
687     * can get the depth offsets correct.
688     */
689    if (rp != NULL &&
690        rp->depth_attachment_format != VK_FORMAT_UNDEFINED) {
691       assert(vk_format_has_depth(rp->depth_attachment_format));
692       enum isl_format isl_format =
693          anv_get_isl_format(pipeline->base.device->info,
694                             rp->depth_attachment_format,
695                             VK_IMAGE_ASPECT_DEPTH_BIT,
696                             VK_IMAGE_TILING_OPTIMAL);
697       sf.DepthBufferSurfaceFormat =
698          isl_format_get_depth_format(isl_format, false);
699    }
700 #endif
701 
702 #if GFX_VER >= 8
703    GENX(3DSTATE_SF_pack)(NULL, pipeline->gfx8.sf, &sf);
704    GENX(3DSTATE_RASTER_pack)(NULL, pipeline->gfx8.raster, &raster);
705 #else
706 #  undef raster
707    GENX(3DSTATE_SF_pack)(NULL, &pipeline->gfx7.sf, &sf);
708 #endif
709 }
710 
711 static void
emit_ms_state(struct anv_graphics_pipeline * pipeline,const struct vk_multisample_state * ms)712 emit_ms_state(struct anv_graphics_pipeline *pipeline,
713               const struct vk_multisample_state *ms)
714 {
715 #if GFX_VER >= 8
716    /* On Gfx8+ 3DSTATE_MULTISAMPLE only holds the number of samples. */
717    genX(emit_multisample)(&pipeline->base.batch,
718                           pipeline->rasterization_samples,
719                           NULL);
720 #endif
721 
722    /* From the Vulkan 1.0 spec:
723     *    If pSampleMask is NULL, it is treated as if the mask has all bits
724     *    enabled, i.e. no coverage is removed from fragments.
725     *
726     * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
727     */
728 #if GFX_VER >= 8
729    uint32_t sample_mask = 0xffff;
730 #else
731    uint32_t sample_mask = 0xff;
732 #endif
733 
734    if (ms != NULL)
735       sample_mask &= ms->sample_mask;
736 
737    anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
738       sm.SampleMask = sample_mask;
739    }
740 }
741 
742 const uint32_t genX(vk_to_intel_logic_op)[] = {
743    [VK_LOGIC_OP_COPY]                        = LOGICOP_COPY,
744    [VK_LOGIC_OP_CLEAR]                       = LOGICOP_CLEAR,
745    [VK_LOGIC_OP_AND]                         = LOGICOP_AND,
746    [VK_LOGIC_OP_AND_REVERSE]                 = LOGICOP_AND_REVERSE,
747    [VK_LOGIC_OP_AND_INVERTED]                = LOGICOP_AND_INVERTED,
748    [VK_LOGIC_OP_NO_OP]                       = LOGICOP_NOOP,
749    [VK_LOGIC_OP_XOR]                         = LOGICOP_XOR,
750    [VK_LOGIC_OP_OR]                          = LOGICOP_OR,
751    [VK_LOGIC_OP_NOR]                         = LOGICOP_NOR,
752    [VK_LOGIC_OP_EQUIVALENT]                  = LOGICOP_EQUIV,
753    [VK_LOGIC_OP_INVERT]                      = LOGICOP_INVERT,
754    [VK_LOGIC_OP_OR_REVERSE]                  = LOGICOP_OR_REVERSE,
755    [VK_LOGIC_OP_COPY_INVERTED]               = LOGICOP_COPY_INVERTED,
756    [VK_LOGIC_OP_OR_INVERTED]                 = LOGICOP_OR_INVERTED,
757    [VK_LOGIC_OP_NAND]                        = LOGICOP_NAND,
758    [VK_LOGIC_OP_SET]                         = LOGICOP_SET,
759 };
760 
761 static const uint32_t vk_to_intel_blend[] = {
762    [VK_BLEND_FACTOR_ZERO]                    = BLENDFACTOR_ZERO,
763    [VK_BLEND_FACTOR_ONE]                     = BLENDFACTOR_ONE,
764    [VK_BLEND_FACTOR_SRC_COLOR]               = BLENDFACTOR_SRC_COLOR,
765    [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR]     = BLENDFACTOR_INV_SRC_COLOR,
766    [VK_BLEND_FACTOR_DST_COLOR]               = BLENDFACTOR_DST_COLOR,
767    [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR]     = BLENDFACTOR_INV_DST_COLOR,
768    [VK_BLEND_FACTOR_SRC_ALPHA]               = BLENDFACTOR_SRC_ALPHA,
769    [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA]     = BLENDFACTOR_INV_SRC_ALPHA,
770    [VK_BLEND_FACTOR_DST_ALPHA]               = BLENDFACTOR_DST_ALPHA,
771    [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA]     = BLENDFACTOR_INV_DST_ALPHA,
772    [VK_BLEND_FACTOR_CONSTANT_COLOR]          = BLENDFACTOR_CONST_COLOR,
773    [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR,
774    [VK_BLEND_FACTOR_CONSTANT_ALPHA]          = BLENDFACTOR_CONST_ALPHA,
775    [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA,
776    [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE]      = BLENDFACTOR_SRC_ALPHA_SATURATE,
777    [VK_BLEND_FACTOR_SRC1_COLOR]              = BLENDFACTOR_SRC1_COLOR,
778    [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR]    = BLENDFACTOR_INV_SRC1_COLOR,
779    [VK_BLEND_FACTOR_SRC1_ALPHA]              = BLENDFACTOR_SRC1_ALPHA,
780    [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA]    = BLENDFACTOR_INV_SRC1_ALPHA,
781 };
782 
783 static const uint32_t vk_to_intel_blend_op[] = {
784    [VK_BLEND_OP_ADD]                         = BLENDFUNCTION_ADD,
785    [VK_BLEND_OP_SUBTRACT]                    = BLENDFUNCTION_SUBTRACT,
786    [VK_BLEND_OP_REVERSE_SUBTRACT]            = BLENDFUNCTION_REVERSE_SUBTRACT,
787    [VK_BLEND_OP_MIN]                         = BLENDFUNCTION_MIN,
788    [VK_BLEND_OP_MAX]                         = BLENDFUNCTION_MAX,
789 };
790 
791 const uint32_t genX(vk_to_intel_compare_op)[] = {
792    [VK_COMPARE_OP_NEVER]                        = PREFILTEROP_NEVER,
793    [VK_COMPARE_OP_LESS]                         = PREFILTEROP_LESS,
794    [VK_COMPARE_OP_EQUAL]                        = PREFILTEROP_EQUAL,
795    [VK_COMPARE_OP_LESS_OR_EQUAL]                = PREFILTEROP_LEQUAL,
796    [VK_COMPARE_OP_GREATER]                      = PREFILTEROP_GREATER,
797    [VK_COMPARE_OP_NOT_EQUAL]                    = PREFILTEROP_NOTEQUAL,
798    [VK_COMPARE_OP_GREATER_OR_EQUAL]             = PREFILTEROP_GEQUAL,
799    [VK_COMPARE_OP_ALWAYS]                       = PREFILTEROP_ALWAYS,
800 };
801 
802 const uint32_t genX(vk_to_intel_stencil_op)[] = {
803    [VK_STENCIL_OP_KEEP]                         = STENCILOP_KEEP,
804    [VK_STENCIL_OP_ZERO]                         = STENCILOP_ZERO,
805    [VK_STENCIL_OP_REPLACE]                      = STENCILOP_REPLACE,
806    [VK_STENCIL_OP_INCREMENT_AND_CLAMP]          = STENCILOP_INCRSAT,
807    [VK_STENCIL_OP_DECREMENT_AND_CLAMP]          = STENCILOP_DECRSAT,
808    [VK_STENCIL_OP_INVERT]                       = STENCILOP_INVERT,
809    [VK_STENCIL_OP_INCREMENT_AND_WRAP]           = STENCILOP_INCR,
810    [VK_STENCIL_OP_DECREMENT_AND_WRAP]           = STENCILOP_DECR,
811 };
812 
813 const uint32_t genX(vk_to_intel_primitive_type)[] = {
814    [VK_PRIMITIVE_TOPOLOGY_POINT_LIST]                    = _3DPRIM_POINTLIST,
815    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST]                     = _3DPRIM_LINELIST,
816    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP]                    = _3DPRIM_LINESTRIP,
817    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST]                 = _3DPRIM_TRILIST,
818    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP]                = _3DPRIM_TRISTRIP,
819    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN]                  = _3DPRIM_TRIFAN,
820    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY]      = _3DPRIM_LINELIST_ADJ,
821    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY]     = _3DPRIM_LINESTRIP_ADJ,
822    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY]  = _3DPRIM_TRILIST_ADJ,
823    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
824 };
825 
826 static bool
is_dual_src_blend_factor(VkBlendFactor factor)827 is_dual_src_blend_factor(VkBlendFactor factor)
828 {
829    return factor == VK_BLEND_FACTOR_SRC1_COLOR ||
830           factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR ||
831           factor == VK_BLEND_FACTOR_SRC1_ALPHA ||
832           factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA;
833 }
834 
835 static inline uint32_t *
write_disabled_blend(uint32_t * state)836 write_disabled_blend(uint32_t *state)
837 {
838    struct GENX(BLEND_STATE_ENTRY) entry = {
839       .WriteDisableAlpha = true,
840       .WriteDisableRed = true,
841       .WriteDisableGreen = true,
842       .WriteDisableBlue = true,
843    };
844    GENX(BLEND_STATE_ENTRY_pack)(NULL, state, &entry);
845    return state + GENX(BLEND_STATE_ENTRY_length);
846 }
847 
848 static void
emit_cb_state(struct anv_graphics_pipeline * pipeline,const struct vk_color_blend_state * cb,const struct vk_multisample_state * ms)849 emit_cb_state(struct anv_graphics_pipeline *pipeline,
850               const struct vk_color_blend_state *cb,
851               const struct vk_multisample_state *ms)
852 {
853    struct anv_device *device = pipeline->base.device;
854    const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
855 
856    struct GENX(BLEND_STATE) blend_state = {
857 #if GFX_VER >= 8
858       .AlphaToCoverageEnable = ms && ms->alpha_to_coverage_enable,
859       .AlphaToOneEnable = ms && ms->alpha_to_one_enable,
860 #endif
861    };
862 
863    uint32_t surface_count = 0;
864    struct anv_pipeline_bind_map *map;
865    if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
866       map = &pipeline->shaders[MESA_SHADER_FRAGMENT]->bind_map;
867       surface_count = map->surface_count;
868    }
869 
870    const struct intel_device_info *devinfo = pipeline->base.device->info;
871    uint32_t *blend_state_start = devinfo->ver >= 8 ?
872       pipeline->gfx8.blend_state : pipeline->gfx7.blend_state;
873    uint32_t *state_pos = blend_state_start;
874 
875    state_pos += GENX(BLEND_STATE_length);
876 #if GFX_VER >= 8
877    struct GENX(BLEND_STATE_ENTRY) bs0 = { 0 };
878 #endif
879    for (unsigned i = 0; i < surface_count; i++) {
880       struct anv_pipeline_binding *binding = &map->surface_to_descriptor[i];
881 
882       /* All color attachments are at the beginning of the binding table */
883       if (binding->set != ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS)
884          break;
885 
886       /* We can have at most 8 attachments */
887       assert(i < MAX_RTS);
888 
889       if (cb == NULL || binding->index >= cb->attachment_count) {
890          state_pos = write_disabled_blend(state_pos);
891          continue;
892       }
893 
894       const struct vk_color_blend_attachment_state *a =
895          &cb->attachments[binding->index];
896 
897       struct GENX(BLEND_STATE_ENTRY) entry = {
898 #if GFX_VER < 8
899          .AlphaToCoverageEnable = ms && ms->alpha_to_coverage_enable,
900          .AlphaToOneEnable = ms && ms->alpha_to_one_enable,
901 #endif
902          .LogicOpEnable = cb->logic_op_enable,
903 
904          /* Vulkan specification 1.2.168, VkLogicOp:
905           *
906           *   "Logical operations are controlled by the logicOpEnable and
907           *    logicOp members of VkPipelineColorBlendStateCreateInfo. If
908           *    logicOpEnable is VK_TRUE, then a logical operation selected by
909           *    logicOp is applied between each color attachment and the
910           *    fragment’s corresponding output value, and blending of all
911           *    attachments is treated as if it were disabled."
912           *
913           * From the Broadwell PRM Volume 2d: Command Reference: Structures:
914           * BLEND_STATE_ENTRY:
915           *
916           *   "Enabling LogicOp and Color Buffer Blending at the same time is
917           *    UNDEFINED"
918           */
919          .ColorBufferBlendEnable = !cb->logic_op_enable && a->blend_enable,
920          .ColorClampRange = COLORCLAMP_RTFORMAT,
921          .PreBlendColorClampEnable = true,
922          .PostBlendColorClampEnable = true,
923          .SourceBlendFactor = vk_to_intel_blend[a->src_color_blend_factor],
924          .DestinationBlendFactor = vk_to_intel_blend[a->dst_color_blend_factor],
925          .ColorBlendFunction = vk_to_intel_blend_op[a->color_blend_op],
926          .SourceAlphaBlendFactor = vk_to_intel_blend[a->src_alpha_blend_factor],
927          .DestinationAlphaBlendFactor = vk_to_intel_blend[a->dst_alpha_blend_factor],
928          .AlphaBlendFunction = vk_to_intel_blend_op[a->alpha_blend_op],
929       };
930 
931       if (a->src_color_blend_factor != a->src_alpha_blend_factor ||
932           a->dst_color_blend_factor != a->dst_alpha_blend_factor ||
933           a->color_blend_op != a->alpha_blend_op) {
934 #if GFX_VER >= 8
935          blend_state.IndependentAlphaBlendEnable = true;
936 #else
937          entry.IndependentAlphaBlendEnable = true;
938 #endif
939       }
940 
941       /* The Dual Source Blending documentation says:
942        *
943        * "If SRC1 is included in a src/dst blend factor and
944        * a DualSource RT Write message is not used, results
945        * are UNDEFINED. (This reflects the same restriction in DX APIs,
946        * where undefined results are produced if “o1” is not written
947        * by a PS – there are no default values defined)."
948        *
949        * There is no way to gracefully fix this undefined situation
950        * so we just disable the blending to prevent possible issues.
951        */
952       if (!wm_prog_data->dual_src_blend &&
953           (is_dual_src_blend_factor(a->src_color_blend_factor) ||
954            is_dual_src_blend_factor(a->dst_color_blend_factor) ||
955            is_dual_src_blend_factor(a->src_alpha_blend_factor) ||
956            is_dual_src_blend_factor(a->dst_alpha_blend_factor))) {
957          vk_logw(VK_LOG_OBJS(&device->vk.base),
958                  "Enabled dual-src blend factors without writing both targets "
959                  "in the shader.  Disabling blending to avoid GPU hangs.");
960          entry.ColorBufferBlendEnable = false;
961       }
962 
963       /* Our hardware applies the blend factor prior to the blend function
964        * regardless of what function is used.  Technically, this means the
965        * hardware can do MORE than GL or Vulkan specify.  However, it also
966        * means that, for MIN and MAX, we have to stomp the blend factor to
967        * ONE to make it a no-op.
968        */
969       if (a->color_blend_op == VK_BLEND_OP_MIN ||
970           a->color_blend_op == VK_BLEND_OP_MAX) {
971          entry.SourceBlendFactor = BLENDFACTOR_ONE;
972          entry.DestinationBlendFactor = BLENDFACTOR_ONE;
973       }
974       if (a->alpha_blend_op == VK_BLEND_OP_MIN ||
975           a->alpha_blend_op == VK_BLEND_OP_MAX) {
976          entry.SourceAlphaBlendFactor = BLENDFACTOR_ONE;
977          entry.DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
978       }
979       GENX(BLEND_STATE_ENTRY_pack)(NULL, state_pos, &entry);
980       state_pos += GENX(BLEND_STATE_ENTRY_length);
981 #if GFX_VER >= 8
982       if (i == 0)
983          bs0 = entry;
984 #endif
985    }
986 
987 #if GFX_VER >= 8
988    struct GENX(3DSTATE_PS_BLEND) blend = {
989       GENX(3DSTATE_PS_BLEND_header),
990    };
991    blend.AlphaToCoverageEnable         = blend_state.AlphaToCoverageEnable;
992    blend.ColorBufferBlendEnable        = bs0.ColorBufferBlendEnable;
993    blend.SourceAlphaBlendFactor        = bs0.SourceAlphaBlendFactor;
994    blend.DestinationAlphaBlendFactor   = bs0.DestinationAlphaBlendFactor;
995    blend.SourceBlendFactor             = bs0.SourceBlendFactor;
996    blend.DestinationBlendFactor        = bs0.DestinationBlendFactor;
997    blend.AlphaTestEnable               = false;
998    blend.IndependentAlphaBlendEnable   = blend_state.IndependentAlphaBlendEnable;
999 
1000    GENX(3DSTATE_PS_BLEND_pack)(NULL, pipeline->gfx8.ps_blend, &blend);
1001 #endif
1002 
1003    GENX(BLEND_STATE_pack)(NULL, blend_state_start, &blend_state);
1004 }
1005 
1006 static void
emit_3dstate_clip(struct anv_graphics_pipeline * pipeline,const struct vk_input_assembly_state * ia,const struct vk_viewport_state * vp,const struct vk_rasterization_state * rs)1007 emit_3dstate_clip(struct anv_graphics_pipeline *pipeline,
1008                   const struct vk_input_assembly_state *ia,
1009                   const struct vk_viewport_state *vp,
1010                   const struct vk_rasterization_state *rs)
1011 {
1012    const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1013    (void) wm_prog_data;
1014 
1015    struct GENX(3DSTATE_CLIP) clip = {
1016       GENX(3DSTATE_CLIP_header),
1017    };
1018 
1019    clip.ClipEnable               = true;
1020    clip.StatisticsEnable         = true;
1021    clip.EarlyCullEnable          = true;
1022    clip.APIMode                  = pipeline->negative_one_to_one ? APIMODE_OGL : APIMODE_D3D;
1023    clip.GuardbandClipTestEnable  = true;
1024 
1025 #if GFX_VER >= 8
1026    clip.VertexSubPixelPrecisionSelect = _8Bit;
1027 #endif
1028    clip.ClipMode = CLIPMODE_NORMAL;
1029 
1030    switch (rs->provoking_vertex) {
1031    case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
1032       clip.TriangleStripListProvokingVertexSelect = 0;
1033       clip.LineStripListProvokingVertexSelect = 0;
1034       clip.TriangleFanProvokingVertexSelect = 1;
1035       break;
1036 
1037    case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
1038       clip.TriangleStripListProvokingVertexSelect = 2;
1039       clip.LineStripListProvokingVertexSelect = 1;
1040       clip.TriangleFanProvokingVertexSelect = 2;
1041       break;
1042 
1043    default:
1044       unreachable("Invalid provoking vertex mode");
1045    }
1046 
1047    clip.MinimumPointWidth = 0.125;
1048    clip.MaximumPointWidth = 255.875;
1049 
1050    const struct elk_vue_prog_data *last =
1051       anv_pipeline_get_last_vue_prog_data(pipeline);
1052 
1053    /* From the Vulkan 1.0.45 spec:
1054     *
1055     *    "If the last active vertex processing stage shader entry point's
1056     *    interface does not include a variable decorated with ViewportIndex,
1057     *    then the first viewport is used."
1058     */
1059    if (vp && (last->vue_map.slots_valid & VARYING_BIT_VIEWPORT)) {
1060       clip.MaximumVPIndex = vp->viewport_count > 0 ?
1061          vp->viewport_count - 1 : 0;
1062    } else {
1063       clip.MaximumVPIndex = 0;
1064    }
1065 
1066    /* From the Vulkan 1.0.45 spec:
1067     *
1068     *    "If the last active vertex processing stage shader entry point's
1069     *    interface does not include a variable decorated with Layer, then the
1070     *    first layer is used."
1071     */
1072    clip.ForceZeroRTAIndexEnable =
1073       !(last->vue_map.slots_valid & VARYING_BIT_LAYER);
1074 
1075 #if GFX_VER == 7
1076    clip.UserClipDistanceClipTestEnableBitmask = last->clip_distance_mask;
1077    clip.UserClipDistanceCullTestEnableBitmask = last->cull_distance_mask;
1078    clip.FrontWinding            = genX(vk_to_intel_front_face)[rs->front_face];
1079    clip.CullMode                = genX(vk_to_intel_cullmode)[rs->cull_mode];
1080    clip.ViewportZClipTestEnable = pipeline->depth_clip_enable;
1081 #endif
1082 
1083    clip.NonPerspectiveBarycentricEnable = wm_prog_data ?
1084       wm_prog_data->uses_nonperspective_interp_modes : 0;
1085 
1086    GENX(3DSTATE_CLIP_pack)(NULL, pipeline->gfx7.clip, &clip);
1087 }
1088 
1089 static void
emit_3dstate_streamout(struct anv_graphics_pipeline * pipeline,const struct vk_rasterization_state * rs)1090 emit_3dstate_streamout(struct anv_graphics_pipeline *pipeline,
1091                        const struct vk_rasterization_state *rs)
1092 {
1093    const struct elk_vue_prog_data *prog_data =
1094       anv_pipeline_get_last_vue_prog_data(pipeline);
1095    const struct intel_vue_map *vue_map = &prog_data->vue_map;
1096 
1097    nir_xfb_info *xfb_info;
1098    if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY))
1099       xfb_info = pipeline->shaders[MESA_SHADER_GEOMETRY]->xfb_info;
1100    else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
1101       xfb_info = pipeline->shaders[MESA_SHADER_TESS_EVAL]->xfb_info;
1102    else
1103       xfb_info = pipeline->shaders[MESA_SHADER_VERTEX]->xfb_info;
1104 
1105    if (xfb_info) {
1106       struct GENX(SO_DECL) so_decl[MAX_XFB_STREAMS][128];
1107       int next_offset[MAX_XFB_BUFFERS] = {0, 0, 0, 0};
1108       int decls[MAX_XFB_STREAMS] = {0, 0, 0, 0};
1109 
1110       memset(so_decl, 0, sizeof(so_decl));
1111 
1112       for (unsigned i = 0; i < xfb_info->output_count; i++) {
1113          const nir_xfb_output_info *output = &xfb_info->outputs[i];
1114          unsigned buffer = output->buffer;
1115          unsigned stream = xfb_info->buffer_to_stream[buffer];
1116 
1117          /* Our hardware is unusual in that it requires us to program SO_DECLs
1118           * for fake "hole" components, rather than simply taking the offset
1119           * for each real varying.  Each hole can have size 1, 2, 3, or 4; we
1120           * program as many size = 4 holes as we can, then a final hole to
1121           * accommodate the final 1, 2, or 3 remaining.
1122           */
1123          int hole_dwords = (output->offset - next_offset[buffer]) / 4;
1124          while (hole_dwords > 0) {
1125             so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
1126                .HoleFlag = 1,
1127                .OutputBufferSlot = buffer,
1128                .ComponentMask = (1 << MIN2(hole_dwords, 4)) - 1,
1129             };
1130             hole_dwords -= 4;
1131          }
1132 
1133          int varying = output->location;
1134          uint8_t component_mask = output->component_mask;
1135          /* VARYING_SLOT_PSIZ contains four scalar fields packed together:
1136           * - VARYING_SLOT_PRIMITIVE_SHADING_RATE in VARYING_SLOT_PSIZ.x
1137           * - VARYING_SLOT_LAYER                  in VARYING_SLOT_PSIZ.y
1138           * - VARYING_SLOT_VIEWPORT               in VARYING_SLOT_PSIZ.z
1139           * - VARYING_SLOT_PSIZ                   in VARYING_SLOT_PSIZ.w
1140           */
1141          if (varying == VARYING_SLOT_PRIMITIVE_SHADING_RATE) {
1142             varying = VARYING_SLOT_PSIZ;
1143             component_mask = 1 << 0; // SO_DECL_COMPMASK_X
1144          } else if (varying == VARYING_SLOT_LAYER) {
1145             varying = VARYING_SLOT_PSIZ;
1146             component_mask = 1 << 1; // SO_DECL_COMPMASK_Y
1147          } else if (varying == VARYING_SLOT_VIEWPORT) {
1148             varying = VARYING_SLOT_PSIZ;
1149             component_mask = 1 << 2; // SO_DECL_COMPMASK_Z
1150          } else if (varying == VARYING_SLOT_PSIZ) {
1151             component_mask = 1 << 3; // SO_DECL_COMPMASK_W
1152          }
1153 
1154          next_offset[buffer] = output->offset +
1155                                __builtin_popcount(component_mask) * 4;
1156 
1157          const int slot = vue_map->varying_to_slot[varying];
1158          if (slot < 0) {
1159             /* This can happen if the shader never writes to the varying.
1160              * Insert a hole instead of actual varying data.
1161              */
1162             so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
1163                .HoleFlag = true,
1164                .OutputBufferSlot = buffer,
1165                .ComponentMask = component_mask,
1166             };
1167          } else {
1168             so_decl[stream][decls[stream]++] = (struct GENX(SO_DECL)) {
1169                .OutputBufferSlot = buffer,
1170                .RegisterIndex = slot,
1171                .ComponentMask = component_mask,
1172             };
1173          }
1174       }
1175 
1176       int max_decls = 0;
1177       for (unsigned s = 0; s < MAX_XFB_STREAMS; s++)
1178          max_decls = MAX2(max_decls, decls[s]);
1179 
1180       uint8_t sbs[MAX_XFB_STREAMS] = { };
1181       for (unsigned b = 0; b < MAX_XFB_BUFFERS; b++) {
1182          if (xfb_info->buffers_written & (1 << b))
1183             sbs[xfb_info->buffer_to_stream[b]] |= 1 << b;
1184       }
1185 
1186       /* Wa_16011773973:
1187        * If SOL is enabled and SO_DECL state has to be programmed,
1188        *    1. Send 3D State SOL state with SOL disabled
1189        *    2. Send SO_DECL NP state
1190        *    3. Send 3D State SOL with SOL Enabled
1191        */
1192       if (intel_device_info_is_dg2(pipeline->base.device->info))
1193          anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_STREAMOUT), so);
1194 
1195       uint32_t *dw = anv_batch_emitn(&pipeline->base.batch, 3 + 2 * max_decls,
1196                                      GENX(3DSTATE_SO_DECL_LIST),
1197                                      .StreamtoBufferSelects0 = sbs[0],
1198                                      .StreamtoBufferSelects1 = sbs[1],
1199                                      .StreamtoBufferSelects2 = sbs[2],
1200                                      .StreamtoBufferSelects3 = sbs[3],
1201                                      .NumEntries0 = decls[0],
1202                                      .NumEntries1 = decls[1],
1203                                      .NumEntries2 = decls[2],
1204                                      .NumEntries3 = decls[3]);
1205 
1206       for (int i = 0; i < max_decls; i++) {
1207          GENX(SO_DECL_ENTRY_pack)(NULL, dw + 3 + i * 2,
1208             &(struct GENX(SO_DECL_ENTRY)) {
1209                .Stream0Decl = so_decl[0][i],
1210                .Stream1Decl = so_decl[1][i],
1211                .Stream2Decl = so_decl[2][i],
1212                .Stream3Decl = so_decl[3][i],
1213             });
1214       }
1215    }
1216 
1217 #if GFX_VER == 7
1218 #  define streamout_state_dw pipeline->gfx7.streamout_state
1219 #else
1220 #  define streamout_state_dw pipeline->gfx8.streamout_state
1221 #endif
1222 
1223    struct GENX(3DSTATE_STREAMOUT) so = {
1224       GENX(3DSTATE_STREAMOUT_header),
1225    };
1226 
1227    if (xfb_info) {
1228       so.SOFunctionEnable = true;
1229       so.SOStatisticsEnable = true;
1230 
1231       switch (rs->provoking_vertex) {
1232       case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
1233          so.ReorderMode = LEADING;
1234          break;
1235 
1236       case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
1237          so.ReorderMode = TRAILING;
1238          break;
1239 
1240       default:
1241          unreachable("Invalid provoking vertex mode");
1242       }
1243 
1244       so.RenderStreamSelect = rs->rasterization_stream;
1245 
1246 #if GFX_VER >= 8
1247       so.Buffer0SurfacePitch = xfb_info->buffers[0].stride;
1248       so.Buffer1SurfacePitch = xfb_info->buffers[1].stride;
1249       so.Buffer2SurfacePitch = xfb_info->buffers[2].stride;
1250       so.Buffer3SurfacePitch = xfb_info->buffers[3].stride;
1251 #else
1252       pipeline->gfx7.xfb_bo_pitch[0] = xfb_info->buffers[0].stride;
1253       pipeline->gfx7.xfb_bo_pitch[1] = xfb_info->buffers[1].stride;
1254       pipeline->gfx7.xfb_bo_pitch[2] = xfb_info->buffers[2].stride;
1255       pipeline->gfx7.xfb_bo_pitch[3] = xfb_info->buffers[3].stride;
1256 
1257       /* On Gfx7, the SO buffer enables live in 3DSTATE_STREAMOUT which
1258        * is a bit inconvenient because we don't know what buffers will
1259        * actually be enabled until draw time.  We do our best here by
1260        * setting them based on buffers_written and we disable them
1261        * as-needed at draw time by setting EndAddress = BaseAddress.
1262        */
1263       so.SOBufferEnable0 = xfb_info->buffers_written & (1 << 0);
1264       so.SOBufferEnable1 = xfb_info->buffers_written & (1 << 1);
1265       so.SOBufferEnable2 = xfb_info->buffers_written & (1 << 2);
1266       so.SOBufferEnable3 = xfb_info->buffers_written & (1 << 3);
1267 #endif
1268 
1269       int urb_entry_read_offset = 0;
1270       int urb_entry_read_length =
1271          (prog_data->vue_map.num_slots + 1) / 2 - urb_entry_read_offset;
1272 
1273       /* We always read the whole vertex.  This could be reduced at some
1274        * point by reading less and offsetting the register index in the
1275        * SO_DECLs.
1276        */
1277       so.Stream0VertexReadOffset = urb_entry_read_offset;
1278       so.Stream0VertexReadLength = urb_entry_read_length - 1;
1279       so.Stream1VertexReadOffset = urb_entry_read_offset;
1280       so.Stream1VertexReadLength = urb_entry_read_length - 1;
1281       so.Stream2VertexReadOffset = urb_entry_read_offset;
1282       so.Stream2VertexReadLength = urb_entry_read_length - 1;
1283       so.Stream3VertexReadOffset = urb_entry_read_offset;
1284       so.Stream3VertexReadLength = urb_entry_read_length - 1;
1285    }
1286 
1287    GENX(3DSTATE_STREAMOUT_pack)(NULL, streamout_state_dw, &so);
1288 }
1289 
1290 static uint32_t
get_sampler_count(const struct anv_shader_bin * bin)1291 get_sampler_count(const struct anv_shader_bin *bin)
1292 {
1293    uint32_t count_by_4 = DIV_ROUND_UP(bin->bind_map.sampler_count, 4);
1294 
1295    /* We can potentially have way more than 32 samplers and that's ok.
1296     * However, the 3DSTATE_XS packets only have 3 bits to specify how
1297     * many to pre-fetch and all values above 4 are marked reserved.
1298     */
1299    return MIN2(count_by_4, 4);
1300 }
1301 
1302 static UNUSED struct anv_address
get_scratch_address(struct anv_pipeline * pipeline,gl_shader_stage stage,const struct anv_shader_bin * bin)1303 get_scratch_address(struct anv_pipeline *pipeline,
1304                     gl_shader_stage stage,
1305                     const struct anv_shader_bin *bin)
1306 {
1307    return (struct anv_address) {
1308       .bo = anv_scratch_pool_alloc(pipeline->device,
1309                                    &pipeline->device->scratch_pool,
1310                                    stage, bin->prog_data->total_scratch),
1311       .offset = 0,
1312    };
1313 }
1314 
1315 static UNUSED uint32_t
get_scratch_space(const struct anv_shader_bin * bin)1316 get_scratch_space(const struct anv_shader_bin *bin)
1317 {
1318    return ffs(bin->prog_data->total_scratch / 2048);
1319 }
1320 
1321 static void
emit_3dstate_vs(struct anv_graphics_pipeline * pipeline)1322 emit_3dstate_vs(struct anv_graphics_pipeline *pipeline)
1323 {
1324    const struct intel_device_info *devinfo = pipeline->base.device->info;
1325    const struct elk_vs_prog_data *vs_prog_data = get_vs_prog_data(pipeline);
1326    const struct anv_shader_bin *vs_bin =
1327       pipeline->shaders[MESA_SHADER_VERTEX];
1328 
1329    assert(anv_pipeline_has_stage(pipeline, MESA_SHADER_VERTEX));
1330 
1331    anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VS), vs) {
1332       vs.Enable               = true;
1333       vs.StatisticsEnable     = true;
1334       vs.KernelStartPointer   = vs_bin->kernel.offset;
1335 #if GFX_VER >= 8
1336       vs.SIMD8DispatchEnable  =
1337          vs_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8;
1338 #endif
1339 
1340       assert(!vs_prog_data->base.base.use_alt_mode);
1341       vs.SingleVertexDispatch       = false;
1342       vs.VectorMaskEnable           = false;
1343       vs.SamplerCount               = get_sampler_count(vs_bin);
1344       vs.BindingTableEntryCount     = vs_bin->bind_map.surface_count;
1345       vs.FloatingPointMode          = IEEE754;
1346       vs.IllegalOpcodeExceptionEnable = false;
1347       vs.SoftwareExceptionEnable    = false;
1348       vs.MaximumNumberofThreads     = devinfo->max_vs_threads - 1;
1349 
1350       vs.VertexURBEntryReadLength      = vs_prog_data->base.urb_read_length;
1351       vs.VertexURBEntryReadOffset      = 0;
1352       vs.DispatchGRFStartRegisterForURBData =
1353          vs_prog_data->base.base.dispatch_grf_start_reg;
1354 
1355 #if GFX_VER >= 8
1356       vs.UserClipDistanceClipTestEnableBitmask =
1357          vs_prog_data->base.clip_distance_mask;
1358       vs.UserClipDistanceCullTestEnableBitmask =
1359          vs_prog_data->base.cull_distance_mask;
1360 #endif
1361 
1362       vs.PerThreadScratchSpace   = get_scratch_space(vs_bin);
1363       vs.ScratchSpaceBasePointer =
1364          get_scratch_address(&pipeline->base, MESA_SHADER_VERTEX, vs_bin);
1365    }
1366 }
1367 
1368 static void
emit_3dstate_hs_te_ds(struct anv_graphics_pipeline * pipeline,const struct vk_tessellation_state * ts)1369 emit_3dstate_hs_te_ds(struct anv_graphics_pipeline *pipeline,
1370                       const struct vk_tessellation_state *ts)
1371 {
1372    if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
1373       anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_HS), hs);
1374       anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TE), te);
1375       anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_DS), ds);
1376       return;
1377    }
1378 
1379    const struct intel_device_info *devinfo = pipeline->base.device->info;
1380    const struct anv_shader_bin *tcs_bin =
1381       pipeline->shaders[MESA_SHADER_TESS_CTRL];
1382    const struct anv_shader_bin *tes_bin =
1383       pipeline->shaders[MESA_SHADER_TESS_EVAL];
1384 
1385    const struct elk_tcs_prog_data *tcs_prog_data = get_tcs_prog_data(pipeline);
1386    const struct elk_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline);
1387 
1388    anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_HS), hs) {
1389       hs.Enable = true;
1390       hs.StatisticsEnable = true;
1391       hs.KernelStartPointer = tcs_bin->kernel.offset;
1392       hs.SamplerCount = get_sampler_count(tcs_bin);
1393       hs.BindingTableEntryCount = tcs_bin->bind_map.surface_count;
1394 
1395       hs.MaximumNumberofThreads = devinfo->max_tcs_threads - 1;
1396       hs.IncludeVertexHandles = true;
1397       hs.InstanceCount = tcs_prog_data->instances - 1;
1398 
1399       hs.VertexURBEntryReadLength = 0;
1400       hs.VertexURBEntryReadOffset = 0;
1401       hs.DispatchGRFStartRegisterForURBData =
1402          tcs_prog_data->base.base.dispatch_grf_start_reg & 0x1f;
1403 
1404       hs.PerThreadScratchSpace = get_scratch_space(tcs_bin);
1405       hs.ScratchSpaceBasePointer =
1406          get_scratch_address(&pipeline->base, MESA_SHADER_TESS_CTRL, tcs_bin);
1407    }
1408 
1409    anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_TE), te) {
1410       te.Partitioning = tes_prog_data->partitioning;
1411 
1412       if (ts->domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) {
1413          te.OutputTopology = tes_prog_data->output_topology;
1414       } else {
1415          /* When the origin is upper-left, we have to flip the winding order */
1416          if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) {
1417             te.OutputTopology = OUTPUT_TRI_CW;
1418          } else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) {
1419             te.OutputTopology = OUTPUT_TRI_CCW;
1420          } else {
1421             te.OutputTopology = tes_prog_data->output_topology;
1422          }
1423       }
1424 
1425       te.TEDomain = tes_prog_data->domain;
1426       te.TEEnable = true;
1427       te.MaximumTessellationFactorOdd = 63.0;
1428       te.MaximumTessellationFactorNotOdd = 64.0;
1429    }
1430 
1431    anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_DS), ds) {
1432       ds.Enable = true;
1433       ds.StatisticsEnable = true;
1434       ds.KernelStartPointer = tes_bin->kernel.offset;
1435       ds.SamplerCount = get_sampler_count(tes_bin);
1436       ds.BindingTableEntryCount = tes_bin->bind_map.surface_count;
1437       ds.MaximumNumberofThreads = devinfo->max_tes_threads - 1;
1438 
1439       ds.ComputeWCoordinateEnable =
1440          tes_prog_data->domain == INTEL_TESS_DOMAIN_TRI;
1441 
1442       ds.PatchURBEntryReadLength = tes_prog_data->base.urb_read_length;
1443       ds.PatchURBEntryReadOffset = 0;
1444       ds.DispatchGRFStartRegisterForURBData =
1445          tes_prog_data->base.base.dispatch_grf_start_reg;
1446 
1447 #if GFX_VER >= 8
1448       ds.DispatchMode =
1449          tes_prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8 ?
1450          DISPATCH_MODE_SIMD8_SINGLE_PATCH :
1451          DISPATCH_MODE_SIMD4X2;
1452 
1453       ds.UserClipDistanceClipTestEnableBitmask =
1454          tes_prog_data->base.clip_distance_mask;
1455       ds.UserClipDistanceCullTestEnableBitmask =
1456          tes_prog_data->base.cull_distance_mask;
1457 #endif
1458 
1459       ds.PerThreadScratchSpace = get_scratch_space(tes_bin);
1460       ds.ScratchSpaceBasePointer =
1461          get_scratch_address(&pipeline->base, MESA_SHADER_TESS_EVAL, tes_bin);
1462    }
1463 }
1464 
1465 static void
emit_3dstate_gs(struct anv_graphics_pipeline * pipeline,const struct vk_rasterization_state * rs)1466 emit_3dstate_gs(struct anv_graphics_pipeline *pipeline,
1467                 const struct vk_rasterization_state *rs)
1468 {
1469    const struct intel_device_info *devinfo = pipeline->base.device->info;
1470    const struct anv_shader_bin *gs_bin =
1471       pipeline->shaders[MESA_SHADER_GEOMETRY];
1472 
1473    if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
1474       anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_GS), gs);
1475       return;
1476    }
1477 
1478    const struct elk_gs_prog_data *gs_prog_data = get_gs_prog_data(pipeline);
1479 
1480    anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_GS), gs) {
1481       gs.Enable                  = true;
1482       gs.StatisticsEnable        = true;
1483       gs.KernelStartPointer      = gs_bin->kernel.offset;
1484       gs.DispatchMode            = gs_prog_data->base.dispatch_mode;
1485 
1486       gs.SingleProgramFlow       = false;
1487       gs.VectorMaskEnable        = false;
1488       gs.SamplerCount            = get_sampler_count(gs_bin);
1489       gs.BindingTableEntryCount  = gs_bin->bind_map.surface_count;
1490       gs.IncludeVertexHandles    = gs_prog_data->base.include_vue_handles;
1491       gs.IncludePrimitiveID      = gs_prog_data->include_primitive_id;
1492 
1493       if (GFX_VER == 8) {
1494          /* Broadwell is weird.  It needs us to divide by 2. */
1495          gs.MaximumNumberofThreads = devinfo->max_gs_threads / 2 - 1;
1496       } else {
1497          gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1;
1498       }
1499 
1500       gs.OutputVertexSize        = gs_prog_data->output_vertex_size_hwords * 2 - 1;
1501       gs.OutputTopology          = gs_prog_data->output_topology;
1502       gs.ControlDataFormat       = gs_prog_data->control_data_format;
1503       gs.ControlDataHeaderSize   = gs_prog_data->control_data_header_size_hwords;
1504       gs.InstanceControl         = MAX2(gs_prog_data->invocations, 1) - 1;
1505 
1506       switch (rs->provoking_vertex) {
1507       case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
1508          gs.ReorderMode = LEADING;
1509          break;
1510 
1511       case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
1512          gs.ReorderMode = TRAILING;
1513          break;
1514 
1515       default:
1516          unreachable("Invalid provoking vertex mode");
1517       }
1518 
1519 #if GFX_VER >= 8
1520       gs.ExpectedVertexCount     = gs_prog_data->vertices_in;
1521       gs.StaticOutput            = gs_prog_data->static_vertex_count >= 0;
1522       gs.StaticOutputVertexCount = gs_prog_data->static_vertex_count >= 0 ?
1523                                    gs_prog_data->static_vertex_count : 0;
1524 #endif
1525 
1526       gs.VertexURBEntryReadOffset = 0;
1527       gs.VertexURBEntryReadLength = gs_prog_data->base.urb_read_length;
1528       gs.DispatchGRFStartRegisterForURBData =
1529          gs_prog_data->base.base.dispatch_grf_start_reg;
1530 
1531 #if GFX_VER >= 8
1532       gs.UserClipDistanceClipTestEnableBitmask =
1533          gs_prog_data->base.clip_distance_mask;
1534       gs.UserClipDistanceCullTestEnableBitmask =
1535          gs_prog_data->base.cull_distance_mask;
1536 #endif
1537 
1538       gs.PerThreadScratchSpace   = get_scratch_space(gs_bin);
1539       gs.ScratchSpaceBasePointer =
1540          get_scratch_address(&pipeline->base, MESA_SHADER_GEOMETRY, gs_bin);
1541    }
1542 }
1543 
1544 static bool
state_has_ds_self_dep(const struct vk_graphics_pipeline_state * state)1545 state_has_ds_self_dep(const struct vk_graphics_pipeline_state *state)
1546 {
1547    return state->pipeline_flags &
1548       VK_PIPELINE_CREATE_DEPTH_STENCIL_ATTACHMENT_FEEDBACK_LOOP_BIT_EXT;
1549 }
1550 
1551 static void
emit_3dstate_wm(struct anv_graphics_pipeline * pipeline,const struct vk_input_assembly_state * ia,const struct vk_rasterization_state * rs,const struct vk_multisample_state * ms,const struct vk_color_blend_state * cb,const struct vk_graphics_pipeline_state * state)1552 emit_3dstate_wm(struct anv_graphics_pipeline *pipeline,
1553                 const struct vk_input_assembly_state *ia,
1554                 const struct vk_rasterization_state *rs,
1555                 const struct vk_multisample_state *ms,
1556                 const struct vk_color_blend_state *cb,
1557                 const struct vk_graphics_pipeline_state *state)
1558 {
1559    const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1560 
1561    struct GENX(3DSTATE_WM) wm = {
1562       GENX(3DSTATE_WM_header),
1563    };
1564    wm.StatisticsEnable                    = true;
1565    wm.LineEndCapAntialiasingRegionWidth   = _05pixels;
1566    wm.LineAntialiasingRegionWidth         = _10pixels;
1567    wm.PointRasterizationRule              = RASTRULE_UPPER_LEFT;
1568 
1569    if (anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
1570       if (wm_prog_data->early_fragment_tests) {
1571             wm.EarlyDepthStencilControl         = EDSC_PREPS;
1572       } else if (wm_prog_data->has_side_effects) {
1573          wm.EarlyDepthStencilControl         = EDSC_PSEXEC;
1574       } else {
1575          wm.EarlyDepthStencilControl         = EDSC_NORMAL;
1576       }
1577 
1578 #if GFX_VER >= 8
1579       /* Gen8 hardware tries to compute ThreadDispatchEnable for us but
1580        * doesn't take into account KillPixels when no depth or stencil
1581        * writes are enabled.  In order for occlusion queries to work
1582        * correctly with no attachments, we need to force-enable PS thread
1583        * dispatch.
1584        *
1585        * The BDW docs are pretty clear that that this bit isn't validated
1586        * and probably shouldn't be used in production:
1587        *
1588        *    "This must always be set to Normal. This field should not be
1589        *    tested for functional validation."
1590        *
1591        * Unfortunately, however, the other mechanism we have for doing this
1592        * is 3DSTATE_PS_EXTRA::PixelShaderHasUAV which causes hangs on BDW.
1593        * Given two bad options, we choose the one which works.
1594        */
1595       pipeline->force_fragment_thread_dispatch =
1596          wm_prog_data->has_side_effects ||
1597          wm_prog_data->uses_kill;
1598 #endif
1599 
1600       wm.BarycentricInterpolationMode =
1601          elk_wm_prog_data_barycentric_modes(wm_prog_data, 0);
1602 
1603 #if GFX_VER < 8
1604       wm.PixelShaderComputedDepthMode  = wm_prog_data->computed_depth_mode;
1605       wm.PixelShaderUsesSourceDepth    = wm_prog_data->uses_src_depth;
1606       wm.PixelShaderUsesSourceW        = wm_prog_data->uses_src_w;
1607       wm.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
1608 
1609       /* If the subpass has a depth or stencil self-dependency, then we
1610        * need to force the hardware to do the depth/stencil write *after*
1611        * fragment shader execution.  Otherwise, the writes may hit memory
1612        * before we get around to fetching from the input attachment and we
1613        * may get the depth or stencil value from the current draw rather
1614        * than the previous one.
1615        */
1616       wm.PixelShaderKillsPixel         = state_has_ds_self_dep(state) ||
1617                                          wm_prog_data->uses_kill ||
1618                                          wm_prog_data->uses_omask;
1619 
1620       pipeline->force_fragment_thread_dispatch =
1621          wm.PixelShaderComputedDepthMode != PSCDEPTH_OFF ||
1622          wm_prog_data->has_side_effects ||
1623          wm.PixelShaderKillsPixel;
1624 
1625       if (ms != NULL && ms->rasterization_samples > 1) {
1626          if (elk_wm_prog_data_is_persample(wm_prog_data, 0)) {
1627             wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
1628          } else {
1629             wm.MultisampleDispatchMode = MSDISPMODE_PERPIXEL;
1630          }
1631       } else {
1632          wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
1633       }
1634 #endif
1635 
1636       wm.LineStippleEnable = rs->line.stipple.enable;
1637    }
1638 
1639    const struct intel_device_info *devinfo = pipeline->base.device->info;
1640    uint32_t *dws = devinfo->ver >= 8 ? pipeline->gfx8.wm : pipeline->gfx7.wm;
1641    GENX(3DSTATE_WM_pack)(NULL, dws, &wm);
1642 }
1643 
1644 static void
emit_3dstate_ps(struct anv_graphics_pipeline * pipeline,const struct vk_multisample_state * ms,const struct vk_color_blend_state * cb)1645 emit_3dstate_ps(struct anv_graphics_pipeline *pipeline,
1646                 const struct vk_multisample_state *ms,
1647                 const struct vk_color_blend_state *cb)
1648 {
1649    UNUSED const struct intel_device_info *devinfo =
1650       pipeline->base.device->info;
1651    const struct anv_shader_bin *fs_bin =
1652       pipeline->shaders[MESA_SHADER_FRAGMENT];
1653 
1654    if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
1655       anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
1656 #if GFX_VER == 7
1657          /* Even if no fragments are ever dispatched, gfx7 hardware hangs if
1658           * we don't at least set the maximum number of threads.
1659           */
1660          ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
1661 #endif
1662       }
1663       return;
1664    }
1665 
1666    const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1667 
1668 #if GFX_VER < 8
1669    /* The hardware wedges if you have this bit set but don't turn on any dual
1670     * source blend factors.
1671     */
1672    bool dual_src_blend = false;
1673    if (wm_prog_data->dual_src_blend && cb) {
1674       for (uint32_t i = 0; i < cb->attachment_count; i++) {
1675          const struct vk_color_blend_attachment_state *a =
1676             &cb->attachments[i];
1677 
1678          if (a->blend_enable &&
1679              (is_dual_src_blend_factor(a->src_color_blend_factor) ||
1680               is_dual_src_blend_factor(a->dst_color_blend_factor) ||
1681               is_dual_src_blend_factor(a->src_alpha_blend_factor) ||
1682               is_dual_src_blend_factor(a->dst_alpha_blend_factor))) {
1683             dual_src_blend = true;
1684             break;
1685          }
1686       }
1687    }
1688 #endif
1689 
1690    anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS), ps) {
1691       intel_set_ps_dispatch_state(&ps, devinfo, wm_prog_data,
1692                                   ms != NULL ? ms->rasterization_samples : 1,
1693                                   0 /* msaa_flags */);
1694 
1695       ps.KernelStartPointer0 = fs_bin->kernel.offset +
1696                                elk_wm_prog_data_prog_offset(wm_prog_data, ps, 0);
1697       ps.KernelStartPointer1 = fs_bin->kernel.offset +
1698                                elk_wm_prog_data_prog_offset(wm_prog_data, ps, 1);
1699       ps.KernelStartPointer2 = fs_bin->kernel.offset +
1700                                elk_wm_prog_data_prog_offset(wm_prog_data, ps, 2);
1701 
1702       ps.SingleProgramFlow          = false;
1703       ps.VectorMaskEnable           = GFX_VER >= 8 &&
1704                                       wm_prog_data->uses_vmask;
1705       ps.SamplerCount               = get_sampler_count(fs_bin);
1706       ps.BindingTableEntryCount     = fs_bin->bind_map.surface_count;
1707       ps.PushConstantEnable         = wm_prog_data->base.nr_params > 0 ||
1708                                       wm_prog_data->base.ubo_ranges[0].length;
1709       ps.PositionXYOffsetSelect     = wm_prog_data->uses_pos_offset ?
1710                                       POSOFFSET_SAMPLE: POSOFFSET_NONE;
1711 #if GFX_VER < 8
1712       ps.AttributeEnable            = wm_prog_data->num_varying_inputs > 0;
1713       ps.oMaskPresenttoRenderTarget = wm_prog_data->uses_omask;
1714       ps.DualSourceBlendEnable      = dual_src_blend;
1715 #endif
1716 
1717 #if GFX_VERx10 == 75
1718       /* Haswell requires the sample mask to be set in this packet as well
1719        * as in 3DSTATE_SAMPLE_MASK; the values should match.
1720        */
1721       ps.SampleMask                 = 0xff;
1722 #endif
1723 
1724 #if GFX_VER >= 8
1725       ps.MaximumNumberofThreadsPerPSD =
1726          devinfo->max_threads_per_psd - (GFX_VER == 8 ? 2 : 1);
1727 #else
1728       ps.MaximumNumberofThreads        = devinfo->max_wm_threads - 1;
1729 #endif
1730 
1731       ps.DispatchGRFStartRegisterForConstantSetupData0 =
1732          elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0);
1733       ps.DispatchGRFStartRegisterForConstantSetupData1 =
1734          elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1);
1735       ps.DispatchGRFStartRegisterForConstantSetupData2 =
1736          elk_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2);
1737 
1738       ps.PerThreadScratchSpace   = get_scratch_space(fs_bin);
1739       ps.ScratchSpaceBasePointer =
1740          get_scratch_address(&pipeline->base, MESA_SHADER_FRAGMENT, fs_bin);
1741    }
1742 }
1743 
1744 #if GFX_VER >= 8
1745 static void
emit_3dstate_ps_extra(struct anv_graphics_pipeline * pipeline,const struct vk_rasterization_state * rs,const struct vk_graphics_pipeline_state * state)1746 emit_3dstate_ps_extra(struct anv_graphics_pipeline *pipeline,
1747                       const struct vk_rasterization_state *rs,
1748                       const struct vk_graphics_pipeline_state *state)
1749 {
1750    const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1751 
1752    if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
1753       anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_EXTRA), ps);
1754       return;
1755    }
1756 
1757    anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_PS_EXTRA), ps) {
1758       ps.PixelShaderValid              = true;
1759       ps.AttributeEnable               = wm_prog_data->num_varying_inputs > 0;
1760       ps.oMaskPresenttoRenderTarget    = wm_prog_data->uses_omask;
1761       ps.PixelShaderIsPerSample        =
1762          elk_wm_prog_data_is_persample(wm_prog_data, 0);
1763       ps.PixelShaderComputedDepthMode  = wm_prog_data->computed_depth_mode;
1764       ps.PixelShaderUsesSourceDepth    = wm_prog_data->uses_src_depth;
1765       ps.PixelShaderUsesSourceW        = wm_prog_data->uses_src_w;
1766 
1767       /* If the subpass has a depth or stencil self-dependency, then we need
1768        * to force the hardware to do the depth/stencil write *after* fragment
1769        * shader execution.  Otherwise, the writes may hit memory before we get
1770        * around to fetching from the input attachment and we may get the depth
1771        * or stencil value from the current draw rather than the previous one.
1772        */
1773       ps.PixelShaderKillsPixel         = state_has_ds_self_dep(state) ||
1774                                          wm_prog_data->uses_kill;
1775 
1776       ps.PixelShaderUsesInputCoverageMask = wm_prog_data->uses_sample_mask;
1777    }
1778 }
1779 #endif
1780 
1781 static void
emit_3dstate_vf_statistics(struct anv_graphics_pipeline * pipeline)1782 emit_3dstate_vf_statistics(struct anv_graphics_pipeline *pipeline)
1783 {
1784    anv_batch_emit(&pipeline->base.batch, GENX(3DSTATE_VF_STATISTICS), vfs) {
1785       vfs.StatisticsEnable = true;
1786    }
1787 }
1788 
1789 static void
compute_kill_pixel(struct anv_graphics_pipeline * pipeline,const struct vk_multisample_state * ms,const struct vk_graphics_pipeline_state * state)1790 compute_kill_pixel(struct anv_graphics_pipeline *pipeline,
1791                    const struct vk_multisample_state *ms,
1792                    const struct vk_graphics_pipeline_state *state)
1793 {
1794    if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT)) {
1795       pipeline->kill_pixel = false;
1796       return;
1797    }
1798 
1799    const struct elk_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1800 
1801    /* This computes the KillPixel portion of the computation for whether or
1802     * not we want to enable the PMA fix on gfx8 or gfx9.  It's given by this
1803     * chunk of the giant formula:
1804     *
1805     *    (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
1806     *     3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
1807     *     3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
1808     *     3DSTATE_PS_BLEND::AlphaTestEnable ||
1809     *     3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable)
1810     *
1811     * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable is always false and so is
1812     * 3DSTATE_PS_BLEND::AlphaTestEnable since Vulkan doesn't have a concept
1813     * of an alpha test.
1814     */
1815    pipeline->kill_pixel =
1816       state_has_ds_self_dep(state) ||
1817       wm_prog_data->uses_kill ||
1818       wm_prog_data->uses_omask ||
1819       (ms && ms->alpha_to_coverage_enable);
1820 }
1821 
1822 void
genX(graphics_pipeline_emit)1823 genX(graphics_pipeline_emit)(struct anv_graphics_pipeline *pipeline,
1824                              const struct vk_graphics_pipeline_state *state)
1825 {
1826    enum intel_urb_deref_block_size urb_deref_block_size;
1827    emit_urb_setup(pipeline, &urb_deref_block_size);
1828 
1829    assert(state->rs != NULL);
1830    emit_rs_state(pipeline, state->ia, state->rs, state->ms, state->rp,
1831                            urb_deref_block_size);
1832    emit_ms_state(pipeline, state->ms);
1833    emit_cb_state(pipeline, state->cb, state->ms);
1834    compute_kill_pixel(pipeline, state->ms, state);
1835 
1836    emit_3dstate_clip(pipeline, state->ia, state->vp, state->rs);
1837 
1838 #if 0
1839    /* From gfx7_vs_state.c */
1840 
1841    /**
1842     * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
1843     * Geometry > Geometry Shader > State:
1844     *
1845     *     "Note: Because of corruption in IVB:GT2, software needs to flush the
1846     *     whole fixed function pipeline when the GS enable changes value in
1847     *     the 3DSTATE_GS."
1848     *
1849     * The hardware architects have clarified that in this context "flush the
1850     * whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
1851     * Stall" bit set.
1852     */
1853    if (device->info->platform == INTEL_PLATFORM_IVB)
1854       gfx7_emit_vs_workaround_flush(elk);
1855 #endif
1856 
1857    emit_vertex_input(pipeline, state->vi);
1858 
1859    emit_3dstate_vs(pipeline);
1860    emit_3dstate_hs_te_ds(pipeline, state->ts);
1861    emit_3dstate_gs(pipeline, state->rs);
1862 
1863    emit_3dstate_vf_statistics(pipeline);
1864 
1865    emit_3dstate_streamout(pipeline, state->rs);
1866 
1867    emit_3dstate_sbe(pipeline);
1868    emit_3dstate_wm(pipeline, state->ia, state->rs,
1869                    state->ms, state->cb, state);
1870    emit_3dstate_ps(pipeline, state->ms, state->cb);
1871 #if GFX_VER >= 8
1872    emit_3dstate_ps_extra(pipeline, state->rs, state);
1873 #endif
1874 }
1875 
1876 void
genX(compute_pipeline_emit)1877 genX(compute_pipeline_emit)(struct anv_compute_pipeline *pipeline)
1878 {
1879    struct anv_device *device = pipeline->base.device;
1880    const struct intel_device_info *devinfo = device->info;
1881    const struct elk_cs_prog_data *cs_prog_data = get_cs_prog_data(pipeline);
1882 
1883    anv_pipeline_setup_l3_config(&pipeline->base, cs_prog_data->base.total_shared > 0);
1884 
1885    const struct intel_cs_dispatch_info dispatch =
1886       elk_cs_get_dispatch_info(devinfo, cs_prog_data, NULL);
1887    const uint32_t vfe_curbe_allocation =
1888       ALIGN(cs_prog_data->push.per_thread.regs * dispatch.threads +
1889             cs_prog_data->push.cross_thread.regs, 2);
1890 
1891    const struct anv_shader_bin *cs_bin = pipeline->cs;
1892 
1893    anv_batch_emit(&pipeline->base.batch, GENX(MEDIA_VFE_STATE), vfe) {
1894 #if GFX_VER > 7
1895       vfe.StackSize              = 0;
1896 #else
1897       vfe.GPGPUMode              = true;
1898 #endif
1899       vfe.MaximumNumberofThreads =
1900          devinfo->max_cs_threads * devinfo->subslice_total - 1;
1901       vfe.NumberofURBEntries     = GFX_VER <= 7 ? 0 : 2;
1902       vfe.ResetGatewayTimer      = true;
1903       vfe.BypassGatewayControl   = true;
1904       vfe.URBEntryAllocationSize = GFX_VER <= 7 ? 0 : 2;
1905       vfe.CURBEAllocationSize    = vfe_curbe_allocation;
1906 
1907       if (cs_bin->prog_data->total_scratch) {
1908          if (GFX_VER >= 8) {
1909             /* Broadwell's Per Thread Scratch Space is in the range [0, 11]
1910              * where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
1911              */
1912             vfe.PerThreadScratchSpace =
1913                ffs(cs_bin->prog_data->total_scratch) - 11;
1914          } else if (GFX_VERx10 == 75) {
1915             /* Haswell's Per Thread Scratch Space is in the range [0, 10]
1916              * where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
1917              */
1918             vfe.PerThreadScratchSpace =
1919                ffs(cs_bin->prog_data->total_scratch) - 12;
1920          } else {
1921             /* IVB and BYT use the range [0, 11] to mean [1kB, 12kB]
1922              * where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
1923              */
1924             vfe.PerThreadScratchSpace =
1925                cs_bin->prog_data->total_scratch / 1024 - 1;
1926          }
1927          vfe.ScratchSpaceBasePointer =
1928             get_scratch_address(&pipeline->base, MESA_SHADER_COMPUTE, cs_bin);
1929       }
1930    }
1931 
1932    struct GENX(INTERFACE_DESCRIPTOR_DATA) desc = {
1933       .KernelStartPointer     =
1934          cs_bin->kernel.offset +
1935          elk_cs_prog_data_prog_offset(cs_prog_data, dispatch.simd_size),
1936       .SamplerCount           = get_sampler_count(cs_bin),
1937       /* We add 1 because the CS indirect parameters buffer isn't accounted
1938        * for in bind_map.surface_count.
1939        */
1940       .BindingTableEntryCount = 1 + MIN2(cs_bin->bind_map.surface_count, 30),
1941       .BarrierEnable          = cs_prog_data->uses_barrier,
1942       .SharedLocalMemorySize  =
1943          elk_encode_slm_size(GFX_VER, cs_prog_data->base.total_shared),
1944 
1945 #if GFX_VERx10 != 75
1946       .ConstantURBEntryReadOffset = 0,
1947 #endif
1948       .ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
1949 #if GFX_VERx10 >= 75
1950       .CrossThreadConstantDataReadLength =
1951          cs_prog_data->push.cross_thread.regs,
1952 #endif
1953 
1954       .NumberofThreadsinGPGPUThreadGroup = dispatch.threads,
1955    };
1956    GENX(INTERFACE_DESCRIPTOR_DATA_pack)(NULL,
1957                                         pipeline->interface_descriptor_data,
1958                                         &desc);
1959 }
1960