• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 
30 #include "anv_private.h"
31 
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
34 #include "common/intel_genX_state_brw.h"
35 #include "common/intel_guardband.h"
36 #include "common/intel_tiled_render.h"
37 #include "compiler/brw_prim.h"
38 
39 static const uint32_t vk_to_intel_blend[] = {
40    [VK_BLEND_FACTOR_ZERO]                    = BLENDFACTOR_ZERO,
41    [VK_BLEND_FACTOR_ONE]                     = BLENDFACTOR_ONE,
42    [VK_BLEND_FACTOR_SRC_COLOR]               = BLENDFACTOR_SRC_COLOR,
43    [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR]     = BLENDFACTOR_INV_SRC_COLOR,
44    [VK_BLEND_FACTOR_DST_COLOR]               = BLENDFACTOR_DST_COLOR,
45    [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR]     = BLENDFACTOR_INV_DST_COLOR,
46    [VK_BLEND_FACTOR_SRC_ALPHA]               = BLENDFACTOR_SRC_ALPHA,
47    [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA]     = BLENDFACTOR_INV_SRC_ALPHA,
48    [VK_BLEND_FACTOR_DST_ALPHA]               = BLENDFACTOR_DST_ALPHA,
49    [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA]     = BLENDFACTOR_INV_DST_ALPHA,
50    [VK_BLEND_FACTOR_CONSTANT_COLOR]          = BLENDFACTOR_CONST_COLOR,
51    [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR,
52    [VK_BLEND_FACTOR_CONSTANT_ALPHA]          = BLENDFACTOR_CONST_ALPHA,
53    [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA,
54    [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE]      = BLENDFACTOR_SRC_ALPHA_SATURATE,
55    [VK_BLEND_FACTOR_SRC1_COLOR]              = BLENDFACTOR_SRC1_COLOR,
56    [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR]    = BLENDFACTOR_INV_SRC1_COLOR,
57    [VK_BLEND_FACTOR_SRC1_ALPHA]              = BLENDFACTOR_SRC1_ALPHA,
58    [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA]    = BLENDFACTOR_INV_SRC1_ALPHA,
59 };
60 
61 static const uint32_t vk_to_intel_blend_op[] = {
62    [VK_BLEND_OP_ADD]                         = BLENDFUNCTION_ADD,
63    [VK_BLEND_OP_SUBTRACT]                    = BLENDFUNCTION_SUBTRACT,
64    [VK_BLEND_OP_REVERSE_SUBTRACT]            = BLENDFUNCTION_REVERSE_SUBTRACT,
65    [VK_BLEND_OP_MIN]                         = BLENDFUNCTION_MIN,
66    [VK_BLEND_OP_MAX]                         = BLENDFUNCTION_MAX,
67 };
68 
69 static const uint32_t vk_to_intel_cullmode[] = {
70    [VK_CULL_MODE_NONE]                       = CULLMODE_NONE,
71    [VK_CULL_MODE_FRONT_BIT]                  = CULLMODE_FRONT,
72    [VK_CULL_MODE_BACK_BIT]                   = CULLMODE_BACK,
73    [VK_CULL_MODE_FRONT_AND_BACK]             = CULLMODE_BOTH
74 };
75 
76 static const uint32_t vk_to_intel_fillmode[] = {
77    [VK_POLYGON_MODE_FILL]                    = FILL_MODE_SOLID,
78    [VK_POLYGON_MODE_LINE]                    = FILL_MODE_WIREFRAME,
79    [VK_POLYGON_MODE_POINT]                   = FILL_MODE_POINT,
80 };
81 
82 static const uint32_t vk_to_intel_front_face[] = {
83    [VK_FRONT_FACE_COUNTER_CLOCKWISE]         = 1,
84    [VK_FRONT_FACE_CLOCKWISE]                 = 0
85 };
86 
87 static const uint32_t vk_to_intel_logic_op[] = {
88    [VK_LOGIC_OP_COPY]                        = LOGICOP_COPY,
89    [VK_LOGIC_OP_CLEAR]                       = LOGICOP_CLEAR,
90    [VK_LOGIC_OP_AND]                         = LOGICOP_AND,
91    [VK_LOGIC_OP_AND_REVERSE]                 = LOGICOP_AND_REVERSE,
92    [VK_LOGIC_OP_AND_INVERTED]                = LOGICOP_AND_INVERTED,
93    [VK_LOGIC_OP_NO_OP]                       = LOGICOP_NOOP,
94    [VK_LOGIC_OP_XOR]                         = LOGICOP_XOR,
95    [VK_LOGIC_OP_OR]                          = LOGICOP_OR,
96    [VK_LOGIC_OP_NOR]                         = LOGICOP_NOR,
97    [VK_LOGIC_OP_EQUIVALENT]                  = LOGICOP_EQUIV,
98    [VK_LOGIC_OP_INVERT]                      = LOGICOP_INVERT,
99    [VK_LOGIC_OP_OR_REVERSE]                  = LOGICOP_OR_REVERSE,
100    [VK_LOGIC_OP_COPY_INVERTED]               = LOGICOP_COPY_INVERTED,
101    [VK_LOGIC_OP_OR_INVERTED]                 = LOGICOP_OR_INVERTED,
102    [VK_LOGIC_OP_NAND]                        = LOGICOP_NAND,
103    [VK_LOGIC_OP_SET]                         = LOGICOP_SET,
104 };
105 
106 static const uint32_t vk_to_intel_compare_op[] = {
107    [VK_COMPARE_OP_NEVER]                        = PREFILTEROP_NEVER,
108    [VK_COMPARE_OP_LESS]                         = PREFILTEROP_LESS,
109    [VK_COMPARE_OP_EQUAL]                        = PREFILTEROP_EQUAL,
110    [VK_COMPARE_OP_LESS_OR_EQUAL]                = PREFILTEROP_LEQUAL,
111    [VK_COMPARE_OP_GREATER]                      = PREFILTEROP_GREATER,
112    [VK_COMPARE_OP_NOT_EQUAL]                    = PREFILTEROP_NOTEQUAL,
113    [VK_COMPARE_OP_GREATER_OR_EQUAL]             = PREFILTEROP_GEQUAL,
114    [VK_COMPARE_OP_ALWAYS]                       = PREFILTEROP_ALWAYS,
115 };
116 
117 static const uint32_t vk_to_intel_stencil_op[] = {
118    [VK_STENCIL_OP_KEEP]                         = STENCILOP_KEEP,
119    [VK_STENCIL_OP_ZERO]                         = STENCILOP_ZERO,
120    [VK_STENCIL_OP_REPLACE]                      = STENCILOP_REPLACE,
121    [VK_STENCIL_OP_INCREMENT_AND_CLAMP]          = STENCILOP_INCRSAT,
122    [VK_STENCIL_OP_DECREMENT_AND_CLAMP]          = STENCILOP_DECRSAT,
123    [VK_STENCIL_OP_INVERT]                       = STENCILOP_INVERT,
124    [VK_STENCIL_OP_INCREMENT_AND_WRAP]           = STENCILOP_INCR,
125    [VK_STENCIL_OP_DECREMENT_AND_WRAP]           = STENCILOP_DECR,
126 };
127 
128 static const uint32_t vk_to_intel_primitive_type[] = {
129    [VK_PRIMITIVE_TOPOLOGY_POINT_LIST]                    = _3DPRIM_POINTLIST,
130    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST]                     = _3DPRIM_LINELIST,
131    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP]                    = _3DPRIM_LINESTRIP,
132    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST]                 = _3DPRIM_TRILIST,
133    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP]                = _3DPRIM_TRISTRIP,
134    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN]                  = _3DPRIM_TRIFAN,
135    [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY]      = _3DPRIM_LINELIST_ADJ,
136    [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY]     = _3DPRIM_LINESTRIP_ADJ,
137    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY]  = _3DPRIM_TRILIST_ADJ,
138    [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
139 };
140 
141 static void
genX(streamout_prologue)142 genX(streamout_prologue)(struct anv_cmd_buffer *cmd_buffer)
143 {
144 #if INTEL_WA_16013994831_GFX_VER
145    /* Wa_16013994831 - Disable preemption during streamout, enable back
146     * again if XFB not used by the current pipeline.
147     *
148     * Although this workaround applies to Gfx12+, we already disable object
149     * level preemption for another reason in genX_state.c so we can skip this
150     * for Gfx12.
151     */
152    if (!intel_needs_workaround(cmd_buffer->device->info, 16013994831))
153       return;
154 
155    struct anv_graphics_pipeline *pipeline =
156       anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
157    if (pipeline->uses_xfb) {
158       genX(cmd_buffer_set_preemption)(cmd_buffer, false);
159       return;
160    }
161 
162    if (!cmd_buffer->state.gfx.object_preemption)
163       genX(cmd_buffer_set_preemption)(cmd_buffer, true);
164 #endif
165 }
166 
167 #if GFX_VER >= 12 && GFX_VER < 30
168 static uint32_t
get_cps_state_offset(const struct anv_device * device,const struct vk_fragment_shading_rate_state * fsr)169 get_cps_state_offset(const struct anv_device *device,
170                      const struct vk_fragment_shading_rate_state *fsr)
171 {
172    uint32_t offset;
173    static const uint32_t size_index[] = {
174       [1] = 0,
175       [2] = 1,
176       [4] = 2,
177    };
178 
179 #if GFX_VERx10 >= 125
180    offset =
181       1 + /* skip disabled */
182       fsr->combiner_ops[0] * 5 * 3 * 3 +
183       fsr->combiner_ops[1] * 3 * 3 +
184       size_index[fsr->fragment_size.width] * 3 +
185       size_index[fsr->fragment_size.height];
186 #else
187    offset =
188       1 + /* skip disabled */
189       size_index[fsr->fragment_size.width] * 3 +
190       size_index[fsr->fragment_size.height];
191 #endif
192 
193    offset *= MAX_VIEWPORTS * GENX(CPS_STATE_length) * 4;
194 
195    return device->cps_states.offset + offset;
196 }
197 #endif /* GFX_VER >= 12 && GFX_VER < 30 */
198 
199 #if GFX_VER >= 30
200 static uint32_t
get_cps_size(uint32_t size)201 get_cps_size(uint32_t size)
202 {
203    switch (size) {
204    case 1:
205       return CPSIZE_1;
206    case 2:
207       return CPSIZE_2;
208    case 4:
209       return CPSIZE_4;
210    default:
211       unreachable("Invalid size");
212    }
213 }
214 
215 static const uint32_t vk_to_intel_shading_rate_combiner_op[] = {
216    [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR] = CPS_COMB_OP_PASSTHROUGH,
217    [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR] = CPS_COMB_OP_OVERRIDE,
218    [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MIN_KHR] = CPS_COMB_OP_HIGH_QUALITY,
219    [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR] = CPS_COMB_OP_LOW_QUALITY,
220    [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MUL_KHR] = CPS_COMB_OP_RELATIVE,
221 };
222 #endif
223 
224 static bool
has_ds_feedback_loop(const struct vk_dynamic_graphics_state * dyn)225 has_ds_feedback_loop(const struct vk_dynamic_graphics_state *dyn)
226 {
227    return (dyn->feedback_loops & (VK_IMAGE_ASPECT_DEPTH_BIT |
228                                   VK_IMAGE_ASPECT_STENCIL_BIT)) ||
229       dyn->ial.depth_att != MESA_VK_ATTACHMENT_UNUSED ||
230       dyn->ial.stencil_att != MESA_VK_ATTACHMENT_UNUSED;
231 }
232 
233 UNUSED static bool
want_stencil_pma_fix(const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct vk_depth_stencil_state * ds)234 want_stencil_pma_fix(const struct vk_dynamic_graphics_state *dyn,
235                      const struct anv_cmd_graphics_state *gfx,
236                      const struct vk_depth_stencil_state *ds)
237 {
238    if (GFX_VER > 9)
239       return false;
240    assert(GFX_VER == 9);
241 
242    /* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable:
243     *
244     *    Clearing this bit will force the STC cache to wait for pending
245     *    retirement of pixels at the HZ-read stage and do the STC-test for
246     *    Non-promoted, R-computed and Computed depth modes instead of
247     *    postponing the STC-test to RCPFE.
248     *
249     *    STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
250     *                  3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
251     *
252     *    STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
253     *                   (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
254     *                    3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
255     *
256     *    COMP_STC_EN = STC_TEST_EN &&
257     *                  3DSTATE_PS_EXTRA::PixelShaderComputesStencil
258     *
259     *    SW parses the pipeline states to generate the following logical
260     *    signal indicating if PMA FIX can be enabled.
261     *
262     *    STC_PMA_OPT =
263     *       3DSTATE_WM::ForceThreadDispatch != 1 &&
264     *       !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
265     *       3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
266     *       3DSTATE_DEPTH_BUFFER::HIZ Enable &&
267     *       !(3DSTATE_WM::EDSC_Mode == 2) &&
268     *       3DSTATE_PS_EXTRA::PixelShaderValid &&
269     *       !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
270     *         3DSTATE_WM_HZ_OP::DepthBufferResolve ||
271     *         3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
272     *         3DSTATE_WM_HZ_OP::StencilBufferClear) &&
273     *       (COMP_STC_EN || STC_WRITE_EN) &&
274     *       ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
275     *         3DSTATE_WM::ForceKillPix == ON ||
276     *         3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
277     *         3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
278     *         3DSTATE_PS_BLEND::AlphaTestEnable ||
279     *         3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
280     *        (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
281     */
282 
283    /* These are always true:
284     *    3DSTATE_WM::ForceThreadDispatch != 1 &&
285     *    !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
286     */
287 
288    /* We only enable the PMA fix if we know for certain that HiZ is enabled.
289     * If we don't know whether HiZ is enabled or not, we disable the PMA fix
290     * and there is no harm.
291     *
292     * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
293     * 3DSTATE_DEPTH_BUFFER::HIZ Enable
294     */
295    if (!gfx->hiz_enabled)
296       return false;
297 
298    /* We can't possibly know if HiZ is enabled without the depth attachment */
299    ASSERTED const struct anv_image_view *d_iview = gfx->depth_att.iview;
300    assert(d_iview && d_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ);
301 
302    /* 3DSTATE_PS_EXTRA::PixelShaderValid */
303    struct anv_graphics_pipeline *pipeline =
304       anv_pipeline_to_graphics(gfx->base.pipeline);
305    if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
306       return false;
307 
308    /* !(3DSTATE_WM::EDSC_Mode == 2) */
309    const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
310    if (wm_prog_data->early_fragment_tests)
311       return false;
312 
313    /* We never use anv_pipeline for HiZ ops so this is trivially true:
314    *    !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
315     *      3DSTATE_WM_HZ_OP::DepthBufferResolve ||
316     *      3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
317     *      3DSTATE_WM_HZ_OP::StencilBufferClear)
318     */
319 
320    /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
321     * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
322     */
323    const bool stc_test_en = ds->stencil.test_enable;
324 
325    /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
326     * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
327     *  3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
328     */
329    const bool stc_write_en = ds->stencil.write_enable;
330 
331    /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
332    const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil;
333 
334    /* COMP_STC_EN || STC_WRITE_EN */
335    if (!(comp_stc_en || stc_write_en))
336       return false;
337 
338    /* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
339     *  3DSTATE_WM::ForceKillPix == ON ||
340     *  3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
341     *  3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
342     *  3DSTATE_PS_BLEND::AlphaTestEnable ||
343     *  3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
344     * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
345     */
346    return pipeline->kill_pixel ||
347           pipeline->rp_has_ds_self_dep ||
348           has_ds_feedback_loop(dyn) ||
349           wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
350 }
351 
352 static inline bool
anv_rasterization_aa_mode(VkPolygonMode raster_mode,VkLineRasterizationModeKHR line_mode)353 anv_rasterization_aa_mode(VkPolygonMode raster_mode,
354                           VkLineRasterizationModeKHR line_mode)
355 {
356    if (raster_mode == VK_POLYGON_MODE_LINE &&
357        line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR)
358       return true;
359    return false;
360 }
361 
362 static inline VkLineRasterizationModeKHR
anv_line_rasterization_mode(VkLineRasterizationModeKHR line_mode,unsigned rasterization_samples)363 anv_line_rasterization_mode(VkLineRasterizationModeKHR line_mode,
364                             unsigned rasterization_samples)
365 {
366    if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_KHR) {
367       if (rasterization_samples > 1) {
368          return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_KHR;
369       } else {
370          return VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR;
371       }
372    }
373    return line_mode;
374 }
375 
376 /** Returns the final polygon mode for rasterization
377  *
378  * This function takes into account polygon mode, primitive topology and the
379  * different shader stages which might generate their own type of primitives.
380  */
381 static inline VkPolygonMode
anv_raster_polygon_mode(const struct anv_graphics_pipeline * pipeline,VkPolygonMode polygon_mode,VkPrimitiveTopology primitive_topology)382 anv_raster_polygon_mode(const struct anv_graphics_pipeline *pipeline,
383                         VkPolygonMode polygon_mode,
384                         VkPrimitiveTopology primitive_topology)
385 {
386    if (anv_pipeline_is_mesh(pipeline)) {
387       switch (get_mesh_prog_data(pipeline)->primitive_type) {
388       case MESA_PRIM_POINTS:
389          return VK_POLYGON_MODE_POINT;
390       case MESA_PRIM_LINES:
391          return VK_POLYGON_MODE_LINE;
392       case MESA_PRIM_TRIANGLES:
393          return polygon_mode;
394       default:
395          unreachable("invalid primitive type for mesh");
396       }
397    } else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
398       switch (get_gs_prog_data(pipeline)->output_topology) {
399       case _3DPRIM_POINTLIST:
400          return VK_POLYGON_MODE_POINT;
401 
402       case _3DPRIM_LINELIST:
403       case _3DPRIM_LINESTRIP:
404       case _3DPRIM_LINELOOP:
405          return VK_POLYGON_MODE_LINE;
406 
407       case _3DPRIM_TRILIST:
408       case _3DPRIM_TRIFAN:
409       case _3DPRIM_TRISTRIP:
410       case _3DPRIM_RECTLIST:
411       case _3DPRIM_QUADLIST:
412       case _3DPRIM_QUADSTRIP:
413       case _3DPRIM_POLYGON:
414          return polygon_mode;
415       }
416       unreachable("Unsupported GS output topology");
417    } else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
418       switch (get_tes_prog_data(pipeline)->output_topology) {
419       case INTEL_TESS_OUTPUT_TOPOLOGY_POINT:
420          return VK_POLYGON_MODE_POINT;
421 
422       case INTEL_TESS_OUTPUT_TOPOLOGY_LINE:
423          return VK_POLYGON_MODE_LINE;
424 
425       case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW:
426       case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW:
427          return polygon_mode;
428       }
429       unreachable("Unsupported TCS output topology");
430    } else {
431       switch (primitive_topology) {
432       case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
433          return VK_POLYGON_MODE_POINT;
434 
435       case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
436       case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
437       case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
438       case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
439          return VK_POLYGON_MODE_LINE;
440 
441       case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
442       case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
443       case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
444       case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
445       case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
446          return polygon_mode;
447 
448       default:
449          unreachable("Unsupported primitive topology");
450       }
451    }
452 }
453 
454 static inline bool
anv_is_dual_src_blend_factor(VkBlendFactor factor)455 anv_is_dual_src_blend_factor(VkBlendFactor factor)
456 {
457    return factor == VK_BLEND_FACTOR_SRC1_COLOR ||
458           factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR ||
459           factor == VK_BLEND_FACTOR_SRC1_ALPHA ||
460           factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA;
461 }
462 
463 static inline bool
anv_is_dual_src_blend_equation(const struct vk_color_blend_attachment_state * cb)464 anv_is_dual_src_blend_equation(const struct vk_color_blend_attachment_state *cb)
465 {
466    return anv_is_dual_src_blend_factor(cb->src_color_blend_factor) &&
467           anv_is_dual_src_blend_factor(cb->dst_color_blend_factor) &&
468           anv_is_dual_src_blend_factor(cb->src_alpha_blend_factor) &&
469           anv_is_dual_src_blend_factor(cb->dst_alpha_blend_factor);
470 }
471 
472 static void
anv_rasterization_mode(VkPolygonMode raster_mode,VkLineRasterizationModeKHR line_mode,float line_width,uint32_t * api_mode,bool * msaa_rasterization_enable)473 anv_rasterization_mode(VkPolygonMode raster_mode,
474                        VkLineRasterizationModeKHR line_mode,
475                        float line_width,
476                        uint32_t *api_mode,
477                        bool *msaa_rasterization_enable)
478 {
479    if (raster_mode == VK_POLYGON_MODE_LINE) {
480       /* Unfortunately, configuring our line rasterization hardware on gfx8
481        * and later is rather painful.  Instead of giving us bits to tell the
482        * hardware what line mode to use like we had on gfx7, we now have an
483        * arcane combination of API Mode and MSAA enable bits which do things
484        * in a table which are expected to magically put the hardware into the
485        * right mode for your API.  Sadly, Vulkan isn't any of the APIs the
486        * hardware people thought of so nothing works the way you want it to.
487        *
488        * Look at the table titled "Multisample Rasterization Modes" in Vol 7
489        * of the Skylake PRM for more details.
490        */
491       switch (line_mode) {
492       case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
493          *api_mode = DX101;
494 #if GFX_VER <= 9
495          /* Prior to ICL, the algorithm the HW uses to draw wide lines
496           * doesn't quite match what the CTS expects, at least for rectangular
497           * lines, so we set this to false here, making it draw parallelograms
498           * instead, which work well enough.
499           */
500          *msaa_rasterization_enable = line_width < 1.0078125;
501 #else
502          *msaa_rasterization_enable = true;
503 #endif
504          break;
505 
506       case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
507       case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
508          *api_mode = DX9OGL;
509          *msaa_rasterization_enable = false;
510          break;
511 
512       default:
513          unreachable("Unsupported line rasterization mode");
514       }
515    } else {
516       *api_mode = DX101;
517       *msaa_rasterization_enable = true;
518    }
519 }
520 
521 static bool
522 is_src1_blend_factor(enum GENX(3D_Color_Buffer_Blend_Factor) factor)
523 {
524    return factor == BLENDFACTOR_SRC1_COLOR ||
525           factor == BLENDFACTOR_SRC1_ALPHA ||
526           factor == BLENDFACTOR_INV_SRC1_COLOR ||
527           factor == BLENDFACTOR_INV_SRC1_ALPHA;
528 }
529 
530 #if GFX_VERx10 == 125
531 /**
532  * Return the dimensions of the current rendering area, defined as the
533  * bounding box of all present color, depth and stencil attachments.
534  */
535 UNUSED static bool
calculate_render_area(const struct anv_cmd_graphics_state * gfx,unsigned * width,unsigned * height)536 calculate_render_area(const struct anv_cmd_graphics_state *gfx,
537                       unsigned *width, unsigned *height)
538 {
539    *width = gfx->render_area.offset.x + gfx->render_area.extent.width;
540    *height = gfx->render_area.offset.y + gfx->render_area.extent.height;
541 
542    for (unsigned i = 0; i < gfx->color_att_count; i++) {
543       const struct anv_attachment *att = &gfx->color_att[i];
544       if (att->iview) {
545          *width = MAX2(*width, att->iview->vk.extent.width);
546          *height = MAX2(*height, att->iview->vk.extent.height);
547       }
548    }
549 
550    const struct anv_image_view *const z_view = gfx->depth_att.iview;
551    if (z_view) {
552       *width = MAX2(*width, z_view->vk.extent.width);
553       *height = MAX2(*height, z_view->vk.extent.height);
554    }
555 
556    const struct anv_image_view *const s_view = gfx->stencil_att.iview;
557    if (s_view) {
558       *width = MAX2(*width, s_view->vk.extent.width);
559       *height = MAX2(*height, s_view->vk.extent.height);
560    }
561 
562    return *width && *height;
563 }
564 
565 /* Calculate TBIMR tiling parameters adequate for the current pipeline
566  * setup.  Return true if TBIMR should be enabled.
567  */
568 UNUSED static bool
calculate_tile_dimensions(const struct anv_device * device,const struct anv_cmd_graphics_state * gfx,const struct intel_l3_config * l3_config,unsigned fb_width,unsigned fb_height,unsigned * tile_width,unsigned * tile_height)569 calculate_tile_dimensions(const struct anv_device *device,
570                           const struct anv_cmd_graphics_state *gfx,
571                           const struct intel_l3_config *l3_config,
572                           unsigned fb_width, unsigned fb_height,
573                           unsigned *tile_width, unsigned *tile_height)
574 {
575    assert(GFX_VER == 12);
576    const unsigned aux_scale = ISL_MAIN_TO_CCS_SIZE_RATIO_XE;
577 
578    unsigned pixel_size = 0;
579 
580    /* Perform a rough calculation of the tile cache footprint of the
581     * pixel pipeline, approximating it as the sum of the amount of
582     * memory used per pixel by every render target, depth, stencil and
583     * auxiliary surfaces bound to the pipeline.
584     */
585    for (uint32_t i = 0; i < gfx->color_att_count; i++) {
586       const struct anv_attachment *att = &gfx->color_att[i];
587 
588       if (att->iview) {
589          const struct anv_image *image = att->iview->image;
590          const unsigned p = anv_image_aspect_to_plane(image,
591                                                       VK_IMAGE_ASPECT_COLOR_BIT);
592          const struct anv_image_plane *plane = &image->planes[p];
593 
594          pixel_size += intel_calculate_surface_pixel_size(
595             &plane->primary_surface.isl);
596 
597          if (isl_aux_usage_has_mcs(att->aux_usage))
598             pixel_size += intel_calculate_surface_pixel_size(
599                &plane->aux_surface.isl);
600 
601          if (isl_aux_usage_has_ccs(att->aux_usage))
602             pixel_size += DIV_ROUND_UP(intel_calculate_surface_pixel_size(
603                                           &plane->primary_surface.isl),
604                                        aux_scale);
605       }
606    }
607 
608    const struct anv_image_view *const z_view = gfx->depth_att.iview;
609    if (z_view) {
610       const struct anv_image *image = z_view->image;
611       assert(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT);
612       const unsigned p = anv_image_aspect_to_plane(image,
613                                                    VK_IMAGE_ASPECT_DEPTH_BIT);
614       const struct anv_image_plane *plane = &image->planes[p];
615 
616       pixel_size += intel_calculate_surface_pixel_size(
617          &plane->primary_surface.isl);
618 
619       if (isl_aux_usage_has_hiz(image->planes[p].aux_usage))
620          pixel_size += intel_calculate_surface_pixel_size(
621             &plane->aux_surface.isl);
622 
623       if (isl_aux_usage_has_ccs(image->planes[p].aux_usage))
624          pixel_size += DIV_ROUND_UP(intel_calculate_surface_pixel_size(
625                                        &plane->primary_surface.isl),
626                                     aux_scale);
627    }
628 
629    const struct anv_image_view *const s_view = gfx->depth_att.iview;
630    if (s_view && s_view != z_view) {
631       const struct anv_image *image = s_view->image;
632       assert(image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
633       const unsigned p = anv_image_aspect_to_plane(image,
634                                                    VK_IMAGE_ASPECT_STENCIL_BIT);
635       const struct anv_image_plane *plane = &image->planes[p];
636 
637       pixel_size += intel_calculate_surface_pixel_size(
638          &plane->primary_surface.isl);
639    }
640 
641    if (!pixel_size)
642       return false;
643 
644    /* Compute a tile layout that allows reasonable utilization of the
645     * tile cache based on the per-pixel cache footprint estimated
646     * above.
647     */
648    intel_calculate_tile_dimensions(device->info, l3_config,
649                                    32, 32, fb_width, fb_height,
650                                    pixel_size, tile_width, tile_height);
651 
652    /* Perform TBIMR tile passes only if the framebuffer covers more
653     * than a single tile.
654     */
655    return *tile_width < fb_width || *tile_height < fb_height;
656 }
657 #endif
658 
659 #define GET(field) hw_state->field
660 #define SET(bit, field, value)                               \
661    do {                                                      \
662       __typeof(hw_state->field) __v = value;                 \
663       if (hw_state->field != __v) {                          \
664          hw_state->field = __v;                              \
665          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit);   \
666       }                                                      \
667    } while (0)
668 #define SET_STAGE(bit, field, value, stage)                  \
669    do {                                                      \
670       __typeof(hw_state->field) __v = value;                 \
671       if (!anv_pipeline_has_stage(pipeline,                  \
672                                   MESA_SHADER_##stage)) {    \
673          hw_state->field = __v;                              \
674          break;                                              \
675       }                                                      \
676       if (hw_state->field != __v) {                          \
677          hw_state->field = __v;                              \
678          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit);   \
679       }                                                      \
680    } while (0)
681 #define SETUP_PROVOKING_VERTEX(bit, cmd, mode)                         \
682    switch (mode) {                                                     \
683    case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:                     \
684       SET(bit, cmd.TriangleStripListProvokingVertexSelect, 0);         \
685       SET(bit, cmd.LineStripListProvokingVertexSelect,     0);         \
686       SET(bit, cmd.TriangleFanProvokingVertexSelect,       1);         \
687       break;                                                           \
688    case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:                      \
689       SET(bit, cmd.TriangleStripListProvokingVertexSelect, 2);         \
690       SET(bit, cmd.LineStripListProvokingVertexSelect,     1);         \
691       SET(bit, cmd.TriangleFanProvokingVertexSelect,       2);         \
692       break;                                                           \
693    default:                                                            \
694       unreachable("Invalid provoking vertex mode");                    \
695    }                                                                   \
696 
697 ALWAYS_INLINE static void
update_fs_msaa_flags(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)698 update_fs_msaa_flags(struct anv_gfx_dynamic_state *hw_state,
699                      const struct vk_dynamic_graphics_state *dyn,
700                      const struct anv_graphics_pipeline *pipeline)
701 {
702    const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
703 
704    if (!wm_prog_data)
705       return;
706 
707    /* If we have any dynamic bits here, we might need to update the value
708     * in the push constant for the shader.
709     */
710    if (wm_prog_data->coarse_pixel_dispatch != INTEL_SOMETIMES &&
711        wm_prog_data->persample_dispatch != INTEL_SOMETIMES &&
712        wm_prog_data->alpha_to_coverage != INTEL_SOMETIMES)
713       return;
714 
715    enum intel_msaa_flags fs_msaa_flags = INTEL_MSAA_FLAG_ENABLE_DYNAMIC;
716 
717    if (dyn->ms.rasterization_samples > 1) {
718       fs_msaa_flags |= INTEL_MSAA_FLAG_MULTISAMPLE_FBO;
719 
720       if (wm_prog_data->sample_shading) {
721          assert(wm_prog_data->persample_dispatch != INTEL_NEVER);
722          fs_msaa_flags |= INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH;
723       }
724       if ((pipeline->sample_shading_enable &&
725            (pipeline->min_sample_shading * dyn->ms.rasterization_samples) > 1) ||
726           wm_prog_data->sample_shading) {
727          fs_msaa_flags |= INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH |
728                           INTEL_MSAA_FLAG_PERSAMPLE_INTERP;
729       }
730    }
731 
732    if (wm_prog_data->coarse_pixel_dispatch == INTEL_SOMETIMES &&
733        !(fs_msaa_flags & INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH)) {
734       fs_msaa_flags |= INTEL_MSAA_FLAG_COARSE_PI_MSG |
735                        INTEL_MSAA_FLAG_COARSE_RT_WRITES;
736    }
737 
738    if (dyn->ms.alpha_to_coverage_enable)
739       fs_msaa_flags |= INTEL_MSAA_FLAG_ALPHA_TO_COVERAGE;
740 
741    SET(FS_MSAA_FLAGS, fs_msaa_flags, fs_msaa_flags);
742 }
743 
744 ALWAYS_INLINE static void
update_ps(struct anv_gfx_dynamic_state * hw_state,const struct anv_device * device,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)745 update_ps(struct anv_gfx_dynamic_state *hw_state,
746           const struct anv_device *device,
747           const struct vk_dynamic_graphics_state *dyn,
748           const struct anv_graphics_pipeline *pipeline)
749 {
750    const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
751 
752    if (!wm_prog_data) {
753 #if GFX_VER < 20
754       SET(PS, ps._8PixelDispatchEnable,  false);
755       SET(PS, ps._16PixelDispatchEnable, false);
756       SET(PS, ps._32PixelDispatchEnable, false);
757 #else
758       SET(PS, ps.Kernel0Enable, false);
759       SET(PS, ps.Kernel1Enable, false);
760 #endif
761       return;
762    }
763 
764    const struct anv_shader_bin *fs_bin =
765       pipeline->base.shaders[MESA_SHADER_FRAGMENT];
766    struct GENX(3DSTATE_PS) ps = {};
767    intel_set_ps_dispatch_state(&ps, device->info, wm_prog_data,
768                                MAX2(dyn->ms.rasterization_samples, 1),
769                                hw_state->fs_msaa_flags);
770 
771    SET(PS, ps.KernelStartPointer0,
772            fs_bin->kernel.offset +
773            brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0));
774    SET(PS, ps.KernelStartPointer1,
775            fs_bin->kernel.offset +
776            brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1));
777 #if GFX_VER < 20
778    SET(PS, ps.KernelStartPointer2,
779            fs_bin->kernel.offset +
780            brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2));
781 #endif
782 
783    SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData0,
784            brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0));
785    SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData1,
786            brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1));
787 #if GFX_VER < 20
788    SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData2,
789            brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2));
790 #endif
791 
792 #if GFX_VER < 20
793    SET(PS, ps._8PixelDispatchEnable,  ps._8PixelDispatchEnable);
794    SET(PS, ps._16PixelDispatchEnable, ps._16PixelDispatchEnable);
795    SET(PS, ps._32PixelDispatchEnable, ps._32PixelDispatchEnable);
796 #else
797    SET(PS, ps.Kernel0Enable,            ps.Kernel0Enable);
798    SET(PS, ps.Kernel1Enable,            ps.Kernel1Enable);
799    SET(PS, ps.Kernel0SIMDWidth,         ps.Kernel0SIMDWidth);
800    SET(PS, ps.Kernel1SIMDWidth,         ps.Kernel1SIMDWidth);
801    SET(PS, ps.Kernel0PolyPackingPolicy, ps.Kernel0PolyPackingPolicy);
802    SET(PS, ps.Kernel0MaximumPolysperThread, ps.Kernel0MaximumPolysperThread);
803 #endif
804 
805    SET(PS, ps.PositionXYOffsetSelect,
806            !wm_prog_data->uses_pos_offset ? POSOFFSET_NONE :
807            brw_wm_prog_data_is_persample(wm_prog_data,
808                                          hw_state->fs_msaa_flags) ?
809            POSOFFSET_SAMPLE : POSOFFSET_CENTROID);
810 }
811 
812 ALWAYS_INLINE static void
update_ps_extra_wm(struct anv_gfx_dynamic_state * hw_state,const struct anv_graphics_pipeline * pipeline)813 update_ps_extra_wm(struct anv_gfx_dynamic_state *hw_state,
814                    const struct anv_graphics_pipeline *pipeline)
815 {
816    const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
817 
818    if (!wm_prog_data)
819       return;
820 
821    SET(PS_EXTRA, ps_extra.PixelShaderIsPerSample,
822                  brw_wm_prog_data_is_persample(wm_prog_data,
823                                                hw_state->fs_msaa_flags));
824 #if GFX_VER >= 11
825    const bool uses_coarse_pixel =
826       brw_wm_prog_data_is_coarse(wm_prog_data, hw_state->fs_msaa_flags);
827    SET(PS_EXTRA, ps_extra.PixelShaderIsPerCoarsePixel, uses_coarse_pixel);
828 #endif
829 #if GFX_VERx10 >= 125
830    /* TODO: We should only require this when the last geometry shader uses a
831     *       fragment shading rate that is not constant.
832     */
833    SET(PS_EXTRA, ps_extra.EnablePSDependencyOnCPsizeChange, uses_coarse_pixel);
834 #endif
835 
836    SET(WM, wm.BarycentricInterpolationMode,
837            wm_prog_data_barycentric_modes(wm_prog_data, hw_state->fs_msaa_flags));
838 }
839 
840 ALWAYS_INLINE static void
update_ps_extra_has_uav(struct anv_gfx_dynamic_state * hw_state,const struct anv_cmd_graphics_state * gfx,const struct anv_graphics_pipeline * pipeline)841 update_ps_extra_has_uav(struct anv_gfx_dynamic_state *hw_state,
842                         const struct anv_cmd_graphics_state *gfx,
843                         const struct anv_graphics_pipeline *pipeline)
844 {
845    const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
846 
847 #if GFX_VERx10 >= 125
848    SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV,
849                        wm_prog_data && wm_prog_data->has_side_effects,
850                        FRAGMENT);
851 #else
852    /* Prior to Gfx12.5 the HW seems to avoid spawning fragment shaders even if
853     * 3DSTATE_PS_EXTRA::PixelShaderKillsPixel=true when
854     * 3DSTATE_PS_BLEND::HasWriteableRT=false. This is causing problems with
855     * occlusion queries with 0 attachments. There are no CTS tests exercising
856     * this but zink+anv fails a bunch of tests like piglit
857     * arb_framebuffer_no_attachments-query.
858     *
859     * Here we choose to tweak the PixelShaderHasUAV to make sure the fragment
860     * shaders are run properly.
861     */
862    SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV,
863                        wm_prog_data && (wm_prog_data->has_side_effects ||
864                                         (gfx->color_att_count == 0 &&
865                                          gfx->n_occlusion_queries > 0)),
866                        FRAGMENT);
867 #endif
868 }
869 
870 ALWAYS_INLINE static void
update_ps_extra_kills_pixel(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct anv_graphics_pipeline * pipeline)871 update_ps_extra_kills_pixel(struct anv_gfx_dynamic_state *hw_state,
872                             const struct vk_dynamic_graphics_state *dyn,
873                             const struct anv_cmd_graphics_state *gfx,
874                             const struct anv_graphics_pipeline *pipeline)
875 {
876    const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
877 
878    SET_STAGE(PS_EXTRA, ps_extra.PixelShaderKillsPixel,
879                        wm_prog_data && (pipeline->rp_has_ds_self_dep ||
880                                         has_ds_feedback_loop(dyn) ||
881                                         wm_prog_data->uses_kill),
882                        FRAGMENT);
883 }
884 
885 #if GFX_VERx10 >= 125
886 ALWAYS_INLINE static void
update_vfg_list_cut_index(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)887 update_vfg_list_cut_index(struct anv_gfx_dynamic_state *hw_state,
888                           const struct vk_dynamic_graphics_state *dyn)
889 {
890    SET(VFG, vfg.ListCutIndexEnable, dyn->ia.primitive_restart_enable);
891 }
892 #endif
893 
894 ALWAYS_INLINE static void
update_streamout(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct anv_graphics_pipeline * pipeline)895 update_streamout(struct anv_gfx_dynamic_state *hw_state,
896                  const struct vk_dynamic_graphics_state *dyn,
897                  const struct anv_cmd_graphics_state *gfx,
898                  const struct anv_graphics_pipeline *pipeline)
899 {
900    SET(STREAMOUT, so.RenderingDisable, dyn->rs.rasterizer_discard_enable);
901    SET(STREAMOUT, so.RenderStreamSelect, dyn->rs.rasterization_stream);
902 
903 #if INTEL_NEEDS_WA_18022508906
904    /* Wa_18022508906 :
905     *
906     * SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage:
907     *
908     * SOL_INT::Render_Enable =
909     *   (3DSTATE_STREAMOUT::Force_Rending == Force_On) ||
910     *   (
911     *     (3DSTATE_STREAMOUT::Force_Rending != Force_Off) &&
912     *     !(3DSTATE_GS::Enable && 3DSTATE_GS::Output Vertex Size == 0) &&
913     *     !3DSTATE_STREAMOUT::API_Render_Disable &&
914     *     (
915     *       3DSTATE_DEPTH_STENCIL_STATE::Stencil_TestEnable ||
916     *       3DSTATE_DEPTH_STENCIL_STATE::Depth_TestEnable ||
917     *       3DSTATE_DEPTH_STENCIL_STATE::Depth_WriteEnable ||
918     *       3DSTATE_PS_EXTRA::PS_Valid ||
919     *       3DSTATE_WM::Legacy Depth_Buffer_Clear ||
920     *       3DSTATE_WM::Legacy Depth_Buffer_Resolve_Enable ||
921     *       3DSTATE_WM::Legacy Hierarchical_Depth_Buffer_Resolve_Enable
922     *     )
923     *   )
924     *
925     * If SOL_INT::Render_Enable is false, the SO stage will not forward any
926     * topologies down the pipeline. Which is not what we want for occlusion
927     * queries.
928     *
929     * Here we force rendering to get SOL_INT::Render_Enable when occlusion
930     * queries are active.
931     */
932    SET(STREAMOUT, so.ForceRendering,
933        (!GET(so.RenderingDisable) && gfx->n_occlusion_queries > 0) ?
934        Force_on : 0);
935 #endif
936 }
937 
938 ALWAYS_INLINE static void
update_provoking_vertex(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)939 update_provoking_vertex(struct anv_gfx_dynamic_state *hw_state,
940                         const struct vk_dynamic_graphics_state *dyn,
941                         const struct anv_graphics_pipeline *pipeline)
942 {
943    SETUP_PROVOKING_VERTEX(SF, sf, dyn->rs.provoking_vertex);
944    SETUP_PROVOKING_VERTEX(CLIP, clip, dyn->rs.provoking_vertex);
945 
946    switch (dyn->rs.provoking_vertex) {
947    case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
948       SET(STREAMOUT, so.ReorderMode, LEADING);
949       SET_STAGE(GS, gs.ReorderMode, LEADING, GEOMETRY);
950       break;
951 
952    case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
953       SET(STREAMOUT, so.ReorderMode, TRAILING);
954       SET_STAGE(GS, gs.ReorderMode, TRAILING, GEOMETRY);
955       break;
956 
957    default:
958       unreachable("Invalid provoking vertex mode");
959    }
960 }
961 
962 ALWAYS_INLINE static void
update_topology(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)963 update_topology(struct anv_gfx_dynamic_state *hw_state,
964                 const struct vk_dynamic_graphics_state *dyn,
965                 const struct anv_graphics_pipeline *pipeline)
966 {
967    uint32_t topology =
968       anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ?
969       _3DPRIM_PATCHLIST(dyn->ts.patch_control_points) :
970       vk_to_intel_primitive_type[dyn->ia.primitive_topology];
971 
972    SET(VF_TOPOLOGY, vft.PrimitiveTopologyType, topology);
973 }
974 
975 #if GFX_VER >= 11
976 ALWAYS_INLINE static void
update_cps(struct anv_gfx_dynamic_state * hw_state,const struct anv_device * device,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)977 update_cps(struct anv_gfx_dynamic_state *hw_state,
978            const struct anv_device *device,
979            const struct vk_dynamic_graphics_state *dyn,
980            const struct anv_graphics_pipeline *pipeline)
981 {
982 #if GFX_VER >= 30
983    SET(COARSE_PIXEL, coarse_pixel.CPSizeX,
984        get_cps_size(dyn->fsr.fragment_size.width));
985    SET(COARSE_PIXEL, coarse_pixel.CPSizeY,
986        get_cps_size(dyn->fsr.fragment_size.height));
987    SET(COARSE_PIXEL, coarse_pixel.CPSizeCombiner0Opcode,
988        vk_to_intel_shading_rate_combiner_op[dyn->fsr.combiner_ops[0]]);
989    SET(COARSE_PIXEL, coarse_pixel.CPSizeCombiner1Opcode,
990        vk_to_intel_shading_rate_combiner_op[dyn->fsr.combiner_ops[1]]);
991 #elif GFX_VER >= 12
992    SET(CPS, cps.CoarsePixelShadingStateArrayPointer,
993        get_cps_state_offset(device, &dyn->fsr));
994 #else
995    STATIC_ASSERT(GFX_VER == 11);
996    SET(CPS, cps.CoarsePixelShadingMode, CPS_MODE_CONSTANT);
997    SET(CPS, cps.MinCPSizeX, dyn->fsr.fragment_size.width);
998    SET(CPS, cps.MinCPSizeY, dyn->fsr.fragment_size.height);
999 #endif
1000 }
1001 #endif
1002 
1003 ALWAYS_INLINE static void
update_te(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)1004 update_te(struct anv_gfx_dynamic_state *hw_state,
1005           const struct vk_dynamic_graphics_state *dyn,
1006           const struct anv_graphics_pipeline *pipeline)
1007 {
1008    const struct brw_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline);
1009 
1010    if (tes_prog_data && anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
1011       if (dyn->ts.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) {
1012          SET(TE, te.OutputTopology, tes_prog_data->output_topology);
1013       } else {
1014             /* When the origin is upper-left, we have to flip the winding order */
1015          if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) {
1016             SET(TE, te.OutputTopology, OUTPUT_TRI_CW);
1017          } else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) {
1018             SET(TE, te.OutputTopology, OUTPUT_TRI_CCW);
1019          } else {
1020             SET(TE, te.OutputTopology, tes_prog_data->output_topology);
1021             }
1022       }
1023    } else {
1024       SET(TE, te.OutputTopology, OUTPUT_POINT);
1025    }
1026 }
1027 
1028 ALWAYS_INLINE static void
update_line_width(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1029 update_line_width(struct anv_gfx_dynamic_state *hw_state,
1030                   const struct vk_dynamic_graphics_state *dyn)
1031 {
1032    SET(SF, sf.LineWidth, dyn->rs.line.width);
1033 }
1034 
1035 ALWAYS_INLINE static void
update_sf_global_depth_bias(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1036 update_sf_global_depth_bias(struct anv_gfx_dynamic_state *hw_state,
1037                             const struct vk_dynamic_graphics_state *dyn)
1038 {
1039    /**
1040     * From the Vulkan Spec:
1041     *
1042     *    "VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT specifies that the depth bias
1043     *     representation is a factor of constant r equal to 1."
1044     *
1045     * From the SKL PRMs, Volume 7: 3D-Media-GPGPU, Depth Offset:
1046     *
1047     *    "When UNORM Depth Buffer is at Output Merger (or no Depth Buffer):
1048     *
1049     *     Bias = GlobalDepthOffsetConstant * r + GlobalDepthOffsetScale * MaxDepthSlope
1050     *
1051     *     Where r is the minimum representable value > 0 in the depth buffer
1052     *     format, converted to float32 (note: If state bit Legacy Global Depth
1053     *     Bias Enable is set, the r term will be forced to 1.0)"
1054     *
1055     * When VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT is set, enable
1056     * LegacyGlobalDepthBiasEnable.
1057     */
1058    SET(SF, sf.LegacyGlobalDepthBiasEnable,
1059            dyn->rs.depth_bias.representation ==
1060            VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT);
1061 }
1062 
1063 ALWAYS_INLINE static void
update_clip_api_mode(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1064 update_clip_api_mode(struct anv_gfx_dynamic_state *hw_state,
1065                      const struct vk_dynamic_graphics_state *dyn)
1066 {
1067    SET(CLIP, clip.APIMode,
1068              dyn->vp.depth_clip_negative_one_to_one ?
1069              APIMODE_OGL : APIMODE_D3D);
1070 }
1071 
1072 ALWAYS_INLINE static void
update_clip_max_viewport(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1073 update_clip_max_viewport(struct anv_gfx_dynamic_state *hw_state,
1074                          const struct vk_dynamic_graphics_state *dyn)
1075 {
1076    /* From the Vulkan 1.0.45 spec:
1077     *
1078     *    "If the last active vertex processing stage shader entry point's
1079     *     interface does not include a variable decorated with ViewportIndex,
1080     *     then the first viewport is used."
1081     *
1082     * This could mean that we might need to set the MaximumVPIndex based on
1083     * the pipeline's last stage, but if the last shader doesn't write the
1084     * viewport index and the VUE header is used, the compiler will force the
1085     * value to 0 (which is what the spec requires above). Otherwise it seems
1086     * like the HW should be pulling 0 if the VUE header is not present.
1087     *
1088     * Avoiding a check on the pipeline seems to prevent additional emissions
1089     * of 3DSTATE_CLIP which appear to impact performance on Assassin's Creed
1090     * Valhalla..
1091     */
1092    SET(CLIP, clip.MaximumVPIndex, dyn->vp.viewport_count > 0 ?
1093                                   dyn->vp.viewport_count - 1 : 0);
1094 }
1095 
1096 ALWAYS_INLINE static void
update_clip_raster(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct anv_graphics_pipeline * pipeline)1097 update_clip_raster(struct anv_gfx_dynamic_state *hw_state,
1098                    const struct vk_dynamic_graphics_state *dyn,
1099                    const struct anv_cmd_graphics_state *gfx,
1100                    const struct anv_graphics_pipeline *pipeline)
1101 {
1102    /* Take dynamic primitive topology in to account with
1103     *    3DSTATE_RASTER::APIMode
1104     *    3DSTATE_RASTER::DXMultisampleRasterizationEnable
1105     *    3DSTATE_RASTER::AntialiasingEnable
1106     */
1107    uint32_t api_mode = 0;
1108    bool msaa_raster_enable = false;
1109 
1110    const VkLineRasterizationModeKHR line_mode =
1111       anv_line_rasterization_mode(dyn->rs.line.mode,
1112                                   dyn->ms.rasterization_samples);
1113 
1114    const VkPolygonMode dynamic_raster_mode =
1115       anv_raster_polygon_mode(pipeline,
1116                               dyn->rs.polygon_mode,
1117                               dyn->ia.primitive_topology);
1118 
1119    anv_rasterization_mode(dynamic_raster_mode,
1120                           line_mode, dyn->rs.line.width,
1121                           &api_mode, &msaa_raster_enable);
1122 
1123    /* From the Browadwell PRM, Volume 2, documentation for 3DSTATE_RASTER,
1124     * "Antialiasing Enable":
1125     *
1126     * "This field must be disabled if any of the render targets have integer
1127     * (UINT or SINT) surface format."
1128     *
1129     * Additionally internal documentation for Gfx12+ states:
1130     *
1131     * "This bit MUST not be set when NUM_MULTISAMPLES > 1 OR
1132     *  FORCED_SAMPLE_COUNT > 1."
1133     */
1134    const bool aa_enable =
1135       anv_rasterization_aa_mode(dynamic_raster_mode, line_mode) &&
1136       !gfx->has_uint_rt &&
1137       !(GFX_VER >= 12 && gfx->samples > 1);
1138 
1139    const bool depth_clip_enable =
1140       vk_rasterization_state_depth_clip_enable(&dyn->rs);
1141 
1142    const bool xy_clip_test_enable =
1143       (dynamic_raster_mode == VK_POLYGON_MODE_FILL);
1144 
1145    SET(CLIP, clip.ViewportXYClipTestEnable, xy_clip_test_enable);
1146 
1147    SET(RASTER, raster.APIMode, api_mode);
1148    SET(RASTER, raster.DXMultisampleRasterizationEnable, msaa_raster_enable);
1149    SET(RASTER, raster.AntialiasingEnable, aa_enable);
1150    SET(RASTER, raster.CullMode, vk_to_intel_cullmode[dyn->rs.cull_mode]);
1151    SET(RASTER, raster.FrontWinding, vk_to_intel_front_face[dyn->rs.front_face]);
1152    SET(RASTER, raster.GlobalDepthOffsetEnableSolid, dyn->rs.depth_bias.enable);
1153    SET(RASTER, raster.GlobalDepthOffsetEnableWireframe, dyn->rs.depth_bias.enable);
1154    SET(RASTER, raster.GlobalDepthOffsetEnablePoint, dyn->rs.depth_bias.enable);
1155    SET(RASTER, raster.GlobalDepthOffsetConstant, dyn->rs.depth_bias.constant_factor);
1156    SET(RASTER, raster.GlobalDepthOffsetScale, dyn->rs.depth_bias.slope_factor);
1157    SET(RASTER, raster.GlobalDepthOffsetClamp, dyn->rs.depth_bias.clamp);
1158    SET(RASTER, raster.FrontFaceFillMode, vk_to_intel_fillmode[dyn->rs.polygon_mode]);
1159    SET(RASTER, raster.BackFaceFillMode, vk_to_intel_fillmode[dyn->rs.polygon_mode]);
1160    SET(RASTER, raster.ViewportZFarClipTestEnable, depth_clip_enable);
1161    SET(RASTER, raster.ViewportZNearClipTestEnable, depth_clip_enable);
1162    SET(RASTER, raster.ConservativeRasterizationEnable,
1163                dyn->rs.conservative_mode !=
1164                VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT);
1165 }
1166 
1167 ALWAYS_INLINE static void
update_multisample(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1168 update_multisample(struct anv_gfx_dynamic_state *hw_state,
1169                    const struct vk_dynamic_graphics_state *dyn)
1170 {
1171    SET(MULTISAMPLE, ms.NumberofMultisamples,
1172                     __builtin_ffs(MAX2(dyn->ms.rasterization_samples, 1)) - 1);
1173 }
1174 
1175 ALWAYS_INLINE static void
update_sample_mask(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1176 update_sample_mask(struct anv_gfx_dynamic_state *hw_state,
1177                    const struct vk_dynamic_graphics_state *dyn)
1178 {
1179    /* From the Vulkan 1.0 spec:
1180     *    If pSampleMask is NULL, it is treated as if the mask has all bits
1181     *    enabled, i.e. no coverage is removed from fragments.
1182     *
1183     * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
1184     */
1185    SET(SAMPLE_MASK, sm.SampleMask, dyn->ms.sample_mask & 0xffff);
1186 }
1187 
1188 ALWAYS_INLINE static void
update_wm_depth_stencil(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct anv_device * device)1189 update_wm_depth_stencil(struct anv_gfx_dynamic_state *hw_state,
1190                         const struct vk_dynamic_graphics_state *dyn,
1191                         const struct anv_cmd_graphics_state *gfx,
1192                         const struct anv_device *device)
1193 {
1194    VkImageAspectFlags ds_aspects = 0;
1195    if (gfx->depth_att.vk_format != VK_FORMAT_UNDEFINED)
1196       ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
1197    if (gfx->stencil_att.vk_format != VK_FORMAT_UNDEFINED)
1198       ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
1199 
1200    struct vk_depth_stencil_state opt_ds = dyn->ds;
1201    vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
1202 
1203    SET(WM_DEPTH_STENCIL, ds.DoubleSidedStencilEnable, true);
1204 
1205    SET(WM_DEPTH_STENCIL, ds.StencilTestMask,
1206        opt_ds.stencil.front.compare_mask & 0xff);
1207    SET(WM_DEPTH_STENCIL, ds.StencilWriteMask,
1208        opt_ds.stencil.front.write_mask & 0xff);
1209 
1210    SET(WM_DEPTH_STENCIL, ds.BackfaceStencilTestMask, opt_ds.stencil.back.compare_mask & 0xff);
1211    SET(WM_DEPTH_STENCIL, ds.BackfaceStencilWriteMask, opt_ds.stencil.back.write_mask & 0xff);
1212 
1213    SET(WM_DEPTH_STENCIL, ds.StencilReferenceValue,
1214        opt_ds.stencil.front.reference & 0xff);
1215    SET(WM_DEPTH_STENCIL, ds.BackfaceStencilReferenceValue,
1216        opt_ds.stencil.back.reference & 0xff);
1217 
1218    SET(WM_DEPTH_STENCIL, ds.DepthTestEnable, opt_ds.depth.test_enable);
1219    SET(WM_DEPTH_STENCIL, ds.DepthBufferWriteEnable, opt_ds.depth.write_enable);
1220    SET(WM_DEPTH_STENCIL, ds.DepthTestFunction,
1221                          vk_to_intel_compare_op[opt_ds.depth.compare_op]);
1222    SET(WM_DEPTH_STENCIL, ds.StencilTestEnable, opt_ds.stencil.test_enable);
1223    SET(WM_DEPTH_STENCIL, ds.StencilBufferWriteEnable,
1224                          opt_ds.stencil.write_enable);
1225    SET(WM_DEPTH_STENCIL, ds.StencilFailOp,
1226                          vk_to_intel_stencil_op[opt_ds.stencil.front.op.fail]);
1227    SET(WM_DEPTH_STENCIL, ds.StencilPassDepthPassOp,
1228                          vk_to_intel_stencil_op[opt_ds.stencil.front.op.pass]);
1229    SET(WM_DEPTH_STENCIL, ds.StencilPassDepthFailOp,
1230                          vk_to_intel_stencil_op[
1231                             opt_ds.stencil.front.op.depth_fail]);
1232    SET(WM_DEPTH_STENCIL, ds.StencilTestFunction,
1233                          vk_to_intel_compare_op[
1234                             opt_ds.stencil.front.op.compare]);
1235    SET(WM_DEPTH_STENCIL, ds.BackfaceStencilFailOp,
1236                          vk_to_intel_stencil_op[
1237                             opt_ds.stencil.back.op.fail]);
1238    SET(WM_DEPTH_STENCIL, ds.BackfaceStencilPassDepthPassOp,
1239                          vk_to_intel_stencil_op[
1240                             opt_ds.stencil.back.op.pass]);
1241    SET(WM_DEPTH_STENCIL, ds.BackfaceStencilPassDepthFailOp,
1242                          vk_to_intel_stencil_op[
1243                             opt_ds.stencil.back.op.depth_fail]);
1244    SET(WM_DEPTH_STENCIL, ds.BackfaceStencilTestFunction,
1245                          vk_to_intel_compare_op[
1246                             opt_ds.stencil.back.op.compare]);
1247 
1248 #if GFX_VER == 9
1249    const bool pma = want_stencil_pma_fix(dyn, gfx, &opt_ds);
1250    SET(PMA_FIX, pma_fix, pma);
1251 #endif
1252 
1253 #if INTEL_WA_18019816803_GFX_VER
1254    if (intel_needs_workaround(device->info, 18019816803)) {
1255       bool ds_write_state = opt_ds.depth.write_enable || opt_ds.stencil.write_enable;
1256       SET(WA_18019816803, ds_write_state, ds_write_state);
1257    }
1258 #endif
1259 }
1260 
1261 ALWAYS_INLINE static void
update_depth_bounds(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1262 update_depth_bounds(struct anv_gfx_dynamic_state *hw_state,
1263                     const struct vk_dynamic_graphics_state *dyn)
1264 {
1265    SET(DEPTH_BOUNDS, db.DepthBoundsTestEnable, dyn->ds.depth.bounds_test.enable);
1266    /* Only look at updating the bounds if testing is enabled */
1267    if (dyn->ds.depth.bounds_test.enable) {
1268       SET(DEPTH_BOUNDS, db.DepthBoundsTestMinValue, dyn->ds.depth.bounds_test.min);
1269       SET(DEPTH_BOUNDS, db.DepthBoundsTestMaxValue, dyn->ds.depth.bounds_test.max);
1270    }
1271 }
1272 
1273 ALWAYS_INLINE static void
update_line_stipple(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1274 update_line_stipple(struct anv_gfx_dynamic_state *hw_state,
1275                     const struct vk_dynamic_graphics_state *dyn)
1276 {
1277    SET(LINE_STIPPLE, ls.LineStipplePattern, dyn->rs.line.stipple.pattern);
1278    SET(LINE_STIPPLE, ls.LineStippleInverseRepeatCount,
1279                      1.0f / MAX2(1, dyn->rs.line.stipple.factor));
1280    SET(LINE_STIPPLE, ls.LineStippleRepeatCount, dyn->rs.line.stipple.factor);
1281 
1282    SET(WM,           wm.LineStippleEnable, dyn->rs.line.stipple.enable);
1283 }
1284 
1285 ALWAYS_INLINE static void
update_vf_restart(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx)1286 update_vf_restart(struct anv_gfx_dynamic_state *hw_state,
1287                   const struct vk_dynamic_graphics_state *dyn,
1288                   const struct anv_cmd_graphics_state *gfx)
1289 {
1290    SET(VF, vf.IndexedDrawCutIndexEnable, dyn->ia.primitive_restart_enable);
1291    SET(VF, vf.CutIndex, gfx->restart_index);
1292 }
1293 
1294 ALWAYS_INLINE static void
update_blend_state(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,struct anv_cmd_graphics_state * gfx,const struct anv_device * device,bool has_fs_stage,bool has_fs_dual_src)1295 update_blend_state(struct anv_gfx_dynamic_state *hw_state,
1296                    const struct vk_dynamic_graphics_state *dyn,
1297                    struct anv_cmd_graphics_state *gfx,
1298                    const struct anv_device *device,
1299                    bool has_fs_stage,
1300                    bool has_fs_dual_src)
1301 {
1302    const struct anv_instance *instance = device->physical->instance;
1303    const uint8_t color_writes = dyn->cb.color_write_enables;
1304    bool has_writeable_rt =
1305       has_fs_stage &&
1306       !anv_gfx_all_color_write_masked(gfx, dyn);
1307 
1308    SET(BLEND_STATE, blend.AlphaToCoverageEnable,
1309                     dyn->ms.alpha_to_coverage_enable);
1310    SET(BLEND_STATE, blend.AlphaToOneEnable,
1311                     dyn->ms.alpha_to_one_enable);
1312    SET(BLEND_STATE, blend.ColorDitherEnable,
1313                     gfx->rendering_flags &
1314                     VK_RENDERING_ENABLE_LEGACY_DITHERING_BIT_EXT);
1315 
1316    bool independent_alpha_blend = false;
1317    /* Wa_14018912822, check if we set these during RT setup. */
1318    bool color_blend_zero = false;
1319    bool alpha_blend_zero = false;
1320    uint32_t rt_0 = MESA_VK_ATTACHMENT_UNUSED;
1321    for (uint32_t rt = 0; rt < MAX_RTS; rt++) {
1322       if (gfx->color_output_mapping[rt] >= gfx->color_att_count)
1323          continue;
1324 
1325       uint32_t att = gfx->color_output_mapping[rt];
1326       if (att == 0)
1327          rt_0 = att;
1328 
1329       /* Disable anything above the current number of color attachments. */
1330       bool write_disabled = (color_writes & BITFIELD_BIT(att)) == 0;
1331 
1332       SET(BLEND_STATE, blend.rts[rt].WriteDisableAlpha,
1333                        write_disabled ||
1334                        (dyn->cb.attachments[att].write_mask &
1335                         VK_COLOR_COMPONENT_A_BIT) == 0);
1336       SET(BLEND_STATE, blend.rts[rt].WriteDisableRed,
1337                        write_disabled ||
1338                        (dyn->cb.attachments[att].write_mask &
1339                         VK_COLOR_COMPONENT_R_BIT) == 0);
1340       SET(BLEND_STATE, blend.rts[rt].WriteDisableGreen,
1341                        write_disabled ||
1342                        (dyn->cb.attachments[att].write_mask &
1343                         VK_COLOR_COMPONENT_G_BIT) == 0);
1344       SET(BLEND_STATE, blend.rts[rt].WriteDisableBlue,
1345                        write_disabled ||
1346                        (dyn->cb.attachments[att].write_mask &
1347                         VK_COLOR_COMPONENT_B_BIT) == 0);
1348       /* Vulkan specification 1.2.168, VkLogicOp:
1349        *
1350        *   "Logical operations are controlled by the logicOpEnable and logicOp
1351        *   members of VkPipelineColorBlendStateCreateInfo. If logicOpEnable is
1352        *   VK_TRUE, then a logical operation selected by logicOp is applied
1353        *   between each color attachment and the fragment’s corresponding
1354        *   output value, and blending of all attachments is treated as if it
1355        *   were disabled."
1356        *
1357        * From the Broadwell PRM Volume 2d: Command Reference: Structures:
1358        * BLEND_STATE_ENTRY:
1359        *
1360        *   "Enabling LogicOp and Color Buffer Blending at the same time is
1361        *   UNDEFINED"
1362        *
1363        * The Vulkan spec also says:
1364        *   "Logical operations are not applied to floating-point or sRGB format
1365        *   color attachments."
1366        * and
1367        *   "Any attachments using color formats for which logical operations
1368        *   are not supported simply pass through the color values unmodified."
1369        */
1370       bool ignores_logic_op =
1371          vk_format_is_float(gfx->color_att[att].vk_format) ||
1372          vk_format_is_srgb(gfx->color_att[att].vk_format);
1373       SET(BLEND_STATE, blend.rts[rt].LogicOpFunction,
1374                        vk_to_intel_logic_op[dyn->cb.logic_op]);
1375       SET(BLEND_STATE, blend.rts[rt].LogicOpEnable,
1376                        dyn->cb.logic_op_enable && !ignores_logic_op);
1377 
1378       SET(BLEND_STATE, blend.rts[rt].ColorClampRange, COLORCLAMP_RTFORMAT);
1379       SET(BLEND_STATE, blend.rts[rt].PreBlendColorClampEnable, true);
1380       SET(BLEND_STATE, blend.rts[rt].PostBlendColorClampEnable, true);
1381 
1382       /* Setup blend equation. */
1383       SET(BLEND_STATE, blend.rts[rt].ColorBlendFunction,
1384                        vk_to_intel_blend_op[
1385                           dyn->cb.attachments[att].color_blend_op]);
1386       SET(BLEND_STATE, blend.rts[rt].AlphaBlendFunction,
1387                        vk_to_intel_blend_op[
1388                           dyn->cb.attachments[att].alpha_blend_op]);
1389 
1390       if (dyn->cb.attachments[att].src_color_blend_factor !=
1391           dyn->cb.attachments[att].src_alpha_blend_factor ||
1392           dyn->cb.attachments[att].dst_color_blend_factor !=
1393           dyn->cb.attachments[att].dst_alpha_blend_factor ||
1394           dyn->cb.attachments[att].color_blend_op !=
1395           dyn->cb.attachments[att].alpha_blend_op)
1396          independent_alpha_blend = true;
1397 
1398       /* The Dual Source Blending documentation says:
1399        *
1400        * "If SRC1 is included in a src/dst blend factor and a DualSource RT
1401        * Write message is not used, results are UNDEFINED. (This reflects the
1402        * same restriction in DX APIs, where undefined results are produced if
1403        * “o1” is not written by a PS – there are no default values defined)."
1404        *
1405        * There is no way to gracefully fix this undefined situation so we just
1406        * disable the blending to prevent possible issues.
1407        */
1408       if (has_fs_stage && !has_fs_dual_src &&
1409           anv_is_dual_src_blend_equation(&dyn->cb.attachments[att])) {
1410          SET(BLEND_STATE, blend.rts[rt].ColorBufferBlendEnable, false);
1411       } else {
1412          SET(BLEND_STATE, blend.rts[rt].ColorBufferBlendEnable,
1413                           !dyn->cb.logic_op_enable &&
1414                           dyn->cb.attachments[att].blend_enable);
1415       }
1416 
1417       /* Our hardware applies the blend factor prior to the blend function
1418        * regardless of what function is used. Technically, this means the
1419        * hardware can do MORE than GL or Vulkan specify. However, it also
1420        * means that, for MIN and MAX, we have to stomp the blend factor to ONE
1421        * to make it a no-op.
1422        */
1423       uint32_t SourceBlendFactor;
1424       uint32_t DestinationBlendFactor;
1425       uint32_t SourceAlphaBlendFactor;
1426       uint32_t DestinationAlphaBlendFactor;
1427       if (dyn->cb.attachments[att].color_blend_op == VK_BLEND_OP_MIN ||
1428           dyn->cb.attachments[att].color_blend_op == VK_BLEND_OP_MAX) {
1429          SourceBlendFactor = BLENDFACTOR_ONE;
1430          DestinationBlendFactor = BLENDFACTOR_ONE;
1431       } else {
1432          SourceBlendFactor = vk_to_intel_blend[
1433             dyn->cb.attachments[att].src_color_blend_factor];
1434          DestinationBlendFactor = vk_to_intel_blend[
1435             dyn->cb.attachments[att].dst_color_blend_factor];
1436       }
1437 
1438       if (dyn->cb.attachments[att].alpha_blend_op == VK_BLEND_OP_MIN ||
1439           dyn->cb.attachments[att].alpha_blend_op == VK_BLEND_OP_MAX) {
1440          SourceAlphaBlendFactor = BLENDFACTOR_ONE;
1441          DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
1442       } else {
1443          SourceAlphaBlendFactor = vk_to_intel_blend[
1444             dyn->cb.attachments[att].src_alpha_blend_factor];
1445          DestinationAlphaBlendFactor = vk_to_intel_blend[
1446             dyn->cb.attachments[att].dst_alpha_blend_factor];
1447       }
1448 
1449       /* Replace and Src1 value by 1.0 if dual source blending is not
1450        * enabled.
1451        */
1452       if (has_fs_stage && !has_fs_dual_src) {
1453          if (is_src1_blend_factor(SourceBlendFactor))
1454             SourceBlendFactor = BLENDFACTOR_ONE;
1455          if (is_src1_blend_factor(DestinationBlendFactor))
1456             DestinationBlendFactor = BLENDFACTOR_ONE;
1457       }
1458 
1459       if (instance->intel_enable_wa_14018912822 &&
1460           intel_needs_workaround(device->info, 14018912822) &&
1461           dyn->ms.rasterization_samples > 1) {
1462          if (DestinationBlendFactor == BLENDFACTOR_ZERO) {
1463             DestinationBlendFactor = BLENDFACTOR_CONST_COLOR;
1464             color_blend_zero = true;
1465          }
1466          if (DestinationAlphaBlendFactor == BLENDFACTOR_ZERO) {
1467             DestinationAlphaBlendFactor = BLENDFACTOR_CONST_ALPHA;
1468             alpha_blend_zero = true;
1469          }
1470       }
1471 
1472       SET(BLEND_STATE, blend.rts[rt].SourceBlendFactor, SourceBlendFactor);
1473       SET(BLEND_STATE, blend.rts[rt].DestinationBlendFactor, DestinationBlendFactor);
1474       SET(BLEND_STATE, blend.rts[rt].SourceAlphaBlendFactor, SourceAlphaBlendFactor);
1475       SET(BLEND_STATE, blend.rts[rt].DestinationAlphaBlendFactor, DestinationAlphaBlendFactor);
1476    }
1477    gfx->color_blend_zero = color_blend_zero;
1478    gfx->alpha_blend_zero = alpha_blend_zero;
1479 
1480    SET(BLEND_STATE, blend.IndependentAlphaBlendEnable, independent_alpha_blend);
1481 
1482    if (rt_0 == MESA_VK_ATTACHMENT_UNUSED)
1483       rt_0 = 0;
1484 
1485    /* 3DSTATE_PS_BLEND to be consistent with the rest of the
1486     * BLEND_STATE_ENTRY.
1487     */
1488    SET(PS_BLEND, ps_blend.HasWriteableRT, has_writeable_rt);
1489    SET(PS_BLEND, ps_blend.ColorBufferBlendEnable,
1490                  GET(blend.rts[rt_0].ColorBufferBlendEnable));
1491    SET(PS_BLEND, ps_blend.SourceAlphaBlendFactor,
1492                  GET(blend.rts[rt_0].SourceAlphaBlendFactor));
1493    SET(PS_BLEND, ps_blend.DestinationAlphaBlendFactor,
1494                  gfx->alpha_blend_zero ?
1495                  BLENDFACTOR_CONST_ALPHA :
1496                  GET(blend.rts[rt_0].DestinationAlphaBlendFactor));
1497    SET(PS_BLEND, ps_blend.SourceBlendFactor,
1498                  GET(blend.rts[rt_0].SourceBlendFactor));
1499    SET(PS_BLEND, ps_blend.DestinationBlendFactor,
1500                  gfx->color_blend_zero ?
1501                  BLENDFACTOR_CONST_COLOR :
1502                  GET(blend.rts[rt_0].DestinationBlendFactor));
1503    SET(PS_BLEND, ps_blend.AlphaTestEnable, false);
1504    SET(PS_BLEND, ps_blend.IndependentAlphaBlendEnable,
1505                  GET(blend.IndependentAlphaBlendEnable));
1506    SET(PS_BLEND, ps_blend.AlphaToCoverageEnable,
1507                  dyn->ms.alpha_to_coverage_enable);
1508 }
1509 
1510 ALWAYS_INLINE static void
update_blend_constants(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx)1511 update_blend_constants(struct anv_gfx_dynamic_state *hw_state,
1512                        const struct vk_dynamic_graphics_state *dyn,
1513                        const struct anv_cmd_graphics_state *gfx)
1514 {
1515    SET(CC_STATE, cc.BlendConstantColorRed,
1516                  gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[0]);
1517    SET(CC_STATE, cc.BlendConstantColorGreen,
1518                  gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[1]);
1519    SET(CC_STATE, cc.BlendConstantColorBlue,
1520                  gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[2]);
1521    SET(CC_STATE, cc.BlendConstantColorAlpha,
1522                  gfx->alpha_blend_zero ? 0.0f : dyn->cb.blend_constants[3]);
1523 }
1524 
1525 ALWAYS_INLINE static void
update_viewports(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct anv_device * device)1526 update_viewports(struct anv_gfx_dynamic_state *hw_state,
1527                  const struct vk_dynamic_graphics_state *dyn,
1528                  const struct anv_cmd_graphics_state *gfx,
1529                  const struct anv_device *device)
1530 {
1531    const struct anv_instance *instance = device->physical->instance;
1532    const VkViewport *viewports = dyn->vp.viewports;
1533 
1534    const float scale = dyn->vp.depth_clip_negative_one_to_one ? 0.5f : 1.0f;
1535 
1536       for (uint32_t i = 0; i < dyn->vp.viewport_count; i++) {
1537          const VkViewport *vp = &viewports[i];
1538 
1539          /* The gfx7 state struct has just the matrix and guardband fields, the
1540           * gfx8 struct adds the min/max viewport fields. */
1541          struct GENX(SF_CLIP_VIEWPORT) sfv = {
1542             .ViewportMatrixElementm00 = vp->width / 2,
1543             .ViewportMatrixElementm11 = vp->height / 2,
1544             .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) * scale,
1545             .ViewportMatrixElementm30 = vp->x + vp->width / 2,
1546             .ViewportMatrixElementm31 = vp->y + vp->height / 2,
1547             .ViewportMatrixElementm32 = dyn->vp.depth_clip_negative_one_to_one ?
1548                (vp->minDepth + vp->maxDepth) * scale : vp->minDepth,
1549             .XMinClipGuardband = -1.0f,
1550             .XMaxClipGuardband = 1.0f,
1551             .YMinClipGuardband = -1.0f,
1552             .YMaxClipGuardband = 1.0f,
1553             .XMinViewPort = vp->x,
1554             .XMaxViewPort = vp->x + vp->width - 1,
1555             .YMinViewPort = MIN2(vp->y, vp->y + vp->height),
1556             .YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1,
1557          };
1558 
1559          /* Fix depth test misrenderings by lowering translated depth range */
1560          if (instance->lower_depth_range_rate != 1.0f)
1561             sfv.ViewportMatrixElementm32 *= instance->lower_depth_range_rate;
1562 
1563          const uint32_t fb_size_max = 1 << 14;
1564          uint32_t x_min = 0, x_max = fb_size_max;
1565          uint32_t y_min = 0, y_max = fb_size_max;
1566 
1567          /* If we have a valid renderArea, include that */
1568          if (gfx->render_area.extent.width > 0 &&
1569              gfx->render_area.extent.height > 0) {
1570             x_min = MAX2(x_min, gfx->render_area.offset.x);
1571             x_max = MIN2(x_max, gfx->render_area.offset.x +
1572                                 gfx->render_area.extent.width);
1573             y_min = MAX2(y_min, gfx->render_area.offset.y);
1574             y_max = MIN2(y_max, gfx->render_area.offset.y +
1575                                 gfx->render_area.extent.height);
1576          }
1577 
1578          /* The client is required to have enough scissors for whatever it
1579           * sets as ViewportIndex but it's possible that they've got more
1580           * viewports set from a previous command. Also, from the Vulkan
1581           * 1.3.207:
1582           *
1583           *    "The application must ensure (using scissor if necessary) that
1584           *    all rendering is contained within the render area."
1585           *
1586           * If the client doesn't set a scissor, that basically means it
1587           * guarantees everything is in-bounds already. If we end up using a
1588           * guardband of [-1, 1] in that case, there shouldn't be much loss.
1589           * It's theoretically possible that they could do all their clipping
1590           * with clip planes but that'd be a bit odd.
1591           */
1592          if (i < dyn->vp.scissor_count) {
1593             const VkRect2D *scissor = &dyn->vp.scissors[i];
1594             x_min = MAX2(x_min, scissor->offset.x);
1595             x_max = MIN2(x_max, scissor->offset.x + scissor->extent.width);
1596             y_min = MAX2(y_min, scissor->offset.y);
1597             y_max = MIN2(y_max, scissor->offset.y + scissor->extent.height);
1598          }
1599 
1600          /* Only bother calculating the guardband if our known render area is
1601           * less than the maximum size. Otherwise, it will calculate [-1, 1]
1602           * anyway but possibly with precision loss.
1603           */
1604          if (x_min > 0 || x_max < fb_size_max ||
1605              y_min > 0 || y_max < fb_size_max) {
1606             intel_calculate_guardband_size(x_min, x_max, y_min, y_max,
1607                                            sfv.ViewportMatrixElementm00,
1608                                            sfv.ViewportMatrixElementm11,
1609                                            sfv.ViewportMatrixElementm30,
1610                                            sfv.ViewportMatrixElementm31,
1611                                            &sfv.XMinClipGuardband,
1612                                            &sfv.XMaxClipGuardband,
1613                                            &sfv.YMinClipGuardband,
1614                                            &sfv.YMaxClipGuardband);
1615          }
1616 
1617 #define SET_VP(bit, state, field)                                        \
1618          do {                                                           \
1619             if (hw_state->state.field != sfv.field) {                   \
1620                hw_state->state.field = sfv.field;                       \
1621                BITSET_SET(hw_state->dirty,                              \
1622                           ANV_GFX_STATE_##bit);                         \
1623             }                                                           \
1624          } while (0)
1625          SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm00);
1626          SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm11);
1627          SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm22);
1628          SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm30);
1629          SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm31);
1630          SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm32);
1631          SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMinClipGuardband);
1632          SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMaxClipGuardband);
1633          SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMinClipGuardband);
1634          SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMaxClipGuardband);
1635          SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMinViewPort);
1636          SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMaxViewPort);
1637          SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMinViewPort);
1638          SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMaxViewPort);
1639 #undef SET_VP
1640 
1641          const bool depth_range_unrestricted =
1642             device->vk.enabled_extensions.EXT_depth_range_unrestricted;
1643 
1644          float min_depth_limit = depth_range_unrestricted ? -FLT_MAX : 0.0;
1645          float max_depth_limit = depth_range_unrestricted ? FLT_MAX : 1.0;
1646 
1647          float min_depth = dyn->rs.depth_clamp_enable ?
1648                            MIN2(vp->minDepth, vp->maxDepth) : min_depth_limit;
1649          float max_depth = dyn->rs.depth_clamp_enable ?
1650                            MAX2(vp->minDepth, vp->maxDepth) : max_depth_limit;
1651 
1652          if (dyn->rs.depth_clamp_enable &&
1653             dyn->vp.depth_clamp_mode == VK_DEPTH_CLAMP_MODE_USER_DEFINED_RANGE_EXT) {
1654             min_depth = dyn->vp.depth_clamp_range.minDepthClamp;
1655             max_depth = dyn->vp.depth_clamp_range.maxDepthClamp;
1656          }
1657 
1658          SET(VIEWPORT_CC, vp_cc.elem[i].MinimumDepth, min_depth);
1659          SET(VIEWPORT_CC, vp_cc.elem[i].MaximumDepth, max_depth);
1660       }
1661 
1662       /* If the HW state is already considered dirty or the previous
1663        * programmed viewport count is smaller than what we need, update the
1664        * viewport count and ensure the HW state is dirty. Otherwise if the
1665        * number of viewport programmed previously was larger than what we need
1666        * now, no need to reemit we can just keep the old programmed values.
1667        */
1668       if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP) ||
1669           hw_state->vp_sf_clip.count < dyn->vp.viewport_count) {
1670          hw_state->vp_sf_clip.count = dyn->vp.viewport_count;
1671          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP);
1672       }
1673       if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) ||
1674           hw_state->vp_cc.count < dyn->vp.viewport_count) {
1675          hw_state->vp_cc.count = dyn->vp.viewport_count;
1676          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC);
1677       }
1678 }
1679 
1680 ALWAYS_INLINE static void
update_scissors(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,VkCommandBufferLevel cmd_buffer_level)1681 update_scissors(struct anv_gfx_dynamic_state *hw_state,
1682                 const struct vk_dynamic_graphics_state *dyn,
1683                 const struct anv_cmd_graphics_state *gfx,
1684                 VkCommandBufferLevel cmd_buffer_level)
1685 {
1686    const VkRect2D *scissors = dyn->vp.scissors;
1687    const VkViewport *viewports = dyn->vp.viewports;
1688 
1689    for (uint32_t i = 0; i < dyn->vp.scissor_count; i++) {
1690       const VkRect2D *s = &scissors[i];
1691       const VkViewport *vp = &viewports[i];
1692 
1693       const int max = 0xffff;
1694 
1695       uint32_t y_min = MAX2(s->offset.y, MIN2(vp->y, vp->y + vp->height));
1696       uint32_t x_min = MAX2(s->offset.x, vp->x);
1697       int64_t y_max = MIN2(s->offset.y + s->extent.height - 1,
1698                            MAX2(vp->y, vp->y + vp->height) - 1);
1699       int64_t x_max = MIN2(s->offset.x + s->extent.width - 1,
1700                            vp->x + vp->width - 1);
1701 
1702       y_max = CLAMP(y_max, 0, INT16_MAX >> 1);
1703       x_max = CLAMP(x_max, 0, INT16_MAX >> 1);
1704 
1705       /* Do this math using int64_t so overflow gets clamped correctly. */
1706       if (cmd_buffer_level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
1707          y_min = CLAMP((uint64_t) y_min, gfx->render_area.offset.y, max);
1708          x_min = CLAMP((uint64_t) x_min, gfx->render_area.offset.x, max);
1709          y_max = CLAMP((uint64_t) y_max, 0,
1710                        gfx->render_area.offset.y +
1711                        gfx->render_area.extent.height - 1);
1712          x_max = CLAMP((uint64_t) x_max, 0,
1713                        gfx->render_area.offset.x +
1714                        gfx->render_area.extent.width - 1);
1715       }
1716 
1717       if (s->extent.width <= 0 || s->extent.height <= 0) {
1718          /* Since xmax and ymax are inclusive, we have to have xmax < xmin or
1719           * ymax < ymin for empty clips. In case clip x, y, width height are
1720           * all 0, the clamps below produce 0 for xmin, ymin, xmax, ymax,
1721           * which isn't what we want. Just special case empty clips and
1722           * produce a canonical empty clip.
1723           */
1724          SET(SCISSOR, scissor.elem[i].ScissorRectangleYMin, 1);
1725          SET(SCISSOR, scissor.elem[i].ScissorRectangleXMin, 1);
1726          SET(SCISSOR, scissor.elem[i].ScissorRectangleYMax, 0);
1727          SET(SCISSOR, scissor.elem[i].ScissorRectangleXMax, 0);
1728       } else {
1729          SET(SCISSOR, scissor.elem[i].ScissorRectangleYMin, y_min);
1730          SET(SCISSOR, scissor.elem[i].ScissorRectangleXMin, x_min);
1731          SET(SCISSOR, scissor.elem[i].ScissorRectangleYMax, y_max);
1732          SET(SCISSOR, scissor.elem[i].ScissorRectangleXMax, x_max);
1733       }
1734    }
1735 
1736    /* If the HW state is already considered dirty or the previous programmed
1737     * viewport count is smaller than what we need, update the viewport count
1738     * and ensure the HW state is dirty. Otherwise if the number of viewport
1739     * programmed previously was larger than what we need now, no need to
1740     * reemit we can just keep the old programmed values.
1741     */
1742    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SCISSOR) ||
1743        hw_state->scissor.count < dyn->vp.scissor_count) {
1744       hw_state->scissor.count = dyn->vp.scissor_count;
1745       BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SCISSOR);
1746    }
1747 }
1748 
1749 #if GFX_VERx10 == 125
1750 ALWAYS_INLINE static void
update_tbimr_info(struct anv_gfx_dynamic_state * hw_state,const struct anv_device * device,const struct anv_cmd_graphics_state * gfx,const struct intel_l3_config * l3_config)1751 update_tbimr_info(struct anv_gfx_dynamic_state *hw_state,
1752                   const struct anv_device *device,
1753                   const struct anv_cmd_graphics_state *gfx,
1754                   const struct intel_l3_config *l3_config)
1755 {
1756    unsigned fb_width, fb_height, tile_width, tile_height;
1757 
1758    if (device->physical->instance->enable_tbimr &&
1759        calculate_render_area(gfx, &fb_width, &fb_height) &&
1760        calculate_tile_dimensions(device, gfx, l3_config,
1761                                  fb_width, fb_height,
1762                                  &tile_width, &tile_height)) {
1763       /* Use a batch size of 128 polygons per slice as recommended */
1764       /*    by BSpec 68436 "TBIMR Programming". */
1765       const unsigned num_slices = device->info->num_slices;
1766       const unsigned batch_size = DIV_ROUND_UP(num_slices, 2) * 256;
1767 
1768       SET(TBIMR_TILE_PASS_INFO, tbimr.TileRectangleHeight, tile_height);
1769       SET(TBIMR_TILE_PASS_INFO, tbimr.TileRectangleWidth, tile_width);
1770       SET(TBIMR_TILE_PASS_INFO, tbimr.VerticalTileCount,
1771           DIV_ROUND_UP(fb_height, tile_height));
1772       SET(TBIMR_TILE_PASS_INFO, tbimr.HorizontalTileCount,
1773           DIV_ROUND_UP(fb_width, tile_width));
1774       SET(TBIMR_TILE_PASS_INFO, tbimr.TBIMRBatchSize,
1775           util_logbase2(batch_size) - 5);
1776       SET(TBIMR_TILE_PASS_INFO, tbimr.TileBoxCheck, true);
1777       SET(TBIMR_TILE_PASS_INFO, use_tbimr, true);
1778    } else {
1779       hw_state->use_tbimr = false;
1780    }
1781 }
1782 #endif
1783 
1784 /**
1785  * This function takes the vulkan runtime values & dirty states and updates
1786  * the values in anv_gfx_dynamic_state, flagging HW instructions for
1787  * reemission if the values are changing.
1788  *
1789  * Nothing is emitted in the batch buffer.
1790  */
1791 static void
cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state * hw_state,const struct anv_device * device,const struct vk_dynamic_graphics_state * dyn,struct anv_cmd_graphics_state * gfx,const struct anv_graphics_pipeline * pipeline,VkCommandBufferLevel cmd_buffer_level)1792 cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
1793                                    const struct anv_device *device,
1794                                    const struct vk_dynamic_graphics_state *dyn,
1795                                    struct anv_cmd_graphics_state *gfx,
1796                                    const struct anv_graphics_pipeline *pipeline,
1797                                    VkCommandBufferLevel cmd_buffer_level)
1798 {
1799    UNUSED bool fs_msaa_changed = false;
1800    if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1801        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ||
1802        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ||
1803        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))
1804       update_fs_msaa_flags(hw_state, dyn, pipeline);
1805 
1806    if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1807        BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)) {
1808       update_ps(hw_state, device, dyn, pipeline);
1809       update_ps_extra_wm(hw_state, pipeline);
1810    }
1811 
1812    if (gfx->dirty &
1813 #if GFX_VERx10 >= 125
1814        ANV_CMD_DIRTY_PIPELINE
1815 #else
1816        (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE)
1817 #endif
1818       )
1819       update_ps_extra_has_uav(hw_state, gfx, pipeline);
1820 
1821    if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1822        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE))
1823       update_ps_extra_kills_pixel(hw_state, dyn, gfx, pipeline);
1824 
1825    if ((gfx->dirty & ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE) ||
1826        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE) ||
1827        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM))
1828       update_streamout(hw_state, dyn, gfx, pipeline);
1829 
1830    if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX))
1831       update_provoking_vertex(hw_state, dyn, pipeline);
1832 
1833    if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1834        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY))
1835       update_topology(hw_state, dyn, pipeline);
1836 
1837    if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1838        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI) ||
1839        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDINGS_VALID) ||
1840        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDING_STRIDES))
1841       BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
1842 
1843 #if GFX_VER >= 11
1844    if (device->vk.enabled_extensions.KHR_fragment_shading_rate &&
1845        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))
1846       update_cps(hw_state, device, dyn, pipeline);
1847 #endif /* GFX_VER >= 11 */
1848 
1849    if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1850        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN))
1851       update_te(hw_state, dyn, pipeline);
1852 
1853    if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH))
1854       update_line_width(hw_state, dyn);
1855 
1856    if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS))
1857       update_sf_global_depth_bias(hw_state, dyn);
1858 
1859    if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE))
1860       update_clip_api_mode(hw_state, dyn);
1861 
1862    if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT))
1863       update_clip_max_viewport(hw_state, dyn);
1864 
1865    if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1866        (gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
1867        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) ||
1868        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) ||
1869        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) ||
1870        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) ||
1871        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS) ||
1872        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_POLYGON_MODE) ||
1873        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_MODE) ||
1874        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH) ||
1875        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLIP_ENABLE) ||
1876        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) ||
1877        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CONSERVATIVE_MODE))
1878       update_clip_raster(hw_state, dyn, gfx, pipeline);
1879 
1880    if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES))
1881       update_multisample(hw_state, dyn);
1882 
1883    if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_MASK))
1884       update_sample_mask(hw_state, dyn);
1885 
1886    if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
1887 #if GFX_VER == 9
1888        /* For the PMA fix */
1889        (gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1890 #endif
1891        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
1892        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
1893        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
1894        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
1895        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
1896        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
1897        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) ||
1898        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE))
1899       update_wm_depth_stencil(hw_state, dyn, gfx, device);
1900 
1901 #if GFX_VER >= 12
1902    if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) ||
1903        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS))
1904       update_depth_bounds(hw_state, dyn);
1905 #endif
1906 
1907    if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE) ||
1908        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE_ENABLE))
1909       update_line_stipple(hw_state, dyn);
1910 
1911    if ((gfx->dirty & ANV_CMD_DIRTY_RESTART_INDEX) ||
1912        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE))
1913       update_vf_restart(hw_state, dyn, gfx);
1914 
1915    if (gfx->dirty & ANV_CMD_DIRTY_INDEX_BUFFER)
1916       BITSET_SET(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER);
1917 
1918 #if GFX_VERx10 >= 125
1919    if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE))
1920       update_vfg_list_cut_index(hw_state, dyn);
1921 #endif
1922 
1923    if (device->vk.enabled_extensions.EXT_sample_locations &&
1924        (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS) ||
1925         BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE)))
1926       BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN);
1927 
1928    if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1929        (gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
1930        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
1931        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) ||
1932        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE) ||
1933        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE) ||
1934        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ||
1935        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS) ||
1936        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES) ||
1937        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS)) {
1938       const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1939       update_blend_state(hw_state, dyn, gfx, device,
1940                          wm_prog_data != NULL,
1941                          wm_prog_data != NULL ?
1942                          wm_prog_data->dual_src_blend : false);
1943    }
1944 
1945    if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS))
1946       update_blend_constants(hw_state, dyn, gfx);
1947 
1948    if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_AREA) ||
1949        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS) ||
1950        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) ||
1951        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) ||
1952        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) ||
1953        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLAMP_RANGE))
1954       update_viewports(hw_state, dyn, gfx, device);
1955 
1956    if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_AREA) ||
1957        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) ||
1958        BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS))
1959       update_scissors(hw_state, dyn, gfx, cmd_buffer_level);
1960 
1961 #if GFX_VERx10 == 125
1962    if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS))
1963       update_tbimr_info(hw_state, device, gfx, pipeline->base.base.l3_config);
1964 #endif
1965 
1966 #if INTEL_WA_14018283232_GFX_VER
1967    if (intel_needs_workaround(device->info, 14018283232) &&
1968        ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1969         BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE))) {
1970       const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1971       SET(WA_14018283232, wa_14018283232_toggle,
1972           dyn->ds.depth.bounds_test.enable &&
1973           wm_prog_data &&
1974           wm_prog_data->uses_kill);
1975    }
1976 #endif
1977 
1978    /* If the pipeline uses a dynamic value of patch_control_points and either
1979     * the pipeline change or the dynamic value change, check the value and
1980     * reemit if needed.
1981     */
1982    if (pipeline->dynamic_patch_control_points &&
1983        ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1984         BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS)))
1985       SET(TCS_INPUT_VERTICES, tcs_input_vertices, dyn->ts.patch_control_points);
1986 }
1987 
1988 #undef GET
1989 #undef SET
1990 #undef SET_STAGE
1991 #undef SETUP_PROVOKING_VERTEX
1992 
1993 /**
1994  * This function takes the vulkan runtime values & dirty states and updates
1995  * the values in anv_gfx_dynamic_state, flagging HW instructions for
1996  * reemission if the values are changing.
1997  *
1998  * Nothing is emitted in the batch buffer.
1999  */
2000 void
genX(cmd_buffer_flush_gfx_runtime_state)2001 genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
2002 {
2003    cmd_buffer_flush_gfx_runtime_state(
2004       &cmd_buffer->state.gfx.dyn_state,
2005       cmd_buffer->device,
2006       &cmd_buffer->vk.dynamic_graphics_state,
2007       &cmd_buffer->state.gfx,
2008       anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline),
2009       cmd_buffer->vk.level);
2010 
2011    vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state);
2012 }
2013 
2014 static void
emit_wa_18020335297_dummy_draw(struct anv_cmd_buffer * cmd_buffer)2015 emit_wa_18020335297_dummy_draw(struct anv_cmd_buffer *cmd_buffer)
2016 {
2017    /* For Wa_16012775297, ensure VF_STATISTICS is emitted before 3DSTATE_VF
2018     */
2019    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_STATISTICS), zero);
2020 #if GFX_VERx10 >= 125
2021    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) {
2022       vfg.DistributionMode = RR_STRICT;
2023    }
2024    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
2025       vf.GeometryDistributionEnable = true;
2026    }
2027 #endif
2028 
2029 #if GFX_VER >= 12
2030    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr) {
2031       pr.ReplicaMask = 1;
2032    }
2033 #endif
2034 
2035    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_RASTER), rr) {
2036       rr.CullMode = CULLMODE_NONE;
2037       rr.FrontFaceFillMode = FILL_MODE_SOLID;
2038       rr.BackFaceFillMode = FILL_MODE_SOLID;
2039    }
2040 
2041    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_SGVS), zero);
2042 
2043 #if GFX_VER >= 11
2044    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_SGVS_2), zero);
2045 #endif
2046 
2047    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLIP), clip) {
2048       clip.ClipEnable = true;
2049       clip.ClipMode = CLIPMODE_REJECT_ALL;
2050    }
2051 
2052    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VS), zero);
2053    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_GS), zero);
2054    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HS), zero);
2055    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TE), zero);
2056    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DS), zero);
2057    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT), zero);
2058 
2059    uint32_t *vertex_elements = anv_batch_emitn(&cmd_buffer->batch, 1 + 2 * 2,
2060                                                GENX(3DSTATE_VERTEX_ELEMENTS));
2061    uint32_t *ve_pack_dest = &vertex_elements[1];
2062 
2063    for (int i = 0; i < 2; i++) {
2064       struct GENX(VERTEX_ELEMENT_STATE) element = {
2065          .Valid = true,
2066          .SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,
2067          .Component0Control = VFCOMP_STORE_0,
2068          .Component1Control = VFCOMP_STORE_0,
2069          .Component2Control = i == 0 ? VFCOMP_STORE_0 : VFCOMP_STORE_1_FP,
2070          .Component3Control = i == 0 ? VFCOMP_STORE_0 : VFCOMP_STORE_1_FP,
2071       };
2072       GENX(VERTEX_ELEMENT_STATE_pack)(NULL, ve_pack_dest, &element);
2073       ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
2074    }
2075 
2076    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
2077       topo.PrimitiveTopologyType = _3DPRIM_TRILIST;
2078    }
2079 
2080    /* Emit dummy draw per slice. */
2081    for (unsigned i = 0; i < cmd_buffer->device->info->num_slices; i++) {
2082       anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
2083          prim.VertexCountPerInstance = 3;
2084          prim.PrimitiveTopologyType = _3DPRIM_TRILIST;
2085          prim.InstanceCount = 1;
2086          prim.VertexAccessType = SEQUENTIAL;
2087       }
2088    }
2089 }
2090 
2091 #if INTEL_WA_14018283232_GFX_VER
2092 void
genX(batch_emit_wa_14018283232)2093 genX(batch_emit_wa_14018283232)(struct anv_batch *batch)
2094 {
2095    anv_batch_emit(batch, GENX(RESOURCE_BARRIER), barrier) {
2096       barrier.ResourceBarrierBody = (struct GENX(RESOURCE_BARRIER_BODY)) {
2097          .BarrierType = RESOURCE_BARRIER_TYPE_IMMEDIATE,
2098          .SignalStage = RESOURCE_BARRIER_STAGE_COLOR,
2099             .WaitStage = RESOURCE_BARRIER_STAGE_PIXEL,
2100       };
2101    }
2102 }
2103 #endif
2104 
2105 /**
2106  * This function handles dirty state emission to the batch buffer.
2107  */
2108 static void
cmd_buffer_gfx_state_emission(struct anv_cmd_buffer * cmd_buffer)2109 cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
2110 {
2111    struct anv_device *device = cmd_buffer->device;
2112    struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
2113    struct anv_graphics_pipeline *pipeline =
2114       anv_pipeline_to_graphics(gfx->base.pipeline);
2115    const struct vk_dynamic_graphics_state *dyn =
2116       &cmd_buffer->vk.dynamic_graphics_state;
2117    struct anv_push_constants *push_consts =
2118       &cmd_buffer->state.gfx.base.push_constants;
2119    struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
2120    const bool protected = cmd_buffer->vk.pool->flags &
2121                           VK_COMMAND_POOL_CREATE_PROTECTED_BIT;
2122 
2123 #if INTEL_WA_16011107343_GFX_VER
2124    /* Will be emitted in front of every draw instead */
2125    if (intel_needs_workaround(device->info, 16011107343) &&
2126        anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL))
2127       BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_HS);
2128 #endif
2129 
2130 #if INTEL_WA_22018402687_GFX_VER
2131    /* Will be emitted in front of every draw instead */
2132    if (intel_needs_workaround(device->info, 22018402687) &&
2133        anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
2134       BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_DS);
2135 #endif
2136 
2137    /*
2138     * Values provided by push constants
2139     */
2140 
2141    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TCS_INPUT_VERTICES)) {
2142       push_consts->gfx.tcs_input_vertices = dyn->ts.patch_control_points;
2143       cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2144       gfx->base.push_constants_data_dirty = true;
2145    }
2146 
2147    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)) {
2148       push_consts->gfx.fs_msaa_flags = hw_state->fs_msaa_flags;
2149       cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
2150       gfx->base.push_constants_data_dirty = true;
2151    }
2152 
2153    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_URB)) {
2154       genX(urb_workaround)(cmd_buffer, &pipeline->urb_cfg);
2155 
2156       anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.urb);
2157 
2158       memcpy(&gfx->urb_cfg, &pipeline->urb_cfg,
2159              sizeof(struct intel_urb_config));
2160    }
2161 
2162    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION))
2163       anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.primitive_replication);
2164 
2165    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_INSTANCING))
2166       anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs_instancing);
2167 
2168    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS))
2169       anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs);
2170 
2171 #if GFX_VER >= 11
2172    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2))
2173       anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs_2);
2174 #endif
2175 
2176    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VS)) {
2177       anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
2178                                               final.vs, protected);
2179    }
2180 
2181    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_HS)) {
2182       anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
2183                                               final.hs, protected);
2184    }
2185 
2186    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_DS)) {
2187       anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
2188                                               final.ds, protected);
2189    }
2190 
2191    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS)) {
2192       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_STATISTICS), vfs) {
2193          vfs.StatisticsEnable = true;
2194       }
2195    }
2196 
2197    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE))
2198       anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe);
2199 
2200    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_SWIZ))
2201       anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe_swiz);
2202 
2203    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST)) {
2204       /* Wa_16011773973:
2205        * If SOL is enabled and SO_DECL state has to be programmed,
2206        *    1. Send 3D State SOL state with SOL disabled
2207        *    2. Send SO_DECL NP state
2208        *    3. Send 3D State SOL with SOL Enabled
2209        */
2210       if (intel_needs_workaround(device->info, 16011773973) &&
2211           pipeline->uses_xfb)
2212          anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT), so);
2213 
2214       anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline,
2215                                     final.so_decl_list);
2216 
2217 #if GFX_VER >= 11 && GFX_VER < 20
2218       /* ICL PRMs, Volume 2a - Command Reference: Instructions,
2219        * 3DSTATE_SO_DECL_LIST:
2220        *
2221        *    "Workaround: This command must be followed by a PIPE_CONTROL with
2222        *     CS Stall bit set."
2223        *
2224        * On DG2+ also known as Wa_1509820217.
2225        */
2226       genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
2227                                    cmd_buffer->state.current_pipeline,
2228                                    ANV_PIPE_CS_STALL_BIT);
2229 #endif
2230    }
2231 
2232    if (device->vk.enabled_extensions.EXT_mesh_shader) {
2233       if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL)) {
2234          anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
2235                                                  final.mesh_control, protected);
2236       }
2237 
2238       if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_SHADER))
2239          anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_shader);
2240 
2241       if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_DISTRIB))
2242          anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_distrib);
2243 
2244       if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL)) {
2245          anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
2246                                                  final.task_control, protected);
2247       }
2248 
2249       if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_SHADER))
2250          anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.task_shader);
2251 
2252       if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_REDISTRIB))
2253          anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.task_redistrib);
2254 
2255       if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH))
2256          anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe_mesh);
2257 
2258       if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP_MESH))
2259          anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.clip_mesh);
2260    } else {
2261       assert(!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL) &&
2262              !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_SHADER) &&
2263              !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_DISTRIB) &&
2264              !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL) &&
2265              !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_SHADER) &&
2266              !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_REDISTRIB) &&
2267              !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP_MESH) &&
2268              !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH));
2269    }
2270 
2271 #define INIT(category, name) \
2272    .name = hw_state->category.name
2273 #define SET(s, category, name) \
2274    s.name = hw_state->category.name
2275 
2276    /* Now the potentially dynamic instructions */
2277 
2278    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS)) {
2279       anv_batch_emit_merge_protected(&cmd_buffer->batch, GENX(3DSTATE_PS),
2280                                      pipeline, partial.ps, ps, protected) {
2281          SET(ps, ps, KernelStartPointer0);
2282          SET(ps, ps, KernelStartPointer1);
2283          SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData0);
2284          SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData1);
2285 
2286 #if GFX_VER < 20
2287          SET(ps, ps, KernelStartPointer2);
2288          SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData2);
2289 
2290          SET(ps, ps, _8PixelDispatchEnable);
2291          SET(ps, ps, _16PixelDispatchEnable);
2292          SET(ps, ps, _32PixelDispatchEnable);
2293 #else
2294          SET(ps, ps, Kernel0Enable);
2295          SET(ps, ps, Kernel1Enable);
2296          SET(ps, ps, Kernel0SIMDWidth);
2297          SET(ps, ps, Kernel1SIMDWidth);
2298          SET(ps, ps, Kernel0PolyPackingPolicy);
2299          SET(ps, ps, Kernel0MaximumPolysperThread);
2300 #endif
2301          SET(ps, ps, PositionXYOffsetSelect);
2302       }
2303    }
2304 
2305    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA) ||
2306        BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_STATE)) {
2307       anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS_EXTRA),
2308                            pipeline, partial.ps_extra, pse) {
2309          SET(pse, ps_extra, PixelShaderHasUAV);
2310          SET(pse, ps_extra, PixelShaderIsPerSample);
2311 #if GFX_VER >= 11
2312          SET(pse, ps_extra, PixelShaderIsPerCoarsePixel);
2313 #endif
2314          SET(pse, ps_extra, PixelShaderKillsPixel);
2315 
2316 #if INTEL_WA_18038825448_GFX_VER
2317          /* Add a dependency if easier the shader needs it (because of runtime
2318           * change through pre-rasterization shader) or if we notice a change.
2319           */
2320          pse.EnablePSDependencyOnCPsizeChange =
2321             hw_state->ps_extra.EnablePSDependencyOnCPsizeChange ||
2322             BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_STATE);
2323 #elif GFX_VERx10 >= 125
2324          SET(pse, ps_extra, EnablePSDependencyOnCPsizeChange);
2325 #endif
2326       }
2327    }
2328 
2329    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP)) {
2330       anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_CLIP),
2331                            pipeline, partial.clip, clip) {
2332          SET(clip, clip, APIMode);
2333          SET(clip, clip, ViewportXYClipTestEnable);
2334          SET(clip, clip, TriangleStripListProvokingVertexSelect);
2335          SET(clip, clip, LineStripListProvokingVertexSelect);
2336          SET(clip, clip, TriangleFanProvokingVertexSelect);
2337          SET(clip, clip, MaximumVPIndex);
2338       }
2339    }
2340 
2341    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_STREAMOUT)) {
2342       genX(streamout_prologue)(cmd_buffer);
2343 
2344       anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT),
2345                            pipeline, partial.so, so) {
2346          SET(so, so, RenderingDisable);
2347          SET(so, so, RenderStreamSelect);
2348          SET(so, so, ReorderMode);
2349          SET(so, so, ForceRendering);
2350       }
2351    }
2352 
2353    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP)) {
2354       struct anv_state sf_clip_state =
2355          anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
2356                                             hw_state->vp_sf_clip.count * 64, 64);
2357 
2358       for (uint32_t i = 0; i < hw_state->vp_sf_clip.count; i++) {
2359          struct GENX(SF_CLIP_VIEWPORT) sfv = {
2360             INIT(vp_sf_clip.elem[i], ViewportMatrixElementm00),
2361             INIT(vp_sf_clip.elem[i], ViewportMatrixElementm11),
2362             INIT(vp_sf_clip.elem[i], ViewportMatrixElementm22),
2363             INIT(vp_sf_clip.elem[i], ViewportMatrixElementm30),
2364             INIT(vp_sf_clip.elem[i], ViewportMatrixElementm31),
2365             INIT(vp_sf_clip.elem[i], ViewportMatrixElementm32),
2366             INIT(vp_sf_clip.elem[i], XMinClipGuardband),
2367             INIT(vp_sf_clip.elem[i], XMaxClipGuardband),
2368             INIT(vp_sf_clip.elem[i], YMinClipGuardband),
2369             INIT(vp_sf_clip.elem[i], YMaxClipGuardband),
2370             INIT(vp_sf_clip.elem[i], XMinViewPort),
2371             INIT(vp_sf_clip.elem[i], XMaxViewPort),
2372             INIT(vp_sf_clip.elem[i], YMinViewPort),
2373             INIT(vp_sf_clip.elem[i], YMaxViewPort),
2374          };
2375          GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv);
2376       }
2377 
2378       anv_batch_emit(&cmd_buffer->batch,
2379                      GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
2380          clip.SFClipViewportPointer = sf_clip_state.offset;
2381       }
2382    }
2383 
2384    /* Force CC_VIEWPORT reallocation on Gfx9 when reprogramming
2385     * 3DSTATE_VIEWPORT_STATE_POINTERS_CC :
2386     *    https://gitlab.freedesktop.org/mesa/mesa/-/issues/11647
2387     */
2388    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) ||
2389        (GFX_VER == 9 &&
2390         BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR))) {
2391       hw_state->vp_cc.state =
2392          anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
2393                                             hw_state->vp_cc.count * 8, 32);
2394 
2395       for (uint32_t i = 0; i < hw_state->vp_cc.count; i++) {
2396          struct GENX(CC_VIEWPORT) cc_viewport = {
2397             INIT(vp_cc.elem[i], MinimumDepth),
2398             INIT(vp_cc.elem[i], MaximumDepth),
2399          };
2400          GENX(CC_VIEWPORT_pack)(NULL, hw_state->vp_cc.state.map + i * 8,
2401                                 &cc_viewport);
2402       }
2403 
2404       /* Dirty the pointers to reemit 3DSTATE_VIEWPORT_STATE_POINTERS_CC below
2405        */
2406       BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR);
2407    }
2408 
2409    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) {
2410       anv_batch_emit(&cmd_buffer->batch,
2411                      GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
2412          cc.CCViewportPointer = hw_state->vp_cc.state.offset;
2413       }
2414       cmd_buffer->state.gfx.viewport_set = true;
2415    }
2416 
2417    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SCISSOR)) {
2418       /* Wa_1409725701:
2419        *
2420        *    "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
2421        *    stored as an array of up to 16 elements. The location of first
2422        *    element of the array, as specified by Pointer to SCISSOR_RECT,
2423        *    should be aligned to a 64-byte boundary.
2424        */
2425       struct anv_state scissor_state =
2426          anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
2427                                             hw_state->scissor.count * 8, 64);
2428 
2429       for (uint32_t i = 0; i < hw_state->scissor.count; i++) {
2430          struct GENX(SCISSOR_RECT) scissor = {
2431             INIT(scissor.elem[i], ScissorRectangleYMin),
2432             INIT(scissor.elem[i], ScissorRectangleXMin),
2433             INIT(scissor.elem[i], ScissorRectangleYMax),
2434             INIT(scissor.elem[i], ScissorRectangleXMax),
2435          };
2436          GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8, &scissor);
2437       }
2438 
2439       anv_batch_emit(&cmd_buffer->batch,
2440                      GENX(3DSTATE_SCISSOR_STATE_POINTERS), ssp) {
2441          ssp.ScissorRectPointer = scissor_state.offset;
2442       }
2443    }
2444 
2445    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY)) {
2446       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) {
2447          SET(vft, vft, PrimitiveTopologyType);
2448       }
2449    }
2450 
2451    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT)) {
2452       genX(batch_emit_vertex_input)(&cmd_buffer->batch, device,
2453                                     pipeline, dyn->vi);
2454    }
2455 
2456    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TE)) {
2457       anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_TE),
2458                            pipeline, partial.te, te) {
2459          SET(te, te, OutputTopology);
2460       }
2461    }
2462 
2463    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_GS)) {
2464       anv_batch_emit_merge_protected(&cmd_buffer->batch, GENX(3DSTATE_GS),
2465                                      pipeline, partial.gs, gs, protected) {
2466          SET(gs, gs, ReorderMode);
2467       }
2468    }
2469 
2470 #if GFX_VER >= 30
2471    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_PIXEL)) {
2472       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_COARSE_PIXEL), coarse_pixel) {
2473          coarse_pixel.DisableCPSPointers = true;
2474          SET(coarse_pixel, coarse_pixel, CPSizeX);
2475          SET(coarse_pixel, coarse_pixel, CPSizeY);
2476          SET(coarse_pixel, coarse_pixel, CPSizeCombiner0Opcode);
2477          SET(coarse_pixel, coarse_pixel, CPSizeCombiner1Opcode);
2478       }
2479    }
2480 #else
2481    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CPS)) {
2482 #if GFX_VER == 11
2483       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CPS), cps) {
2484          SET(cps, cps, CoarsePixelShadingMode);
2485          SET(cps, cps, MinCPSizeX);
2486          SET(cps, cps, MinCPSizeY);
2487       }
2488 #elif GFX_VER >= 12
2489       /* TODO: we can optimize this flush in the following cases:
2490        *
2491        *    In the case where the last geometry shader emits a value that is
2492        *    not constant, we can avoid this stall because we can synchronize
2493        *    the pixel shader internally with
2494        *    3DSTATE_PS::EnablePSDependencyOnCPsizeChange.
2495        *
2496        *    If we know that the previous pipeline and the current one are
2497        *    using the same fragment shading rate.
2498        */
2499       anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
2500 #if GFX_VERx10 >= 125
2501          pc.PSSStallSyncEnable = true;
2502 #else
2503          pc.PSDSyncEnable = true;
2504 #endif
2505       }
2506 
2507       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CPS_POINTERS), cps) {
2508          SET(cps, cps, CoarsePixelShadingStateArrayPointer);
2509       }
2510 #endif
2511    }
2512 #endif /* GFX_VER >= 30 */
2513 
2514    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SF)) {
2515       anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_SF),
2516                            pipeline, partial.sf, sf) {
2517          SET(sf, sf, LineWidth);
2518          SET(sf, sf, TriangleStripListProvokingVertexSelect);
2519          SET(sf, sf, LineStripListProvokingVertexSelect);
2520          SET(sf, sf, TriangleFanProvokingVertexSelect);
2521          SET(sf, sf, LegacyGlobalDepthBiasEnable);
2522       }
2523    }
2524 
2525    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_RASTER)) {
2526       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_RASTER), raster) {
2527          /* For details on 3DSTATE_RASTER multisample state, see the BSpec
2528           * table "Multisample Modes State".
2529           *
2530           * NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the SKL PMA fix
2531           * computations. If we ever set this bit to a different value, they
2532           * will need to be updated accordingly.
2533           */
2534          raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0;
2535          raster.ForceMultisampling = false;
2536          raster.ScissorRectangleEnable = true;
2537 
2538          SET(raster, raster, APIMode);
2539          SET(raster, raster, DXMultisampleRasterizationEnable);
2540          SET(raster, raster, AntialiasingEnable);
2541          SET(raster, raster, CullMode);
2542          SET(raster, raster, FrontWinding);
2543          SET(raster, raster, GlobalDepthOffsetEnableSolid);
2544          SET(raster, raster, GlobalDepthOffsetEnableWireframe);
2545          SET(raster, raster, GlobalDepthOffsetEnablePoint);
2546          SET(raster, raster, GlobalDepthOffsetConstant);
2547          SET(raster, raster, GlobalDepthOffsetScale);
2548          SET(raster, raster, GlobalDepthOffsetClamp);
2549          SET(raster, raster, FrontFaceFillMode);
2550          SET(raster, raster, BackFaceFillMode);
2551          SET(raster, raster, ViewportZFarClipTestEnable);
2552          SET(raster, raster, ViewportZNearClipTestEnable);
2553          SET(raster, raster, ConservativeRasterizationEnable);
2554       }
2555    }
2556 
2557    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE)) {
2558       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_MULTISAMPLE), ms) {
2559          ms.PixelLocation              = CENTER;
2560 
2561          /* The PRM says that this bit is valid only for DX9:
2562           *
2563           *    SW can choose to set this bit only for DX9 API. DX10/OGL API's
2564           *    should not have any effect by setting or not setting this bit.
2565           */
2566          ms.PixelPositionOffsetEnable  = false;
2567 
2568          SET(ms, ms, NumberofMultisamples);
2569       }
2570    }
2571 
2572    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CC_STATE)) {
2573       hw_state->cc.state =
2574          anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
2575                                             GENX(COLOR_CALC_STATE_length) * 4,
2576                                             64);
2577       struct GENX(COLOR_CALC_STATE) cc = {
2578          INIT(cc, BlendConstantColorRed),
2579          INIT(cc, BlendConstantColorGreen),
2580          INIT(cc, BlendConstantColorBlue),
2581          INIT(cc, BlendConstantColorAlpha),
2582       };
2583       GENX(COLOR_CALC_STATE_pack)(NULL, hw_state->cc.state.map, &cc);
2584 
2585       /* Dirty the pointers to reemit 3DSTATE_CC_STATE_POINTERS below
2586        */
2587       BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CC_STATE_PTR);
2588    }
2589 
2590    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CC_STATE_PTR)) {
2591       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
2592          ccp.ColorCalcStatePointer = hw_state->cc.state.offset;
2593          ccp.ColorCalcStatePointerValid = true;
2594       }
2595    }
2596 
2597    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SAMPLE_MASK)) {
2598       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
2599          SET(sm, sm, SampleMask);
2600       }
2601    }
2602 
2603    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM_DEPTH_STENCIL)) {
2604       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
2605          SET(ds, ds, DoubleSidedStencilEnable);
2606          SET(ds, ds, StencilTestMask);
2607          SET(ds, ds, StencilWriteMask);
2608          SET(ds, ds, BackfaceStencilTestMask);
2609          SET(ds, ds, BackfaceStencilWriteMask);
2610          SET(ds, ds, StencilReferenceValue);
2611          SET(ds, ds, BackfaceStencilReferenceValue);
2612          SET(ds, ds, DepthTestEnable);
2613          SET(ds, ds, DepthBufferWriteEnable);
2614          SET(ds, ds, DepthTestFunction);
2615          SET(ds, ds, StencilTestEnable);
2616          SET(ds, ds, StencilBufferWriteEnable);
2617          SET(ds, ds, StencilFailOp);
2618          SET(ds, ds, StencilPassDepthPassOp);
2619          SET(ds, ds, StencilPassDepthFailOp);
2620          SET(ds, ds, StencilTestFunction);
2621          SET(ds, ds, BackfaceStencilFailOp);
2622          SET(ds, ds, BackfaceStencilPassDepthPassOp);
2623          SET(ds, ds, BackfaceStencilPassDepthFailOp);
2624          SET(ds, ds, BackfaceStencilTestFunction);
2625       }
2626    }
2627 
2628 #if GFX_VER >= 12
2629    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_DEPTH_BOUNDS)) {
2630       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
2631          SET(db, db, DepthBoundsTestEnable);
2632          SET(db, db, DepthBoundsTestMinValue);
2633          SET(db, db, DepthBoundsTestMaxValue);
2634       }
2635    }
2636 #endif
2637 
2638    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_LINE_STIPPLE)) {
2639       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) {
2640          SET(ls, ls, LineStipplePattern);
2641          SET(ls, ls, LineStippleInverseRepeatCount);
2642          SET(ls, ls, LineStippleRepeatCount);
2643       }
2644 #if GFX_VER >= 11
2645       /* ICL PRMs, Volume 2a - Command Reference: Instructions,
2646        * 3DSTATE_LINE_STIPPLE:
2647        *
2648        *    "Workaround: This command must be followed by a PIPE_CONTROL with
2649        *     CS Stall bit set."
2650        */
2651       genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
2652                                    cmd_buffer->state.current_pipeline,
2653                                    ANV_PIPE_CS_STALL_BIT);
2654 #endif
2655    }
2656 
2657    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF)) {
2658       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
2659 #if GFX_VERx10 >= 125
2660          vf.GeometryDistributionEnable = true;
2661 #endif
2662          SET(vf, vf, IndexedDrawCutIndexEnable);
2663          SET(vf, vf, CutIndex);
2664       }
2665    }
2666 
2667    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER)) {
2668       struct anv_buffer *buffer = gfx->index_buffer;
2669       uint32_t offset = gfx->index_offset;
2670       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
2671          ib.IndexFormat           = gfx->index_type;
2672          ib.MOCS                  = anv_mocs(device,
2673                                              buffer ? buffer->address.bo : NULL,
2674                                              ISL_SURF_USAGE_INDEX_BUFFER_BIT);
2675 #if GFX_VER >= 12
2676          ib.L3BypassDisable       = true;
2677 #endif
2678          if (buffer) {
2679             ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
2680             ib.BufferSize            = gfx->index_size;
2681          }
2682       }
2683    }
2684 
2685 #if GFX_VERx10 >= 125
2686    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VFG)) {
2687       anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_VFG),
2688                            pipeline, partial.vfg, vfg) {
2689          SET(vfg, vfg, ListCutIndexEnable);
2690       }
2691    }
2692 #endif
2693 
2694    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN)) {
2695       genX(emit_sample_pattern)(&cmd_buffer->batch,
2696                                 dyn->ms.sample_locations_enable ?
2697                                 dyn->ms.sample_locations : NULL);
2698    }
2699 
2700    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM)) {
2701       anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_WM),
2702                            pipeline, partial.wm, wm) {
2703          SET(wm, wm, LineStippleEnable);
2704          SET(wm, wm, BarycentricInterpolationMode);
2705       }
2706    }
2707 
2708    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_BLEND)) {
2709       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_PS_BLEND), blend) {
2710          SET(blend, ps_blend, HasWriteableRT);
2711          SET(blend, ps_blend, ColorBufferBlendEnable);
2712          SET(blend, ps_blend, SourceAlphaBlendFactor);
2713          SET(blend, ps_blend, DestinationAlphaBlendFactor);
2714          SET(blend, ps_blend, SourceBlendFactor);
2715          SET(blend, ps_blend, DestinationBlendFactor);
2716          SET(blend, ps_blend, AlphaTestEnable);
2717          SET(blend, ps_blend, IndependentAlphaBlendEnable);
2718          SET(blend, ps_blend, AlphaToCoverageEnable);
2719       }
2720    }
2721 
2722    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE)) {
2723       const uint32_t num_dwords = GENX(BLEND_STATE_length) +
2724          GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
2725       hw_state->blend.state =
2726          anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
2727                                             num_dwords * 4,
2728                                             64);
2729 
2730       uint32_t *dws = hw_state->blend.state.map;
2731 
2732       struct GENX(BLEND_STATE) blend_state = {
2733          INIT(blend, AlphaToCoverageEnable),
2734          INIT(blend, AlphaToOneEnable),
2735          INIT(blend, IndependentAlphaBlendEnable),
2736          INIT(blend, ColorDitherEnable),
2737       };
2738       GENX(BLEND_STATE_pack)(NULL, dws, &blend_state);
2739 
2740       /* Jump to blend entries. */
2741       dws += GENX(BLEND_STATE_length);
2742       for (uint32_t i = 0; i < MAX_RTS; i++) {
2743          struct GENX(BLEND_STATE_ENTRY) entry = {
2744             INIT(blend.rts[i], WriteDisableAlpha),
2745             INIT(blend.rts[i], WriteDisableRed),
2746             INIT(blend.rts[i], WriteDisableGreen),
2747             INIT(blend.rts[i], WriteDisableBlue),
2748             INIT(blend.rts[i], LogicOpFunction),
2749             INIT(blend.rts[i], LogicOpEnable),
2750             INIT(blend.rts[i], ColorBufferBlendEnable),
2751             INIT(blend.rts[i], ColorClampRange),
2752             INIT(blend.rts[i], PreBlendColorClampEnable),
2753             INIT(blend.rts[i], PostBlendColorClampEnable),
2754             INIT(blend.rts[i], SourceBlendFactor),
2755             INIT(blend.rts[i], DestinationBlendFactor),
2756             INIT(blend.rts[i], ColorBlendFunction),
2757             INIT(blend.rts[i], SourceAlphaBlendFactor),
2758             INIT(blend.rts[i], DestinationAlphaBlendFactor),
2759             INIT(blend.rts[i], AlphaBlendFunction),
2760          };
2761 
2762          GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry);
2763          dws += GENX(BLEND_STATE_ENTRY_length);
2764       }
2765 
2766       /* Dirty the pointers to reemit 3DSTATE_BLEND_STATE_POINTERS below */
2767       BITSET_SET(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_PTR);
2768    }
2769 
2770    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_PTR)) {
2771       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
2772          bsp.BlendStatePointer      = hw_state->blend.state.offset;
2773          bsp.BlendStatePointerValid = true;
2774       }
2775    }
2776 
2777 #if INTEL_WA_18019816803_GFX_VER
2778    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_18019816803)) {
2779       genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
2780                                    cmd_buffer->state.current_pipeline,
2781                                    ANV_PIPE_PSS_STALL_SYNC_BIT);
2782    }
2783 #endif
2784 
2785 #if INTEL_WA_14018283232_GFX_VER
2786    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_14018283232))
2787       genX(batch_emit_wa_14018283232)(&cmd_buffer->batch);
2788 #endif
2789 
2790 #if GFX_VER == 9
2791    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PMA_FIX))
2792       genX(cmd_buffer_enable_pma_fix)(cmd_buffer, hw_state->pma_fix);
2793 #endif
2794 
2795 #if GFX_VERx10 >= 125
2796    if (hw_state->use_tbimr &&
2797        BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TBIMR_TILE_PASS_INFO)) {
2798       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TBIMR_TILE_PASS_INFO),
2799                      tbimr) {
2800          SET(tbimr, tbimr, TileRectangleHeight);
2801          SET(tbimr, tbimr, TileRectangleWidth);
2802          SET(tbimr, tbimr, VerticalTileCount);
2803          SET(tbimr, tbimr, HorizontalTileCount);
2804          SET(tbimr, tbimr, TBIMRBatchSize);
2805          SET(tbimr, tbimr, TileBoxCheck);
2806       }
2807    }
2808 #endif
2809 
2810 #undef INIT
2811 #undef SET
2812 
2813    BITSET_ZERO(hw_state->dirty);
2814 }
2815 
2816 /**
2817  * This function handles possible state workarounds and emits the dirty
2818  * instructions to the batch buffer.
2819  */
2820 void
genX(cmd_buffer_flush_gfx_hw_state)2821 genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer)
2822 {
2823    struct anv_device *device = cmd_buffer->device;
2824    struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
2825    struct anv_graphics_pipeline *pipeline =
2826       anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
2827    struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
2828 
2829    if (INTEL_DEBUG(DEBUG_REEMIT)) {
2830       BITSET_OR(gfx->dyn_state.dirty, gfx->dyn_state.dirty,
2831                 device->gfx_dirty_state);
2832    }
2833 
2834    /**
2835     * Put potential workarounds here if you need to reemit an instruction
2836     * because of another one is changing.
2837     */
2838 
2839    /* Reproduce the programming done on Windows drivers.
2840     * Fixes flickering issues with multiple workloads.
2841     */
2842    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP) ||
2843        BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) {
2844       BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP);
2845       BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR);
2846    }
2847 
2848    /* Wa_16012775297 - Emit dummy VF statistics before each 3DSTATE_VF. */
2849 #if INTEL_WA_16012775297_GFX_VER
2850    if (intel_needs_workaround(device->info, 16012775297) &&
2851        BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF))
2852       BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS);
2853 #endif
2854 
2855    /* Since Wa_16011773973 will disable 3DSTATE_STREAMOUT, we need to reemit
2856     * it after.
2857     */
2858    if (intel_needs_workaround(device->info, 16011773973) &&
2859        pipeline->uses_xfb &&
2860        BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST)) {
2861       BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
2862    }
2863 
2864 #if INTEL_WA_18038825448_GFX_VER
2865    const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
2866    if (wm_prog_data) {
2867       genX(cmd_buffer_set_coarse_pixel_active)(
2868          cmd_buffer,
2869          brw_wm_prog_data_is_coarse(wm_prog_data, hw_state->fs_msaa_flags));
2870    }
2871 #endif
2872 
2873    /* Gfx11 undocumented issue :
2874     * https://gitlab.freedesktop.org/mesa/mesa/-/issues/9781
2875     */
2876 #if GFX_VER == 11
2877    if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE))
2878       BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE);
2879 #endif
2880 
2881    /* Wa_18020335297 - Apply the WA when viewport ptr is reprogrammed. */
2882    if (intel_needs_workaround(device->info, 18020335297) &&
2883        (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) ||
2884         BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) &&
2885        cmd_buffer->state.gfx.viewport_set) {
2886       /* For mesh, we implement the WA using CS stall. This is for
2887        * simplicity and takes care of possible interaction with Wa_16014390852.
2888        */
2889       if (anv_pipeline_is_mesh(pipeline)) {
2890          genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
2891                                       _3D, ANV_PIPE_CS_STALL_BIT);
2892       } else {
2893          /* Mask off all instructions that we program. */
2894          BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VFG);
2895          BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF);
2896          BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
2897          BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_RASTER);
2898          BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS);
2899          BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_SGVS);
2900          BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2);
2901          BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_CLIP);
2902          BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
2903          BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
2904          BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY);
2905 
2906          BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VS);
2907          BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_GS);
2908          BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_HS);
2909          BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_TE);
2910          BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_DS);
2911 
2912          cmd_buffer_gfx_state_emission(cmd_buffer);
2913 
2914          emit_wa_18020335297_dummy_draw(cmd_buffer);
2915 
2916          /* Dirty all emitted WA state to make sure that current real
2917           * state is restored.
2918           */
2919          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VFG);
2920          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF);
2921          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
2922          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_RASTER);
2923          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS);
2924          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS);
2925          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2);
2926          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CLIP);
2927          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
2928          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
2929          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY);
2930 
2931          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VS);
2932          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_GS);
2933          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_HS);
2934          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TE);
2935          BITSET_SET(hw_state->dirty, ANV_GFX_STATE_DS);
2936       }
2937    }
2938 
2939    cmd_buffer_gfx_state_emission(cmd_buffer);
2940 }
2941 
2942 void
genX(cmd_buffer_enable_pma_fix)2943 genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
2944 {
2945    if (!anv_cmd_buffer_is_render_queue(cmd_buffer))
2946       return;
2947 
2948    if (cmd_buffer->state.gfx.pma_fix_enabled == enable)
2949       return;
2950 
2951    cmd_buffer->state.gfx.pma_fix_enabled = enable;
2952 
2953    /* According to the Broadwell PIPE_CONTROL documentation, software should
2954     * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
2955     * prior to the LRI.  If stencil buffer writes are enabled, then a Render
2956     * Cache Flush is also necessary.
2957     *
2958     * The Skylake docs say to use a depth stall rather than a command
2959     * streamer stall.  However, the hardware seems to violently disagree.
2960     * A full command streamer stall seems to be needed in both cases.
2961     */
2962    genx_batch_emit_pipe_control
2963       (&cmd_buffer->batch, cmd_buffer->device->info,
2964        cmd_buffer->state.current_pipeline,
2965        ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
2966        ANV_PIPE_CS_STALL_BIT |
2967 #if GFX_VER >= 12
2968        ANV_PIPE_TILE_CACHE_FLUSH_BIT |
2969 #endif
2970        ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
2971 
2972 #if GFX_VER == 9
2973    uint32_t cache_mode;
2974    anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0),
2975                    .STCPMAOptimizationEnable = enable,
2976                    .STCPMAOptimizationEnableMask = true);
2977    anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
2978       lri.RegisterOffset   = GENX(CACHE_MODE_0_num);
2979       lri.DataDWord        = cache_mode;
2980    }
2981 
2982 #endif /* GFX_VER == 9 */
2983 
2984    /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
2985     * Flush bits is often necessary.  We do it regardless because it's easier.
2986     * The render cache flush is also necessary if stencil writes are enabled.
2987     *
2988     * Again, the Skylake docs give a different set of flushes but the BDW
2989     * flushes seem to work just as well.
2990     */
2991    genx_batch_emit_pipe_control
2992       (&cmd_buffer->batch, cmd_buffer->device->info,
2993        cmd_buffer->state.current_pipeline,
2994        ANV_PIPE_DEPTH_STALL_BIT |
2995        ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
2996 #if GFX_VER >= 12
2997        ANV_PIPE_TILE_CACHE_FLUSH_BIT |
2998 #endif
2999        ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
3000 }
3001