1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
34 #include "common/intel_genX_state_brw.h"
35 #include "common/intel_guardband.h"
36 #include "common/intel_tiled_render.h"
37 #include "compiler/brw_prim.h"
38
39 static const uint32_t vk_to_intel_blend[] = {
40 [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO,
41 [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE,
42 [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR,
43 [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR,
44 [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR,
45 [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR,
46 [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA,
47 [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA,
48 [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA,
49 [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA,
50 [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR,
51 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR,
52 [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA,
53 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA,
54 [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE,
55 [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR,
56 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR,
57 [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA,
58 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA,
59 };
60
61 static const uint32_t vk_to_intel_blend_op[] = {
62 [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD,
63 [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT,
64 [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT,
65 [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN,
66 [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX,
67 };
68
69 static const uint32_t vk_to_intel_cullmode[] = {
70 [VK_CULL_MODE_NONE] = CULLMODE_NONE,
71 [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
72 [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK,
73 [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH
74 };
75
76 static const uint32_t vk_to_intel_fillmode[] = {
77 [VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
78 [VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
79 [VK_POLYGON_MODE_POINT] = FILL_MODE_POINT,
80 };
81
82 static const uint32_t vk_to_intel_front_face[] = {
83 [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1,
84 [VK_FRONT_FACE_CLOCKWISE] = 0
85 };
86
87 static const uint32_t vk_to_intel_logic_op[] = {
88 [VK_LOGIC_OP_COPY] = LOGICOP_COPY,
89 [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR,
90 [VK_LOGIC_OP_AND] = LOGICOP_AND,
91 [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE,
92 [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED,
93 [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP,
94 [VK_LOGIC_OP_XOR] = LOGICOP_XOR,
95 [VK_LOGIC_OP_OR] = LOGICOP_OR,
96 [VK_LOGIC_OP_NOR] = LOGICOP_NOR,
97 [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV,
98 [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT,
99 [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE,
100 [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED,
101 [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED,
102 [VK_LOGIC_OP_NAND] = LOGICOP_NAND,
103 [VK_LOGIC_OP_SET] = LOGICOP_SET,
104 };
105
106 static const uint32_t vk_to_intel_compare_op[] = {
107 [VK_COMPARE_OP_NEVER] = PREFILTEROP_NEVER,
108 [VK_COMPARE_OP_LESS] = PREFILTEROP_LESS,
109 [VK_COMPARE_OP_EQUAL] = PREFILTEROP_EQUAL,
110 [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROP_LEQUAL,
111 [VK_COMPARE_OP_GREATER] = PREFILTEROP_GREATER,
112 [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROP_NOTEQUAL,
113 [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROP_GEQUAL,
114 [VK_COMPARE_OP_ALWAYS] = PREFILTEROP_ALWAYS,
115 };
116
117 static const uint32_t vk_to_intel_stencil_op[] = {
118 [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP,
119 [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO,
120 [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE,
121 [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT,
122 [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT,
123 [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT,
124 [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR,
125 [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR,
126 };
127
128 static const uint32_t vk_to_intel_primitive_type[] = {
129 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
130 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
131 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
132 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
133 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
134 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
135 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
136 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
137 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
138 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
139 };
140
141 static void
genX(streamout_prologue)142 genX(streamout_prologue)(struct anv_cmd_buffer *cmd_buffer)
143 {
144 #if INTEL_WA_16013994831_GFX_VER
145 /* Wa_16013994831 - Disable preemption during streamout, enable back
146 * again if XFB not used by the current pipeline.
147 *
148 * Although this workaround applies to Gfx12+, we already disable object
149 * level preemption for another reason in genX_state.c so we can skip this
150 * for Gfx12.
151 */
152 if (!intel_needs_workaround(cmd_buffer->device->info, 16013994831))
153 return;
154
155 struct anv_graphics_pipeline *pipeline =
156 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
157 if (pipeline->uses_xfb) {
158 genX(cmd_buffer_set_preemption)(cmd_buffer, false);
159 return;
160 }
161
162 if (!cmd_buffer->state.gfx.object_preemption)
163 genX(cmd_buffer_set_preemption)(cmd_buffer, true);
164 #endif
165 }
166
167 #if GFX_VER >= 12 && GFX_VER < 30
168 static uint32_t
get_cps_state_offset(const struct anv_device * device,const struct vk_fragment_shading_rate_state * fsr)169 get_cps_state_offset(const struct anv_device *device,
170 const struct vk_fragment_shading_rate_state *fsr)
171 {
172 uint32_t offset;
173 static const uint32_t size_index[] = {
174 [1] = 0,
175 [2] = 1,
176 [4] = 2,
177 };
178
179 #if GFX_VERx10 >= 125
180 offset =
181 1 + /* skip disabled */
182 fsr->combiner_ops[0] * 5 * 3 * 3 +
183 fsr->combiner_ops[1] * 3 * 3 +
184 size_index[fsr->fragment_size.width] * 3 +
185 size_index[fsr->fragment_size.height];
186 #else
187 offset =
188 1 + /* skip disabled */
189 size_index[fsr->fragment_size.width] * 3 +
190 size_index[fsr->fragment_size.height];
191 #endif
192
193 offset *= MAX_VIEWPORTS * GENX(CPS_STATE_length) * 4;
194
195 return device->cps_states.offset + offset;
196 }
197 #endif /* GFX_VER >= 12 && GFX_VER < 30 */
198
199 #if GFX_VER >= 30
200 static uint32_t
get_cps_size(uint32_t size)201 get_cps_size(uint32_t size)
202 {
203 switch (size) {
204 case 1:
205 return CPSIZE_1;
206 case 2:
207 return CPSIZE_2;
208 case 4:
209 return CPSIZE_4;
210 default:
211 unreachable("Invalid size");
212 }
213 }
214
215 static const uint32_t vk_to_intel_shading_rate_combiner_op[] = {
216 [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR] = CPS_COMB_OP_PASSTHROUGH,
217 [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR] = CPS_COMB_OP_OVERRIDE,
218 [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MIN_KHR] = CPS_COMB_OP_HIGH_QUALITY,
219 [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR] = CPS_COMB_OP_LOW_QUALITY,
220 [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MUL_KHR] = CPS_COMB_OP_RELATIVE,
221 };
222 #endif
223
224 static bool
has_ds_feedback_loop(const struct vk_dynamic_graphics_state * dyn)225 has_ds_feedback_loop(const struct vk_dynamic_graphics_state *dyn)
226 {
227 return (dyn->feedback_loops & (VK_IMAGE_ASPECT_DEPTH_BIT |
228 VK_IMAGE_ASPECT_STENCIL_BIT)) ||
229 dyn->ial.depth_att != MESA_VK_ATTACHMENT_UNUSED ||
230 dyn->ial.stencil_att != MESA_VK_ATTACHMENT_UNUSED;
231 }
232
233 UNUSED static bool
want_stencil_pma_fix(const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct vk_depth_stencil_state * ds)234 want_stencil_pma_fix(const struct vk_dynamic_graphics_state *dyn,
235 const struct anv_cmd_graphics_state *gfx,
236 const struct vk_depth_stencil_state *ds)
237 {
238 if (GFX_VER > 9)
239 return false;
240 assert(GFX_VER == 9);
241
242 /* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable:
243 *
244 * Clearing this bit will force the STC cache to wait for pending
245 * retirement of pixels at the HZ-read stage and do the STC-test for
246 * Non-promoted, R-computed and Computed depth modes instead of
247 * postponing the STC-test to RCPFE.
248 *
249 * STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
250 * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
251 *
252 * STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
253 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
254 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
255 *
256 * COMP_STC_EN = STC_TEST_EN &&
257 * 3DSTATE_PS_EXTRA::PixelShaderComputesStencil
258 *
259 * SW parses the pipeline states to generate the following logical
260 * signal indicating if PMA FIX can be enabled.
261 *
262 * STC_PMA_OPT =
263 * 3DSTATE_WM::ForceThreadDispatch != 1 &&
264 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
265 * 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
266 * 3DSTATE_DEPTH_BUFFER::HIZ Enable &&
267 * !(3DSTATE_WM::EDSC_Mode == 2) &&
268 * 3DSTATE_PS_EXTRA::PixelShaderValid &&
269 * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
270 * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
271 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
272 * 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
273 * (COMP_STC_EN || STC_WRITE_EN) &&
274 * ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
275 * 3DSTATE_WM::ForceKillPix == ON ||
276 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
277 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
278 * 3DSTATE_PS_BLEND::AlphaTestEnable ||
279 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
280 * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
281 */
282
283 /* These are always true:
284 * 3DSTATE_WM::ForceThreadDispatch != 1 &&
285 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
286 */
287
288 /* We only enable the PMA fix if we know for certain that HiZ is enabled.
289 * If we don't know whether HiZ is enabled or not, we disable the PMA fix
290 * and there is no harm.
291 *
292 * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
293 * 3DSTATE_DEPTH_BUFFER::HIZ Enable
294 */
295 if (!gfx->hiz_enabled)
296 return false;
297
298 /* We can't possibly know if HiZ is enabled without the depth attachment */
299 ASSERTED const struct anv_image_view *d_iview = gfx->depth_att.iview;
300 assert(d_iview && d_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ);
301
302 /* 3DSTATE_PS_EXTRA::PixelShaderValid */
303 struct anv_graphics_pipeline *pipeline =
304 anv_pipeline_to_graphics(gfx->base.pipeline);
305 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
306 return false;
307
308 /* !(3DSTATE_WM::EDSC_Mode == 2) */
309 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
310 if (wm_prog_data->early_fragment_tests)
311 return false;
312
313 /* We never use anv_pipeline for HiZ ops so this is trivially true:
314 * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
315 * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
316 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
317 * 3DSTATE_WM_HZ_OP::StencilBufferClear)
318 */
319
320 /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
321 * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
322 */
323 const bool stc_test_en = ds->stencil.test_enable;
324
325 /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
326 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
327 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
328 */
329 const bool stc_write_en = ds->stencil.write_enable;
330
331 /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
332 const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil;
333
334 /* COMP_STC_EN || STC_WRITE_EN */
335 if (!(comp_stc_en || stc_write_en))
336 return false;
337
338 /* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
339 * 3DSTATE_WM::ForceKillPix == ON ||
340 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
341 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
342 * 3DSTATE_PS_BLEND::AlphaTestEnable ||
343 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
344 * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
345 */
346 return pipeline->kill_pixel ||
347 pipeline->rp_has_ds_self_dep ||
348 has_ds_feedback_loop(dyn) ||
349 wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
350 }
351
352 static inline bool
anv_rasterization_aa_mode(VkPolygonMode raster_mode,VkLineRasterizationModeKHR line_mode)353 anv_rasterization_aa_mode(VkPolygonMode raster_mode,
354 VkLineRasterizationModeKHR line_mode)
355 {
356 if (raster_mode == VK_POLYGON_MODE_LINE &&
357 line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR)
358 return true;
359 return false;
360 }
361
362 static inline VkLineRasterizationModeKHR
anv_line_rasterization_mode(VkLineRasterizationModeKHR line_mode,unsigned rasterization_samples)363 anv_line_rasterization_mode(VkLineRasterizationModeKHR line_mode,
364 unsigned rasterization_samples)
365 {
366 if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_KHR) {
367 if (rasterization_samples > 1) {
368 return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_KHR;
369 } else {
370 return VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR;
371 }
372 }
373 return line_mode;
374 }
375
376 /** Returns the final polygon mode for rasterization
377 *
378 * This function takes into account polygon mode, primitive topology and the
379 * different shader stages which might generate their own type of primitives.
380 */
381 static inline VkPolygonMode
anv_raster_polygon_mode(const struct anv_graphics_pipeline * pipeline,VkPolygonMode polygon_mode,VkPrimitiveTopology primitive_topology)382 anv_raster_polygon_mode(const struct anv_graphics_pipeline *pipeline,
383 VkPolygonMode polygon_mode,
384 VkPrimitiveTopology primitive_topology)
385 {
386 if (anv_pipeline_is_mesh(pipeline)) {
387 switch (get_mesh_prog_data(pipeline)->primitive_type) {
388 case MESA_PRIM_POINTS:
389 return VK_POLYGON_MODE_POINT;
390 case MESA_PRIM_LINES:
391 return VK_POLYGON_MODE_LINE;
392 case MESA_PRIM_TRIANGLES:
393 return polygon_mode;
394 default:
395 unreachable("invalid primitive type for mesh");
396 }
397 } else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
398 switch (get_gs_prog_data(pipeline)->output_topology) {
399 case _3DPRIM_POINTLIST:
400 return VK_POLYGON_MODE_POINT;
401
402 case _3DPRIM_LINELIST:
403 case _3DPRIM_LINESTRIP:
404 case _3DPRIM_LINELOOP:
405 return VK_POLYGON_MODE_LINE;
406
407 case _3DPRIM_TRILIST:
408 case _3DPRIM_TRIFAN:
409 case _3DPRIM_TRISTRIP:
410 case _3DPRIM_RECTLIST:
411 case _3DPRIM_QUADLIST:
412 case _3DPRIM_QUADSTRIP:
413 case _3DPRIM_POLYGON:
414 return polygon_mode;
415 }
416 unreachable("Unsupported GS output topology");
417 } else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
418 switch (get_tes_prog_data(pipeline)->output_topology) {
419 case INTEL_TESS_OUTPUT_TOPOLOGY_POINT:
420 return VK_POLYGON_MODE_POINT;
421
422 case INTEL_TESS_OUTPUT_TOPOLOGY_LINE:
423 return VK_POLYGON_MODE_LINE;
424
425 case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW:
426 case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW:
427 return polygon_mode;
428 }
429 unreachable("Unsupported TCS output topology");
430 } else {
431 switch (primitive_topology) {
432 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
433 return VK_POLYGON_MODE_POINT;
434
435 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
436 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
437 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
438 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
439 return VK_POLYGON_MODE_LINE;
440
441 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
442 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
443 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
444 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
445 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
446 return polygon_mode;
447
448 default:
449 unreachable("Unsupported primitive topology");
450 }
451 }
452 }
453
454 static inline bool
anv_is_dual_src_blend_factor(VkBlendFactor factor)455 anv_is_dual_src_blend_factor(VkBlendFactor factor)
456 {
457 return factor == VK_BLEND_FACTOR_SRC1_COLOR ||
458 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR ||
459 factor == VK_BLEND_FACTOR_SRC1_ALPHA ||
460 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA;
461 }
462
463 static inline bool
anv_is_dual_src_blend_equation(const struct vk_color_blend_attachment_state * cb)464 anv_is_dual_src_blend_equation(const struct vk_color_blend_attachment_state *cb)
465 {
466 return anv_is_dual_src_blend_factor(cb->src_color_blend_factor) &&
467 anv_is_dual_src_blend_factor(cb->dst_color_blend_factor) &&
468 anv_is_dual_src_blend_factor(cb->src_alpha_blend_factor) &&
469 anv_is_dual_src_blend_factor(cb->dst_alpha_blend_factor);
470 }
471
472 static void
anv_rasterization_mode(VkPolygonMode raster_mode,VkLineRasterizationModeKHR line_mode,float line_width,uint32_t * api_mode,bool * msaa_rasterization_enable)473 anv_rasterization_mode(VkPolygonMode raster_mode,
474 VkLineRasterizationModeKHR line_mode,
475 float line_width,
476 uint32_t *api_mode,
477 bool *msaa_rasterization_enable)
478 {
479 if (raster_mode == VK_POLYGON_MODE_LINE) {
480 /* Unfortunately, configuring our line rasterization hardware on gfx8
481 * and later is rather painful. Instead of giving us bits to tell the
482 * hardware what line mode to use like we had on gfx7, we now have an
483 * arcane combination of API Mode and MSAA enable bits which do things
484 * in a table which are expected to magically put the hardware into the
485 * right mode for your API. Sadly, Vulkan isn't any of the APIs the
486 * hardware people thought of so nothing works the way you want it to.
487 *
488 * Look at the table titled "Multisample Rasterization Modes" in Vol 7
489 * of the Skylake PRM for more details.
490 */
491 switch (line_mode) {
492 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
493 *api_mode = DX101;
494 #if GFX_VER <= 9
495 /* Prior to ICL, the algorithm the HW uses to draw wide lines
496 * doesn't quite match what the CTS expects, at least for rectangular
497 * lines, so we set this to false here, making it draw parallelograms
498 * instead, which work well enough.
499 */
500 *msaa_rasterization_enable = line_width < 1.0078125;
501 #else
502 *msaa_rasterization_enable = true;
503 #endif
504 break;
505
506 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
507 case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
508 *api_mode = DX9OGL;
509 *msaa_rasterization_enable = false;
510 break;
511
512 default:
513 unreachable("Unsupported line rasterization mode");
514 }
515 } else {
516 *api_mode = DX101;
517 *msaa_rasterization_enable = true;
518 }
519 }
520
521 static bool
522 is_src1_blend_factor(enum GENX(3D_Color_Buffer_Blend_Factor) factor)
523 {
524 return factor == BLENDFACTOR_SRC1_COLOR ||
525 factor == BLENDFACTOR_SRC1_ALPHA ||
526 factor == BLENDFACTOR_INV_SRC1_COLOR ||
527 factor == BLENDFACTOR_INV_SRC1_ALPHA;
528 }
529
530 #if GFX_VERx10 == 125
531 /**
532 * Return the dimensions of the current rendering area, defined as the
533 * bounding box of all present color, depth and stencil attachments.
534 */
535 UNUSED static bool
calculate_render_area(const struct anv_cmd_graphics_state * gfx,unsigned * width,unsigned * height)536 calculate_render_area(const struct anv_cmd_graphics_state *gfx,
537 unsigned *width, unsigned *height)
538 {
539 *width = gfx->render_area.offset.x + gfx->render_area.extent.width;
540 *height = gfx->render_area.offset.y + gfx->render_area.extent.height;
541
542 for (unsigned i = 0; i < gfx->color_att_count; i++) {
543 const struct anv_attachment *att = &gfx->color_att[i];
544 if (att->iview) {
545 *width = MAX2(*width, att->iview->vk.extent.width);
546 *height = MAX2(*height, att->iview->vk.extent.height);
547 }
548 }
549
550 const struct anv_image_view *const z_view = gfx->depth_att.iview;
551 if (z_view) {
552 *width = MAX2(*width, z_view->vk.extent.width);
553 *height = MAX2(*height, z_view->vk.extent.height);
554 }
555
556 const struct anv_image_view *const s_view = gfx->stencil_att.iview;
557 if (s_view) {
558 *width = MAX2(*width, s_view->vk.extent.width);
559 *height = MAX2(*height, s_view->vk.extent.height);
560 }
561
562 return *width && *height;
563 }
564
565 /* Calculate TBIMR tiling parameters adequate for the current pipeline
566 * setup. Return true if TBIMR should be enabled.
567 */
568 UNUSED static bool
calculate_tile_dimensions(const struct anv_device * device,const struct anv_cmd_graphics_state * gfx,const struct intel_l3_config * l3_config,unsigned fb_width,unsigned fb_height,unsigned * tile_width,unsigned * tile_height)569 calculate_tile_dimensions(const struct anv_device *device,
570 const struct anv_cmd_graphics_state *gfx,
571 const struct intel_l3_config *l3_config,
572 unsigned fb_width, unsigned fb_height,
573 unsigned *tile_width, unsigned *tile_height)
574 {
575 assert(GFX_VER == 12);
576 const unsigned aux_scale = ISL_MAIN_TO_CCS_SIZE_RATIO_XE;
577
578 unsigned pixel_size = 0;
579
580 /* Perform a rough calculation of the tile cache footprint of the
581 * pixel pipeline, approximating it as the sum of the amount of
582 * memory used per pixel by every render target, depth, stencil and
583 * auxiliary surfaces bound to the pipeline.
584 */
585 for (uint32_t i = 0; i < gfx->color_att_count; i++) {
586 const struct anv_attachment *att = &gfx->color_att[i];
587
588 if (att->iview) {
589 const struct anv_image *image = att->iview->image;
590 const unsigned p = anv_image_aspect_to_plane(image,
591 VK_IMAGE_ASPECT_COLOR_BIT);
592 const struct anv_image_plane *plane = &image->planes[p];
593
594 pixel_size += intel_calculate_surface_pixel_size(
595 &plane->primary_surface.isl);
596
597 if (isl_aux_usage_has_mcs(att->aux_usage))
598 pixel_size += intel_calculate_surface_pixel_size(
599 &plane->aux_surface.isl);
600
601 if (isl_aux_usage_has_ccs(att->aux_usage))
602 pixel_size += DIV_ROUND_UP(intel_calculate_surface_pixel_size(
603 &plane->primary_surface.isl),
604 aux_scale);
605 }
606 }
607
608 const struct anv_image_view *const z_view = gfx->depth_att.iview;
609 if (z_view) {
610 const struct anv_image *image = z_view->image;
611 assert(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT);
612 const unsigned p = anv_image_aspect_to_plane(image,
613 VK_IMAGE_ASPECT_DEPTH_BIT);
614 const struct anv_image_plane *plane = &image->planes[p];
615
616 pixel_size += intel_calculate_surface_pixel_size(
617 &plane->primary_surface.isl);
618
619 if (isl_aux_usage_has_hiz(image->planes[p].aux_usage))
620 pixel_size += intel_calculate_surface_pixel_size(
621 &plane->aux_surface.isl);
622
623 if (isl_aux_usage_has_ccs(image->planes[p].aux_usage))
624 pixel_size += DIV_ROUND_UP(intel_calculate_surface_pixel_size(
625 &plane->primary_surface.isl),
626 aux_scale);
627 }
628
629 const struct anv_image_view *const s_view = gfx->depth_att.iview;
630 if (s_view && s_view != z_view) {
631 const struct anv_image *image = s_view->image;
632 assert(image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
633 const unsigned p = anv_image_aspect_to_plane(image,
634 VK_IMAGE_ASPECT_STENCIL_BIT);
635 const struct anv_image_plane *plane = &image->planes[p];
636
637 pixel_size += intel_calculate_surface_pixel_size(
638 &plane->primary_surface.isl);
639 }
640
641 if (!pixel_size)
642 return false;
643
644 /* Compute a tile layout that allows reasonable utilization of the
645 * tile cache based on the per-pixel cache footprint estimated
646 * above.
647 */
648 intel_calculate_tile_dimensions(device->info, l3_config,
649 32, 32, fb_width, fb_height,
650 pixel_size, tile_width, tile_height);
651
652 /* Perform TBIMR tile passes only if the framebuffer covers more
653 * than a single tile.
654 */
655 return *tile_width < fb_width || *tile_height < fb_height;
656 }
657 #endif
658
659 #define GET(field) hw_state->field
660 #define SET(bit, field, value) \
661 do { \
662 __typeof(hw_state->field) __v = value; \
663 if (hw_state->field != __v) { \
664 hw_state->field = __v; \
665 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \
666 } \
667 } while (0)
668 #define SET_STAGE(bit, field, value, stage) \
669 do { \
670 __typeof(hw_state->field) __v = value; \
671 if (!anv_pipeline_has_stage(pipeline, \
672 MESA_SHADER_##stage)) { \
673 hw_state->field = __v; \
674 break; \
675 } \
676 if (hw_state->field != __v) { \
677 hw_state->field = __v; \
678 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \
679 } \
680 } while (0)
681 #define SETUP_PROVOKING_VERTEX(bit, cmd, mode) \
682 switch (mode) { \
683 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT: \
684 SET(bit, cmd.TriangleStripListProvokingVertexSelect, 0); \
685 SET(bit, cmd.LineStripListProvokingVertexSelect, 0); \
686 SET(bit, cmd.TriangleFanProvokingVertexSelect, 1); \
687 break; \
688 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT: \
689 SET(bit, cmd.TriangleStripListProvokingVertexSelect, 2); \
690 SET(bit, cmd.LineStripListProvokingVertexSelect, 1); \
691 SET(bit, cmd.TriangleFanProvokingVertexSelect, 2); \
692 break; \
693 default: \
694 unreachable("Invalid provoking vertex mode"); \
695 } \
696
697 ALWAYS_INLINE static void
update_fs_msaa_flags(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)698 update_fs_msaa_flags(struct anv_gfx_dynamic_state *hw_state,
699 const struct vk_dynamic_graphics_state *dyn,
700 const struct anv_graphics_pipeline *pipeline)
701 {
702 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
703
704 if (!wm_prog_data)
705 return;
706
707 /* If we have any dynamic bits here, we might need to update the value
708 * in the push constant for the shader.
709 */
710 if (wm_prog_data->coarse_pixel_dispatch != INTEL_SOMETIMES &&
711 wm_prog_data->persample_dispatch != INTEL_SOMETIMES &&
712 wm_prog_data->alpha_to_coverage != INTEL_SOMETIMES)
713 return;
714
715 enum intel_msaa_flags fs_msaa_flags = INTEL_MSAA_FLAG_ENABLE_DYNAMIC;
716
717 if (dyn->ms.rasterization_samples > 1) {
718 fs_msaa_flags |= INTEL_MSAA_FLAG_MULTISAMPLE_FBO;
719
720 if (wm_prog_data->sample_shading) {
721 assert(wm_prog_data->persample_dispatch != INTEL_NEVER);
722 fs_msaa_flags |= INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH;
723 }
724 if ((pipeline->sample_shading_enable &&
725 (pipeline->min_sample_shading * dyn->ms.rasterization_samples) > 1) ||
726 wm_prog_data->sample_shading) {
727 fs_msaa_flags |= INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH |
728 INTEL_MSAA_FLAG_PERSAMPLE_INTERP;
729 }
730 }
731
732 if (wm_prog_data->coarse_pixel_dispatch == INTEL_SOMETIMES &&
733 !(fs_msaa_flags & INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH)) {
734 fs_msaa_flags |= INTEL_MSAA_FLAG_COARSE_PI_MSG |
735 INTEL_MSAA_FLAG_COARSE_RT_WRITES;
736 }
737
738 if (dyn->ms.alpha_to_coverage_enable)
739 fs_msaa_flags |= INTEL_MSAA_FLAG_ALPHA_TO_COVERAGE;
740
741 SET(FS_MSAA_FLAGS, fs_msaa_flags, fs_msaa_flags);
742 }
743
744 ALWAYS_INLINE static void
update_ps(struct anv_gfx_dynamic_state * hw_state,const struct anv_device * device,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)745 update_ps(struct anv_gfx_dynamic_state *hw_state,
746 const struct anv_device *device,
747 const struct vk_dynamic_graphics_state *dyn,
748 const struct anv_graphics_pipeline *pipeline)
749 {
750 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
751
752 if (!wm_prog_data) {
753 #if GFX_VER < 20
754 SET(PS, ps._8PixelDispatchEnable, false);
755 SET(PS, ps._16PixelDispatchEnable, false);
756 SET(PS, ps._32PixelDispatchEnable, false);
757 #else
758 SET(PS, ps.Kernel0Enable, false);
759 SET(PS, ps.Kernel1Enable, false);
760 #endif
761 return;
762 }
763
764 const struct anv_shader_bin *fs_bin =
765 pipeline->base.shaders[MESA_SHADER_FRAGMENT];
766 struct GENX(3DSTATE_PS) ps = {};
767 intel_set_ps_dispatch_state(&ps, device->info, wm_prog_data,
768 MAX2(dyn->ms.rasterization_samples, 1),
769 hw_state->fs_msaa_flags);
770
771 SET(PS, ps.KernelStartPointer0,
772 fs_bin->kernel.offset +
773 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0));
774 SET(PS, ps.KernelStartPointer1,
775 fs_bin->kernel.offset +
776 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1));
777 #if GFX_VER < 20
778 SET(PS, ps.KernelStartPointer2,
779 fs_bin->kernel.offset +
780 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2));
781 #endif
782
783 SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData0,
784 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0));
785 SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData1,
786 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1));
787 #if GFX_VER < 20
788 SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData2,
789 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2));
790 #endif
791
792 #if GFX_VER < 20
793 SET(PS, ps._8PixelDispatchEnable, ps._8PixelDispatchEnable);
794 SET(PS, ps._16PixelDispatchEnable, ps._16PixelDispatchEnable);
795 SET(PS, ps._32PixelDispatchEnable, ps._32PixelDispatchEnable);
796 #else
797 SET(PS, ps.Kernel0Enable, ps.Kernel0Enable);
798 SET(PS, ps.Kernel1Enable, ps.Kernel1Enable);
799 SET(PS, ps.Kernel0SIMDWidth, ps.Kernel0SIMDWidth);
800 SET(PS, ps.Kernel1SIMDWidth, ps.Kernel1SIMDWidth);
801 SET(PS, ps.Kernel0PolyPackingPolicy, ps.Kernel0PolyPackingPolicy);
802 SET(PS, ps.Kernel0MaximumPolysperThread, ps.Kernel0MaximumPolysperThread);
803 #endif
804
805 SET(PS, ps.PositionXYOffsetSelect,
806 !wm_prog_data->uses_pos_offset ? POSOFFSET_NONE :
807 brw_wm_prog_data_is_persample(wm_prog_data,
808 hw_state->fs_msaa_flags) ?
809 POSOFFSET_SAMPLE : POSOFFSET_CENTROID);
810 }
811
812 ALWAYS_INLINE static void
update_ps_extra_wm(struct anv_gfx_dynamic_state * hw_state,const struct anv_graphics_pipeline * pipeline)813 update_ps_extra_wm(struct anv_gfx_dynamic_state *hw_state,
814 const struct anv_graphics_pipeline *pipeline)
815 {
816 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
817
818 if (!wm_prog_data)
819 return;
820
821 SET(PS_EXTRA, ps_extra.PixelShaderIsPerSample,
822 brw_wm_prog_data_is_persample(wm_prog_data,
823 hw_state->fs_msaa_flags));
824 #if GFX_VER >= 11
825 const bool uses_coarse_pixel =
826 brw_wm_prog_data_is_coarse(wm_prog_data, hw_state->fs_msaa_flags);
827 SET(PS_EXTRA, ps_extra.PixelShaderIsPerCoarsePixel, uses_coarse_pixel);
828 #endif
829 #if GFX_VERx10 >= 125
830 /* TODO: We should only require this when the last geometry shader uses a
831 * fragment shading rate that is not constant.
832 */
833 SET(PS_EXTRA, ps_extra.EnablePSDependencyOnCPsizeChange, uses_coarse_pixel);
834 #endif
835
836 SET(WM, wm.BarycentricInterpolationMode,
837 wm_prog_data_barycentric_modes(wm_prog_data, hw_state->fs_msaa_flags));
838 }
839
840 ALWAYS_INLINE static void
update_ps_extra_has_uav(struct anv_gfx_dynamic_state * hw_state,const struct anv_cmd_graphics_state * gfx,const struct anv_graphics_pipeline * pipeline)841 update_ps_extra_has_uav(struct anv_gfx_dynamic_state *hw_state,
842 const struct anv_cmd_graphics_state *gfx,
843 const struct anv_graphics_pipeline *pipeline)
844 {
845 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
846
847 #if GFX_VERx10 >= 125
848 SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV,
849 wm_prog_data && wm_prog_data->has_side_effects,
850 FRAGMENT);
851 #else
852 /* Prior to Gfx12.5 the HW seems to avoid spawning fragment shaders even if
853 * 3DSTATE_PS_EXTRA::PixelShaderKillsPixel=true when
854 * 3DSTATE_PS_BLEND::HasWriteableRT=false. This is causing problems with
855 * occlusion queries with 0 attachments. There are no CTS tests exercising
856 * this but zink+anv fails a bunch of tests like piglit
857 * arb_framebuffer_no_attachments-query.
858 *
859 * Here we choose to tweak the PixelShaderHasUAV to make sure the fragment
860 * shaders are run properly.
861 */
862 SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV,
863 wm_prog_data && (wm_prog_data->has_side_effects ||
864 (gfx->color_att_count == 0 &&
865 gfx->n_occlusion_queries > 0)),
866 FRAGMENT);
867 #endif
868 }
869
870 ALWAYS_INLINE static void
update_ps_extra_kills_pixel(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct anv_graphics_pipeline * pipeline)871 update_ps_extra_kills_pixel(struct anv_gfx_dynamic_state *hw_state,
872 const struct vk_dynamic_graphics_state *dyn,
873 const struct anv_cmd_graphics_state *gfx,
874 const struct anv_graphics_pipeline *pipeline)
875 {
876 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
877
878 SET_STAGE(PS_EXTRA, ps_extra.PixelShaderKillsPixel,
879 wm_prog_data && (pipeline->rp_has_ds_self_dep ||
880 has_ds_feedback_loop(dyn) ||
881 wm_prog_data->uses_kill),
882 FRAGMENT);
883 }
884
885 #if GFX_VERx10 >= 125
886 ALWAYS_INLINE static void
update_vfg_list_cut_index(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)887 update_vfg_list_cut_index(struct anv_gfx_dynamic_state *hw_state,
888 const struct vk_dynamic_graphics_state *dyn)
889 {
890 SET(VFG, vfg.ListCutIndexEnable, dyn->ia.primitive_restart_enable);
891 }
892 #endif
893
894 ALWAYS_INLINE static void
update_streamout(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct anv_graphics_pipeline * pipeline)895 update_streamout(struct anv_gfx_dynamic_state *hw_state,
896 const struct vk_dynamic_graphics_state *dyn,
897 const struct anv_cmd_graphics_state *gfx,
898 const struct anv_graphics_pipeline *pipeline)
899 {
900 SET(STREAMOUT, so.RenderingDisable, dyn->rs.rasterizer_discard_enable);
901 SET(STREAMOUT, so.RenderStreamSelect, dyn->rs.rasterization_stream);
902
903 #if INTEL_NEEDS_WA_18022508906
904 /* Wa_18022508906 :
905 *
906 * SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage:
907 *
908 * SOL_INT::Render_Enable =
909 * (3DSTATE_STREAMOUT::Force_Rending == Force_On) ||
910 * (
911 * (3DSTATE_STREAMOUT::Force_Rending != Force_Off) &&
912 * !(3DSTATE_GS::Enable && 3DSTATE_GS::Output Vertex Size == 0) &&
913 * !3DSTATE_STREAMOUT::API_Render_Disable &&
914 * (
915 * 3DSTATE_DEPTH_STENCIL_STATE::Stencil_TestEnable ||
916 * 3DSTATE_DEPTH_STENCIL_STATE::Depth_TestEnable ||
917 * 3DSTATE_DEPTH_STENCIL_STATE::Depth_WriteEnable ||
918 * 3DSTATE_PS_EXTRA::PS_Valid ||
919 * 3DSTATE_WM::Legacy Depth_Buffer_Clear ||
920 * 3DSTATE_WM::Legacy Depth_Buffer_Resolve_Enable ||
921 * 3DSTATE_WM::Legacy Hierarchical_Depth_Buffer_Resolve_Enable
922 * )
923 * )
924 *
925 * If SOL_INT::Render_Enable is false, the SO stage will not forward any
926 * topologies down the pipeline. Which is not what we want for occlusion
927 * queries.
928 *
929 * Here we force rendering to get SOL_INT::Render_Enable when occlusion
930 * queries are active.
931 */
932 SET(STREAMOUT, so.ForceRendering,
933 (!GET(so.RenderingDisable) && gfx->n_occlusion_queries > 0) ?
934 Force_on : 0);
935 #endif
936 }
937
938 ALWAYS_INLINE static void
update_provoking_vertex(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)939 update_provoking_vertex(struct anv_gfx_dynamic_state *hw_state,
940 const struct vk_dynamic_graphics_state *dyn,
941 const struct anv_graphics_pipeline *pipeline)
942 {
943 SETUP_PROVOKING_VERTEX(SF, sf, dyn->rs.provoking_vertex);
944 SETUP_PROVOKING_VERTEX(CLIP, clip, dyn->rs.provoking_vertex);
945
946 switch (dyn->rs.provoking_vertex) {
947 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
948 SET(STREAMOUT, so.ReorderMode, LEADING);
949 SET_STAGE(GS, gs.ReorderMode, LEADING, GEOMETRY);
950 break;
951
952 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
953 SET(STREAMOUT, so.ReorderMode, TRAILING);
954 SET_STAGE(GS, gs.ReorderMode, TRAILING, GEOMETRY);
955 break;
956
957 default:
958 unreachable("Invalid provoking vertex mode");
959 }
960 }
961
962 ALWAYS_INLINE static void
update_topology(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)963 update_topology(struct anv_gfx_dynamic_state *hw_state,
964 const struct vk_dynamic_graphics_state *dyn,
965 const struct anv_graphics_pipeline *pipeline)
966 {
967 uint32_t topology =
968 anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ?
969 _3DPRIM_PATCHLIST(dyn->ts.patch_control_points) :
970 vk_to_intel_primitive_type[dyn->ia.primitive_topology];
971
972 SET(VF_TOPOLOGY, vft.PrimitiveTopologyType, topology);
973 }
974
975 #if GFX_VER >= 11
976 ALWAYS_INLINE static void
update_cps(struct anv_gfx_dynamic_state * hw_state,const struct anv_device * device,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)977 update_cps(struct anv_gfx_dynamic_state *hw_state,
978 const struct anv_device *device,
979 const struct vk_dynamic_graphics_state *dyn,
980 const struct anv_graphics_pipeline *pipeline)
981 {
982 #if GFX_VER >= 30
983 SET(COARSE_PIXEL, coarse_pixel.CPSizeX,
984 get_cps_size(dyn->fsr.fragment_size.width));
985 SET(COARSE_PIXEL, coarse_pixel.CPSizeY,
986 get_cps_size(dyn->fsr.fragment_size.height));
987 SET(COARSE_PIXEL, coarse_pixel.CPSizeCombiner0Opcode,
988 vk_to_intel_shading_rate_combiner_op[dyn->fsr.combiner_ops[0]]);
989 SET(COARSE_PIXEL, coarse_pixel.CPSizeCombiner1Opcode,
990 vk_to_intel_shading_rate_combiner_op[dyn->fsr.combiner_ops[1]]);
991 #elif GFX_VER >= 12
992 SET(CPS, cps.CoarsePixelShadingStateArrayPointer,
993 get_cps_state_offset(device, &dyn->fsr));
994 #else
995 STATIC_ASSERT(GFX_VER == 11);
996 SET(CPS, cps.CoarsePixelShadingMode, CPS_MODE_CONSTANT);
997 SET(CPS, cps.MinCPSizeX, dyn->fsr.fragment_size.width);
998 SET(CPS, cps.MinCPSizeY, dyn->fsr.fragment_size.height);
999 #endif
1000 }
1001 #endif
1002
1003 ALWAYS_INLINE static void
update_te(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)1004 update_te(struct anv_gfx_dynamic_state *hw_state,
1005 const struct vk_dynamic_graphics_state *dyn,
1006 const struct anv_graphics_pipeline *pipeline)
1007 {
1008 const struct brw_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline);
1009
1010 if (tes_prog_data && anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
1011 if (dyn->ts.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) {
1012 SET(TE, te.OutputTopology, tes_prog_data->output_topology);
1013 } else {
1014 /* When the origin is upper-left, we have to flip the winding order */
1015 if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) {
1016 SET(TE, te.OutputTopology, OUTPUT_TRI_CW);
1017 } else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) {
1018 SET(TE, te.OutputTopology, OUTPUT_TRI_CCW);
1019 } else {
1020 SET(TE, te.OutputTopology, tes_prog_data->output_topology);
1021 }
1022 }
1023 } else {
1024 SET(TE, te.OutputTopology, OUTPUT_POINT);
1025 }
1026 }
1027
1028 ALWAYS_INLINE static void
update_line_width(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1029 update_line_width(struct anv_gfx_dynamic_state *hw_state,
1030 const struct vk_dynamic_graphics_state *dyn)
1031 {
1032 SET(SF, sf.LineWidth, dyn->rs.line.width);
1033 }
1034
1035 ALWAYS_INLINE static void
update_sf_global_depth_bias(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1036 update_sf_global_depth_bias(struct anv_gfx_dynamic_state *hw_state,
1037 const struct vk_dynamic_graphics_state *dyn)
1038 {
1039 /**
1040 * From the Vulkan Spec:
1041 *
1042 * "VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT specifies that the depth bias
1043 * representation is a factor of constant r equal to 1."
1044 *
1045 * From the SKL PRMs, Volume 7: 3D-Media-GPGPU, Depth Offset:
1046 *
1047 * "When UNORM Depth Buffer is at Output Merger (or no Depth Buffer):
1048 *
1049 * Bias = GlobalDepthOffsetConstant * r + GlobalDepthOffsetScale * MaxDepthSlope
1050 *
1051 * Where r is the minimum representable value > 0 in the depth buffer
1052 * format, converted to float32 (note: If state bit Legacy Global Depth
1053 * Bias Enable is set, the r term will be forced to 1.0)"
1054 *
1055 * When VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT is set, enable
1056 * LegacyGlobalDepthBiasEnable.
1057 */
1058 SET(SF, sf.LegacyGlobalDepthBiasEnable,
1059 dyn->rs.depth_bias.representation ==
1060 VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT);
1061 }
1062
1063 ALWAYS_INLINE static void
update_clip_api_mode(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1064 update_clip_api_mode(struct anv_gfx_dynamic_state *hw_state,
1065 const struct vk_dynamic_graphics_state *dyn)
1066 {
1067 SET(CLIP, clip.APIMode,
1068 dyn->vp.depth_clip_negative_one_to_one ?
1069 APIMODE_OGL : APIMODE_D3D);
1070 }
1071
1072 ALWAYS_INLINE static void
update_clip_max_viewport(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1073 update_clip_max_viewport(struct anv_gfx_dynamic_state *hw_state,
1074 const struct vk_dynamic_graphics_state *dyn)
1075 {
1076 /* From the Vulkan 1.0.45 spec:
1077 *
1078 * "If the last active vertex processing stage shader entry point's
1079 * interface does not include a variable decorated with ViewportIndex,
1080 * then the first viewport is used."
1081 *
1082 * This could mean that we might need to set the MaximumVPIndex based on
1083 * the pipeline's last stage, but if the last shader doesn't write the
1084 * viewport index and the VUE header is used, the compiler will force the
1085 * value to 0 (which is what the spec requires above). Otherwise it seems
1086 * like the HW should be pulling 0 if the VUE header is not present.
1087 *
1088 * Avoiding a check on the pipeline seems to prevent additional emissions
1089 * of 3DSTATE_CLIP which appear to impact performance on Assassin's Creed
1090 * Valhalla..
1091 */
1092 SET(CLIP, clip.MaximumVPIndex, dyn->vp.viewport_count > 0 ?
1093 dyn->vp.viewport_count - 1 : 0);
1094 }
1095
1096 ALWAYS_INLINE static void
update_clip_raster(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct anv_graphics_pipeline * pipeline)1097 update_clip_raster(struct anv_gfx_dynamic_state *hw_state,
1098 const struct vk_dynamic_graphics_state *dyn,
1099 const struct anv_cmd_graphics_state *gfx,
1100 const struct anv_graphics_pipeline *pipeline)
1101 {
1102 /* Take dynamic primitive topology in to account with
1103 * 3DSTATE_RASTER::APIMode
1104 * 3DSTATE_RASTER::DXMultisampleRasterizationEnable
1105 * 3DSTATE_RASTER::AntialiasingEnable
1106 */
1107 uint32_t api_mode = 0;
1108 bool msaa_raster_enable = false;
1109
1110 const VkLineRasterizationModeKHR line_mode =
1111 anv_line_rasterization_mode(dyn->rs.line.mode,
1112 dyn->ms.rasterization_samples);
1113
1114 const VkPolygonMode dynamic_raster_mode =
1115 anv_raster_polygon_mode(pipeline,
1116 dyn->rs.polygon_mode,
1117 dyn->ia.primitive_topology);
1118
1119 anv_rasterization_mode(dynamic_raster_mode,
1120 line_mode, dyn->rs.line.width,
1121 &api_mode, &msaa_raster_enable);
1122
1123 /* From the Browadwell PRM, Volume 2, documentation for 3DSTATE_RASTER,
1124 * "Antialiasing Enable":
1125 *
1126 * "This field must be disabled if any of the render targets have integer
1127 * (UINT or SINT) surface format."
1128 *
1129 * Additionally internal documentation for Gfx12+ states:
1130 *
1131 * "This bit MUST not be set when NUM_MULTISAMPLES > 1 OR
1132 * FORCED_SAMPLE_COUNT > 1."
1133 */
1134 const bool aa_enable =
1135 anv_rasterization_aa_mode(dynamic_raster_mode, line_mode) &&
1136 !gfx->has_uint_rt &&
1137 !(GFX_VER >= 12 && gfx->samples > 1);
1138
1139 const bool depth_clip_enable =
1140 vk_rasterization_state_depth_clip_enable(&dyn->rs);
1141
1142 const bool xy_clip_test_enable =
1143 (dynamic_raster_mode == VK_POLYGON_MODE_FILL);
1144
1145 SET(CLIP, clip.ViewportXYClipTestEnable, xy_clip_test_enable);
1146
1147 SET(RASTER, raster.APIMode, api_mode);
1148 SET(RASTER, raster.DXMultisampleRasterizationEnable, msaa_raster_enable);
1149 SET(RASTER, raster.AntialiasingEnable, aa_enable);
1150 SET(RASTER, raster.CullMode, vk_to_intel_cullmode[dyn->rs.cull_mode]);
1151 SET(RASTER, raster.FrontWinding, vk_to_intel_front_face[dyn->rs.front_face]);
1152 SET(RASTER, raster.GlobalDepthOffsetEnableSolid, dyn->rs.depth_bias.enable);
1153 SET(RASTER, raster.GlobalDepthOffsetEnableWireframe, dyn->rs.depth_bias.enable);
1154 SET(RASTER, raster.GlobalDepthOffsetEnablePoint, dyn->rs.depth_bias.enable);
1155 SET(RASTER, raster.GlobalDepthOffsetConstant, dyn->rs.depth_bias.constant_factor);
1156 SET(RASTER, raster.GlobalDepthOffsetScale, dyn->rs.depth_bias.slope_factor);
1157 SET(RASTER, raster.GlobalDepthOffsetClamp, dyn->rs.depth_bias.clamp);
1158 SET(RASTER, raster.FrontFaceFillMode, vk_to_intel_fillmode[dyn->rs.polygon_mode]);
1159 SET(RASTER, raster.BackFaceFillMode, vk_to_intel_fillmode[dyn->rs.polygon_mode]);
1160 SET(RASTER, raster.ViewportZFarClipTestEnable, depth_clip_enable);
1161 SET(RASTER, raster.ViewportZNearClipTestEnable, depth_clip_enable);
1162 SET(RASTER, raster.ConservativeRasterizationEnable,
1163 dyn->rs.conservative_mode !=
1164 VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT);
1165 }
1166
1167 ALWAYS_INLINE static void
update_multisample(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1168 update_multisample(struct anv_gfx_dynamic_state *hw_state,
1169 const struct vk_dynamic_graphics_state *dyn)
1170 {
1171 SET(MULTISAMPLE, ms.NumberofMultisamples,
1172 __builtin_ffs(MAX2(dyn->ms.rasterization_samples, 1)) - 1);
1173 }
1174
1175 ALWAYS_INLINE static void
update_sample_mask(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1176 update_sample_mask(struct anv_gfx_dynamic_state *hw_state,
1177 const struct vk_dynamic_graphics_state *dyn)
1178 {
1179 /* From the Vulkan 1.0 spec:
1180 * If pSampleMask is NULL, it is treated as if the mask has all bits
1181 * enabled, i.e. no coverage is removed from fragments.
1182 *
1183 * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
1184 */
1185 SET(SAMPLE_MASK, sm.SampleMask, dyn->ms.sample_mask & 0xffff);
1186 }
1187
1188 ALWAYS_INLINE static void
update_wm_depth_stencil(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct anv_device * device)1189 update_wm_depth_stencil(struct anv_gfx_dynamic_state *hw_state,
1190 const struct vk_dynamic_graphics_state *dyn,
1191 const struct anv_cmd_graphics_state *gfx,
1192 const struct anv_device *device)
1193 {
1194 VkImageAspectFlags ds_aspects = 0;
1195 if (gfx->depth_att.vk_format != VK_FORMAT_UNDEFINED)
1196 ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
1197 if (gfx->stencil_att.vk_format != VK_FORMAT_UNDEFINED)
1198 ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
1199
1200 struct vk_depth_stencil_state opt_ds = dyn->ds;
1201 vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
1202
1203 SET(WM_DEPTH_STENCIL, ds.DoubleSidedStencilEnable, true);
1204
1205 SET(WM_DEPTH_STENCIL, ds.StencilTestMask,
1206 opt_ds.stencil.front.compare_mask & 0xff);
1207 SET(WM_DEPTH_STENCIL, ds.StencilWriteMask,
1208 opt_ds.stencil.front.write_mask & 0xff);
1209
1210 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilTestMask, opt_ds.stencil.back.compare_mask & 0xff);
1211 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilWriteMask, opt_ds.stencil.back.write_mask & 0xff);
1212
1213 SET(WM_DEPTH_STENCIL, ds.StencilReferenceValue,
1214 opt_ds.stencil.front.reference & 0xff);
1215 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilReferenceValue,
1216 opt_ds.stencil.back.reference & 0xff);
1217
1218 SET(WM_DEPTH_STENCIL, ds.DepthTestEnable, opt_ds.depth.test_enable);
1219 SET(WM_DEPTH_STENCIL, ds.DepthBufferWriteEnable, opt_ds.depth.write_enable);
1220 SET(WM_DEPTH_STENCIL, ds.DepthTestFunction,
1221 vk_to_intel_compare_op[opt_ds.depth.compare_op]);
1222 SET(WM_DEPTH_STENCIL, ds.StencilTestEnable, opt_ds.stencil.test_enable);
1223 SET(WM_DEPTH_STENCIL, ds.StencilBufferWriteEnable,
1224 opt_ds.stencil.write_enable);
1225 SET(WM_DEPTH_STENCIL, ds.StencilFailOp,
1226 vk_to_intel_stencil_op[opt_ds.stencil.front.op.fail]);
1227 SET(WM_DEPTH_STENCIL, ds.StencilPassDepthPassOp,
1228 vk_to_intel_stencil_op[opt_ds.stencil.front.op.pass]);
1229 SET(WM_DEPTH_STENCIL, ds.StencilPassDepthFailOp,
1230 vk_to_intel_stencil_op[
1231 opt_ds.stencil.front.op.depth_fail]);
1232 SET(WM_DEPTH_STENCIL, ds.StencilTestFunction,
1233 vk_to_intel_compare_op[
1234 opt_ds.stencil.front.op.compare]);
1235 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilFailOp,
1236 vk_to_intel_stencil_op[
1237 opt_ds.stencil.back.op.fail]);
1238 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilPassDepthPassOp,
1239 vk_to_intel_stencil_op[
1240 opt_ds.stencil.back.op.pass]);
1241 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilPassDepthFailOp,
1242 vk_to_intel_stencil_op[
1243 opt_ds.stencil.back.op.depth_fail]);
1244 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilTestFunction,
1245 vk_to_intel_compare_op[
1246 opt_ds.stencil.back.op.compare]);
1247
1248 #if GFX_VER == 9
1249 const bool pma = want_stencil_pma_fix(dyn, gfx, &opt_ds);
1250 SET(PMA_FIX, pma_fix, pma);
1251 #endif
1252
1253 #if INTEL_WA_18019816803_GFX_VER
1254 if (intel_needs_workaround(device->info, 18019816803)) {
1255 bool ds_write_state = opt_ds.depth.write_enable || opt_ds.stencil.write_enable;
1256 SET(WA_18019816803, ds_write_state, ds_write_state);
1257 }
1258 #endif
1259 }
1260
1261 ALWAYS_INLINE static void
update_depth_bounds(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1262 update_depth_bounds(struct anv_gfx_dynamic_state *hw_state,
1263 const struct vk_dynamic_graphics_state *dyn)
1264 {
1265 SET(DEPTH_BOUNDS, db.DepthBoundsTestEnable, dyn->ds.depth.bounds_test.enable);
1266 /* Only look at updating the bounds if testing is enabled */
1267 if (dyn->ds.depth.bounds_test.enable) {
1268 SET(DEPTH_BOUNDS, db.DepthBoundsTestMinValue, dyn->ds.depth.bounds_test.min);
1269 SET(DEPTH_BOUNDS, db.DepthBoundsTestMaxValue, dyn->ds.depth.bounds_test.max);
1270 }
1271 }
1272
1273 ALWAYS_INLINE static void
update_line_stipple(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1274 update_line_stipple(struct anv_gfx_dynamic_state *hw_state,
1275 const struct vk_dynamic_graphics_state *dyn)
1276 {
1277 SET(LINE_STIPPLE, ls.LineStipplePattern, dyn->rs.line.stipple.pattern);
1278 SET(LINE_STIPPLE, ls.LineStippleInverseRepeatCount,
1279 1.0f / MAX2(1, dyn->rs.line.stipple.factor));
1280 SET(LINE_STIPPLE, ls.LineStippleRepeatCount, dyn->rs.line.stipple.factor);
1281
1282 SET(WM, wm.LineStippleEnable, dyn->rs.line.stipple.enable);
1283 }
1284
1285 ALWAYS_INLINE static void
update_vf_restart(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx)1286 update_vf_restart(struct anv_gfx_dynamic_state *hw_state,
1287 const struct vk_dynamic_graphics_state *dyn,
1288 const struct anv_cmd_graphics_state *gfx)
1289 {
1290 SET(VF, vf.IndexedDrawCutIndexEnable, dyn->ia.primitive_restart_enable);
1291 SET(VF, vf.CutIndex, gfx->restart_index);
1292 }
1293
1294 ALWAYS_INLINE static void
update_blend_state(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,struct anv_cmd_graphics_state * gfx,const struct anv_device * device,bool has_fs_stage,bool has_fs_dual_src)1295 update_blend_state(struct anv_gfx_dynamic_state *hw_state,
1296 const struct vk_dynamic_graphics_state *dyn,
1297 struct anv_cmd_graphics_state *gfx,
1298 const struct anv_device *device,
1299 bool has_fs_stage,
1300 bool has_fs_dual_src)
1301 {
1302 const struct anv_instance *instance = device->physical->instance;
1303 const uint8_t color_writes = dyn->cb.color_write_enables;
1304 bool has_writeable_rt =
1305 has_fs_stage &&
1306 !anv_gfx_all_color_write_masked(gfx, dyn);
1307
1308 SET(BLEND_STATE, blend.AlphaToCoverageEnable,
1309 dyn->ms.alpha_to_coverage_enable);
1310 SET(BLEND_STATE, blend.AlphaToOneEnable,
1311 dyn->ms.alpha_to_one_enable);
1312 SET(BLEND_STATE, blend.ColorDitherEnable,
1313 gfx->rendering_flags &
1314 VK_RENDERING_ENABLE_LEGACY_DITHERING_BIT_EXT);
1315
1316 bool independent_alpha_blend = false;
1317 /* Wa_14018912822, check if we set these during RT setup. */
1318 bool color_blend_zero = false;
1319 bool alpha_blend_zero = false;
1320 uint32_t rt_0 = MESA_VK_ATTACHMENT_UNUSED;
1321 for (uint32_t rt = 0; rt < MAX_RTS; rt++) {
1322 if (gfx->color_output_mapping[rt] >= gfx->color_att_count)
1323 continue;
1324
1325 uint32_t att = gfx->color_output_mapping[rt];
1326 if (att == 0)
1327 rt_0 = att;
1328
1329 /* Disable anything above the current number of color attachments. */
1330 bool write_disabled = (color_writes & BITFIELD_BIT(att)) == 0;
1331
1332 SET(BLEND_STATE, blend.rts[rt].WriteDisableAlpha,
1333 write_disabled ||
1334 (dyn->cb.attachments[att].write_mask &
1335 VK_COLOR_COMPONENT_A_BIT) == 0);
1336 SET(BLEND_STATE, blend.rts[rt].WriteDisableRed,
1337 write_disabled ||
1338 (dyn->cb.attachments[att].write_mask &
1339 VK_COLOR_COMPONENT_R_BIT) == 0);
1340 SET(BLEND_STATE, blend.rts[rt].WriteDisableGreen,
1341 write_disabled ||
1342 (dyn->cb.attachments[att].write_mask &
1343 VK_COLOR_COMPONENT_G_BIT) == 0);
1344 SET(BLEND_STATE, blend.rts[rt].WriteDisableBlue,
1345 write_disabled ||
1346 (dyn->cb.attachments[att].write_mask &
1347 VK_COLOR_COMPONENT_B_BIT) == 0);
1348 /* Vulkan specification 1.2.168, VkLogicOp:
1349 *
1350 * "Logical operations are controlled by the logicOpEnable and logicOp
1351 * members of VkPipelineColorBlendStateCreateInfo. If logicOpEnable is
1352 * VK_TRUE, then a logical operation selected by logicOp is applied
1353 * between each color attachment and the fragment’s corresponding
1354 * output value, and blending of all attachments is treated as if it
1355 * were disabled."
1356 *
1357 * From the Broadwell PRM Volume 2d: Command Reference: Structures:
1358 * BLEND_STATE_ENTRY:
1359 *
1360 * "Enabling LogicOp and Color Buffer Blending at the same time is
1361 * UNDEFINED"
1362 *
1363 * The Vulkan spec also says:
1364 * "Logical operations are not applied to floating-point or sRGB format
1365 * color attachments."
1366 * and
1367 * "Any attachments using color formats for which logical operations
1368 * are not supported simply pass through the color values unmodified."
1369 */
1370 bool ignores_logic_op =
1371 vk_format_is_float(gfx->color_att[att].vk_format) ||
1372 vk_format_is_srgb(gfx->color_att[att].vk_format);
1373 SET(BLEND_STATE, blend.rts[rt].LogicOpFunction,
1374 vk_to_intel_logic_op[dyn->cb.logic_op]);
1375 SET(BLEND_STATE, blend.rts[rt].LogicOpEnable,
1376 dyn->cb.logic_op_enable && !ignores_logic_op);
1377
1378 SET(BLEND_STATE, blend.rts[rt].ColorClampRange, COLORCLAMP_RTFORMAT);
1379 SET(BLEND_STATE, blend.rts[rt].PreBlendColorClampEnable, true);
1380 SET(BLEND_STATE, blend.rts[rt].PostBlendColorClampEnable, true);
1381
1382 /* Setup blend equation. */
1383 SET(BLEND_STATE, blend.rts[rt].ColorBlendFunction,
1384 vk_to_intel_blend_op[
1385 dyn->cb.attachments[att].color_blend_op]);
1386 SET(BLEND_STATE, blend.rts[rt].AlphaBlendFunction,
1387 vk_to_intel_blend_op[
1388 dyn->cb.attachments[att].alpha_blend_op]);
1389
1390 if (dyn->cb.attachments[att].src_color_blend_factor !=
1391 dyn->cb.attachments[att].src_alpha_blend_factor ||
1392 dyn->cb.attachments[att].dst_color_blend_factor !=
1393 dyn->cb.attachments[att].dst_alpha_blend_factor ||
1394 dyn->cb.attachments[att].color_blend_op !=
1395 dyn->cb.attachments[att].alpha_blend_op)
1396 independent_alpha_blend = true;
1397
1398 /* The Dual Source Blending documentation says:
1399 *
1400 * "If SRC1 is included in a src/dst blend factor and a DualSource RT
1401 * Write message is not used, results are UNDEFINED. (This reflects the
1402 * same restriction in DX APIs, where undefined results are produced if
1403 * “o1” is not written by a PS – there are no default values defined)."
1404 *
1405 * There is no way to gracefully fix this undefined situation so we just
1406 * disable the blending to prevent possible issues.
1407 */
1408 if (has_fs_stage && !has_fs_dual_src &&
1409 anv_is_dual_src_blend_equation(&dyn->cb.attachments[att])) {
1410 SET(BLEND_STATE, blend.rts[rt].ColorBufferBlendEnable, false);
1411 } else {
1412 SET(BLEND_STATE, blend.rts[rt].ColorBufferBlendEnable,
1413 !dyn->cb.logic_op_enable &&
1414 dyn->cb.attachments[att].blend_enable);
1415 }
1416
1417 /* Our hardware applies the blend factor prior to the blend function
1418 * regardless of what function is used. Technically, this means the
1419 * hardware can do MORE than GL or Vulkan specify. However, it also
1420 * means that, for MIN and MAX, we have to stomp the blend factor to ONE
1421 * to make it a no-op.
1422 */
1423 uint32_t SourceBlendFactor;
1424 uint32_t DestinationBlendFactor;
1425 uint32_t SourceAlphaBlendFactor;
1426 uint32_t DestinationAlphaBlendFactor;
1427 if (dyn->cb.attachments[att].color_blend_op == VK_BLEND_OP_MIN ||
1428 dyn->cb.attachments[att].color_blend_op == VK_BLEND_OP_MAX) {
1429 SourceBlendFactor = BLENDFACTOR_ONE;
1430 DestinationBlendFactor = BLENDFACTOR_ONE;
1431 } else {
1432 SourceBlendFactor = vk_to_intel_blend[
1433 dyn->cb.attachments[att].src_color_blend_factor];
1434 DestinationBlendFactor = vk_to_intel_blend[
1435 dyn->cb.attachments[att].dst_color_blend_factor];
1436 }
1437
1438 if (dyn->cb.attachments[att].alpha_blend_op == VK_BLEND_OP_MIN ||
1439 dyn->cb.attachments[att].alpha_blend_op == VK_BLEND_OP_MAX) {
1440 SourceAlphaBlendFactor = BLENDFACTOR_ONE;
1441 DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
1442 } else {
1443 SourceAlphaBlendFactor = vk_to_intel_blend[
1444 dyn->cb.attachments[att].src_alpha_blend_factor];
1445 DestinationAlphaBlendFactor = vk_to_intel_blend[
1446 dyn->cb.attachments[att].dst_alpha_blend_factor];
1447 }
1448
1449 /* Replace and Src1 value by 1.0 if dual source blending is not
1450 * enabled.
1451 */
1452 if (has_fs_stage && !has_fs_dual_src) {
1453 if (is_src1_blend_factor(SourceBlendFactor))
1454 SourceBlendFactor = BLENDFACTOR_ONE;
1455 if (is_src1_blend_factor(DestinationBlendFactor))
1456 DestinationBlendFactor = BLENDFACTOR_ONE;
1457 }
1458
1459 if (instance->intel_enable_wa_14018912822 &&
1460 intel_needs_workaround(device->info, 14018912822) &&
1461 dyn->ms.rasterization_samples > 1) {
1462 if (DestinationBlendFactor == BLENDFACTOR_ZERO) {
1463 DestinationBlendFactor = BLENDFACTOR_CONST_COLOR;
1464 color_blend_zero = true;
1465 }
1466 if (DestinationAlphaBlendFactor == BLENDFACTOR_ZERO) {
1467 DestinationAlphaBlendFactor = BLENDFACTOR_CONST_ALPHA;
1468 alpha_blend_zero = true;
1469 }
1470 }
1471
1472 SET(BLEND_STATE, blend.rts[rt].SourceBlendFactor, SourceBlendFactor);
1473 SET(BLEND_STATE, blend.rts[rt].DestinationBlendFactor, DestinationBlendFactor);
1474 SET(BLEND_STATE, blend.rts[rt].SourceAlphaBlendFactor, SourceAlphaBlendFactor);
1475 SET(BLEND_STATE, blend.rts[rt].DestinationAlphaBlendFactor, DestinationAlphaBlendFactor);
1476 }
1477 gfx->color_blend_zero = color_blend_zero;
1478 gfx->alpha_blend_zero = alpha_blend_zero;
1479
1480 SET(BLEND_STATE, blend.IndependentAlphaBlendEnable, independent_alpha_blend);
1481
1482 if (rt_0 == MESA_VK_ATTACHMENT_UNUSED)
1483 rt_0 = 0;
1484
1485 /* 3DSTATE_PS_BLEND to be consistent with the rest of the
1486 * BLEND_STATE_ENTRY.
1487 */
1488 SET(PS_BLEND, ps_blend.HasWriteableRT, has_writeable_rt);
1489 SET(PS_BLEND, ps_blend.ColorBufferBlendEnable,
1490 GET(blend.rts[rt_0].ColorBufferBlendEnable));
1491 SET(PS_BLEND, ps_blend.SourceAlphaBlendFactor,
1492 GET(blend.rts[rt_0].SourceAlphaBlendFactor));
1493 SET(PS_BLEND, ps_blend.DestinationAlphaBlendFactor,
1494 gfx->alpha_blend_zero ?
1495 BLENDFACTOR_CONST_ALPHA :
1496 GET(blend.rts[rt_0].DestinationAlphaBlendFactor));
1497 SET(PS_BLEND, ps_blend.SourceBlendFactor,
1498 GET(blend.rts[rt_0].SourceBlendFactor));
1499 SET(PS_BLEND, ps_blend.DestinationBlendFactor,
1500 gfx->color_blend_zero ?
1501 BLENDFACTOR_CONST_COLOR :
1502 GET(blend.rts[rt_0].DestinationBlendFactor));
1503 SET(PS_BLEND, ps_blend.AlphaTestEnable, false);
1504 SET(PS_BLEND, ps_blend.IndependentAlphaBlendEnable,
1505 GET(blend.IndependentAlphaBlendEnable));
1506 SET(PS_BLEND, ps_blend.AlphaToCoverageEnable,
1507 dyn->ms.alpha_to_coverage_enable);
1508 }
1509
1510 ALWAYS_INLINE static void
update_blend_constants(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx)1511 update_blend_constants(struct anv_gfx_dynamic_state *hw_state,
1512 const struct vk_dynamic_graphics_state *dyn,
1513 const struct anv_cmd_graphics_state *gfx)
1514 {
1515 SET(CC_STATE, cc.BlendConstantColorRed,
1516 gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[0]);
1517 SET(CC_STATE, cc.BlendConstantColorGreen,
1518 gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[1]);
1519 SET(CC_STATE, cc.BlendConstantColorBlue,
1520 gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[2]);
1521 SET(CC_STATE, cc.BlendConstantColorAlpha,
1522 gfx->alpha_blend_zero ? 0.0f : dyn->cb.blend_constants[3]);
1523 }
1524
1525 ALWAYS_INLINE static void
update_viewports(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct anv_device * device)1526 update_viewports(struct anv_gfx_dynamic_state *hw_state,
1527 const struct vk_dynamic_graphics_state *dyn,
1528 const struct anv_cmd_graphics_state *gfx,
1529 const struct anv_device *device)
1530 {
1531 const struct anv_instance *instance = device->physical->instance;
1532 const VkViewport *viewports = dyn->vp.viewports;
1533
1534 const float scale = dyn->vp.depth_clip_negative_one_to_one ? 0.5f : 1.0f;
1535
1536 for (uint32_t i = 0; i < dyn->vp.viewport_count; i++) {
1537 const VkViewport *vp = &viewports[i];
1538
1539 /* The gfx7 state struct has just the matrix and guardband fields, the
1540 * gfx8 struct adds the min/max viewport fields. */
1541 struct GENX(SF_CLIP_VIEWPORT) sfv = {
1542 .ViewportMatrixElementm00 = vp->width / 2,
1543 .ViewportMatrixElementm11 = vp->height / 2,
1544 .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) * scale,
1545 .ViewportMatrixElementm30 = vp->x + vp->width / 2,
1546 .ViewportMatrixElementm31 = vp->y + vp->height / 2,
1547 .ViewportMatrixElementm32 = dyn->vp.depth_clip_negative_one_to_one ?
1548 (vp->minDepth + vp->maxDepth) * scale : vp->minDepth,
1549 .XMinClipGuardband = -1.0f,
1550 .XMaxClipGuardband = 1.0f,
1551 .YMinClipGuardband = -1.0f,
1552 .YMaxClipGuardband = 1.0f,
1553 .XMinViewPort = vp->x,
1554 .XMaxViewPort = vp->x + vp->width - 1,
1555 .YMinViewPort = MIN2(vp->y, vp->y + vp->height),
1556 .YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1,
1557 };
1558
1559 /* Fix depth test misrenderings by lowering translated depth range */
1560 if (instance->lower_depth_range_rate != 1.0f)
1561 sfv.ViewportMatrixElementm32 *= instance->lower_depth_range_rate;
1562
1563 const uint32_t fb_size_max = 1 << 14;
1564 uint32_t x_min = 0, x_max = fb_size_max;
1565 uint32_t y_min = 0, y_max = fb_size_max;
1566
1567 /* If we have a valid renderArea, include that */
1568 if (gfx->render_area.extent.width > 0 &&
1569 gfx->render_area.extent.height > 0) {
1570 x_min = MAX2(x_min, gfx->render_area.offset.x);
1571 x_max = MIN2(x_max, gfx->render_area.offset.x +
1572 gfx->render_area.extent.width);
1573 y_min = MAX2(y_min, gfx->render_area.offset.y);
1574 y_max = MIN2(y_max, gfx->render_area.offset.y +
1575 gfx->render_area.extent.height);
1576 }
1577
1578 /* The client is required to have enough scissors for whatever it
1579 * sets as ViewportIndex but it's possible that they've got more
1580 * viewports set from a previous command. Also, from the Vulkan
1581 * 1.3.207:
1582 *
1583 * "The application must ensure (using scissor if necessary) that
1584 * all rendering is contained within the render area."
1585 *
1586 * If the client doesn't set a scissor, that basically means it
1587 * guarantees everything is in-bounds already. If we end up using a
1588 * guardband of [-1, 1] in that case, there shouldn't be much loss.
1589 * It's theoretically possible that they could do all their clipping
1590 * with clip planes but that'd be a bit odd.
1591 */
1592 if (i < dyn->vp.scissor_count) {
1593 const VkRect2D *scissor = &dyn->vp.scissors[i];
1594 x_min = MAX2(x_min, scissor->offset.x);
1595 x_max = MIN2(x_max, scissor->offset.x + scissor->extent.width);
1596 y_min = MAX2(y_min, scissor->offset.y);
1597 y_max = MIN2(y_max, scissor->offset.y + scissor->extent.height);
1598 }
1599
1600 /* Only bother calculating the guardband if our known render area is
1601 * less than the maximum size. Otherwise, it will calculate [-1, 1]
1602 * anyway but possibly with precision loss.
1603 */
1604 if (x_min > 0 || x_max < fb_size_max ||
1605 y_min > 0 || y_max < fb_size_max) {
1606 intel_calculate_guardband_size(x_min, x_max, y_min, y_max,
1607 sfv.ViewportMatrixElementm00,
1608 sfv.ViewportMatrixElementm11,
1609 sfv.ViewportMatrixElementm30,
1610 sfv.ViewportMatrixElementm31,
1611 &sfv.XMinClipGuardband,
1612 &sfv.XMaxClipGuardband,
1613 &sfv.YMinClipGuardband,
1614 &sfv.YMaxClipGuardband);
1615 }
1616
1617 #define SET_VP(bit, state, field) \
1618 do { \
1619 if (hw_state->state.field != sfv.field) { \
1620 hw_state->state.field = sfv.field; \
1621 BITSET_SET(hw_state->dirty, \
1622 ANV_GFX_STATE_##bit); \
1623 } \
1624 } while (0)
1625 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm00);
1626 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm11);
1627 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm22);
1628 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm30);
1629 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm31);
1630 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm32);
1631 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMinClipGuardband);
1632 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMaxClipGuardband);
1633 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMinClipGuardband);
1634 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMaxClipGuardband);
1635 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMinViewPort);
1636 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMaxViewPort);
1637 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMinViewPort);
1638 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMaxViewPort);
1639 #undef SET_VP
1640
1641 const bool depth_range_unrestricted =
1642 device->vk.enabled_extensions.EXT_depth_range_unrestricted;
1643
1644 float min_depth_limit = depth_range_unrestricted ? -FLT_MAX : 0.0;
1645 float max_depth_limit = depth_range_unrestricted ? FLT_MAX : 1.0;
1646
1647 float min_depth = dyn->rs.depth_clamp_enable ?
1648 MIN2(vp->minDepth, vp->maxDepth) : min_depth_limit;
1649 float max_depth = dyn->rs.depth_clamp_enable ?
1650 MAX2(vp->minDepth, vp->maxDepth) : max_depth_limit;
1651
1652 if (dyn->rs.depth_clamp_enable &&
1653 dyn->vp.depth_clamp_mode == VK_DEPTH_CLAMP_MODE_USER_DEFINED_RANGE_EXT) {
1654 min_depth = dyn->vp.depth_clamp_range.minDepthClamp;
1655 max_depth = dyn->vp.depth_clamp_range.maxDepthClamp;
1656 }
1657
1658 SET(VIEWPORT_CC, vp_cc.elem[i].MinimumDepth, min_depth);
1659 SET(VIEWPORT_CC, vp_cc.elem[i].MaximumDepth, max_depth);
1660 }
1661
1662 /* If the HW state is already considered dirty or the previous
1663 * programmed viewport count is smaller than what we need, update the
1664 * viewport count and ensure the HW state is dirty. Otherwise if the
1665 * number of viewport programmed previously was larger than what we need
1666 * now, no need to reemit we can just keep the old programmed values.
1667 */
1668 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP) ||
1669 hw_state->vp_sf_clip.count < dyn->vp.viewport_count) {
1670 hw_state->vp_sf_clip.count = dyn->vp.viewport_count;
1671 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP);
1672 }
1673 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) ||
1674 hw_state->vp_cc.count < dyn->vp.viewport_count) {
1675 hw_state->vp_cc.count = dyn->vp.viewport_count;
1676 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC);
1677 }
1678 }
1679
1680 ALWAYS_INLINE static void
update_scissors(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,VkCommandBufferLevel cmd_buffer_level)1681 update_scissors(struct anv_gfx_dynamic_state *hw_state,
1682 const struct vk_dynamic_graphics_state *dyn,
1683 const struct anv_cmd_graphics_state *gfx,
1684 VkCommandBufferLevel cmd_buffer_level)
1685 {
1686 const VkRect2D *scissors = dyn->vp.scissors;
1687 const VkViewport *viewports = dyn->vp.viewports;
1688
1689 for (uint32_t i = 0; i < dyn->vp.scissor_count; i++) {
1690 const VkRect2D *s = &scissors[i];
1691 const VkViewport *vp = &viewports[i];
1692
1693 const int max = 0xffff;
1694
1695 uint32_t y_min = MAX2(s->offset.y, MIN2(vp->y, vp->y + vp->height));
1696 uint32_t x_min = MAX2(s->offset.x, vp->x);
1697 int64_t y_max = MIN2(s->offset.y + s->extent.height - 1,
1698 MAX2(vp->y, vp->y + vp->height) - 1);
1699 int64_t x_max = MIN2(s->offset.x + s->extent.width - 1,
1700 vp->x + vp->width - 1);
1701
1702 y_max = CLAMP(y_max, 0, INT16_MAX >> 1);
1703 x_max = CLAMP(x_max, 0, INT16_MAX >> 1);
1704
1705 /* Do this math using int64_t so overflow gets clamped correctly. */
1706 if (cmd_buffer_level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
1707 y_min = CLAMP((uint64_t) y_min, gfx->render_area.offset.y, max);
1708 x_min = CLAMP((uint64_t) x_min, gfx->render_area.offset.x, max);
1709 y_max = CLAMP((uint64_t) y_max, 0,
1710 gfx->render_area.offset.y +
1711 gfx->render_area.extent.height - 1);
1712 x_max = CLAMP((uint64_t) x_max, 0,
1713 gfx->render_area.offset.x +
1714 gfx->render_area.extent.width - 1);
1715 }
1716
1717 if (s->extent.width <= 0 || s->extent.height <= 0) {
1718 /* Since xmax and ymax are inclusive, we have to have xmax < xmin or
1719 * ymax < ymin for empty clips. In case clip x, y, width height are
1720 * all 0, the clamps below produce 0 for xmin, ymin, xmax, ymax,
1721 * which isn't what we want. Just special case empty clips and
1722 * produce a canonical empty clip.
1723 */
1724 SET(SCISSOR, scissor.elem[i].ScissorRectangleYMin, 1);
1725 SET(SCISSOR, scissor.elem[i].ScissorRectangleXMin, 1);
1726 SET(SCISSOR, scissor.elem[i].ScissorRectangleYMax, 0);
1727 SET(SCISSOR, scissor.elem[i].ScissorRectangleXMax, 0);
1728 } else {
1729 SET(SCISSOR, scissor.elem[i].ScissorRectangleYMin, y_min);
1730 SET(SCISSOR, scissor.elem[i].ScissorRectangleXMin, x_min);
1731 SET(SCISSOR, scissor.elem[i].ScissorRectangleYMax, y_max);
1732 SET(SCISSOR, scissor.elem[i].ScissorRectangleXMax, x_max);
1733 }
1734 }
1735
1736 /* If the HW state is already considered dirty or the previous programmed
1737 * viewport count is smaller than what we need, update the viewport count
1738 * and ensure the HW state is dirty. Otherwise if the number of viewport
1739 * programmed previously was larger than what we need now, no need to
1740 * reemit we can just keep the old programmed values.
1741 */
1742 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SCISSOR) ||
1743 hw_state->scissor.count < dyn->vp.scissor_count) {
1744 hw_state->scissor.count = dyn->vp.scissor_count;
1745 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SCISSOR);
1746 }
1747 }
1748
1749 #if GFX_VERx10 == 125
1750 ALWAYS_INLINE static void
update_tbimr_info(struct anv_gfx_dynamic_state * hw_state,const struct anv_device * device,const struct anv_cmd_graphics_state * gfx,const struct intel_l3_config * l3_config)1751 update_tbimr_info(struct anv_gfx_dynamic_state *hw_state,
1752 const struct anv_device *device,
1753 const struct anv_cmd_graphics_state *gfx,
1754 const struct intel_l3_config *l3_config)
1755 {
1756 unsigned fb_width, fb_height, tile_width, tile_height;
1757
1758 if (device->physical->instance->enable_tbimr &&
1759 calculate_render_area(gfx, &fb_width, &fb_height) &&
1760 calculate_tile_dimensions(device, gfx, l3_config,
1761 fb_width, fb_height,
1762 &tile_width, &tile_height)) {
1763 /* Use a batch size of 128 polygons per slice as recommended */
1764 /* by BSpec 68436 "TBIMR Programming". */
1765 const unsigned num_slices = device->info->num_slices;
1766 const unsigned batch_size = DIV_ROUND_UP(num_slices, 2) * 256;
1767
1768 SET(TBIMR_TILE_PASS_INFO, tbimr.TileRectangleHeight, tile_height);
1769 SET(TBIMR_TILE_PASS_INFO, tbimr.TileRectangleWidth, tile_width);
1770 SET(TBIMR_TILE_PASS_INFO, tbimr.VerticalTileCount,
1771 DIV_ROUND_UP(fb_height, tile_height));
1772 SET(TBIMR_TILE_PASS_INFO, tbimr.HorizontalTileCount,
1773 DIV_ROUND_UP(fb_width, tile_width));
1774 SET(TBIMR_TILE_PASS_INFO, tbimr.TBIMRBatchSize,
1775 util_logbase2(batch_size) - 5);
1776 SET(TBIMR_TILE_PASS_INFO, tbimr.TileBoxCheck, true);
1777 SET(TBIMR_TILE_PASS_INFO, use_tbimr, true);
1778 } else {
1779 hw_state->use_tbimr = false;
1780 }
1781 }
1782 #endif
1783
1784 /**
1785 * This function takes the vulkan runtime values & dirty states and updates
1786 * the values in anv_gfx_dynamic_state, flagging HW instructions for
1787 * reemission if the values are changing.
1788 *
1789 * Nothing is emitted in the batch buffer.
1790 */
1791 static void
cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state * hw_state,const struct anv_device * device,const struct vk_dynamic_graphics_state * dyn,struct anv_cmd_graphics_state * gfx,const struct anv_graphics_pipeline * pipeline,VkCommandBufferLevel cmd_buffer_level)1792 cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
1793 const struct anv_device *device,
1794 const struct vk_dynamic_graphics_state *dyn,
1795 struct anv_cmd_graphics_state *gfx,
1796 const struct anv_graphics_pipeline *pipeline,
1797 VkCommandBufferLevel cmd_buffer_level)
1798 {
1799 UNUSED bool fs_msaa_changed = false;
1800 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1801 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ||
1802 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ||
1803 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))
1804 update_fs_msaa_flags(hw_state, dyn, pipeline);
1805
1806 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1807 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)) {
1808 update_ps(hw_state, device, dyn, pipeline);
1809 update_ps_extra_wm(hw_state, pipeline);
1810 }
1811
1812 if (gfx->dirty &
1813 #if GFX_VERx10 >= 125
1814 ANV_CMD_DIRTY_PIPELINE
1815 #else
1816 (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE)
1817 #endif
1818 )
1819 update_ps_extra_has_uav(hw_state, gfx, pipeline);
1820
1821 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1822 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE))
1823 update_ps_extra_kills_pixel(hw_state, dyn, gfx, pipeline);
1824
1825 if ((gfx->dirty & ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE) ||
1826 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE) ||
1827 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM))
1828 update_streamout(hw_state, dyn, gfx, pipeline);
1829
1830 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX))
1831 update_provoking_vertex(hw_state, dyn, pipeline);
1832
1833 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1834 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY))
1835 update_topology(hw_state, dyn, pipeline);
1836
1837 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1838 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI) ||
1839 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDINGS_VALID) ||
1840 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDING_STRIDES))
1841 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
1842
1843 #if GFX_VER >= 11
1844 if (device->vk.enabled_extensions.KHR_fragment_shading_rate &&
1845 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))
1846 update_cps(hw_state, device, dyn, pipeline);
1847 #endif /* GFX_VER >= 11 */
1848
1849 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1850 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN))
1851 update_te(hw_state, dyn, pipeline);
1852
1853 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH))
1854 update_line_width(hw_state, dyn);
1855
1856 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS))
1857 update_sf_global_depth_bias(hw_state, dyn);
1858
1859 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE))
1860 update_clip_api_mode(hw_state, dyn);
1861
1862 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT))
1863 update_clip_max_viewport(hw_state, dyn);
1864
1865 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1866 (gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
1867 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) ||
1868 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) ||
1869 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) ||
1870 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) ||
1871 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS) ||
1872 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_POLYGON_MODE) ||
1873 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_MODE) ||
1874 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH) ||
1875 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLIP_ENABLE) ||
1876 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) ||
1877 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CONSERVATIVE_MODE))
1878 update_clip_raster(hw_state, dyn, gfx, pipeline);
1879
1880 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES))
1881 update_multisample(hw_state, dyn);
1882
1883 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_MASK))
1884 update_sample_mask(hw_state, dyn);
1885
1886 if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
1887 #if GFX_VER == 9
1888 /* For the PMA fix */
1889 (gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1890 #endif
1891 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
1892 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
1893 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
1894 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
1895 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
1896 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
1897 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) ||
1898 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE))
1899 update_wm_depth_stencil(hw_state, dyn, gfx, device);
1900
1901 #if GFX_VER >= 12
1902 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) ||
1903 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS))
1904 update_depth_bounds(hw_state, dyn);
1905 #endif
1906
1907 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE) ||
1908 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE_ENABLE))
1909 update_line_stipple(hw_state, dyn);
1910
1911 if ((gfx->dirty & ANV_CMD_DIRTY_RESTART_INDEX) ||
1912 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE))
1913 update_vf_restart(hw_state, dyn, gfx);
1914
1915 if (gfx->dirty & ANV_CMD_DIRTY_INDEX_BUFFER)
1916 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER);
1917
1918 #if GFX_VERx10 >= 125
1919 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE))
1920 update_vfg_list_cut_index(hw_state, dyn);
1921 #endif
1922
1923 if (device->vk.enabled_extensions.EXT_sample_locations &&
1924 (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS) ||
1925 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE)))
1926 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN);
1927
1928 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1929 (gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
1930 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
1931 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) ||
1932 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE) ||
1933 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE) ||
1934 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ||
1935 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS) ||
1936 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES) ||
1937 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS)) {
1938 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1939 update_blend_state(hw_state, dyn, gfx, device,
1940 wm_prog_data != NULL,
1941 wm_prog_data != NULL ?
1942 wm_prog_data->dual_src_blend : false);
1943 }
1944
1945 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS))
1946 update_blend_constants(hw_state, dyn, gfx);
1947
1948 if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_AREA) ||
1949 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS) ||
1950 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) ||
1951 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) ||
1952 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) ||
1953 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLAMP_RANGE))
1954 update_viewports(hw_state, dyn, gfx, device);
1955
1956 if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_AREA) ||
1957 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) ||
1958 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS))
1959 update_scissors(hw_state, dyn, gfx, cmd_buffer_level);
1960
1961 #if GFX_VERx10 == 125
1962 if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS))
1963 update_tbimr_info(hw_state, device, gfx, pipeline->base.base.l3_config);
1964 #endif
1965
1966 #if INTEL_WA_14018283232_GFX_VER
1967 if (intel_needs_workaround(device->info, 14018283232) &&
1968 ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1969 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE))) {
1970 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1971 SET(WA_14018283232, wa_14018283232_toggle,
1972 dyn->ds.depth.bounds_test.enable &&
1973 wm_prog_data &&
1974 wm_prog_data->uses_kill);
1975 }
1976 #endif
1977
1978 /* If the pipeline uses a dynamic value of patch_control_points and either
1979 * the pipeline change or the dynamic value change, check the value and
1980 * reemit if needed.
1981 */
1982 if (pipeline->dynamic_patch_control_points &&
1983 ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1984 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS)))
1985 SET(TCS_INPUT_VERTICES, tcs_input_vertices, dyn->ts.patch_control_points);
1986 }
1987
1988 #undef GET
1989 #undef SET
1990 #undef SET_STAGE
1991 #undef SETUP_PROVOKING_VERTEX
1992
1993 /**
1994 * This function takes the vulkan runtime values & dirty states and updates
1995 * the values in anv_gfx_dynamic_state, flagging HW instructions for
1996 * reemission if the values are changing.
1997 *
1998 * Nothing is emitted in the batch buffer.
1999 */
2000 void
genX(cmd_buffer_flush_gfx_runtime_state)2001 genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
2002 {
2003 cmd_buffer_flush_gfx_runtime_state(
2004 &cmd_buffer->state.gfx.dyn_state,
2005 cmd_buffer->device,
2006 &cmd_buffer->vk.dynamic_graphics_state,
2007 &cmd_buffer->state.gfx,
2008 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline),
2009 cmd_buffer->vk.level);
2010
2011 vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state);
2012 }
2013
2014 static void
emit_wa_18020335297_dummy_draw(struct anv_cmd_buffer * cmd_buffer)2015 emit_wa_18020335297_dummy_draw(struct anv_cmd_buffer *cmd_buffer)
2016 {
2017 /* For Wa_16012775297, ensure VF_STATISTICS is emitted before 3DSTATE_VF
2018 */
2019 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_STATISTICS), zero);
2020 #if GFX_VERx10 >= 125
2021 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) {
2022 vfg.DistributionMode = RR_STRICT;
2023 }
2024 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
2025 vf.GeometryDistributionEnable = true;
2026 }
2027 #endif
2028
2029 #if GFX_VER >= 12
2030 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr) {
2031 pr.ReplicaMask = 1;
2032 }
2033 #endif
2034
2035 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_RASTER), rr) {
2036 rr.CullMode = CULLMODE_NONE;
2037 rr.FrontFaceFillMode = FILL_MODE_SOLID;
2038 rr.BackFaceFillMode = FILL_MODE_SOLID;
2039 }
2040
2041 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_SGVS), zero);
2042
2043 #if GFX_VER >= 11
2044 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_SGVS_2), zero);
2045 #endif
2046
2047 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLIP), clip) {
2048 clip.ClipEnable = true;
2049 clip.ClipMode = CLIPMODE_REJECT_ALL;
2050 }
2051
2052 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VS), zero);
2053 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_GS), zero);
2054 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HS), zero);
2055 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TE), zero);
2056 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DS), zero);
2057 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT), zero);
2058
2059 uint32_t *vertex_elements = anv_batch_emitn(&cmd_buffer->batch, 1 + 2 * 2,
2060 GENX(3DSTATE_VERTEX_ELEMENTS));
2061 uint32_t *ve_pack_dest = &vertex_elements[1];
2062
2063 for (int i = 0; i < 2; i++) {
2064 struct GENX(VERTEX_ELEMENT_STATE) element = {
2065 .Valid = true,
2066 .SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,
2067 .Component0Control = VFCOMP_STORE_0,
2068 .Component1Control = VFCOMP_STORE_0,
2069 .Component2Control = i == 0 ? VFCOMP_STORE_0 : VFCOMP_STORE_1_FP,
2070 .Component3Control = i == 0 ? VFCOMP_STORE_0 : VFCOMP_STORE_1_FP,
2071 };
2072 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, ve_pack_dest, &element);
2073 ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
2074 }
2075
2076 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
2077 topo.PrimitiveTopologyType = _3DPRIM_TRILIST;
2078 }
2079
2080 /* Emit dummy draw per slice. */
2081 for (unsigned i = 0; i < cmd_buffer->device->info->num_slices; i++) {
2082 anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
2083 prim.VertexCountPerInstance = 3;
2084 prim.PrimitiveTopologyType = _3DPRIM_TRILIST;
2085 prim.InstanceCount = 1;
2086 prim.VertexAccessType = SEQUENTIAL;
2087 }
2088 }
2089 }
2090
2091 #if INTEL_WA_14018283232_GFX_VER
2092 void
genX(batch_emit_wa_14018283232)2093 genX(batch_emit_wa_14018283232)(struct anv_batch *batch)
2094 {
2095 anv_batch_emit(batch, GENX(RESOURCE_BARRIER), barrier) {
2096 barrier.ResourceBarrierBody = (struct GENX(RESOURCE_BARRIER_BODY)) {
2097 .BarrierType = RESOURCE_BARRIER_TYPE_IMMEDIATE,
2098 .SignalStage = RESOURCE_BARRIER_STAGE_COLOR,
2099 .WaitStage = RESOURCE_BARRIER_STAGE_PIXEL,
2100 };
2101 }
2102 }
2103 #endif
2104
2105 /**
2106 * This function handles dirty state emission to the batch buffer.
2107 */
2108 static void
cmd_buffer_gfx_state_emission(struct anv_cmd_buffer * cmd_buffer)2109 cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
2110 {
2111 struct anv_device *device = cmd_buffer->device;
2112 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
2113 struct anv_graphics_pipeline *pipeline =
2114 anv_pipeline_to_graphics(gfx->base.pipeline);
2115 const struct vk_dynamic_graphics_state *dyn =
2116 &cmd_buffer->vk.dynamic_graphics_state;
2117 struct anv_push_constants *push_consts =
2118 &cmd_buffer->state.gfx.base.push_constants;
2119 struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
2120 const bool protected = cmd_buffer->vk.pool->flags &
2121 VK_COMMAND_POOL_CREATE_PROTECTED_BIT;
2122
2123 #if INTEL_WA_16011107343_GFX_VER
2124 /* Will be emitted in front of every draw instead */
2125 if (intel_needs_workaround(device->info, 16011107343) &&
2126 anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL))
2127 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_HS);
2128 #endif
2129
2130 #if INTEL_WA_22018402687_GFX_VER
2131 /* Will be emitted in front of every draw instead */
2132 if (intel_needs_workaround(device->info, 22018402687) &&
2133 anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
2134 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_DS);
2135 #endif
2136
2137 /*
2138 * Values provided by push constants
2139 */
2140
2141 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TCS_INPUT_VERTICES)) {
2142 push_consts->gfx.tcs_input_vertices = dyn->ts.patch_control_points;
2143 cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2144 gfx->base.push_constants_data_dirty = true;
2145 }
2146
2147 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)) {
2148 push_consts->gfx.fs_msaa_flags = hw_state->fs_msaa_flags;
2149 cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
2150 gfx->base.push_constants_data_dirty = true;
2151 }
2152
2153 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_URB)) {
2154 genX(urb_workaround)(cmd_buffer, &pipeline->urb_cfg);
2155
2156 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.urb);
2157
2158 memcpy(&gfx->urb_cfg, &pipeline->urb_cfg,
2159 sizeof(struct intel_urb_config));
2160 }
2161
2162 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION))
2163 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.primitive_replication);
2164
2165 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_INSTANCING))
2166 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs_instancing);
2167
2168 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS))
2169 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs);
2170
2171 #if GFX_VER >= 11
2172 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2))
2173 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs_2);
2174 #endif
2175
2176 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VS)) {
2177 anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
2178 final.vs, protected);
2179 }
2180
2181 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_HS)) {
2182 anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
2183 final.hs, protected);
2184 }
2185
2186 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_DS)) {
2187 anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
2188 final.ds, protected);
2189 }
2190
2191 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS)) {
2192 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_STATISTICS), vfs) {
2193 vfs.StatisticsEnable = true;
2194 }
2195 }
2196
2197 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE))
2198 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe);
2199
2200 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_SWIZ))
2201 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe_swiz);
2202
2203 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST)) {
2204 /* Wa_16011773973:
2205 * If SOL is enabled and SO_DECL state has to be programmed,
2206 * 1. Send 3D State SOL state with SOL disabled
2207 * 2. Send SO_DECL NP state
2208 * 3. Send 3D State SOL with SOL Enabled
2209 */
2210 if (intel_needs_workaround(device->info, 16011773973) &&
2211 pipeline->uses_xfb)
2212 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT), so);
2213
2214 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline,
2215 final.so_decl_list);
2216
2217 #if GFX_VER >= 11 && GFX_VER < 20
2218 /* ICL PRMs, Volume 2a - Command Reference: Instructions,
2219 * 3DSTATE_SO_DECL_LIST:
2220 *
2221 * "Workaround: This command must be followed by a PIPE_CONTROL with
2222 * CS Stall bit set."
2223 *
2224 * On DG2+ also known as Wa_1509820217.
2225 */
2226 genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
2227 cmd_buffer->state.current_pipeline,
2228 ANV_PIPE_CS_STALL_BIT);
2229 #endif
2230 }
2231
2232 if (device->vk.enabled_extensions.EXT_mesh_shader) {
2233 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL)) {
2234 anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
2235 final.mesh_control, protected);
2236 }
2237
2238 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_SHADER))
2239 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_shader);
2240
2241 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_DISTRIB))
2242 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_distrib);
2243
2244 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL)) {
2245 anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
2246 final.task_control, protected);
2247 }
2248
2249 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_SHADER))
2250 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.task_shader);
2251
2252 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_REDISTRIB))
2253 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.task_redistrib);
2254
2255 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH))
2256 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe_mesh);
2257
2258 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP_MESH))
2259 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.clip_mesh);
2260 } else {
2261 assert(!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL) &&
2262 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_SHADER) &&
2263 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_DISTRIB) &&
2264 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL) &&
2265 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_SHADER) &&
2266 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_REDISTRIB) &&
2267 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP_MESH) &&
2268 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH));
2269 }
2270
2271 #define INIT(category, name) \
2272 .name = hw_state->category.name
2273 #define SET(s, category, name) \
2274 s.name = hw_state->category.name
2275
2276 /* Now the potentially dynamic instructions */
2277
2278 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS)) {
2279 anv_batch_emit_merge_protected(&cmd_buffer->batch, GENX(3DSTATE_PS),
2280 pipeline, partial.ps, ps, protected) {
2281 SET(ps, ps, KernelStartPointer0);
2282 SET(ps, ps, KernelStartPointer1);
2283 SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData0);
2284 SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData1);
2285
2286 #if GFX_VER < 20
2287 SET(ps, ps, KernelStartPointer2);
2288 SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData2);
2289
2290 SET(ps, ps, _8PixelDispatchEnable);
2291 SET(ps, ps, _16PixelDispatchEnable);
2292 SET(ps, ps, _32PixelDispatchEnable);
2293 #else
2294 SET(ps, ps, Kernel0Enable);
2295 SET(ps, ps, Kernel1Enable);
2296 SET(ps, ps, Kernel0SIMDWidth);
2297 SET(ps, ps, Kernel1SIMDWidth);
2298 SET(ps, ps, Kernel0PolyPackingPolicy);
2299 SET(ps, ps, Kernel0MaximumPolysperThread);
2300 #endif
2301 SET(ps, ps, PositionXYOffsetSelect);
2302 }
2303 }
2304
2305 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA) ||
2306 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_STATE)) {
2307 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS_EXTRA),
2308 pipeline, partial.ps_extra, pse) {
2309 SET(pse, ps_extra, PixelShaderHasUAV);
2310 SET(pse, ps_extra, PixelShaderIsPerSample);
2311 #if GFX_VER >= 11
2312 SET(pse, ps_extra, PixelShaderIsPerCoarsePixel);
2313 #endif
2314 SET(pse, ps_extra, PixelShaderKillsPixel);
2315
2316 #if INTEL_WA_18038825448_GFX_VER
2317 /* Add a dependency if easier the shader needs it (because of runtime
2318 * change through pre-rasterization shader) or if we notice a change.
2319 */
2320 pse.EnablePSDependencyOnCPsizeChange =
2321 hw_state->ps_extra.EnablePSDependencyOnCPsizeChange ||
2322 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_STATE);
2323 #elif GFX_VERx10 >= 125
2324 SET(pse, ps_extra, EnablePSDependencyOnCPsizeChange);
2325 #endif
2326 }
2327 }
2328
2329 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP)) {
2330 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_CLIP),
2331 pipeline, partial.clip, clip) {
2332 SET(clip, clip, APIMode);
2333 SET(clip, clip, ViewportXYClipTestEnable);
2334 SET(clip, clip, TriangleStripListProvokingVertexSelect);
2335 SET(clip, clip, LineStripListProvokingVertexSelect);
2336 SET(clip, clip, TriangleFanProvokingVertexSelect);
2337 SET(clip, clip, MaximumVPIndex);
2338 }
2339 }
2340
2341 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_STREAMOUT)) {
2342 genX(streamout_prologue)(cmd_buffer);
2343
2344 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT),
2345 pipeline, partial.so, so) {
2346 SET(so, so, RenderingDisable);
2347 SET(so, so, RenderStreamSelect);
2348 SET(so, so, ReorderMode);
2349 SET(so, so, ForceRendering);
2350 }
2351 }
2352
2353 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP)) {
2354 struct anv_state sf_clip_state =
2355 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
2356 hw_state->vp_sf_clip.count * 64, 64);
2357
2358 for (uint32_t i = 0; i < hw_state->vp_sf_clip.count; i++) {
2359 struct GENX(SF_CLIP_VIEWPORT) sfv = {
2360 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm00),
2361 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm11),
2362 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm22),
2363 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm30),
2364 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm31),
2365 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm32),
2366 INIT(vp_sf_clip.elem[i], XMinClipGuardband),
2367 INIT(vp_sf_clip.elem[i], XMaxClipGuardband),
2368 INIT(vp_sf_clip.elem[i], YMinClipGuardband),
2369 INIT(vp_sf_clip.elem[i], YMaxClipGuardband),
2370 INIT(vp_sf_clip.elem[i], XMinViewPort),
2371 INIT(vp_sf_clip.elem[i], XMaxViewPort),
2372 INIT(vp_sf_clip.elem[i], YMinViewPort),
2373 INIT(vp_sf_clip.elem[i], YMaxViewPort),
2374 };
2375 GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv);
2376 }
2377
2378 anv_batch_emit(&cmd_buffer->batch,
2379 GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
2380 clip.SFClipViewportPointer = sf_clip_state.offset;
2381 }
2382 }
2383
2384 /* Force CC_VIEWPORT reallocation on Gfx9 when reprogramming
2385 * 3DSTATE_VIEWPORT_STATE_POINTERS_CC :
2386 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/11647
2387 */
2388 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) ||
2389 (GFX_VER == 9 &&
2390 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR))) {
2391 hw_state->vp_cc.state =
2392 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
2393 hw_state->vp_cc.count * 8, 32);
2394
2395 for (uint32_t i = 0; i < hw_state->vp_cc.count; i++) {
2396 struct GENX(CC_VIEWPORT) cc_viewport = {
2397 INIT(vp_cc.elem[i], MinimumDepth),
2398 INIT(vp_cc.elem[i], MaximumDepth),
2399 };
2400 GENX(CC_VIEWPORT_pack)(NULL, hw_state->vp_cc.state.map + i * 8,
2401 &cc_viewport);
2402 }
2403
2404 /* Dirty the pointers to reemit 3DSTATE_VIEWPORT_STATE_POINTERS_CC below
2405 */
2406 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR);
2407 }
2408
2409 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) {
2410 anv_batch_emit(&cmd_buffer->batch,
2411 GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
2412 cc.CCViewportPointer = hw_state->vp_cc.state.offset;
2413 }
2414 cmd_buffer->state.gfx.viewport_set = true;
2415 }
2416
2417 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SCISSOR)) {
2418 /* Wa_1409725701:
2419 *
2420 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
2421 * stored as an array of up to 16 elements. The location of first
2422 * element of the array, as specified by Pointer to SCISSOR_RECT,
2423 * should be aligned to a 64-byte boundary.
2424 */
2425 struct anv_state scissor_state =
2426 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
2427 hw_state->scissor.count * 8, 64);
2428
2429 for (uint32_t i = 0; i < hw_state->scissor.count; i++) {
2430 struct GENX(SCISSOR_RECT) scissor = {
2431 INIT(scissor.elem[i], ScissorRectangleYMin),
2432 INIT(scissor.elem[i], ScissorRectangleXMin),
2433 INIT(scissor.elem[i], ScissorRectangleYMax),
2434 INIT(scissor.elem[i], ScissorRectangleXMax),
2435 };
2436 GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8, &scissor);
2437 }
2438
2439 anv_batch_emit(&cmd_buffer->batch,
2440 GENX(3DSTATE_SCISSOR_STATE_POINTERS), ssp) {
2441 ssp.ScissorRectPointer = scissor_state.offset;
2442 }
2443 }
2444
2445 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY)) {
2446 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) {
2447 SET(vft, vft, PrimitiveTopologyType);
2448 }
2449 }
2450
2451 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT)) {
2452 genX(batch_emit_vertex_input)(&cmd_buffer->batch, device,
2453 pipeline, dyn->vi);
2454 }
2455
2456 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TE)) {
2457 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_TE),
2458 pipeline, partial.te, te) {
2459 SET(te, te, OutputTopology);
2460 }
2461 }
2462
2463 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_GS)) {
2464 anv_batch_emit_merge_protected(&cmd_buffer->batch, GENX(3DSTATE_GS),
2465 pipeline, partial.gs, gs, protected) {
2466 SET(gs, gs, ReorderMode);
2467 }
2468 }
2469
2470 #if GFX_VER >= 30
2471 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_PIXEL)) {
2472 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_COARSE_PIXEL), coarse_pixel) {
2473 coarse_pixel.DisableCPSPointers = true;
2474 SET(coarse_pixel, coarse_pixel, CPSizeX);
2475 SET(coarse_pixel, coarse_pixel, CPSizeY);
2476 SET(coarse_pixel, coarse_pixel, CPSizeCombiner0Opcode);
2477 SET(coarse_pixel, coarse_pixel, CPSizeCombiner1Opcode);
2478 }
2479 }
2480 #else
2481 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CPS)) {
2482 #if GFX_VER == 11
2483 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CPS), cps) {
2484 SET(cps, cps, CoarsePixelShadingMode);
2485 SET(cps, cps, MinCPSizeX);
2486 SET(cps, cps, MinCPSizeY);
2487 }
2488 #elif GFX_VER >= 12
2489 /* TODO: we can optimize this flush in the following cases:
2490 *
2491 * In the case where the last geometry shader emits a value that is
2492 * not constant, we can avoid this stall because we can synchronize
2493 * the pixel shader internally with
2494 * 3DSTATE_PS::EnablePSDependencyOnCPsizeChange.
2495 *
2496 * If we know that the previous pipeline and the current one are
2497 * using the same fragment shading rate.
2498 */
2499 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
2500 #if GFX_VERx10 >= 125
2501 pc.PSSStallSyncEnable = true;
2502 #else
2503 pc.PSDSyncEnable = true;
2504 #endif
2505 }
2506
2507 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CPS_POINTERS), cps) {
2508 SET(cps, cps, CoarsePixelShadingStateArrayPointer);
2509 }
2510 #endif
2511 }
2512 #endif /* GFX_VER >= 30 */
2513
2514 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SF)) {
2515 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_SF),
2516 pipeline, partial.sf, sf) {
2517 SET(sf, sf, LineWidth);
2518 SET(sf, sf, TriangleStripListProvokingVertexSelect);
2519 SET(sf, sf, LineStripListProvokingVertexSelect);
2520 SET(sf, sf, TriangleFanProvokingVertexSelect);
2521 SET(sf, sf, LegacyGlobalDepthBiasEnable);
2522 }
2523 }
2524
2525 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_RASTER)) {
2526 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_RASTER), raster) {
2527 /* For details on 3DSTATE_RASTER multisample state, see the BSpec
2528 * table "Multisample Modes State".
2529 *
2530 * NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the SKL PMA fix
2531 * computations. If we ever set this bit to a different value, they
2532 * will need to be updated accordingly.
2533 */
2534 raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0;
2535 raster.ForceMultisampling = false;
2536 raster.ScissorRectangleEnable = true;
2537
2538 SET(raster, raster, APIMode);
2539 SET(raster, raster, DXMultisampleRasterizationEnable);
2540 SET(raster, raster, AntialiasingEnable);
2541 SET(raster, raster, CullMode);
2542 SET(raster, raster, FrontWinding);
2543 SET(raster, raster, GlobalDepthOffsetEnableSolid);
2544 SET(raster, raster, GlobalDepthOffsetEnableWireframe);
2545 SET(raster, raster, GlobalDepthOffsetEnablePoint);
2546 SET(raster, raster, GlobalDepthOffsetConstant);
2547 SET(raster, raster, GlobalDepthOffsetScale);
2548 SET(raster, raster, GlobalDepthOffsetClamp);
2549 SET(raster, raster, FrontFaceFillMode);
2550 SET(raster, raster, BackFaceFillMode);
2551 SET(raster, raster, ViewportZFarClipTestEnable);
2552 SET(raster, raster, ViewportZNearClipTestEnable);
2553 SET(raster, raster, ConservativeRasterizationEnable);
2554 }
2555 }
2556
2557 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE)) {
2558 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_MULTISAMPLE), ms) {
2559 ms.PixelLocation = CENTER;
2560
2561 /* The PRM says that this bit is valid only for DX9:
2562 *
2563 * SW can choose to set this bit only for DX9 API. DX10/OGL API's
2564 * should not have any effect by setting or not setting this bit.
2565 */
2566 ms.PixelPositionOffsetEnable = false;
2567
2568 SET(ms, ms, NumberofMultisamples);
2569 }
2570 }
2571
2572 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CC_STATE)) {
2573 hw_state->cc.state =
2574 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
2575 GENX(COLOR_CALC_STATE_length) * 4,
2576 64);
2577 struct GENX(COLOR_CALC_STATE) cc = {
2578 INIT(cc, BlendConstantColorRed),
2579 INIT(cc, BlendConstantColorGreen),
2580 INIT(cc, BlendConstantColorBlue),
2581 INIT(cc, BlendConstantColorAlpha),
2582 };
2583 GENX(COLOR_CALC_STATE_pack)(NULL, hw_state->cc.state.map, &cc);
2584
2585 /* Dirty the pointers to reemit 3DSTATE_CC_STATE_POINTERS below
2586 */
2587 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CC_STATE_PTR);
2588 }
2589
2590 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CC_STATE_PTR)) {
2591 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
2592 ccp.ColorCalcStatePointer = hw_state->cc.state.offset;
2593 ccp.ColorCalcStatePointerValid = true;
2594 }
2595 }
2596
2597 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SAMPLE_MASK)) {
2598 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
2599 SET(sm, sm, SampleMask);
2600 }
2601 }
2602
2603 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM_DEPTH_STENCIL)) {
2604 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
2605 SET(ds, ds, DoubleSidedStencilEnable);
2606 SET(ds, ds, StencilTestMask);
2607 SET(ds, ds, StencilWriteMask);
2608 SET(ds, ds, BackfaceStencilTestMask);
2609 SET(ds, ds, BackfaceStencilWriteMask);
2610 SET(ds, ds, StencilReferenceValue);
2611 SET(ds, ds, BackfaceStencilReferenceValue);
2612 SET(ds, ds, DepthTestEnable);
2613 SET(ds, ds, DepthBufferWriteEnable);
2614 SET(ds, ds, DepthTestFunction);
2615 SET(ds, ds, StencilTestEnable);
2616 SET(ds, ds, StencilBufferWriteEnable);
2617 SET(ds, ds, StencilFailOp);
2618 SET(ds, ds, StencilPassDepthPassOp);
2619 SET(ds, ds, StencilPassDepthFailOp);
2620 SET(ds, ds, StencilTestFunction);
2621 SET(ds, ds, BackfaceStencilFailOp);
2622 SET(ds, ds, BackfaceStencilPassDepthPassOp);
2623 SET(ds, ds, BackfaceStencilPassDepthFailOp);
2624 SET(ds, ds, BackfaceStencilTestFunction);
2625 }
2626 }
2627
2628 #if GFX_VER >= 12
2629 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_DEPTH_BOUNDS)) {
2630 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
2631 SET(db, db, DepthBoundsTestEnable);
2632 SET(db, db, DepthBoundsTestMinValue);
2633 SET(db, db, DepthBoundsTestMaxValue);
2634 }
2635 }
2636 #endif
2637
2638 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_LINE_STIPPLE)) {
2639 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) {
2640 SET(ls, ls, LineStipplePattern);
2641 SET(ls, ls, LineStippleInverseRepeatCount);
2642 SET(ls, ls, LineStippleRepeatCount);
2643 }
2644 #if GFX_VER >= 11
2645 /* ICL PRMs, Volume 2a - Command Reference: Instructions,
2646 * 3DSTATE_LINE_STIPPLE:
2647 *
2648 * "Workaround: This command must be followed by a PIPE_CONTROL with
2649 * CS Stall bit set."
2650 */
2651 genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
2652 cmd_buffer->state.current_pipeline,
2653 ANV_PIPE_CS_STALL_BIT);
2654 #endif
2655 }
2656
2657 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF)) {
2658 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
2659 #if GFX_VERx10 >= 125
2660 vf.GeometryDistributionEnable = true;
2661 #endif
2662 SET(vf, vf, IndexedDrawCutIndexEnable);
2663 SET(vf, vf, CutIndex);
2664 }
2665 }
2666
2667 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER)) {
2668 struct anv_buffer *buffer = gfx->index_buffer;
2669 uint32_t offset = gfx->index_offset;
2670 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
2671 ib.IndexFormat = gfx->index_type;
2672 ib.MOCS = anv_mocs(device,
2673 buffer ? buffer->address.bo : NULL,
2674 ISL_SURF_USAGE_INDEX_BUFFER_BIT);
2675 #if GFX_VER >= 12
2676 ib.L3BypassDisable = true;
2677 #endif
2678 if (buffer) {
2679 ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
2680 ib.BufferSize = gfx->index_size;
2681 }
2682 }
2683 }
2684
2685 #if GFX_VERx10 >= 125
2686 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VFG)) {
2687 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_VFG),
2688 pipeline, partial.vfg, vfg) {
2689 SET(vfg, vfg, ListCutIndexEnable);
2690 }
2691 }
2692 #endif
2693
2694 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN)) {
2695 genX(emit_sample_pattern)(&cmd_buffer->batch,
2696 dyn->ms.sample_locations_enable ?
2697 dyn->ms.sample_locations : NULL);
2698 }
2699
2700 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM)) {
2701 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_WM),
2702 pipeline, partial.wm, wm) {
2703 SET(wm, wm, LineStippleEnable);
2704 SET(wm, wm, BarycentricInterpolationMode);
2705 }
2706 }
2707
2708 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_BLEND)) {
2709 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_PS_BLEND), blend) {
2710 SET(blend, ps_blend, HasWriteableRT);
2711 SET(blend, ps_blend, ColorBufferBlendEnable);
2712 SET(blend, ps_blend, SourceAlphaBlendFactor);
2713 SET(blend, ps_blend, DestinationAlphaBlendFactor);
2714 SET(blend, ps_blend, SourceBlendFactor);
2715 SET(blend, ps_blend, DestinationBlendFactor);
2716 SET(blend, ps_blend, AlphaTestEnable);
2717 SET(blend, ps_blend, IndependentAlphaBlendEnable);
2718 SET(blend, ps_blend, AlphaToCoverageEnable);
2719 }
2720 }
2721
2722 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE)) {
2723 const uint32_t num_dwords = GENX(BLEND_STATE_length) +
2724 GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
2725 hw_state->blend.state =
2726 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
2727 num_dwords * 4,
2728 64);
2729
2730 uint32_t *dws = hw_state->blend.state.map;
2731
2732 struct GENX(BLEND_STATE) blend_state = {
2733 INIT(blend, AlphaToCoverageEnable),
2734 INIT(blend, AlphaToOneEnable),
2735 INIT(blend, IndependentAlphaBlendEnable),
2736 INIT(blend, ColorDitherEnable),
2737 };
2738 GENX(BLEND_STATE_pack)(NULL, dws, &blend_state);
2739
2740 /* Jump to blend entries. */
2741 dws += GENX(BLEND_STATE_length);
2742 for (uint32_t i = 0; i < MAX_RTS; i++) {
2743 struct GENX(BLEND_STATE_ENTRY) entry = {
2744 INIT(blend.rts[i], WriteDisableAlpha),
2745 INIT(blend.rts[i], WriteDisableRed),
2746 INIT(blend.rts[i], WriteDisableGreen),
2747 INIT(blend.rts[i], WriteDisableBlue),
2748 INIT(blend.rts[i], LogicOpFunction),
2749 INIT(blend.rts[i], LogicOpEnable),
2750 INIT(blend.rts[i], ColorBufferBlendEnable),
2751 INIT(blend.rts[i], ColorClampRange),
2752 INIT(blend.rts[i], PreBlendColorClampEnable),
2753 INIT(blend.rts[i], PostBlendColorClampEnable),
2754 INIT(blend.rts[i], SourceBlendFactor),
2755 INIT(blend.rts[i], DestinationBlendFactor),
2756 INIT(blend.rts[i], ColorBlendFunction),
2757 INIT(blend.rts[i], SourceAlphaBlendFactor),
2758 INIT(blend.rts[i], DestinationAlphaBlendFactor),
2759 INIT(blend.rts[i], AlphaBlendFunction),
2760 };
2761
2762 GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry);
2763 dws += GENX(BLEND_STATE_ENTRY_length);
2764 }
2765
2766 /* Dirty the pointers to reemit 3DSTATE_BLEND_STATE_POINTERS below */
2767 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_PTR);
2768 }
2769
2770 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_PTR)) {
2771 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
2772 bsp.BlendStatePointer = hw_state->blend.state.offset;
2773 bsp.BlendStatePointerValid = true;
2774 }
2775 }
2776
2777 #if INTEL_WA_18019816803_GFX_VER
2778 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_18019816803)) {
2779 genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
2780 cmd_buffer->state.current_pipeline,
2781 ANV_PIPE_PSS_STALL_SYNC_BIT);
2782 }
2783 #endif
2784
2785 #if INTEL_WA_14018283232_GFX_VER
2786 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_14018283232))
2787 genX(batch_emit_wa_14018283232)(&cmd_buffer->batch);
2788 #endif
2789
2790 #if GFX_VER == 9
2791 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PMA_FIX))
2792 genX(cmd_buffer_enable_pma_fix)(cmd_buffer, hw_state->pma_fix);
2793 #endif
2794
2795 #if GFX_VERx10 >= 125
2796 if (hw_state->use_tbimr &&
2797 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TBIMR_TILE_PASS_INFO)) {
2798 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TBIMR_TILE_PASS_INFO),
2799 tbimr) {
2800 SET(tbimr, tbimr, TileRectangleHeight);
2801 SET(tbimr, tbimr, TileRectangleWidth);
2802 SET(tbimr, tbimr, VerticalTileCount);
2803 SET(tbimr, tbimr, HorizontalTileCount);
2804 SET(tbimr, tbimr, TBIMRBatchSize);
2805 SET(tbimr, tbimr, TileBoxCheck);
2806 }
2807 }
2808 #endif
2809
2810 #undef INIT
2811 #undef SET
2812
2813 BITSET_ZERO(hw_state->dirty);
2814 }
2815
2816 /**
2817 * This function handles possible state workarounds and emits the dirty
2818 * instructions to the batch buffer.
2819 */
2820 void
genX(cmd_buffer_flush_gfx_hw_state)2821 genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer)
2822 {
2823 struct anv_device *device = cmd_buffer->device;
2824 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
2825 struct anv_graphics_pipeline *pipeline =
2826 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
2827 struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
2828
2829 if (INTEL_DEBUG(DEBUG_REEMIT)) {
2830 BITSET_OR(gfx->dyn_state.dirty, gfx->dyn_state.dirty,
2831 device->gfx_dirty_state);
2832 }
2833
2834 /**
2835 * Put potential workarounds here if you need to reemit an instruction
2836 * because of another one is changing.
2837 */
2838
2839 /* Reproduce the programming done on Windows drivers.
2840 * Fixes flickering issues with multiple workloads.
2841 */
2842 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP) ||
2843 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) {
2844 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP);
2845 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR);
2846 }
2847
2848 /* Wa_16012775297 - Emit dummy VF statistics before each 3DSTATE_VF. */
2849 #if INTEL_WA_16012775297_GFX_VER
2850 if (intel_needs_workaround(device->info, 16012775297) &&
2851 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF))
2852 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS);
2853 #endif
2854
2855 /* Since Wa_16011773973 will disable 3DSTATE_STREAMOUT, we need to reemit
2856 * it after.
2857 */
2858 if (intel_needs_workaround(device->info, 16011773973) &&
2859 pipeline->uses_xfb &&
2860 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST)) {
2861 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
2862 }
2863
2864 #if INTEL_WA_18038825448_GFX_VER
2865 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
2866 if (wm_prog_data) {
2867 genX(cmd_buffer_set_coarse_pixel_active)(
2868 cmd_buffer,
2869 brw_wm_prog_data_is_coarse(wm_prog_data, hw_state->fs_msaa_flags));
2870 }
2871 #endif
2872
2873 /* Gfx11 undocumented issue :
2874 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/9781
2875 */
2876 #if GFX_VER == 11
2877 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE))
2878 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE);
2879 #endif
2880
2881 /* Wa_18020335297 - Apply the WA when viewport ptr is reprogrammed. */
2882 if (intel_needs_workaround(device->info, 18020335297) &&
2883 (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) ||
2884 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) &&
2885 cmd_buffer->state.gfx.viewport_set) {
2886 /* For mesh, we implement the WA using CS stall. This is for
2887 * simplicity and takes care of possible interaction with Wa_16014390852.
2888 */
2889 if (anv_pipeline_is_mesh(pipeline)) {
2890 genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
2891 _3D, ANV_PIPE_CS_STALL_BIT);
2892 } else {
2893 /* Mask off all instructions that we program. */
2894 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VFG);
2895 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF);
2896 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
2897 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_RASTER);
2898 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS);
2899 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_SGVS);
2900 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2);
2901 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_CLIP);
2902 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
2903 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
2904 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY);
2905
2906 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VS);
2907 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_GS);
2908 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_HS);
2909 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_TE);
2910 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_DS);
2911
2912 cmd_buffer_gfx_state_emission(cmd_buffer);
2913
2914 emit_wa_18020335297_dummy_draw(cmd_buffer);
2915
2916 /* Dirty all emitted WA state to make sure that current real
2917 * state is restored.
2918 */
2919 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VFG);
2920 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF);
2921 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
2922 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_RASTER);
2923 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS);
2924 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS);
2925 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2);
2926 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CLIP);
2927 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
2928 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
2929 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY);
2930
2931 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VS);
2932 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_GS);
2933 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_HS);
2934 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TE);
2935 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_DS);
2936 }
2937 }
2938
2939 cmd_buffer_gfx_state_emission(cmd_buffer);
2940 }
2941
2942 void
genX(cmd_buffer_enable_pma_fix)2943 genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
2944 {
2945 if (!anv_cmd_buffer_is_render_queue(cmd_buffer))
2946 return;
2947
2948 if (cmd_buffer->state.gfx.pma_fix_enabled == enable)
2949 return;
2950
2951 cmd_buffer->state.gfx.pma_fix_enabled = enable;
2952
2953 /* According to the Broadwell PIPE_CONTROL documentation, software should
2954 * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
2955 * prior to the LRI. If stencil buffer writes are enabled, then a Render
2956 * Cache Flush is also necessary.
2957 *
2958 * The Skylake docs say to use a depth stall rather than a command
2959 * streamer stall. However, the hardware seems to violently disagree.
2960 * A full command streamer stall seems to be needed in both cases.
2961 */
2962 genx_batch_emit_pipe_control
2963 (&cmd_buffer->batch, cmd_buffer->device->info,
2964 cmd_buffer->state.current_pipeline,
2965 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
2966 ANV_PIPE_CS_STALL_BIT |
2967 #if GFX_VER >= 12
2968 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
2969 #endif
2970 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
2971
2972 #if GFX_VER == 9
2973 uint32_t cache_mode;
2974 anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0),
2975 .STCPMAOptimizationEnable = enable,
2976 .STCPMAOptimizationEnableMask = true);
2977 anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
2978 lri.RegisterOffset = GENX(CACHE_MODE_0_num);
2979 lri.DataDWord = cache_mode;
2980 }
2981
2982 #endif /* GFX_VER == 9 */
2983
2984 /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
2985 * Flush bits is often necessary. We do it regardless because it's easier.
2986 * The render cache flush is also necessary if stencil writes are enabled.
2987 *
2988 * Again, the Skylake docs give a different set of flushes but the BDW
2989 * flushes seem to work just as well.
2990 */
2991 genx_batch_emit_pipe_control
2992 (&cmd_buffer->batch, cmd_buffer->device->info,
2993 cmd_buffer->state.current_pipeline,
2994 ANV_PIPE_DEPTH_STALL_BIT |
2995 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
2996 #if GFX_VER >= 12
2997 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
2998 #endif
2999 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
3000 }
3001