1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
34
35 void
genX(cmd_buffer_enable_pma_fix)36 genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
37 {
38 if (cmd_buffer->state.pma_fix_enabled == enable)
39 return;
40
41 cmd_buffer->state.pma_fix_enabled = enable;
42
43 /* According to the Broadwell PIPE_CONTROL documentation, software should
44 * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
45 * prior to the LRI. If stencil buffer writes are enabled, then a Render
46 * Cache Flush is also necessary.
47 *
48 * The Skylake docs say to use a depth stall rather than a command
49 * streamer stall. However, the hardware seems to violently disagree.
50 * A full command streamer stall seems to be needed in both cases.
51 */
52 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
53 pc.DepthCacheFlushEnable = true;
54 pc.CommandStreamerStallEnable = true;
55 pc.RenderTargetCacheFlushEnable = true;
56 #if GFX_VER >= 12
57 pc.TileCacheFlushEnable = true;
58
59 /* Wa_1409600907: "PIPE_CONTROL with Depth Stall Enable bit must
60 * be set with any PIPE_CONTROL with Depth Flush Enable bit set.
61 */
62 pc.DepthStallEnable = true;
63 #endif
64 }
65
66 #if GFX_VER == 9
67
68 uint32_t cache_mode;
69 anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0),
70 .STCPMAOptimizationEnable = enable,
71 .STCPMAOptimizationEnableMask = true);
72 anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
73 lri.RegisterOffset = GENX(CACHE_MODE_0_num);
74 lri.DataDWord = cache_mode;
75 }
76
77 #elif GFX_VER == 8
78
79 uint32_t cache_mode;
80 anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1),
81 .NPPMAFixEnable = enable,
82 .NPEarlyZFailsDisable = enable,
83 .NPPMAFixEnableMask = true,
84 .NPEarlyZFailsDisableMask = true);
85 anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
86 lri.RegisterOffset = GENX(CACHE_MODE_1_num);
87 lri.DataDWord = cache_mode;
88 }
89
90 #endif /* GFX_VER == 8 */
91
92 /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
93 * Flush bits is often necessary. We do it regardless because it's easier.
94 * The render cache flush is also necessary if stencil writes are enabled.
95 *
96 * Again, the Skylake docs give a different set of flushes but the BDW
97 * flushes seem to work just as well.
98 */
99 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
100 pc.DepthStallEnable = true;
101 pc.DepthCacheFlushEnable = true;
102 pc.RenderTargetCacheFlushEnable = true;
103 #if GFX_VER >= 12
104 pc.TileCacheFlushEnable = true;
105 #endif
106 }
107 }
108
109 UNUSED static bool
want_depth_pma_fix(struct anv_cmd_buffer * cmd_buffer,const struct vk_depth_stencil_state * ds)110 want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer,
111 const struct vk_depth_stencil_state *ds)
112 {
113 assert(GFX_VER == 8);
114
115 /* From the Broadwell PRM Vol. 2c CACHE_MODE_1::NP_PMA_FIX_ENABLE:
116 *
117 * SW must set this bit in order to enable this fix when following
118 * expression is TRUE.
119 *
120 * 3DSTATE_WM::ForceThreadDispatch != 1 &&
121 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
122 * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
123 * (3DSTATE_DEPTH_BUFFER::HIZ Enable) &&
124 * !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) &&
125 * (3DSTATE_PS_EXTRA::PixelShaderValid) &&
126 * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
127 * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
128 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
129 * 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
130 * (3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable) &&
131 * (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
132 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
133 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
134 * 3DSTATE_PS_BLEND::AlphaTestEnable ||
135 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
136 * 3DSTATE_WM::ForceKillPix != ForceOff &&
137 * ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
138 * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
139 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
140 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
141 * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
142 * (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
143 */
144
145 /* These are always true:
146 * 3DSTATE_WM::ForceThreadDispatch != 1 &&
147 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
148 */
149
150 /* We only enable the PMA fix if we know for certain that HiZ is enabled.
151 * If we don't know whether HiZ is enabled or not, we disable the PMA fix
152 * and there is no harm.
153 *
154 * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
155 * 3DSTATE_DEPTH_BUFFER::HIZ Enable
156 */
157 if (!cmd_buffer->state.hiz_enabled)
158 return false;
159
160 /* 3DSTATE_PS_EXTRA::PixelShaderValid */
161 struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
162 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
163 return false;
164
165 /* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) */
166 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
167 if (wm_prog_data->early_fragment_tests)
168 return false;
169
170 /* We never use anv_pipeline for HiZ ops so this is trivially true:
171 * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
172 * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
173 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
174 * 3DSTATE_WM_HZ_OP::StencilBufferClear)
175 */
176
177 /* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */
178 if (!ds->depth.test_enable)
179 return false;
180
181 /* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
182 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
183 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
184 * 3DSTATE_PS_BLEND::AlphaTestEnable ||
185 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
186 * 3DSTATE_WM::ForceKillPix != ForceOff &&
187 * ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
188 * 3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
189 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
190 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
191 * 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
192 * (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
193 */
194 return (pipeline->kill_pixel && (ds->depth.write_enable ||
195 ds->stencil.write_enable)) ||
196 wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
197 }
198
199 UNUSED static bool
want_stencil_pma_fix(struct anv_cmd_buffer * cmd_buffer,const struct vk_depth_stencil_state * ds)200 want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer,
201 const struct vk_depth_stencil_state *ds)
202 {
203 if (GFX_VER > 9)
204 return false;
205 assert(GFX_VER == 9);
206
207 /* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable:
208 *
209 * Clearing this bit will force the STC cache to wait for pending
210 * retirement of pixels at the HZ-read stage and do the STC-test for
211 * Non-promoted, R-computed and Computed depth modes instead of
212 * postponing the STC-test to RCPFE.
213 *
214 * STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
215 * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
216 *
217 * STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
218 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
219 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
220 *
221 * COMP_STC_EN = STC_TEST_EN &&
222 * 3DSTATE_PS_EXTRA::PixelShaderComputesStencil
223 *
224 * SW parses the pipeline states to generate the following logical
225 * signal indicating if PMA FIX can be enabled.
226 *
227 * STC_PMA_OPT =
228 * 3DSTATE_WM::ForceThreadDispatch != 1 &&
229 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
230 * 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
231 * 3DSTATE_DEPTH_BUFFER::HIZ Enable &&
232 * !(3DSTATE_WM::EDSC_Mode == 2) &&
233 * 3DSTATE_PS_EXTRA::PixelShaderValid &&
234 * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
235 * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
236 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
237 * 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
238 * (COMP_STC_EN || STC_WRITE_EN) &&
239 * ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
240 * 3DSTATE_WM::ForceKillPix == ON ||
241 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
242 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
243 * 3DSTATE_PS_BLEND::AlphaTestEnable ||
244 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
245 * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
246 */
247
248 /* These are always true:
249 * 3DSTATE_WM::ForceThreadDispatch != 1 &&
250 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
251 */
252
253 /* We only enable the PMA fix if we know for certain that HiZ is enabled.
254 * If we don't know whether HiZ is enabled or not, we disable the PMA fix
255 * and there is no harm.
256 *
257 * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
258 * 3DSTATE_DEPTH_BUFFER::HIZ Enable
259 */
260 if (!cmd_buffer->state.hiz_enabled)
261 return false;
262
263 /* We can't possibly know if HiZ is enabled without the depth attachment */
264 ASSERTED const struct anv_image_view *d_iview =
265 cmd_buffer->state.gfx.depth_att.iview;
266 assert(d_iview && d_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ);
267
268 /* 3DSTATE_PS_EXTRA::PixelShaderValid */
269 struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
270 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
271 return false;
272
273 /* !(3DSTATE_WM::EDSC_Mode == 2) */
274 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
275 if (wm_prog_data->early_fragment_tests)
276 return false;
277
278 /* We never use anv_pipeline for HiZ ops so this is trivially true:
279 * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
280 * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
281 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
282 * 3DSTATE_WM_HZ_OP::StencilBufferClear)
283 */
284
285 /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
286 * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
287 */
288 const bool stc_test_en = ds->stencil.test_enable;
289
290 /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
291 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
292 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
293 */
294 const bool stc_write_en = ds->stencil.write_enable;
295
296 /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
297 const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil;
298
299 /* COMP_STC_EN || STC_WRITE_EN */
300 if (!(comp_stc_en || stc_write_en))
301 return false;
302
303 /* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
304 * 3DSTATE_WM::ForceKillPix == ON ||
305 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
306 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
307 * 3DSTATE_PS_BLEND::AlphaTestEnable ||
308 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
309 * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
310 */
311 return pipeline->kill_pixel ||
312 wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
313 }
314
315 void
genX(cmd_buffer_flush_dynamic_state)316 genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
317 {
318 struct anv_graphics_pipeline *pipeline = cmd_buffer->state.gfx.pipeline;
319 const struct vk_dynamic_graphics_state *dyn =
320 &cmd_buffer->vk.dynamic_graphics_state;
321
322 #if GFX_VER >= 11
323 if (cmd_buffer->device->vk.enabled_extensions.KHR_fragment_shading_rate &&
324 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))
325 genX(emit_shading_rate)(&cmd_buffer->batch, pipeline, &dyn->fsr);
326 #endif /* GFX_VER >= 11 */
327
328 if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
329 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH)) {
330 uint32_t sf_dw[GENX(3DSTATE_SF_length)];
331 struct GENX(3DSTATE_SF) sf = {
332 GENX(3DSTATE_SF_header),
333 };
334 #if GFX_VER == 8
335 if (cmd_buffer->device->info.platform == INTEL_PLATFORM_CHV) {
336 sf.CHVLineWidth = dyn->rs.line.width;
337 } else {
338 sf.LineWidth = dyn->rs.line.width;
339 }
340 #else
341 sf.LineWidth = dyn->rs.line.width,
342 #endif
343 GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
344 anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gfx8.sf);
345 }
346
347 if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
348 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) ||
349 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) ||
350 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) ||
351 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) ||
352 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS)) {
353 /* Take dynamic primitive topology in to account with
354 * 3DSTATE_RASTER::APIMode
355 * 3DSTATE_RASTER::DXMultisampleRasterizationEnable
356 * 3DSTATE_RASTER::AntialiasingEnable
357 */
358 uint32_t api_mode = 0;
359 bool msaa_raster_enable = false;
360
361 VkPolygonMode dynamic_raster_mode =
362 genX(raster_polygon_mode)(cmd_buffer->state.gfx.pipeline,
363 dyn->ia.primitive_topology);
364
365 genX(rasterization_mode)(dynamic_raster_mode,
366 pipeline->line_mode, dyn->rs.line.width,
367 &api_mode, &msaa_raster_enable);
368
369 bool aa_enable = anv_rasterization_aa_mode(dynamic_raster_mode,
370 pipeline->line_mode);
371
372 uint32_t raster_dw[GENX(3DSTATE_RASTER_length)];
373 struct GENX(3DSTATE_RASTER) raster = {
374 GENX(3DSTATE_RASTER_header),
375 .APIMode = api_mode,
376 .DXMultisampleRasterizationEnable = msaa_raster_enable,
377 .AntialiasingEnable = aa_enable,
378 .CullMode = genX(vk_to_intel_cullmode)[dyn->rs.cull_mode],
379 .FrontWinding = genX(vk_to_intel_front_face)[dyn->rs.front_face],
380 .GlobalDepthOffsetEnableSolid = dyn->rs.depth_bias.enable,
381 .GlobalDepthOffsetEnableWireframe = dyn->rs.depth_bias.enable,
382 .GlobalDepthOffsetEnablePoint = dyn->rs.depth_bias.enable,
383 .GlobalDepthOffsetConstant = dyn->rs.depth_bias.constant,
384 .GlobalDepthOffsetScale = dyn->rs.depth_bias.slope,
385 .GlobalDepthOffsetClamp = dyn->rs.depth_bias.clamp,
386 };
387 GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster);
388 anv_batch_emit_merge(&cmd_buffer->batch, raster_dw,
389 pipeline->gfx8.raster);
390 }
391
392 /* Stencil reference values moved from COLOR_CALC_STATE in gfx8 to
393 * 3DSTATE_WM_DEPTH_STENCIL in gfx9. That means the dirty bits gets split
394 * across different state packets for gfx8 and gfx9. We handle that by
395 * using a big old #if switch here.
396 */
397 #if GFX_VER == 8
398 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE) ||
399 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
400 struct anv_state cc_state =
401 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
402 GENX(COLOR_CALC_STATE_length) * 4,
403 64);
404 struct GENX(COLOR_CALC_STATE) cc = {
405 .BlendConstantColorRed = dyn->cb.blend_constants[0],
406 .BlendConstantColorGreen = dyn->cb.blend_constants[1],
407 .BlendConstantColorBlue = dyn->cb.blend_constants[2],
408 .BlendConstantColorAlpha = dyn->cb.blend_constants[3],
409 .StencilReferenceValue = dyn->ds.stencil.front.reference & 0xff,
410 .BackfaceStencilReferenceValue = dyn->ds.stencil.back.reference & 0xff,
411 };
412 GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
413
414 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
415 ccp.ColorCalcStatePointer = cc_state.offset;
416 ccp.ColorCalcStatePointerValid = true;
417 }
418 }
419
420 if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
421 ANV_CMD_DIRTY_RENDER_TARGETS)) ||
422 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
423 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
424 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
425 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
426 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
427 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
428 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK)) {
429 VkImageAspectFlags ds_aspects = 0;
430 if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
431 ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
432 if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
433 ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
434
435 struct vk_depth_stencil_state opt_ds = dyn->ds;
436 vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
437
438 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
439 ds.DoubleSidedStencilEnable = true;
440
441 ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff;
442 ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff;
443
444 ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff;
445 ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff;
446
447 ds.DepthTestEnable = opt_ds.depth.test_enable;
448 ds.DepthBufferWriteEnable = opt_ds.depth.write_enable;
449 ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op];
450 ds.StencilTestEnable = opt_ds.stencil.test_enable;
451 ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable;
452 ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail];
453 ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass];
454 ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail];
455 ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare];
456 ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail];
457 ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass];
458 ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail];
459 ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare];
460 }
461
462 const bool pma = want_depth_pma_fix(cmd_buffer, &opt_ds);
463 genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
464 }
465 #else
466 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
467 struct anv_state cc_state =
468 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
469 GENX(COLOR_CALC_STATE_length) * 4,
470 64);
471 struct GENX(COLOR_CALC_STATE) cc = {
472 .BlendConstantColorRed = dyn->cb.blend_constants[0],
473 .BlendConstantColorGreen = dyn->cb.blend_constants[1],
474 .BlendConstantColorBlue = dyn->cb.blend_constants[2],
475 .BlendConstantColorAlpha = dyn->cb.blend_constants[3],
476 };
477 GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
478
479 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
480 ccp.ColorCalcStatePointer = cc_state.offset;
481 ccp.ColorCalcStatePointerValid = true;
482 }
483 }
484
485 if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
486 ANV_CMD_DIRTY_RENDER_TARGETS)) ||
487 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
488 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
489 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
490 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
491 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
492 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
493 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) ||
494 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) {
495 VkImageAspectFlags ds_aspects = 0;
496 if (cmd_buffer->state.gfx.depth_att.vk_format != VK_FORMAT_UNDEFINED)
497 ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
498 if (cmd_buffer->state.gfx.stencil_att.vk_format != VK_FORMAT_UNDEFINED)
499 ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
500
501 struct vk_depth_stencil_state opt_ds = dyn->ds;
502 vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
503
504 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
505 ds.DoubleSidedStencilEnable = true;
506
507 ds.StencilTestMask = opt_ds.stencil.front.compare_mask & 0xff;
508 ds.StencilWriteMask = opt_ds.stencil.front.write_mask & 0xff;
509
510 ds.BackfaceStencilTestMask = opt_ds.stencil.back.compare_mask & 0xff;
511 ds.BackfaceStencilWriteMask = opt_ds.stencil.back.write_mask & 0xff;
512
513 ds.StencilReferenceValue = opt_ds.stencil.front.reference & 0xff;
514 ds.BackfaceStencilReferenceValue = opt_ds.stencil.back.reference & 0xff;
515
516 ds.DepthTestEnable = opt_ds.depth.test_enable;
517 ds.DepthBufferWriteEnable = opt_ds.depth.write_enable;
518 ds.DepthTestFunction = genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op];
519 ds.StencilTestEnable = opt_ds.stencil.test_enable;
520 ds.StencilBufferWriteEnable = opt_ds.stencil.write_enable;
521 ds.StencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail];
522 ds.StencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass];
523 ds.StencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail];
524 ds.StencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare];
525 ds.BackfaceStencilFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail];
526 ds.BackfaceStencilPassDepthPassOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass];
527 ds.BackfaceStencilPassDepthFailOp = genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail];
528 ds.BackfaceStencilTestFunction = genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare];
529 }
530
531 const bool pma = want_stencil_pma_fix(cmd_buffer, &opt_ds);
532 genX(cmd_buffer_enable_pma_fix)(cmd_buffer, pma);
533 }
534 #endif
535
536 #if GFX_VER >= 12
537 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) ||
538 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS)) {
539 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
540 db.DepthBoundsTestEnable = dyn->ds.depth.bounds_test.enable;
541 db.DepthBoundsTestMinValue = dyn->ds.depth.bounds_test.min;
542 db.DepthBoundsTestMaxValue = dyn->ds.depth.bounds_test.max;
543 }
544 }
545 #endif
546
547 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE)) {
548 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) {
549 ls.LineStipplePattern = dyn->rs.line.stipple.pattern;
550 ls.LineStippleInverseRepeatCount =
551 1.0f / MAX2(1, dyn->rs.line.stipple.factor);
552 ls.LineStippleRepeatCount = dyn->rs.line.stipple.factor;
553 }
554 }
555
556 if ((cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
557 ANV_CMD_DIRTY_INDEX_BUFFER)) ||
558 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
559 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
560 #if GFX_VERx10 >= 125
561 vf.GeometryDistributionEnable = true;
562 #endif
563 vf.IndexedDrawCutIndexEnable = dyn->ia.primitive_restart_enable;
564 vf.CutIndex = cmd_buffer->state.gfx.restart_index;
565 }
566 }
567
568 if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_INDEX_BUFFER) {
569 struct anv_buffer *buffer = cmd_buffer->state.gfx.index_buffer;
570 uint32_t offset = cmd_buffer->state.gfx.index_offset;
571 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
572 ib.IndexFormat = cmd_buffer->state.gfx.index_type;
573 ib.MOCS = anv_mocs(cmd_buffer->device,
574 buffer->address.bo,
575 ISL_SURF_USAGE_INDEX_BUFFER_BIT);
576 #if GFX_VER >= 12
577 ib.L3BypassDisable = true;
578 #endif
579 ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
580 ib.BufferSize = vk_buffer_range(&buffer->vk, offset,
581 VK_WHOLE_SIZE);
582 }
583 }
584
585 #if GFX_VERx10 >= 125
586 if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
587 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
588 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) {
589 /* If 3DSTATE_TE: TE Enable == 1 then RR_STRICT else RR_FREE*/
590 vfg.DistributionMode =
591 anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ? RR_STRICT :
592 RR_FREE;
593 vfg.DistributionGranularity = BatchLevelGranularity;
594 /* Wa_14014890652 */
595 if (intel_device_info_is_dg2(&cmd_buffer->device->info))
596 vfg.GranularityThresholdDisable = 1;
597 vfg.ListCutIndexEnable = dyn->ia.primitive_restart_enable;
598 /* 192 vertices for TRILIST_ADJ */
599 vfg.ListNBatchSizeScale = 0;
600 /* Batch size of 384 vertices */
601 vfg.List3BatchSizeScale = 2;
602 /* Batch size of 128 vertices */
603 vfg.List2BatchSizeScale = 1;
604 /* Batch size of 128 vertices */
605 vfg.List1BatchSizeScale = 2;
606 /* Batch size of 256 vertices for STRIP topologies */
607 vfg.StripBatchSizeScale = 3;
608 /* 192 control points for PATCHLIST_3 */
609 vfg.PatchBatchSizeScale = 1;
610 /* 192 control points for PATCHLIST_3 */
611 vfg.PatchBatchSizeMultiplier = 31;
612 }
613 }
614 #endif
615
616 if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&
617 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS))
618 genX(emit_sample_pattern)(&cmd_buffer->batch, dyn->ms.sample_locations);
619
620 if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
621 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
622 /* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
623 * threads.
624 */
625 uint32_t wm_dwords[GENX(3DSTATE_WM_length)];
626 struct GENX(3DSTATE_WM) wm = {
627 GENX(3DSTATE_WM_header),
628
629 .ForceThreadDispatchEnable = anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
630 (pipeline->force_fragment_thread_dispatch ||
631 anv_cmd_buffer_all_color_write_masked(cmd_buffer)) ?
632 ForceON : 0,
633 };
634 GENX(3DSTATE_WM_pack)(NULL, wm_dwords, &wm);
635
636 anv_batch_emit_merge(&cmd_buffer->batch, wm_dwords, pipeline->gfx8.wm);
637 }
638
639 if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
640 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
641 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
642 const uint8_t color_writes = dyn->cb.color_write_enables;
643 const struct anv_cmd_graphics_state *state = &cmd_buffer->state.gfx;
644 bool has_writeable_rt =
645 anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
646 (color_writes & ((1u << state->color_att_count) - 1)) != 0;
647
648 /* 3DSTATE_PS_BLEND to be consistent with the rest of the
649 * BLEND_STATE_ENTRY.
650 */
651 uint32_t ps_blend_dwords[GENX(3DSTATE_PS_BLEND_length)];
652 struct GENX(3DSTATE_PS_BLEND) ps_blend = {
653 GENX(3DSTATE_PS_BLEND_header),
654 .HasWriteableRT = has_writeable_rt,
655 };
656 GENX(3DSTATE_PS_BLEND_pack)(NULL, ps_blend_dwords, &ps_blend);
657 anv_batch_emit_merge(&cmd_buffer->batch, ps_blend_dwords,
658 pipeline->gfx8.ps_blend);
659
660 uint32_t blend_dws[GENX(BLEND_STATE_length) +
661 MAX_RTS * GENX(BLEND_STATE_ENTRY_length)];
662 uint32_t *dws = blend_dws;
663 memset(blend_dws, 0, sizeof(blend_dws));
664
665 /* Skip this part */
666 dws += GENX(BLEND_STATE_length);
667
668 for (uint32_t i = 0; i < MAX_RTS; i++) {
669 /* Disable anything above the current number of color attachments. */
670 bool write_disabled = i >= cmd_buffer->state.gfx.color_att_count ||
671 (color_writes & BITFIELD_BIT(i)) == 0;
672 struct GENX(BLEND_STATE_ENTRY) entry = {
673 .WriteDisableAlpha = write_disabled ||
674 (pipeline->color_comp_writes[i] &
675 VK_COLOR_COMPONENT_A_BIT) == 0,
676 .WriteDisableRed = write_disabled ||
677 (pipeline->color_comp_writes[i] &
678 VK_COLOR_COMPONENT_R_BIT) == 0,
679 .WriteDisableGreen = write_disabled ||
680 (pipeline->color_comp_writes[i] &
681 VK_COLOR_COMPONENT_G_BIT) == 0,
682 .WriteDisableBlue = write_disabled ||
683 (pipeline->color_comp_writes[i] &
684 VK_COLOR_COMPONENT_B_BIT) == 0,
685 .LogicOpFunction = genX(vk_to_intel_logic_op)[dyn->cb.logic_op],
686 };
687 GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry);
688 dws += GENX(BLEND_STATE_ENTRY_length);
689 }
690
691 uint32_t num_dwords = GENX(BLEND_STATE_length) +
692 GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
693
694 struct anv_state blend_states =
695 anv_cmd_buffer_merge_dynamic(cmd_buffer, blend_dws,
696 pipeline->gfx8.blend_state, num_dwords, 64);
697 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
698 bsp.BlendStatePointer = blend_states.offset;
699 bsp.BlendStatePointerValid = true;
700 }
701 }
702
703 /* When we're done, there is no more dirty gfx state. */
704 vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state);
705 cmd_buffer->state.gfx.dirty = 0;
706 }
707