1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
34 #include "common/intel_genX_state_brw.h"
35 #include "common/intel_guardband.h"
36 #include "common/intel_tiled_render.h"
37 #include "compiler/brw_prim.h"
38
39 static const uint32_t vk_to_intel_blend[] = {
40 [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO,
41 [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE,
42 [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR,
43 [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR,
44 [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR,
45 [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR,
46 [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA,
47 [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA,
48 [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA,
49 [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA,
50 [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR,
51 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR,
52 [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA,
53 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA,
54 [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE,
55 [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR,
56 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR,
57 [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA,
58 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA,
59 };
60
61 static const uint32_t vk_to_intel_blend_op[] = {
62 [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD,
63 [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT,
64 [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT,
65 [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN,
66 [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX,
67 };
68
69 static const uint32_t vk_to_intel_cullmode[] = {
70 [VK_CULL_MODE_NONE] = CULLMODE_NONE,
71 [VK_CULL_MODE_FRONT_BIT] = CULLMODE_FRONT,
72 [VK_CULL_MODE_BACK_BIT] = CULLMODE_BACK,
73 [VK_CULL_MODE_FRONT_AND_BACK] = CULLMODE_BOTH
74 };
75
76 static const uint32_t vk_to_intel_fillmode[] = {
77 [VK_POLYGON_MODE_FILL] = FILL_MODE_SOLID,
78 [VK_POLYGON_MODE_LINE] = FILL_MODE_WIREFRAME,
79 [VK_POLYGON_MODE_POINT] = FILL_MODE_POINT,
80 };
81
82 static const uint32_t vk_to_intel_front_face[] = {
83 [VK_FRONT_FACE_COUNTER_CLOCKWISE] = 1,
84 [VK_FRONT_FACE_CLOCKWISE] = 0
85 };
86
87 static const uint32_t vk_to_intel_logic_op[] = {
88 [VK_LOGIC_OP_COPY] = LOGICOP_COPY,
89 [VK_LOGIC_OP_CLEAR] = LOGICOP_CLEAR,
90 [VK_LOGIC_OP_AND] = LOGICOP_AND,
91 [VK_LOGIC_OP_AND_REVERSE] = LOGICOP_AND_REVERSE,
92 [VK_LOGIC_OP_AND_INVERTED] = LOGICOP_AND_INVERTED,
93 [VK_LOGIC_OP_NO_OP] = LOGICOP_NOOP,
94 [VK_LOGIC_OP_XOR] = LOGICOP_XOR,
95 [VK_LOGIC_OP_OR] = LOGICOP_OR,
96 [VK_LOGIC_OP_NOR] = LOGICOP_NOR,
97 [VK_LOGIC_OP_EQUIVALENT] = LOGICOP_EQUIV,
98 [VK_LOGIC_OP_INVERT] = LOGICOP_INVERT,
99 [VK_LOGIC_OP_OR_REVERSE] = LOGICOP_OR_REVERSE,
100 [VK_LOGIC_OP_COPY_INVERTED] = LOGICOP_COPY_INVERTED,
101 [VK_LOGIC_OP_OR_INVERTED] = LOGICOP_OR_INVERTED,
102 [VK_LOGIC_OP_NAND] = LOGICOP_NAND,
103 [VK_LOGIC_OP_SET] = LOGICOP_SET,
104 };
105
106 static const uint32_t vk_to_intel_compare_op[] = {
107 [VK_COMPARE_OP_NEVER] = PREFILTEROP_NEVER,
108 [VK_COMPARE_OP_LESS] = PREFILTEROP_LESS,
109 [VK_COMPARE_OP_EQUAL] = PREFILTEROP_EQUAL,
110 [VK_COMPARE_OP_LESS_OR_EQUAL] = PREFILTEROP_LEQUAL,
111 [VK_COMPARE_OP_GREATER] = PREFILTEROP_GREATER,
112 [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROP_NOTEQUAL,
113 [VK_COMPARE_OP_GREATER_OR_EQUAL] = PREFILTEROP_GEQUAL,
114 [VK_COMPARE_OP_ALWAYS] = PREFILTEROP_ALWAYS,
115 };
116
117 static const uint32_t vk_to_intel_stencil_op[] = {
118 [VK_STENCIL_OP_KEEP] = STENCILOP_KEEP,
119 [VK_STENCIL_OP_ZERO] = STENCILOP_ZERO,
120 [VK_STENCIL_OP_REPLACE] = STENCILOP_REPLACE,
121 [VK_STENCIL_OP_INCREMENT_AND_CLAMP] = STENCILOP_INCRSAT,
122 [VK_STENCIL_OP_DECREMENT_AND_CLAMP] = STENCILOP_DECRSAT,
123 [VK_STENCIL_OP_INVERT] = STENCILOP_INVERT,
124 [VK_STENCIL_OP_INCREMENT_AND_WRAP] = STENCILOP_INCR,
125 [VK_STENCIL_OP_DECREMENT_AND_WRAP] = STENCILOP_DECR,
126 };
127
128 static const uint32_t vk_to_intel_primitive_type[] = {
129 [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,
130 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,
131 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,
132 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,
133 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,
134 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,
135 [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,
136 [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,
137 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,
138 [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,
139 };
140
141 static void
genX(streamout_prologue)142 genX(streamout_prologue)(struct anv_cmd_buffer *cmd_buffer)
143 {
144 #if INTEL_WA_16013994831_GFX_VER
145 /* Wa_16013994831 - Disable preemption during streamout, enable back
146 * again if XFB not used by the current pipeline.
147 *
148 * Although this workaround applies to Gfx12+, we already disable object
149 * level preemption for another reason in genX_state.c so we can skip this
150 * for Gfx12.
151 */
152 if (!intel_needs_workaround(cmd_buffer->device->info, 16013994831))
153 return;
154
155 struct anv_graphics_pipeline *pipeline =
156 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
157 if (pipeline->uses_xfb) {
158 genX(cmd_buffer_set_preemption)(cmd_buffer, false);
159 return;
160 }
161
162 if (!cmd_buffer->state.gfx.object_preemption)
163 genX(cmd_buffer_set_preemption)(cmd_buffer, true);
164 #endif
165 }
166
167 #if GFX_VER >= 12 && GFX_VER < 30
168 static uint32_t
get_cps_state_offset(const struct anv_device * device,bool cps_enabled,const struct vk_fragment_shading_rate_state * fsr)169 get_cps_state_offset(const struct anv_device *device, bool cps_enabled,
170 const struct vk_fragment_shading_rate_state *fsr)
171 {
172 if (!cps_enabled)
173 return device->cps_states.offset;
174
175 uint32_t offset;
176 static const uint32_t size_index[] = {
177 [1] = 0,
178 [2] = 1,
179 [4] = 2,
180 };
181
182 #if GFX_VERx10 >= 125
183 offset =
184 1 + /* skip disabled */
185 fsr->combiner_ops[0] * 5 * 3 * 3 +
186 fsr->combiner_ops[1] * 3 * 3 +
187 size_index[fsr->fragment_size.width] * 3 +
188 size_index[fsr->fragment_size.height];
189 #else
190 offset =
191 1 + /* skip disabled */
192 size_index[fsr->fragment_size.width] * 3 +
193 size_index[fsr->fragment_size.height];
194 #endif
195
196 offset *= MAX_VIEWPORTS * GENX(CPS_STATE_length) * 4;
197
198 return device->cps_states.offset + offset;
199 }
200 #endif /* GFX_VER >= 12 && GFX_VER < 30 */
201
202 #if GFX_VER >= 30
203 static uint32_t
get_cps_size(uint32_t size)204 get_cps_size(uint32_t size)
205 {
206 switch (size) {
207 case 1:
208 return CPSIZE_1;
209 case 2:
210 return CPSIZE_2;
211 case 4:
212 return CPSIZE_4;
213 default:
214 unreachable("Invalid size");
215 }
216 }
217
218 static const uint32_t vk_to_intel_shading_rate_combiner_op[] = {
219 [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR] = CPS_COMB_OP_PASSTHROUGH,
220 [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR] = CPS_COMB_OP_OVERRIDE,
221 [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MIN_KHR] = CPS_COMB_OP_HIGH_QUALITY,
222 [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR] = CPS_COMB_OP_LOW_QUALITY,
223 [VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MUL_KHR] = CPS_COMB_OP_RELATIVE,
224 };
225 #endif
226
227 static bool
has_ds_feedback_loop(const struct vk_dynamic_graphics_state * dyn)228 has_ds_feedback_loop(const struct vk_dynamic_graphics_state *dyn)
229 {
230 return (dyn->feedback_loops & (VK_IMAGE_ASPECT_DEPTH_BIT |
231 VK_IMAGE_ASPECT_STENCIL_BIT)) ||
232 dyn->ial.depth_att != MESA_VK_ATTACHMENT_UNUSED ||
233 dyn->ial.stencil_att != MESA_VK_ATTACHMENT_UNUSED;
234 }
235
236 UNUSED static bool
want_stencil_pma_fix(const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct vk_depth_stencil_state * ds)237 want_stencil_pma_fix(const struct vk_dynamic_graphics_state *dyn,
238 const struct anv_cmd_graphics_state *gfx,
239 const struct vk_depth_stencil_state *ds)
240 {
241 if (GFX_VER > 9)
242 return false;
243 assert(GFX_VER == 9);
244
245 /* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable:
246 *
247 * Clearing this bit will force the STC cache to wait for pending
248 * retirement of pixels at the HZ-read stage and do the STC-test for
249 * Non-promoted, R-computed and Computed depth modes instead of
250 * postponing the STC-test to RCPFE.
251 *
252 * STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
253 * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
254 *
255 * STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
256 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
257 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
258 *
259 * COMP_STC_EN = STC_TEST_EN &&
260 * 3DSTATE_PS_EXTRA::PixelShaderComputesStencil
261 *
262 * SW parses the pipeline states to generate the following logical
263 * signal indicating if PMA FIX can be enabled.
264 *
265 * STC_PMA_OPT =
266 * 3DSTATE_WM::ForceThreadDispatch != 1 &&
267 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
268 * 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
269 * 3DSTATE_DEPTH_BUFFER::HIZ Enable &&
270 * !(3DSTATE_WM::EDSC_Mode == 2) &&
271 * 3DSTATE_PS_EXTRA::PixelShaderValid &&
272 * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
273 * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
274 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
275 * 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
276 * (COMP_STC_EN || STC_WRITE_EN) &&
277 * ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
278 * 3DSTATE_WM::ForceKillPix == ON ||
279 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
280 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
281 * 3DSTATE_PS_BLEND::AlphaTestEnable ||
282 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
283 * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
284 */
285
286 /* These are always true:
287 * 3DSTATE_WM::ForceThreadDispatch != 1 &&
288 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
289 */
290
291 /* We only enable the PMA fix if we know for certain that HiZ is enabled.
292 * If we don't know whether HiZ is enabled or not, we disable the PMA fix
293 * and there is no harm.
294 *
295 * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
296 * 3DSTATE_DEPTH_BUFFER::HIZ Enable
297 */
298 if (!gfx->hiz_enabled)
299 return false;
300
301 /* We can't possibly know if HiZ is enabled without the depth attachment */
302 ASSERTED const struct anv_image_view *d_iview = gfx->depth_att.iview;
303 assert(d_iview && d_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ);
304
305 /* 3DSTATE_PS_EXTRA::PixelShaderValid */
306 struct anv_graphics_pipeline *pipeline =
307 anv_pipeline_to_graphics(gfx->base.pipeline);
308 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
309 return false;
310
311 /* !(3DSTATE_WM::EDSC_Mode == 2) */
312 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
313 if (wm_prog_data->early_fragment_tests)
314 return false;
315
316 /* We never use anv_pipeline for HiZ ops so this is trivially true:
317 * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
318 * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
319 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
320 * 3DSTATE_WM_HZ_OP::StencilBufferClear)
321 */
322
323 /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
324 * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
325 */
326 const bool stc_test_en = ds->stencil.test_enable;
327
328 /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
329 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
330 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
331 */
332 const bool stc_write_en = ds->stencil.write_enable;
333
334 /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
335 const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil;
336
337 /* COMP_STC_EN || STC_WRITE_EN */
338 if (!(comp_stc_en || stc_write_en))
339 return false;
340
341 /* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
342 * 3DSTATE_WM::ForceKillPix == ON ||
343 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
344 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
345 * 3DSTATE_PS_BLEND::AlphaTestEnable ||
346 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
347 * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
348 */
349 return pipeline->kill_pixel ||
350 pipeline->rp_has_ds_self_dep ||
351 has_ds_feedback_loop(dyn) ||
352 wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
353 }
354
355 static inline bool
anv_rasterization_aa_mode(VkPolygonMode raster_mode,VkLineRasterizationModeKHR line_mode)356 anv_rasterization_aa_mode(VkPolygonMode raster_mode,
357 VkLineRasterizationModeKHR line_mode)
358 {
359 if (raster_mode == VK_POLYGON_MODE_LINE &&
360 line_mode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_KHR)
361 return true;
362 return false;
363 }
364
365 static inline VkLineRasterizationModeKHR
anv_line_rasterization_mode(VkLineRasterizationModeKHR line_mode,unsigned rasterization_samples)366 anv_line_rasterization_mode(VkLineRasterizationModeKHR line_mode,
367 unsigned rasterization_samples)
368 {
369 if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_KHR) {
370 if (rasterization_samples > 1) {
371 return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_KHR;
372 } else {
373 return VK_LINE_RASTERIZATION_MODE_BRESENHAM_KHR;
374 }
375 }
376 return line_mode;
377 }
378
379 /** Returns the final polygon mode for rasterization
380 *
381 * This function takes into account polygon mode, primitive topology and the
382 * different shader stages which might generate their own type of primitives.
383 */
384 static inline VkPolygonMode
anv_raster_polygon_mode(const struct anv_graphics_pipeline * pipeline,VkPolygonMode polygon_mode,VkPrimitiveTopology primitive_topology)385 anv_raster_polygon_mode(const struct anv_graphics_pipeline *pipeline,
386 VkPolygonMode polygon_mode,
387 VkPrimitiveTopology primitive_topology)
388 {
389 if (anv_pipeline_is_mesh(pipeline)) {
390 switch (get_mesh_prog_data(pipeline)->primitive_type) {
391 case MESA_PRIM_POINTS:
392 return VK_POLYGON_MODE_POINT;
393 case MESA_PRIM_LINES:
394 return VK_POLYGON_MODE_LINE;
395 case MESA_PRIM_TRIANGLES:
396 return polygon_mode;
397 default:
398 unreachable("invalid primitive type for mesh");
399 }
400 } else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_GEOMETRY)) {
401 switch (get_gs_prog_data(pipeline)->output_topology) {
402 case _3DPRIM_POINTLIST:
403 return VK_POLYGON_MODE_POINT;
404
405 case _3DPRIM_LINELIST:
406 case _3DPRIM_LINESTRIP:
407 case _3DPRIM_LINELOOP:
408 return VK_POLYGON_MODE_LINE;
409
410 case _3DPRIM_TRILIST:
411 case _3DPRIM_TRIFAN:
412 case _3DPRIM_TRISTRIP:
413 case _3DPRIM_RECTLIST:
414 case _3DPRIM_QUADLIST:
415 case _3DPRIM_QUADSTRIP:
416 case _3DPRIM_POLYGON:
417 return polygon_mode;
418 }
419 unreachable("Unsupported GS output topology");
420 } else if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
421 switch (get_tes_prog_data(pipeline)->output_topology) {
422 case INTEL_TESS_OUTPUT_TOPOLOGY_POINT:
423 return VK_POLYGON_MODE_POINT;
424
425 case INTEL_TESS_OUTPUT_TOPOLOGY_LINE:
426 return VK_POLYGON_MODE_LINE;
427
428 case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CW:
429 case INTEL_TESS_OUTPUT_TOPOLOGY_TRI_CCW:
430 return polygon_mode;
431 }
432 unreachable("Unsupported TCS output topology");
433 } else {
434 switch (primitive_topology) {
435 case VK_PRIMITIVE_TOPOLOGY_POINT_LIST:
436 return VK_POLYGON_MODE_POINT;
437
438 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST:
439 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP:
440 case VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY:
441 case VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY:
442 return VK_POLYGON_MODE_LINE;
443
444 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST:
445 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP:
446 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN:
447 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY:
448 case VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY:
449 return polygon_mode;
450
451 default:
452 unreachable("Unsupported primitive topology");
453 }
454 }
455 }
456
457 static inline bool
anv_is_dual_src_blend_factor(VkBlendFactor factor)458 anv_is_dual_src_blend_factor(VkBlendFactor factor)
459 {
460 return factor == VK_BLEND_FACTOR_SRC1_COLOR ||
461 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR ||
462 factor == VK_BLEND_FACTOR_SRC1_ALPHA ||
463 factor == VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA;
464 }
465
466 static inline bool
anv_is_dual_src_blend_equation(const struct vk_color_blend_attachment_state * cb)467 anv_is_dual_src_blend_equation(const struct vk_color_blend_attachment_state *cb)
468 {
469 return anv_is_dual_src_blend_factor(cb->src_color_blend_factor) &&
470 anv_is_dual_src_blend_factor(cb->dst_color_blend_factor) &&
471 anv_is_dual_src_blend_factor(cb->src_alpha_blend_factor) &&
472 anv_is_dual_src_blend_factor(cb->dst_alpha_blend_factor);
473 }
474
475 static void
anv_rasterization_mode(VkPolygonMode raster_mode,VkLineRasterizationModeKHR line_mode,float line_width,uint32_t * api_mode,bool * msaa_rasterization_enable)476 anv_rasterization_mode(VkPolygonMode raster_mode,
477 VkLineRasterizationModeKHR line_mode,
478 float line_width,
479 uint32_t *api_mode,
480 bool *msaa_rasterization_enable)
481 {
482 if (raster_mode == VK_POLYGON_MODE_LINE) {
483 /* Unfortunately, configuring our line rasterization hardware on gfx8
484 * and later is rather painful. Instead of giving us bits to tell the
485 * hardware what line mode to use like we had on gfx7, we now have an
486 * arcane combination of API Mode and MSAA enable bits which do things
487 * in a table which are expected to magically put the hardware into the
488 * right mode for your API. Sadly, Vulkan isn't any of the APIs the
489 * hardware people thought of so nothing works the way you want it to.
490 *
491 * Look at the table titled "Multisample Rasterization Modes" in Vol 7
492 * of the Skylake PRM for more details.
493 */
494 switch (line_mode) {
495 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
496 *api_mode = DX101;
497 #if GFX_VER <= 9
498 /* Prior to ICL, the algorithm the HW uses to draw wide lines
499 * doesn't quite match what the CTS expects, at least for rectangular
500 * lines, so we set this to false here, making it draw parallelograms
501 * instead, which work well enough.
502 */
503 *msaa_rasterization_enable = line_width < 1.0078125;
504 #else
505 *msaa_rasterization_enable = true;
506 #endif
507 break;
508
509 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
510 case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
511 *api_mode = DX9OGL;
512 *msaa_rasterization_enable = false;
513 break;
514
515 default:
516 unreachable("Unsupported line rasterization mode");
517 }
518 } else {
519 *api_mode = DX101;
520 *msaa_rasterization_enable = true;
521 }
522 }
523
524 static bool
525 is_src1_blend_factor(enum GENX(3D_Color_Buffer_Blend_Factor) factor)
526 {
527 return factor == BLENDFACTOR_SRC1_COLOR ||
528 factor == BLENDFACTOR_SRC1_ALPHA ||
529 factor == BLENDFACTOR_INV_SRC1_COLOR ||
530 factor == BLENDFACTOR_INV_SRC1_ALPHA;
531 }
532
533 #if GFX_VERx10 == 125
534 /**
535 * Return the dimensions of the current rendering area, defined as the
536 * bounding box of all present color, depth and stencil attachments.
537 */
538 UNUSED static bool
calculate_render_area(const struct anv_cmd_graphics_state * gfx,unsigned * width,unsigned * height)539 calculate_render_area(const struct anv_cmd_graphics_state *gfx,
540 unsigned *width, unsigned *height)
541 {
542 *width = gfx->render_area.offset.x + gfx->render_area.extent.width;
543 *height = gfx->render_area.offset.y + gfx->render_area.extent.height;
544
545 for (unsigned i = 0; i < gfx->color_att_count; i++) {
546 const struct anv_attachment *att = &gfx->color_att[i];
547 if (att->iview) {
548 *width = MAX2(*width, att->iview->vk.extent.width);
549 *height = MAX2(*height, att->iview->vk.extent.height);
550 }
551 }
552
553 const struct anv_image_view *const z_view = gfx->depth_att.iview;
554 if (z_view) {
555 *width = MAX2(*width, z_view->vk.extent.width);
556 *height = MAX2(*height, z_view->vk.extent.height);
557 }
558
559 const struct anv_image_view *const s_view = gfx->stencil_att.iview;
560 if (s_view) {
561 *width = MAX2(*width, s_view->vk.extent.width);
562 *height = MAX2(*height, s_view->vk.extent.height);
563 }
564
565 return *width && *height;
566 }
567
568 /* Calculate TBIMR tiling parameters adequate for the current pipeline
569 * setup. Return true if TBIMR should be enabled.
570 */
571 UNUSED static bool
calculate_tile_dimensions(const struct anv_device * device,const struct anv_cmd_graphics_state * gfx,const struct intel_l3_config * l3_config,unsigned fb_width,unsigned fb_height,unsigned * tile_width,unsigned * tile_height)572 calculate_tile_dimensions(const struct anv_device *device,
573 const struct anv_cmd_graphics_state *gfx,
574 const struct intel_l3_config *l3_config,
575 unsigned fb_width, unsigned fb_height,
576 unsigned *tile_width, unsigned *tile_height)
577 {
578 assert(GFX_VER == 12);
579 const unsigned aux_scale = ISL_MAIN_TO_CCS_SIZE_RATIO_XE;
580
581 unsigned pixel_size = 0;
582
583 /* Perform a rough calculation of the tile cache footprint of the
584 * pixel pipeline, approximating it as the sum of the amount of
585 * memory used per pixel by every render target, depth, stencil and
586 * auxiliary surfaces bound to the pipeline.
587 */
588 for (uint32_t i = 0; i < gfx->color_att_count; i++) {
589 const struct anv_attachment *att = &gfx->color_att[i];
590
591 if (att->iview) {
592 const struct anv_image *image = att->iview->image;
593 const unsigned p = anv_image_aspect_to_plane(image,
594 VK_IMAGE_ASPECT_COLOR_BIT);
595 const struct anv_image_plane *plane = &image->planes[p];
596
597 pixel_size += intel_calculate_surface_pixel_size(
598 &plane->primary_surface.isl);
599
600 if (isl_aux_usage_has_mcs(att->aux_usage))
601 pixel_size += intel_calculate_surface_pixel_size(
602 &plane->aux_surface.isl);
603
604 if (isl_aux_usage_has_ccs(att->aux_usage))
605 pixel_size += DIV_ROUND_UP(intel_calculate_surface_pixel_size(
606 &plane->primary_surface.isl),
607 aux_scale);
608 }
609 }
610
611 const struct anv_image_view *const z_view = gfx->depth_att.iview;
612 if (z_view) {
613 const struct anv_image *image = z_view->image;
614 assert(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT);
615 const unsigned p = anv_image_aspect_to_plane(image,
616 VK_IMAGE_ASPECT_DEPTH_BIT);
617 const struct anv_image_plane *plane = &image->planes[p];
618
619 pixel_size += intel_calculate_surface_pixel_size(
620 &plane->primary_surface.isl);
621
622 if (isl_aux_usage_has_hiz(image->planes[p].aux_usage))
623 pixel_size += intel_calculate_surface_pixel_size(
624 &plane->aux_surface.isl);
625
626 if (isl_aux_usage_has_ccs(image->planes[p].aux_usage))
627 pixel_size += DIV_ROUND_UP(intel_calculate_surface_pixel_size(
628 &plane->primary_surface.isl),
629 aux_scale);
630 }
631
632 const struct anv_image_view *const s_view = gfx->depth_att.iview;
633 if (s_view && s_view != z_view) {
634 const struct anv_image *image = s_view->image;
635 assert(image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
636 const unsigned p = anv_image_aspect_to_plane(image,
637 VK_IMAGE_ASPECT_STENCIL_BIT);
638 const struct anv_image_plane *plane = &image->planes[p];
639
640 pixel_size += intel_calculate_surface_pixel_size(
641 &plane->primary_surface.isl);
642 }
643
644 if (!pixel_size)
645 return false;
646
647 /* Compute a tile layout that allows reasonable utilization of the
648 * tile cache based on the per-pixel cache footprint estimated
649 * above.
650 */
651 intel_calculate_tile_dimensions(device->info, l3_config,
652 32, 32, fb_width, fb_height,
653 pixel_size, tile_width, tile_height);
654
655 /* Perform TBIMR tile passes only if the framebuffer covers more
656 * than a single tile.
657 */
658 return *tile_width < fb_width || *tile_height < fb_height;
659 }
660 #endif
661
662 #define GET(field) hw_state->field
663 #define SET(bit, field, value) \
664 do { \
665 __typeof(hw_state->field) __v = value; \
666 if (hw_state->field != __v) { \
667 hw_state->field = __v; \
668 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \
669 } \
670 } while (0)
671 #define SET_STAGE(bit, field, value, stage) \
672 do { \
673 __typeof(hw_state->field) __v = value; \
674 if (!anv_pipeline_has_stage(pipeline, \
675 MESA_SHADER_##stage)) { \
676 hw_state->field = __v; \
677 break; \
678 } \
679 if (hw_state->field != __v) { \
680 hw_state->field = __v; \
681 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \
682 } \
683 } while (0)
684 #define SETUP_PROVOKING_VERTEX(bit, cmd, mode) \
685 switch (mode) { \
686 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT: \
687 SET(bit, cmd.TriangleStripListProvokingVertexSelect, 0); \
688 SET(bit, cmd.LineStripListProvokingVertexSelect, 0); \
689 SET(bit, cmd.TriangleFanProvokingVertexSelect, 1); \
690 break; \
691 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT: \
692 SET(bit, cmd.TriangleStripListProvokingVertexSelect, 2); \
693 SET(bit, cmd.LineStripListProvokingVertexSelect, 1); \
694 SET(bit, cmd.TriangleFanProvokingVertexSelect, 2); \
695 break; \
696 default: \
697 unreachable("Invalid provoking vertex mode"); \
698 } \
699
700 ALWAYS_INLINE static void
update_fs_msaa_flags(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)701 update_fs_msaa_flags(struct anv_gfx_dynamic_state *hw_state,
702 const struct vk_dynamic_graphics_state *dyn,
703 const struct anv_graphics_pipeline *pipeline)
704 {
705 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
706
707 if (!wm_prog_data)
708 return;
709
710 /* If we have any dynamic bits here, we might need to update the value
711 * in the push constant for the shader.
712 */
713 if (wm_prog_data->coarse_pixel_dispatch != INTEL_SOMETIMES &&
714 wm_prog_data->persample_dispatch != INTEL_SOMETIMES &&
715 wm_prog_data->alpha_to_coverage != INTEL_SOMETIMES)
716 return;
717
718 enum intel_msaa_flags fs_msaa_flags = INTEL_MSAA_FLAG_ENABLE_DYNAMIC;
719
720 if (dyn->ms.rasterization_samples > 1) {
721 fs_msaa_flags |= INTEL_MSAA_FLAG_MULTISAMPLE_FBO;
722
723 if (wm_prog_data->sample_shading) {
724 assert(wm_prog_data->persample_dispatch != INTEL_NEVER);
725 fs_msaa_flags |= INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH;
726 }
727 if ((pipeline->sample_shading_enable &&
728 (pipeline->min_sample_shading * dyn->ms.rasterization_samples) > 1) ||
729 wm_prog_data->sample_shading) {
730 fs_msaa_flags |= INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH |
731 INTEL_MSAA_FLAG_PERSAMPLE_INTERP;
732 }
733 }
734
735 if (wm_prog_data->coarse_pixel_dispatch == INTEL_SOMETIMES &&
736 !(fs_msaa_flags & INTEL_MSAA_FLAG_PERSAMPLE_DISPATCH)) {
737 fs_msaa_flags |= INTEL_MSAA_FLAG_COARSE_PI_MSG |
738 INTEL_MSAA_FLAG_COARSE_RT_WRITES;
739 }
740
741 if (dyn->ms.alpha_to_coverage_enable)
742 fs_msaa_flags |= INTEL_MSAA_FLAG_ALPHA_TO_COVERAGE;
743
744 SET(FS_MSAA_FLAGS, fs_msaa_flags, fs_msaa_flags);
745 }
746
747 ALWAYS_INLINE static void
update_ps(struct anv_gfx_dynamic_state * hw_state,const struct anv_device * device,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)748 update_ps(struct anv_gfx_dynamic_state *hw_state,
749 const struct anv_device *device,
750 const struct vk_dynamic_graphics_state *dyn,
751 const struct anv_graphics_pipeline *pipeline)
752 {
753 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
754
755 if (!wm_prog_data) {
756 #if GFX_VER < 20
757 SET(PS, ps._8PixelDispatchEnable, false);
758 SET(PS, ps._16PixelDispatchEnable, false);
759 SET(PS, ps._32PixelDispatchEnable, false);
760 #else
761 SET(PS, ps.Kernel0Enable, false);
762 SET(PS, ps.Kernel1Enable, false);
763 #endif
764 return;
765 }
766
767 const struct anv_shader_bin *fs_bin =
768 pipeline->base.shaders[MESA_SHADER_FRAGMENT];
769 struct GENX(3DSTATE_PS) ps = {};
770 intel_set_ps_dispatch_state(&ps, device->info, wm_prog_data,
771 MAX2(dyn->ms.rasterization_samples, 1),
772 hw_state->fs_msaa_flags);
773
774 SET(PS, ps.KernelStartPointer0,
775 fs_bin->kernel.offset +
776 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 0));
777 SET(PS, ps.KernelStartPointer1,
778 fs_bin->kernel.offset +
779 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 1));
780 #if GFX_VER < 20
781 SET(PS, ps.KernelStartPointer2,
782 fs_bin->kernel.offset +
783 brw_wm_prog_data_prog_offset(wm_prog_data, ps, 2));
784 #endif
785
786 SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData0,
787 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 0));
788 SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData1,
789 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 1));
790 #if GFX_VER < 20
791 SET(PS, ps.DispatchGRFStartRegisterForConstantSetupData2,
792 brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, ps, 2));
793 #endif
794
795 #if GFX_VER < 20
796 SET(PS, ps._8PixelDispatchEnable, ps._8PixelDispatchEnable);
797 SET(PS, ps._16PixelDispatchEnable, ps._16PixelDispatchEnable);
798 SET(PS, ps._32PixelDispatchEnable, ps._32PixelDispatchEnable);
799 #else
800 SET(PS, ps.Kernel0Enable, ps.Kernel0Enable);
801 SET(PS, ps.Kernel1Enable, ps.Kernel1Enable);
802 SET(PS, ps.Kernel0SIMDWidth, ps.Kernel0SIMDWidth);
803 SET(PS, ps.Kernel1SIMDWidth, ps.Kernel1SIMDWidth);
804 SET(PS, ps.Kernel0PolyPackingPolicy, ps.Kernel0PolyPackingPolicy);
805 #endif
806
807 SET(PS, ps.PositionXYOffsetSelect,
808 !wm_prog_data->uses_pos_offset ? POSOFFSET_NONE :
809 brw_wm_prog_data_is_persample(wm_prog_data,
810 hw_state->fs_msaa_flags) ?
811 POSOFFSET_SAMPLE : POSOFFSET_CENTROID);
812 }
813
814 ALWAYS_INLINE static void
update_ps_extra_wm(struct anv_gfx_dynamic_state * hw_state,const struct anv_graphics_pipeline * pipeline)815 update_ps_extra_wm(struct anv_gfx_dynamic_state *hw_state,
816 const struct anv_graphics_pipeline *pipeline)
817 {
818 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
819
820 if (!wm_prog_data)
821 return;
822
823 SET(PS_EXTRA, ps_extra.PixelShaderIsPerSample,
824 brw_wm_prog_data_is_persample(wm_prog_data,
825 hw_state->fs_msaa_flags));
826 #if GFX_VER >= 11
827 const bool uses_coarse_pixel =
828 brw_wm_prog_data_is_coarse(wm_prog_data, hw_state->fs_msaa_flags);
829 SET(PS_EXTRA, ps_extra.PixelShaderIsPerCoarsePixel, uses_coarse_pixel);
830 #endif
831 #if GFX_VERx10 >= 125
832 /* TODO: We should only require this when the last geometry shader uses a
833 * fragment shading rate that is not constant.
834 */
835 SET(PS_EXTRA, ps_extra.EnablePSDependencyOnCPsizeChange, uses_coarse_pixel);
836 #endif
837
838 SET(WM, wm.BarycentricInterpolationMode,
839 wm_prog_data_barycentric_modes(wm_prog_data, hw_state->fs_msaa_flags));
840 }
841
842 ALWAYS_INLINE static void
update_ps_extra_has_uav(struct anv_gfx_dynamic_state * hw_state,const struct anv_cmd_graphics_state * gfx,const struct anv_graphics_pipeline * pipeline)843 update_ps_extra_has_uav(struct anv_gfx_dynamic_state *hw_state,
844 const struct anv_cmd_graphics_state *gfx,
845 const struct anv_graphics_pipeline *pipeline)
846 {
847 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
848
849 #if GFX_VERx10 >= 125
850 SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV,
851 wm_prog_data && wm_prog_data->has_side_effects,
852 FRAGMENT);
853 #else
854 /* Prior to Gfx12.5 the HW seems to avoid spawning fragment shaders even if
855 * 3DSTATE_PS_EXTRA::PixelShaderKillsPixel=true when
856 * 3DSTATE_PS_BLEND::HasWriteableRT=false. This is causing problems with
857 * occlusion queries with 0 attachments. There are no CTS tests exercising
858 * this but zink+anv fails a bunch of tests like piglit
859 * arb_framebuffer_no_attachments-query.
860 *
861 * Here we choose to tweak the PixelShaderHasUAV to make sure the fragment
862 * shaders are run properly.
863 */
864 SET_STAGE(PS_EXTRA, ps_extra.PixelShaderHasUAV,
865 wm_prog_data && (wm_prog_data->has_side_effects ||
866 (gfx->color_att_count == 0 &&
867 gfx->n_occlusion_queries > 0)),
868 FRAGMENT);
869 #endif
870 }
871
872 ALWAYS_INLINE static void
update_ps_extra_kills_pixel(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct anv_graphics_pipeline * pipeline)873 update_ps_extra_kills_pixel(struct anv_gfx_dynamic_state *hw_state,
874 const struct vk_dynamic_graphics_state *dyn,
875 const struct anv_cmd_graphics_state *gfx,
876 const struct anv_graphics_pipeline *pipeline)
877 {
878 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
879
880 SET_STAGE(PS_EXTRA, ps_extra.PixelShaderKillsPixel,
881 wm_prog_data && (pipeline->rp_has_ds_self_dep ||
882 has_ds_feedback_loop(dyn) ||
883 wm_prog_data->uses_kill),
884 FRAGMENT);
885 }
886
887 #if GFX_VERx10 >= 125
888 ALWAYS_INLINE static void
update_vfg_list_cut_index(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)889 update_vfg_list_cut_index(struct anv_gfx_dynamic_state *hw_state,
890 const struct vk_dynamic_graphics_state *dyn)
891 {
892 SET(VFG, vfg.ListCutIndexEnable, dyn->ia.primitive_restart_enable);
893 }
894 #endif
895
896 ALWAYS_INLINE static void
update_streamout(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct anv_graphics_pipeline * pipeline)897 update_streamout(struct anv_gfx_dynamic_state *hw_state,
898 const struct vk_dynamic_graphics_state *dyn,
899 const struct anv_cmd_graphics_state *gfx,
900 const struct anv_graphics_pipeline *pipeline)
901 {
902 SET(STREAMOUT, so.RenderingDisable, dyn->rs.rasterizer_discard_enable);
903 SET(STREAMOUT, so.RenderStreamSelect, dyn->rs.rasterization_stream);
904
905 #if INTEL_NEEDS_WA_18022508906
906 /* Wa_18022508906 :
907 *
908 * SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage:
909 *
910 * SOL_INT::Render_Enable =
911 * (3DSTATE_STREAMOUT::Force_Rending == Force_On) ||
912 * (
913 * (3DSTATE_STREAMOUT::Force_Rending != Force_Off) &&
914 * !(3DSTATE_GS::Enable && 3DSTATE_GS::Output Vertex Size == 0) &&
915 * !3DSTATE_STREAMOUT::API_Render_Disable &&
916 * (
917 * 3DSTATE_DEPTH_STENCIL_STATE::Stencil_TestEnable ||
918 * 3DSTATE_DEPTH_STENCIL_STATE::Depth_TestEnable ||
919 * 3DSTATE_DEPTH_STENCIL_STATE::Depth_WriteEnable ||
920 * 3DSTATE_PS_EXTRA::PS_Valid ||
921 * 3DSTATE_WM::Legacy Depth_Buffer_Clear ||
922 * 3DSTATE_WM::Legacy Depth_Buffer_Resolve_Enable ||
923 * 3DSTATE_WM::Legacy Hierarchical_Depth_Buffer_Resolve_Enable
924 * )
925 * )
926 *
927 * If SOL_INT::Render_Enable is false, the SO stage will not forward any
928 * topologies down the pipeline. Which is not what we want for occlusion
929 * queries.
930 *
931 * Here we force rendering to get SOL_INT::Render_Enable when occlusion
932 * queries are active.
933 */
934 SET(STREAMOUT, so.ForceRendering,
935 (!GET(so.RenderingDisable) && gfx->n_occlusion_queries > 0) ?
936 Force_on : 0);
937 #endif
938 }
939
940 ALWAYS_INLINE static void
update_provoking_vertex(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)941 update_provoking_vertex(struct anv_gfx_dynamic_state *hw_state,
942 const struct vk_dynamic_graphics_state *dyn,
943 const struct anv_graphics_pipeline *pipeline)
944 {
945 SETUP_PROVOKING_VERTEX(SF, sf, dyn->rs.provoking_vertex);
946 SETUP_PROVOKING_VERTEX(CLIP, clip, dyn->rs.provoking_vertex);
947
948 switch (dyn->rs.provoking_vertex) {
949 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
950 SET(STREAMOUT, so.ReorderMode, LEADING);
951 SET_STAGE(GS, gs.ReorderMode, LEADING, GEOMETRY);
952 break;
953
954 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
955 SET(STREAMOUT, so.ReorderMode, TRAILING);
956 SET_STAGE(GS, gs.ReorderMode, TRAILING, GEOMETRY);
957 break;
958
959 default:
960 unreachable("Invalid provoking vertex mode");
961 }
962 }
963
964 ALWAYS_INLINE static void
update_topology(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)965 update_topology(struct anv_gfx_dynamic_state *hw_state,
966 const struct vk_dynamic_graphics_state *dyn,
967 const struct anv_graphics_pipeline *pipeline)
968 {
969 uint32_t topology =
970 anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL) ?
971 _3DPRIM_PATCHLIST(dyn->ts.patch_control_points) :
972 vk_to_intel_primitive_type[dyn->ia.primitive_topology];
973
974 SET(VF_TOPOLOGY, vft.PrimitiveTopologyType, topology);
975 }
976
977 #if GFX_VER >= 11
978 ALWAYS_INLINE static void
update_cps(struct anv_gfx_dynamic_state * hw_state,const struct anv_device * device,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)979 update_cps(struct anv_gfx_dynamic_state *hw_state,
980 const struct anv_device *device,
981 const struct vk_dynamic_graphics_state *dyn,
982 const struct anv_graphics_pipeline *pipeline)
983 {
984 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
985 if (!wm_prog_data)
986 return;
987
988 UNUSED const bool cps_enable =
989 brw_wm_prog_data_is_coarse(wm_prog_data, hw_state->fs_msaa_flags);
990
991 #if GFX_VER >= 30
992 SET(COARSE_PIXEL, coarse_pixel.CPSizeX,
993 get_cps_size(dyn->fsr.fragment_size.width));
994 SET(COARSE_PIXEL, coarse_pixel.CPSizeY,
995 get_cps_size(dyn->fsr.fragment_size.height));
996 SET(COARSE_PIXEL, coarse_pixel.CPSizeCombiner0Opcode,
997 vk_to_intel_shading_rate_combiner_op[dyn->fsr.combiner_ops[0]]);
998 SET(COARSE_PIXEL, coarse_pixel.CPSizeCombiner1Opcode,
999 vk_to_intel_shading_rate_combiner_op[dyn->fsr.combiner_ops[1]]);
1000 #elif GFX_VER >= 12
1001 SET(CPS, cps.CoarsePixelShadingStateArrayPointer,
1002 get_cps_state_offset(device, cps_enable, &dyn->fsr));
1003 #else
1004 STATIC_ASSERT(GFX_VER == 11);
1005 SET(CPS, cps.CoarsePixelShadingMode,
1006 cps_enable ? CPS_MODE_CONSTANT : CPS_MODE_NONE);
1007 SET(CPS, cps.MinCPSizeX, dyn->fsr.fragment_size.width);
1008 SET(CPS, cps.MinCPSizeY, dyn->fsr.fragment_size.height);
1009 #endif
1010 }
1011 #endif
1012
1013 ALWAYS_INLINE static void
update_te(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_graphics_pipeline * pipeline)1014 update_te(struct anv_gfx_dynamic_state *hw_state,
1015 const struct vk_dynamic_graphics_state *dyn,
1016 const struct anv_graphics_pipeline *pipeline)
1017 {
1018 const struct brw_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline);
1019
1020 if (tes_prog_data && anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
1021 if (dyn->ts.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) {
1022 SET(TE, te.OutputTopology, tes_prog_data->output_topology);
1023 } else {
1024 /* When the origin is upper-left, we have to flip the winding order */
1025 if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) {
1026 SET(TE, te.OutputTopology, OUTPUT_TRI_CW);
1027 } else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) {
1028 SET(TE, te.OutputTopology, OUTPUT_TRI_CCW);
1029 } else {
1030 SET(TE, te.OutputTopology, tes_prog_data->output_topology);
1031 }
1032 }
1033 } else {
1034 SET(TE, te.OutputTopology, OUTPUT_POINT);
1035 }
1036 }
1037
1038 ALWAYS_INLINE static void
update_line_width(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1039 update_line_width(struct anv_gfx_dynamic_state *hw_state,
1040 const struct vk_dynamic_graphics_state *dyn)
1041 {
1042 SET(SF, sf.LineWidth, dyn->rs.line.width);
1043 }
1044
1045 ALWAYS_INLINE static void
update_sf_global_depth_bias(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1046 update_sf_global_depth_bias(struct anv_gfx_dynamic_state *hw_state,
1047 const struct vk_dynamic_graphics_state *dyn)
1048 {
1049 /**
1050 * From the Vulkan Spec:
1051 *
1052 * "VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT specifies that the depth bias
1053 * representation is a factor of constant r equal to 1."
1054 *
1055 * From the SKL PRMs, Volume 7: 3D-Media-GPGPU, Depth Offset:
1056 *
1057 * "When UNORM Depth Buffer is at Output Merger (or no Depth Buffer):
1058 *
1059 * Bias = GlobalDepthOffsetConstant * r + GlobalDepthOffsetScale * MaxDepthSlope
1060 *
1061 * Where r is the minimum representable value > 0 in the depth buffer
1062 * format, converted to float32 (note: If state bit Legacy Global Depth
1063 * Bias Enable is set, the r term will be forced to 1.0)"
1064 *
1065 * When VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT is set, enable
1066 * LegacyGlobalDepthBiasEnable.
1067 */
1068 SET(SF, sf.LegacyGlobalDepthBiasEnable,
1069 dyn->rs.depth_bias.representation ==
1070 VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT);
1071 }
1072
1073 ALWAYS_INLINE static void
update_clip_api_mode(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1074 update_clip_api_mode(struct anv_gfx_dynamic_state *hw_state,
1075 const struct vk_dynamic_graphics_state *dyn)
1076 {
1077 SET(CLIP, clip.APIMode,
1078 dyn->vp.depth_clip_negative_one_to_one ?
1079 APIMODE_OGL : APIMODE_D3D);
1080 }
1081
1082 ALWAYS_INLINE static void
update_clip_max_viewport(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1083 update_clip_max_viewport(struct anv_gfx_dynamic_state *hw_state,
1084 const struct vk_dynamic_graphics_state *dyn)
1085 {
1086 /* From the Vulkan 1.0.45 spec:
1087 *
1088 * "If the last active vertex processing stage shader entry point's
1089 * interface does not include a variable decorated with ViewportIndex,
1090 * then the first viewport is used."
1091 *
1092 * This could mean that we might need to set the MaximumVPIndex based on
1093 * the pipeline's last stage, but if the last shader doesn't write the
1094 * viewport index and the VUE header is used, the compiler will force the
1095 * value to 0 (which is what the spec requires above). Otherwise it seems
1096 * like the HW should be pulling 0 if the VUE header is not present.
1097 *
1098 * Avoiding a check on the pipeline seems to prevent additional emissions
1099 * of 3DSTATE_CLIP which appear to impact performance on Assassin's Creed
1100 * Valhalla..
1101 */
1102 SET(CLIP, clip.MaximumVPIndex, dyn->vp.viewport_count > 0 ?
1103 dyn->vp.viewport_count - 1 : 0);
1104 }
1105
1106 ALWAYS_INLINE static void
update_clip_raster(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct anv_graphics_pipeline * pipeline)1107 update_clip_raster(struct anv_gfx_dynamic_state *hw_state,
1108 const struct vk_dynamic_graphics_state *dyn,
1109 const struct anv_cmd_graphics_state *gfx,
1110 const struct anv_graphics_pipeline *pipeline)
1111 {
1112 /* Take dynamic primitive topology in to account with
1113 * 3DSTATE_RASTER::APIMode
1114 * 3DSTATE_RASTER::DXMultisampleRasterizationEnable
1115 * 3DSTATE_RASTER::AntialiasingEnable
1116 */
1117 uint32_t api_mode = 0;
1118 bool msaa_raster_enable = false;
1119
1120 const VkLineRasterizationModeKHR line_mode =
1121 anv_line_rasterization_mode(dyn->rs.line.mode,
1122 dyn->ms.rasterization_samples);
1123
1124 const VkPolygonMode dynamic_raster_mode =
1125 anv_raster_polygon_mode(pipeline,
1126 dyn->rs.polygon_mode,
1127 dyn->ia.primitive_topology);
1128
1129 anv_rasterization_mode(dynamic_raster_mode,
1130 line_mode, dyn->rs.line.width,
1131 &api_mode, &msaa_raster_enable);
1132
1133 /* From the Browadwell PRM, Volume 2, documentation for 3DSTATE_RASTER,
1134 * "Antialiasing Enable":
1135 *
1136 * "This field must be disabled if any of the render targets have integer
1137 * (UINT or SINT) surface format."
1138 *
1139 * Additionally internal documentation for Gfx12+ states:
1140 *
1141 * "This bit MUST not be set when NUM_MULTISAMPLES > 1 OR
1142 * FORCED_SAMPLE_COUNT > 1."
1143 */
1144 const bool aa_enable =
1145 anv_rasterization_aa_mode(dynamic_raster_mode, line_mode) &&
1146 !gfx->has_uint_rt &&
1147 !(GFX_VER >= 12 && gfx->samples > 1);
1148
1149 const bool depth_clip_enable =
1150 vk_rasterization_state_depth_clip_enable(&dyn->rs);
1151
1152 const bool xy_clip_test_enable =
1153 (dynamic_raster_mode == VK_POLYGON_MODE_FILL);
1154
1155 SET(CLIP, clip.ViewportXYClipTestEnable, xy_clip_test_enable);
1156
1157 SET(RASTER, raster.APIMode, api_mode);
1158 SET(RASTER, raster.DXMultisampleRasterizationEnable, msaa_raster_enable);
1159 SET(RASTER, raster.AntialiasingEnable, aa_enable);
1160 SET(RASTER, raster.CullMode, vk_to_intel_cullmode[dyn->rs.cull_mode]);
1161 SET(RASTER, raster.FrontWinding, vk_to_intel_front_face[dyn->rs.front_face]);
1162 SET(RASTER, raster.GlobalDepthOffsetEnableSolid, dyn->rs.depth_bias.enable);
1163 SET(RASTER, raster.GlobalDepthOffsetEnableWireframe, dyn->rs.depth_bias.enable);
1164 SET(RASTER, raster.GlobalDepthOffsetEnablePoint, dyn->rs.depth_bias.enable);
1165 SET(RASTER, raster.GlobalDepthOffsetConstant, dyn->rs.depth_bias.constant_factor);
1166 SET(RASTER, raster.GlobalDepthOffsetScale, dyn->rs.depth_bias.slope_factor);
1167 SET(RASTER, raster.GlobalDepthOffsetClamp, dyn->rs.depth_bias.clamp);
1168 SET(RASTER, raster.FrontFaceFillMode, vk_to_intel_fillmode[dyn->rs.polygon_mode]);
1169 SET(RASTER, raster.BackFaceFillMode, vk_to_intel_fillmode[dyn->rs.polygon_mode]);
1170 SET(RASTER, raster.ViewportZFarClipTestEnable, depth_clip_enable);
1171 SET(RASTER, raster.ViewportZNearClipTestEnable, depth_clip_enable);
1172 SET(RASTER, raster.ConservativeRasterizationEnable,
1173 dyn->rs.conservative_mode !=
1174 VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT);
1175 }
1176
1177 ALWAYS_INLINE static void
update_multisample(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1178 update_multisample(struct anv_gfx_dynamic_state *hw_state,
1179 const struct vk_dynamic_graphics_state *dyn)
1180 {
1181 SET(MULTISAMPLE, ms.NumberofMultisamples,
1182 __builtin_ffs(MAX2(dyn->ms.rasterization_samples, 1)) - 1);
1183 }
1184
1185 ALWAYS_INLINE static void
update_sample_mask(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1186 update_sample_mask(struct anv_gfx_dynamic_state *hw_state,
1187 const struct vk_dynamic_graphics_state *dyn)
1188 {
1189 /* From the Vulkan 1.0 spec:
1190 * If pSampleMask is NULL, it is treated as if the mask has all bits
1191 * enabled, i.e. no coverage is removed from fragments.
1192 *
1193 * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
1194 */
1195 SET(SAMPLE_MASK, sm.SampleMask, dyn->ms.sample_mask & 0xffff);
1196 }
1197
1198 ALWAYS_INLINE static void
update_wm_depth_stencil(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct anv_device * device)1199 update_wm_depth_stencil(struct anv_gfx_dynamic_state *hw_state,
1200 const struct vk_dynamic_graphics_state *dyn,
1201 const struct anv_cmd_graphics_state *gfx,
1202 const struct anv_device *device)
1203 {
1204 VkImageAspectFlags ds_aspects = 0;
1205 if (gfx->depth_att.vk_format != VK_FORMAT_UNDEFINED)
1206 ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
1207 if (gfx->stencil_att.vk_format != VK_FORMAT_UNDEFINED)
1208 ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
1209
1210 struct vk_depth_stencil_state opt_ds = dyn->ds;
1211 vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
1212
1213 SET(WM_DEPTH_STENCIL, ds.DoubleSidedStencilEnable, true);
1214
1215 SET(WM_DEPTH_STENCIL, ds.StencilTestMask,
1216 opt_ds.stencil.front.compare_mask & 0xff);
1217 SET(WM_DEPTH_STENCIL, ds.StencilWriteMask,
1218 opt_ds.stencil.front.write_mask & 0xff);
1219
1220 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilTestMask, opt_ds.stencil.back.compare_mask & 0xff);
1221 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilWriteMask, opt_ds.stencil.back.write_mask & 0xff);
1222
1223 SET(WM_DEPTH_STENCIL, ds.StencilReferenceValue,
1224 opt_ds.stencil.front.reference & 0xff);
1225 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilReferenceValue,
1226 opt_ds.stencil.back.reference & 0xff);
1227
1228 SET(WM_DEPTH_STENCIL, ds.DepthTestEnable, opt_ds.depth.test_enable);
1229 SET(WM_DEPTH_STENCIL, ds.DepthBufferWriteEnable, opt_ds.depth.write_enable);
1230 SET(WM_DEPTH_STENCIL, ds.DepthTestFunction,
1231 vk_to_intel_compare_op[opt_ds.depth.compare_op]);
1232 SET(WM_DEPTH_STENCIL, ds.StencilTestEnable, opt_ds.stencil.test_enable);
1233 SET(WM_DEPTH_STENCIL, ds.StencilBufferWriteEnable,
1234 opt_ds.stencil.write_enable);
1235 SET(WM_DEPTH_STENCIL, ds.StencilFailOp,
1236 vk_to_intel_stencil_op[opt_ds.stencil.front.op.fail]);
1237 SET(WM_DEPTH_STENCIL, ds.StencilPassDepthPassOp,
1238 vk_to_intel_stencil_op[opt_ds.stencil.front.op.pass]);
1239 SET(WM_DEPTH_STENCIL, ds.StencilPassDepthFailOp,
1240 vk_to_intel_stencil_op[
1241 opt_ds.stencil.front.op.depth_fail]);
1242 SET(WM_DEPTH_STENCIL, ds.StencilTestFunction,
1243 vk_to_intel_compare_op[
1244 opt_ds.stencil.front.op.compare]);
1245 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilFailOp,
1246 vk_to_intel_stencil_op[
1247 opt_ds.stencil.back.op.fail]);
1248 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilPassDepthPassOp,
1249 vk_to_intel_stencil_op[
1250 opt_ds.stencil.back.op.pass]);
1251 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilPassDepthFailOp,
1252 vk_to_intel_stencil_op[
1253 opt_ds.stencil.back.op.depth_fail]);
1254 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilTestFunction,
1255 vk_to_intel_compare_op[
1256 opt_ds.stencil.back.op.compare]);
1257
1258 #if GFX_VER == 9
1259 const bool pma = want_stencil_pma_fix(dyn, gfx, &opt_ds);
1260 SET(PMA_FIX, pma_fix, pma);
1261 #endif
1262
1263 #if INTEL_WA_18019816803_GFX_VER
1264 if (intel_needs_workaround(device->info, 18019816803)) {
1265 bool ds_write_state = opt_ds.depth.write_enable || opt_ds.stencil.write_enable;
1266 SET(WA_18019816803, ds_write_state, ds_write_state);
1267 }
1268 #endif
1269 }
1270
1271 ALWAYS_INLINE static void
update_depth_bounds(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1272 update_depth_bounds(struct anv_gfx_dynamic_state *hw_state,
1273 const struct vk_dynamic_graphics_state *dyn)
1274 {
1275 SET(DEPTH_BOUNDS, db.DepthBoundsTestEnable, dyn->ds.depth.bounds_test.enable);
1276 /* Only look at updating the bounds if testing is enabled */
1277 if (dyn->ds.depth.bounds_test.enable) {
1278 SET(DEPTH_BOUNDS, db.DepthBoundsTestMinValue, dyn->ds.depth.bounds_test.min);
1279 SET(DEPTH_BOUNDS, db.DepthBoundsTestMaxValue, dyn->ds.depth.bounds_test.max);
1280 }
1281 }
1282
1283 ALWAYS_INLINE static void
update_line_stipple(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn)1284 update_line_stipple(struct anv_gfx_dynamic_state *hw_state,
1285 const struct vk_dynamic_graphics_state *dyn)
1286 {
1287 SET(LINE_STIPPLE, ls.LineStipplePattern, dyn->rs.line.stipple.pattern);
1288 SET(LINE_STIPPLE, ls.LineStippleInverseRepeatCount,
1289 1.0f / MAX2(1, dyn->rs.line.stipple.factor));
1290 SET(LINE_STIPPLE, ls.LineStippleRepeatCount, dyn->rs.line.stipple.factor);
1291
1292 SET(WM, wm.LineStippleEnable, dyn->rs.line.stipple.enable);
1293 }
1294
1295 ALWAYS_INLINE static void
update_vf_restart(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx)1296 update_vf_restart(struct anv_gfx_dynamic_state *hw_state,
1297 const struct vk_dynamic_graphics_state *dyn,
1298 const struct anv_cmd_graphics_state *gfx)
1299 {
1300 SET(VF, vf.IndexedDrawCutIndexEnable, dyn->ia.primitive_restart_enable);
1301 SET(VF, vf.CutIndex, gfx->restart_index);
1302 }
1303
1304 ALWAYS_INLINE static void
update_blend_state(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,struct anv_cmd_graphics_state * gfx,const struct anv_device * device,bool has_fs_stage,bool has_fs_dual_src)1305 update_blend_state(struct anv_gfx_dynamic_state *hw_state,
1306 const struct vk_dynamic_graphics_state *dyn,
1307 struct anv_cmd_graphics_state *gfx,
1308 const struct anv_device *device,
1309 bool has_fs_stage,
1310 bool has_fs_dual_src)
1311 {
1312 const struct anv_instance *instance = device->physical->instance;
1313 const uint8_t color_writes = dyn->cb.color_write_enables;
1314 bool has_writeable_rt =
1315 has_fs_stage &&
1316 !anv_gfx_all_color_write_masked(gfx, dyn);
1317
1318 SET(BLEND_STATE, blend.AlphaToCoverageEnable,
1319 dyn->ms.alpha_to_coverage_enable);
1320 SET(BLEND_STATE, blend.AlphaToOneEnable,
1321 dyn->ms.alpha_to_one_enable);
1322 SET(BLEND_STATE, blend.ColorDitherEnable,
1323 gfx->rendering_flags &
1324 VK_RENDERING_ENABLE_LEGACY_DITHERING_BIT_EXT);
1325
1326 bool independent_alpha_blend = false;
1327 /* Wa_14018912822, check if we set these during RT setup. */
1328 bool color_blend_zero = false;
1329 bool alpha_blend_zero = false;
1330 uint32_t rt_0 = MESA_VK_ATTACHMENT_UNUSED;
1331 for (uint32_t rt = 0; rt < MAX_RTS; rt++) {
1332 if (gfx->color_output_mapping[rt] >= gfx->color_att_count)
1333 continue;
1334
1335 uint32_t att = gfx->color_output_mapping[rt];
1336 if (att == 0)
1337 rt_0 = att;
1338
1339 /* Disable anything above the current number of color attachments. */
1340 bool write_disabled = (color_writes & BITFIELD_BIT(att)) == 0;
1341
1342 SET(BLEND_STATE, blend.rts[rt].WriteDisableAlpha,
1343 write_disabled ||
1344 (dyn->cb.attachments[att].write_mask &
1345 VK_COLOR_COMPONENT_A_BIT) == 0);
1346 SET(BLEND_STATE, blend.rts[rt].WriteDisableRed,
1347 write_disabled ||
1348 (dyn->cb.attachments[att].write_mask &
1349 VK_COLOR_COMPONENT_R_BIT) == 0);
1350 SET(BLEND_STATE, blend.rts[rt].WriteDisableGreen,
1351 write_disabled ||
1352 (dyn->cb.attachments[att].write_mask &
1353 VK_COLOR_COMPONENT_G_BIT) == 0);
1354 SET(BLEND_STATE, blend.rts[rt].WriteDisableBlue,
1355 write_disabled ||
1356 (dyn->cb.attachments[att].write_mask &
1357 VK_COLOR_COMPONENT_B_BIT) == 0);
1358 /* Vulkan specification 1.2.168, VkLogicOp:
1359 *
1360 * "Logical operations are controlled by the logicOpEnable and logicOp
1361 * members of VkPipelineColorBlendStateCreateInfo. If logicOpEnable is
1362 * VK_TRUE, then a logical operation selected by logicOp is applied
1363 * between each color attachment and the fragment’s corresponding
1364 * output value, and blending of all attachments is treated as if it
1365 * were disabled."
1366 *
1367 * From the Broadwell PRM Volume 2d: Command Reference: Structures:
1368 * BLEND_STATE_ENTRY:
1369 *
1370 * "Enabling LogicOp and Color Buffer Blending at the same time is
1371 * UNDEFINED"
1372 */
1373 SET(BLEND_STATE, blend.rts[rt].LogicOpFunction,
1374 vk_to_intel_logic_op[dyn->cb.logic_op]);
1375 SET(BLEND_STATE, blend.rts[rt].LogicOpEnable, dyn->cb.logic_op_enable);
1376
1377 SET(BLEND_STATE, blend.rts[rt].ColorClampRange, COLORCLAMP_RTFORMAT);
1378 SET(BLEND_STATE, blend.rts[rt].PreBlendColorClampEnable, true);
1379 SET(BLEND_STATE, blend.rts[rt].PostBlendColorClampEnable, true);
1380
1381 /* Setup blend equation. */
1382 SET(BLEND_STATE, blend.rts[rt].ColorBlendFunction,
1383 vk_to_intel_blend_op[
1384 dyn->cb.attachments[att].color_blend_op]);
1385 SET(BLEND_STATE, blend.rts[rt].AlphaBlendFunction,
1386 vk_to_intel_blend_op[
1387 dyn->cb.attachments[att].alpha_blend_op]);
1388
1389 if (dyn->cb.attachments[att].src_color_blend_factor !=
1390 dyn->cb.attachments[att].src_alpha_blend_factor ||
1391 dyn->cb.attachments[att].dst_color_blend_factor !=
1392 dyn->cb.attachments[att].dst_alpha_blend_factor ||
1393 dyn->cb.attachments[att].color_blend_op !=
1394 dyn->cb.attachments[att].alpha_blend_op)
1395 independent_alpha_blend = true;
1396
1397 /* The Dual Source Blending documentation says:
1398 *
1399 * "If SRC1 is included in a src/dst blend factor and a DualSource RT
1400 * Write message is not used, results are UNDEFINED. (This reflects the
1401 * same restriction in DX APIs, where undefined results are produced if
1402 * “o1” is not written by a PS – there are no default values defined)."
1403 *
1404 * There is no way to gracefully fix this undefined situation so we just
1405 * disable the blending to prevent possible issues.
1406 */
1407 if (has_fs_stage && !has_fs_dual_src &&
1408 anv_is_dual_src_blend_equation(&dyn->cb.attachments[att])) {
1409 SET(BLEND_STATE, blend.rts[rt].ColorBufferBlendEnable, false);
1410 } else {
1411 SET(BLEND_STATE, blend.rts[rt].ColorBufferBlendEnable,
1412 !dyn->cb.logic_op_enable &&
1413 dyn->cb.attachments[att].blend_enable);
1414 }
1415
1416 /* Our hardware applies the blend factor prior to the blend function
1417 * regardless of what function is used. Technically, this means the
1418 * hardware can do MORE than GL or Vulkan specify. However, it also
1419 * means that, for MIN and MAX, we have to stomp the blend factor to ONE
1420 * to make it a no-op.
1421 */
1422 uint32_t SourceBlendFactor;
1423 uint32_t DestinationBlendFactor;
1424 uint32_t SourceAlphaBlendFactor;
1425 uint32_t DestinationAlphaBlendFactor;
1426 if (dyn->cb.attachments[att].color_blend_op == VK_BLEND_OP_MIN ||
1427 dyn->cb.attachments[att].color_blend_op == VK_BLEND_OP_MAX) {
1428 SourceBlendFactor = BLENDFACTOR_ONE;
1429 DestinationBlendFactor = BLENDFACTOR_ONE;
1430 } else {
1431 SourceBlendFactor = vk_to_intel_blend[
1432 dyn->cb.attachments[att].src_color_blend_factor];
1433 DestinationBlendFactor = vk_to_intel_blend[
1434 dyn->cb.attachments[att].dst_color_blend_factor];
1435 }
1436
1437 if (dyn->cb.attachments[att].alpha_blend_op == VK_BLEND_OP_MIN ||
1438 dyn->cb.attachments[att].alpha_blend_op == VK_BLEND_OP_MAX) {
1439 SourceAlphaBlendFactor = BLENDFACTOR_ONE;
1440 DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
1441 } else {
1442 SourceAlphaBlendFactor = vk_to_intel_blend[
1443 dyn->cb.attachments[att].src_alpha_blend_factor];
1444 DestinationAlphaBlendFactor = vk_to_intel_blend[
1445 dyn->cb.attachments[att].dst_alpha_blend_factor];
1446 }
1447
1448 /* Replace and Src1 value by 1.0 if dual source blending is not
1449 * enabled.
1450 */
1451 if (has_fs_stage && !has_fs_dual_src) {
1452 if (is_src1_blend_factor(SourceBlendFactor))
1453 SourceBlendFactor = BLENDFACTOR_ONE;
1454 if (is_src1_blend_factor(DestinationBlendFactor))
1455 DestinationBlendFactor = BLENDFACTOR_ONE;
1456 }
1457
1458 if (instance->intel_enable_wa_14018912822 &&
1459 intel_needs_workaround(device->info, 14018912822) &&
1460 dyn->ms.rasterization_samples > 1) {
1461 if (DestinationBlendFactor == BLENDFACTOR_ZERO) {
1462 DestinationBlendFactor = BLENDFACTOR_CONST_COLOR;
1463 color_blend_zero = true;
1464 }
1465 if (DestinationAlphaBlendFactor == BLENDFACTOR_ZERO) {
1466 DestinationAlphaBlendFactor = BLENDFACTOR_CONST_ALPHA;
1467 alpha_blend_zero = true;
1468 }
1469 }
1470
1471 SET(BLEND_STATE, blend.rts[rt].SourceBlendFactor, SourceBlendFactor);
1472 SET(BLEND_STATE, blend.rts[rt].DestinationBlendFactor, DestinationBlendFactor);
1473 SET(BLEND_STATE, blend.rts[rt].SourceAlphaBlendFactor, SourceAlphaBlendFactor);
1474 SET(BLEND_STATE, blend.rts[rt].DestinationAlphaBlendFactor, DestinationAlphaBlendFactor);
1475 }
1476 gfx->color_blend_zero = color_blend_zero;
1477 gfx->alpha_blend_zero = alpha_blend_zero;
1478
1479 SET(BLEND_STATE, blend.IndependentAlphaBlendEnable, independent_alpha_blend);
1480
1481 if (rt_0 == MESA_VK_ATTACHMENT_UNUSED)
1482 rt_0 = 0;
1483
1484 /* 3DSTATE_PS_BLEND to be consistent with the rest of the
1485 * BLEND_STATE_ENTRY.
1486 */
1487 SET(PS_BLEND, ps_blend.HasWriteableRT, has_writeable_rt);
1488 SET(PS_BLEND, ps_blend.ColorBufferBlendEnable,
1489 GET(blend.rts[rt_0].ColorBufferBlendEnable));
1490 SET(PS_BLEND, ps_blend.SourceAlphaBlendFactor,
1491 GET(blend.rts[rt_0].SourceAlphaBlendFactor));
1492 SET(PS_BLEND, ps_blend.DestinationAlphaBlendFactor,
1493 gfx->alpha_blend_zero ?
1494 BLENDFACTOR_CONST_ALPHA :
1495 GET(blend.rts[rt_0].DestinationAlphaBlendFactor));
1496 SET(PS_BLEND, ps_blend.SourceBlendFactor,
1497 GET(blend.rts[rt_0].SourceBlendFactor));
1498 SET(PS_BLEND, ps_blend.DestinationBlendFactor,
1499 gfx->color_blend_zero ?
1500 BLENDFACTOR_CONST_COLOR :
1501 GET(blend.rts[rt_0].DestinationBlendFactor));
1502 SET(PS_BLEND, ps_blend.AlphaTestEnable, false);
1503 SET(PS_BLEND, ps_blend.IndependentAlphaBlendEnable,
1504 GET(blend.IndependentAlphaBlendEnable));
1505 SET(PS_BLEND, ps_blend.AlphaToCoverageEnable,
1506 dyn->ms.alpha_to_coverage_enable);
1507 }
1508
1509 ALWAYS_INLINE static void
update_blend_constants(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx)1510 update_blend_constants(struct anv_gfx_dynamic_state *hw_state,
1511 const struct vk_dynamic_graphics_state *dyn,
1512 const struct anv_cmd_graphics_state *gfx)
1513 {
1514 SET(CC_STATE, cc.BlendConstantColorRed,
1515 gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[0]);
1516 SET(CC_STATE, cc.BlendConstantColorGreen,
1517 gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[1]);
1518 SET(CC_STATE, cc.BlendConstantColorBlue,
1519 gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[2]);
1520 SET(CC_STATE, cc.BlendConstantColorAlpha,
1521 gfx->alpha_blend_zero ? 0.0f : dyn->cb.blend_constants[3]);
1522 }
1523
1524 ALWAYS_INLINE static void
update_viewports(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,const struct anv_device * device)1525 update_viewports(struct anv_gfx_dynamic_state *hw_state,
1526 const struct vk_dynamic_graphics_state *dyn,
1527 const struct anv_cmd_graphics_state *gfx,
1528 const struct anv_device *device)
1529 {
1530 const struct anv_instance *instance = device->physical->instance;
1531 const VkViewport *viewports = dyn->vp.viewports;
1532
1533 const float scale = dyn->vp.depth_clip_negative_one_to_one ? 0.5f : 1.0f;
1534
1535 for (uint32_t i = 0; i < dyn->vp.viewport_count; i++) {
1536 const VkViewport *vp = &viewports[i];
1537
1538 /* The gfx7 state struct has just the matrix and guardband fields, the
1539 * gfx8 struct adds the min/max viewport fields. */
1540 struct GENX(SF_CLIP_VIEWPORT) sfv = {
1541 .ViewportMatrixElementm00 = vp->width / 2,
1542 .ViewportMatrixElementm11 = vp->height / 2,
1543 .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) * scale,
1544 .ViewportMatrixElementm30 = vp->x + vp->width / 2,
1545 .ViewportMatrixElementm31 = vp->y + vp->height / 2,
1546 .ViewportMatrixElementm32 = dyn->vp.depth_clip_negative_one_to_one ?
1547 (vp->minDepth + vp->maxDepth) * scale : vp->minDepth,
1548 .XMinClipGuardband = -1.0f,
1549 .XMaxClipGuardband = 1.0f,
1550 .YMinClipGuardband = -1.0f,
1551 .YMaxClipGuardband = 1.0f,
1552 .XMinViewPort = vp->x,
1553 .XMaxViewPort = vp->x + vp->width - 1,
1554 .YMinViewPort = MIN2(vp->y, vp->y + vp->height),
1555 .YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1,
1556 };
1557
1558 /* Fix depth test misrenderings by lowering translated depth range */
1559 if (instance->lower_depth_range_rate != 1.0f)
1560 sfv.ViewportMatrixElementm32 *= instance->lower_depth_range_rate;
1561
1562 const uint32_t fb_size_max = 1 << 14;
1563 uint32_t x_min = 0, x_max = fb_size_max;
1564 uint32_t y_min = 0, y_max = fb_size_max;
1565
1566 /* If we have a valid renderArea, include that */
1567 if (gfx->render_area.extent.width > 0 &&
1568 gfx->render_area.extent.height > 0) {
1569 x_min = MAX2(x_min, gfx->render_area.offset.x);
1570 x_max = MIN2(x_max, gfx->render_area.offset.x +
1571 gfx->render_area.extent.width);
1572 y_min = MAX2(y_min, gfx->render_area.offset.y);
1573 y_max = MIN2(y_max, gfx->render_area.offset.y +
1574 gfx->render_area.extent.height);
1575 }
1576
1577 /* The client is required to have enough scissors for whatever it
1578 * sets as ViewportIndex but it's possible that they've got more
1579 * viewports set from a previous command. Also, from the Vulkan
1580 * 1.3.207:
1581 *
1582 * "The application must ensure (using scissor if necessary) that
1583 * all rendering is contained within the render area."
1584 *
1585 * If the client doesn't set a scissor, that basically means it
1586 * guarantees everything is in-bounds already. If we end up using a
1587 * guardband of [-1, 1] in that case, there shouldn't be much loss.
1588 * It's theoretically possible that they could do all their clipping
1589 * with clip planes but that'd be a bit odd.
1590 */
1591 if (i < dyn->vp.scissor_count) {
1592 const VkRect2D *scissor = &dyn->vp.scissors[i];
1593 x_min = MAX2(x_min, scissor->offset.x);
1594 x_max = MIN2(x_max, scissor->offset.x + scissor->extent.width);
1595 y_min = MAX2(y_min, scissor->offset.y);
1596 y_max = MIN2(y_max, scissor->offset.y + scissor->extent.height);
1597 }
1598
1599 /* Only bother calculating the guardband if our known render area is
1600 * less than the maximum size. Otherwise, it will calculate [-1, 1]
1601 * anyway but possibly with precision loss.
1602 */
1603 if (x_min > 0 || x_max < fb_size_max ||
1604 y_min > 0 || y_max < fb_size_max) {
1605 intel_calculate_guardband_size(x_min, x_max, y_min, y_max,
1606 sfv.ViewportMatrixElementm00,
1607 sfv.ViewportMatrixElementm11,
1608 sfv.ViewportMatrixElementm30,
1609 sfv.ViewportMatrixElementm31,
1610 &sfv.XMinClipGuardband,
1611 &sfv.XMaxClipGuardband,
1612 &sfv.YMinClipGuardband,
1613 &sfv.YMaxClipGuardband);
1614 }
1615
1616 #define SET_VP(bit, state, field) \
1617 do { \
1618 if (hw_state->state.field != sfv.field) { \
1619 hw_state->state.field = sfv.field; \
1620 BITSET_SET(hw_state->dirty, \
1621 ANV_GFX_STATE_##bit); \
1622 } \
1623 } while (0)
1624 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm00);
1625 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm11);
1626 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm22);
1627 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm30);
1628 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm31);
1629 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm32);
1630 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMinClipGuardband);
1631 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMaxClipGuardband);
1632 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMinClipGuardband);
1633 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMaxClipGuardband);
1634 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMinViewPort);
1635 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMaxViewPort);
1636 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMinViewPort);
1637 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMaxViewPort);
1638 #undef SET_VP
1639
1640 const bool depth_range_unrestricted =
1641 device->vk.enabled_extensions.EXT_depth_range_unrestricted;
1642
1643 float min_depth_limit = depth_range_unrestricted ? -FLT_MAX : 0.0;
1644 float max_depth_limit = depth_range_unrestricted ? FLT_MAX : 1.0;
1645
1646 float min_depth = dyn->rs.depth_clamp_enable ?
1647 MIN2(vp->minDepth, vp->maxDepth) : min_depth_limit;
1648 float max_depth = dyn->rs.depth_clamp_enable ?
1649 MAX2(vp->minDepth, vp->maxDepth) : max_depth_limit;
1650
1651 if (dyn->rs.depth_clamp_enable &&
1652 dyn->vp.depth_clamp_mode == VK_DEPTH_CLAMP_MODE_USER_DEFINED_RANGE_EXT) {
1653 min_depth = dyn->vp.depth_clamp_range.minDepthClamp;
1654 max_depth = dyn->vp.depth_clamp_range.maxDepthClamp;
1655 }
1656
1657 SET(VIEWPORT_CC, vp_cc.elem[i].MinimumDepth, min_depth);
1658 SET(VIEWPORT_CC, vp_cc.elem[i].MaximumDepth, max_depth);
1659 }
1660
1661 /* If the HW state is already considered dirty or the previous
1662 * programmed viewport count is smaller than what we need, update the
1663 * viewport count and ensure the HW state is dirty. Otherwise if the
1664 * number of viewport programmed previously was larger than what we need
1665 * now, no need to reemit we can just keep the old programmed values.
1666 */
1667 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP) ||
1668 hw_state->vp_sf_clip.count < dyn->vp.viewport_count) {
1669 hw_state->vp_sf_clip.count = dyn->vp.viewport_count;
1670 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP);
1671 }
1672 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) ||
1673 hw_state->vp_cc.count < dyn->vp.viewport_count) {
1674 hw_state->vp_cc.count = dyn->vp.viewport_count;
1675 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC);
1676 }
1677 }
1678
1679 ALWAYS_INLINE static void
update_scissors(struct anv_gfx_dynamic_state * hw_state,const struct vk_dynamic_graphics_state * dyn,const struct anv_cmd_graphics_state * gfx,VkCommandBufferLevel cmd_buffer_level)1680 update_scissors(struct anv_gfx_dynamic_state *hw_state,
1681 const struct vk_dynamic_graphics_state *dyn,
1682 const struct anv_cmd_graphics_state *gfx,
1683 VkCommandBufferLevel cmd_buffer_level)
1684 {
1685 const VkRect2D *scissors = dyn->vp.scissors;
1686 const VkViewport *viewports = dyn->vp.viewports;
1687
1688 for (uint32_t i = 0; i < dyn->vp.scissor_count; i++) {
1689 const VkRect2D *s = &scissors[i];
1690 const VkViewport *vp = &viewports[i];
1691
1692 const int max = 0xffff;
1693
1694 uint32_t y_min = MAX2(s->offset.y, MIN2(vp->y, vp->y + vp->height));
1695 uint32_t x_min = MAX2(s->offset.x, vp->x);
1696 int64_t y_max = MIN2(s->offset.y + s->extent.height - 1,
1697 MAX2(vp->y, vp->y + vp->height) - 1);
1698 int64_t x_max = MIN2(s->offset.x + s->extent.width - 1,
1699 vp->x + vp->width - 1);
1700
1701 y_max = CLAMP(y_max, 0, INT16_MAX >> 1);
1702 x_max = CLAMP(x_max, 0, INT16_MAX >> 1);
1703
1704 /* Do this math using int64_t so overflow gets clamped correctly. */
1705 if (cmd_buffer_level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
1706 y_min = CLAMP((uint64_t) y_min, gfx->render_area.offset.y, max);
1707 x_min = CLAMP((uint64_t) x_min, gfx->render_area.offset.x, max);
1708 y_max = CLAMP((uint64_t) y_max, 0,
1709 gfx->render_area.offset.y +
1710 gfx->render_area.extent.height - 1);
1711 x_max = CLAMP((uint64_t) x_max, 0,
1712 gfx->render_area.offset.x +
1713 gfx->render_area.extent.width - 1);
1714 }
1715
1716 if (s->extent.width <= 0 || s->extent.height <= 0) {
1717 /* Since xmax and ymax are inclusive, we have to have xmax < xmin or
1718 * ymax < ymin for empty clips. In case clip x, y, width height are
1719 * all 0, the clamps below produce 0 for xmin, ymin, xmax, ymax,
1720 * which isn't what we want. Just special case empty clips and
1721 * produce a canonical empty clip.
1722 */
1723 SET(SCISSOR, scissor.elem[i].ScissorRectangleYMin, 1);
1724 SET(SCISSOR, scissor.elem[i].ScissorRectangleXMin, 1);
1725 SET(SCISSOR, scissor.elem[i].ScissorRectangleYMax, 0);
1726 SET(SCISSOR, scissor.elem[i].ScissorRectangleXMax, 0);
1727 } else {
1728 SET(SCISSOR, scissor.elem[i].ScissorRectangleYMin, y_min);
1729 SET(SCISSOR, scissor.elem[i].ScissorRectangleXMin, x_min);
1730 SET(SCISSOR, scissor.elem[i].ScissorRectangleYMax, y_max);
1731 SET(SCISSOR, scissor.elem[i].ScissorRectangleXMax, x_max);
1732 }
1733 }
1734
1735 /* If the HW state is already considered dirty or the previous programmed
1736 * viewport count is smaller than what we need, update the viewport count
1737 * and ensure the HW state is dirty. Otherwise if the number of viewport
1738 * programmed previously was larger than what we need now, no need to
1739 * reemit we can just keep the old programmed values.
1740 */
1741 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SCISSOR) ||
1742 hw_state->scissor.count < dyn->vp.scissor_count) {
1743 hw_state->scissor.count = dyn->vp.scissor_count;
1744 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SCISSOR);
1745 }
1746 }
1747
1748 #if GFX_VERx10 == 125
1749 ALWAYS_INLINE static void
update_tbimr_info(struct anv_gfx_dynamic_state * hw_state,const struct anv_device * device,const struct anv_cmd_graphics_state * gfx,const struct intel_l3_config * l3_config)1750 update_tbimr_info(struct anv_gfx_dynamic_state *hw_state,
1751 const struct anv_device *device,
1752 const struct anv_cmd_graphics_state *gfx,
1753 const struct intel_l3_config *l3_config)
1754 {
1755 unsigned fb_width, fb_height, tile_width, tile_height;
1756
1757 if (device->physical->instance->enable_tbimr &&
1758 calculate_render_area(gfx, &fb_width, &fb_height) &&
1759 calculate_tile_dimensions(device, gfx, l3_config,
1760 fb_width, fb_height,
1761 &tile_width, &tile_height)) {
1762 /* Use a batch size of 128 polygons per slice as recommended */
1763 /* by BSpec 68436 "TBIMR Programming". */
1764 const unsigned num_slices = device->info->num_slices;
1765 const unsigned batch_size = DIV_ROUND_UP(num_slices, 2) * 256;
1766
1767 SET(TBIMR_TILE_PASS_INFO, tbimr.TileRectangleHeight, tile_height);
1768 SET(TBIMR_TILE_PASS_INFO, tbimr.TileRectangleWidth, tile_width);
1769 SET(TBIMR_TILE_PASS_INFO, tbimr.VerticalTileCount,
1770 DIV_ROUND_UP(fb_height, tile_height));
1771 SET(TBIMR_TILE_PASS_INFO, tbimr.HorizontalTileCount,
1772 DIV_ROUND_UP(fb_width, tile_width));
1773 SET(TBIMR_TILE_PASS_INFO, tbimr.TBIMRBatchSize,
1774 util_logbase2(batch_size) - 5);
1775 SET(TBIMR_TILE_PASS_INFO, tbimr.TileBoxCheck, true);
1776 SET(TBIMR_TILE_PASS_INFO, use_tbimr, true);
1777 } else {
1778 hw_state->use_tbimr = false;
1779 }
1780 }
1781 #endif
1782
1783 /**
1784 * This function takes the vulkan runtime values & dirty states and updates
1785 * the values in anv_gfx_dynamic_state, flagging HW instructions for
1786 * reemission if the values are changing.
1787 *
1788 * Nothing is emitted in the batch buffer.
1789 */
1790 static void
cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state * hw_state,const struct anv_device * device,const struct vk_dynamic_graphics_state * dyn,struct anv_cmd_graphics_state * gfx,const struct anv_graphics_pipeline * pipeline,VkCommandBufferLevel cmd_buffer_level)1791 cmd_buffer_flush_gfx_runtime_state(struct anv_gfx_dynamic_state *hw_state,
1792 const struct anv_device *device,
1793 const struct vk_dynamic_graphics_state *dyn,
1794 struct anv_cmd_graphics_state *gfx,
1795 const struct anv_graphics_pipeline *pipeline,
1796 VkCommandBufferLevel cmd_buffer_level)
1797 {
1798 UNUSED bool fs_msaa_changed = false;
1799 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1800 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ||
1801 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES) ||
1802 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))
1803 update_fs_msaa_flags(hw_state, dyn, pipeline);
1804
1805 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1806 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)) {
1807 update_ps(hw_state, device, dyn, pipeline);
1808 update_ps_extra_wm(hw_state, pipeline);
1809 }
1810
1811 if (gfx->dirty &
1812 #if GFX_VERx10 >= 125
1813 ANV_CMD_DIRTY_PIPELINE
1814 #else
1815 (ANV_CMD_DIRTY_PIPELINE | ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE)
1816 #endif
1817 )
1818 update_ps_extra_has_uav(hw_state, gfx, pipeline);
1819
1820 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1821 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE))
1822 update_ps_extra_kills_pixel(hw_state, dyn, gfx, pipeline);
1823
1824 if ((gfx->dirty & ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE) ||
1825 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE) ||
1826 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM))
1827 update_streamout(hw_state, dyn, gfx, pipeline);
1828
1829 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX))
1830 update_provoking_vertex(hw_state, dyn, pipeline);
1831
1832 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1833 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY))
1834 update_topology(hw_state, dyn, pipeline);
1835
1836 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1837 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI) ||
1838 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDINGS_VALID) ||
1839 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDING_STRIDES))
1840 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
1841
1842 #if GFX_VER >= 11
1843 if (device->vk.enabled_extensions.KHR_fragment_shading_rate &&
1844 ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1845 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR) ||
1846 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)))
1847 update_cps(hw_state, device, dyn, pipeline);
1848 #endif /* GFX_VER >= 11 */
1849
1850 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1851 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN))
1852 update_te(hw_state, dyn, pipeline);
1853
1854 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH))
1855 update_line_width(hw_state, dyn);
1856
1857 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS))
1858 update_sf_global_depth_bias(hw_state, dyn);
1859
1860 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE))
1861 update_clip_api_mode(hw_state, dyn);
1862
1863 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORT_COUNT))
1864 update_clip_max_viewport(hw_state, dyn);
1865
1866 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1867 (gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
1868 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) ||
1869 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) ||
1870 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) ||
1871 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) ||
1872 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS) ||
1873 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_POLYGON_MODE) ||
1874 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_MODE) ||
1875 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH) ||
1876 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLIP_ENABLE) ||
1877 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) ||
1878 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CONSERVATIVE_MODE))
1879 update_clip_raster(hw_state, dyn, gfx, pipeline);
1880
1881 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_RASTERIZATION_SAMPLES))
1882 update_multisample(hw_state, dyn);
1883
1884 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_MASK))
1885 update_sample_mask(hw_state, dyn);
1886
1887 if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
1888 #if GFX_VER == 9
1889 /* For the PMA fix */
1890 (gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1891 #endif
1892 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
1893 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
1894 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
1895 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
1896 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
1897 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
1898 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) ||
1899 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE))
1900 update_wm_depth_stencil(hw_state, dyn, gfx, device);
1901
1902 #if GFX_VER >= 12
1903 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) ||
1904 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS))
1905 update_depth_bounds(hw_state, dyn);
1906 #endif
1907
1908 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE) ||
1909 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE_ENABLE))
1910 update_line_stipple(hw_state, dyn);
1911
1912 if ((gfx->dirty & ANV_CMD_DIRTY_RESTART_INDEX) ||
1913 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE))
1914 update_vf_restart(hw_state, dyn, gfx);
1915
1916 if (gfx->dirty & ANV_CMD_DIRTY_INDEX_BUFFER)
1917 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER);
1918
1919 #if GFX_VERx10 >= 125
1920 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE))
1921 update_vfg_list_cut_index(hw_state, dyn);
1922 #endif
1923
1924 if (device->vk.enabled_extensions.EXT_sample_locations &&
1925 (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS) ||
1926 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE)))
1927 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN);
1928
1929 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1930 (gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
1931 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
1932 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) ||
1933 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE) ||
1934 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE) ||
1935 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_COVERAGE_ENABLE) ||
1936 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS) ||
1937 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES) ||
1938 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS)) {
1939 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1940 update_blend_state(hw_state, dyn, gfx, device,
1941 wm_prog_data != NULL,
1942 wm_prog_data != NULL ?
1943 wm_prog_data->dual_src_blend : false);
1944 }
1945
1946 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS))
1947 update_blend_constants(hw_state, dyn, gfx);
1948
1949 if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_AREA) ||
1950 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS) ||
1951 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) ||
1952 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) ||
1953 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE) ||
1954 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLAMP_RANGE))
1955 update_viewports(hw_state, dyn, gfx, device);
1956
1957 if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_AREA) ||
1958 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) ||
1959 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS))
1960 update_scissors(hw_state, dyn, gfx, cmd_buffer_level);
1961
1962 #if GFX_VERx10 == 125
1963 if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS))
1964 update_tbimr_info(hw_state, device, gfx, pipeline->base.base.l3_config);
1965 #endif
1966
1967 #if INTEL_WA_14018283232_GFX_VER
1968 if (intel_needs_workaround(device->info, 14018283232) &&
1969 ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1970 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE))) {
1971 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
1972 SET(WA_14018283232, wa_14018283232_toggle,
1973 dyn->ds.depth.bounds_test.enable &&
1974 wm_prog_data &&
1975 wm_prog_data->uses_kill);
1976 }
1977 #endif
1978
1979 /* If the pipeline uses a dynamic value of patch_control_points and either
1980 * the pipeline change or the dynamic value change, check the value and
1981 * reemit if needed.
1982 */
1983 if (pipeline->dynamic_patch_control_points &&
1984 ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
1985 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_PATCH_CONTROL_POINTS)))
1986 SET(TCS_INPUT_VERTICES, tcs_input_vertices, dyn->ts.patch_control_points);
1987 }
1988
1989 #undef GET
1990 #undef SET
1991 #undef SET_STAGE
1992 #undef SETUP_PROVOKING_VERTEX
1993
1994 /**
1995 * This function takes the vulkan runtime values & dirty states and updates
1996 * the values in anv_gfx_dynamic_state, flagging HW instructions for
1997 * reemission if the values are changing.
1998 *
1999 * Nothing is emitted in the batch buffer.
2000 */
2001 void
genX(cmd_buffer_flush_gfx_runtime_state)2002 genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
2003 {
2004 cmd_buffer_flush_gfx_runtime_state(
2005 &cmd_buffer->state.gfx.dyn_state,
2006 cmd_buffer->device,
2007 &cmd_buffer->vk.dynamic_graphics_state,
2008 &cmd_buffer->state.gfx,
2009 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline),
2010 cmd_buffer->vk.level);
2011
2012 vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state);
2013 }
2014
2015 static void
emit_wa_18020335297_dummy_draw(struct anv_cmd_buffer * cmd_buffer)2016 emit_wa_18020335297_dummy_draw(struct anv_cmd_buffer *cmd_buffer)
2017 {
2018 #if GFX_VERx10 >= 125
2019 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) {
2020 vfg.DistributionMode = RR_STRICT;
2021 }
2022 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
2023 vf.GeometryDistributionEnable = true;
2024 }
2025 #endif
2026
2027 #if GFX_VER >= 12
2028 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr) {
2029 pr.ReplicaMask = 1;
2030 }
2031 #endif
2032
2033 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_RASTER), rr) {
2034 rr.CullMode = CULLMODE_NONE;
2035 rr.FrontFaceFillMode = FILL_MODE_SOLID;
2036 rr.BackFaceFillMode = FILL_MODE_SOLID;
2037 }
2038
2039 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_STATISTICS), zero);
2040 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_SGVS), zero);
2041
2042 #if GFX_VER >= 11
2043 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_SGVS_2), zero);
2044 #endif
2045
2046 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLIP), clip) {
2047 clip.ClipEnable = true;
2048 clip.ClipMode = CLIPMODE_REJECT_ALL;
2049 }
2050
2051 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VS), zero);
2052 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_GS), zero);
2053 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HS), zero);
2054 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TE), zero);
2055 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DS), zero);
2056 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT), zero);
2057
2058 uint32_t *vertex_elements = anv_batch_emitn(&cmd_buffer->batch, 1 + 2 * 2,
2059 GENX(3DSTATE_VERTEX_ELEMENTS));
2060 uint32_t *ve_pack_dest = &vertex_elements[1];
2061
2062 for (int i = 0; i < 2; i++) {
2063 struct GENX(VERTEX_ELEMENT_STATE) element = {
2064 .Valid = true,
2065 .SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,
2066 .Component0Control = VFCOMP_STORE_0,
2067 .Component1Control = VFCOMP_STORE_0,
2068 .Component2Control = i == 0 ? VFCOMP_STORE_0 : VFCOMP_STORE_1_FP,
2069 .Component3Control = i == 0 ? VFCOMP_STORE_0 : VFCOMP_STORE_1_FP,
2070 };
2071 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, ve_pack_dest, &element);
2072 ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
2073 }
2074
2075 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
2076 topo.PrimitiveTopologyType = _3DPRIM_TRILIST;
2077 }
2078
2079 /* Emit dummy draw per slice. */
2080 for (unsigned i = 0; i < cmd_buffer->device->info->num_slices; i++) {
2081 anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
2082 prim.VertexCountPerInstance = 3;
2083 prim.PrimitiveTopologyType = _3DPRIM_TRILIST;
2084 prim.InstanceCount = 1;
2085 prim.VertexAccessType = SEQUENTIAL;
2086 }
2087 }
2088 }
2089
2090 #if INTEL_WA_14018283232_GFX_VER
2091 void
genX(batch_emit_wa_14018283232)2092 genX(batch_emit_wa_14018283232)(struct anv_batch *batch)
2093 {
2094 anv_batch_emit(batch, GENX(RESOURCE_BARRIER), barrier) {
2095 barrier.ResourceBarrierBody = (struct GENX(RESOURCE_BARRIER_BODY)) {
2096 .BarrierType = RESOURCE_BARRIER_TYPE_IMMEDIATE,
2097 .SignalStage = RESOURCE_BARRIER_STAGE_COLOR,
2098 .WaitStage = RESOURCE_BARRIER_STAGE_PIXEL,
2099 };
2100 }
2101 }
2102 #endif
2103
2104 /**
2105 * This function handles dirty state emission to the batch buffer.
2106 */
2107 static void
cmd_buffer_gfx_state_emission(struct anv_cmd_buffer * cmd_buffer)2108 cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
2109 {
2110 struct anv_device *device = cmd_buffer->device;
2111 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
2112 struct anv_graphics_pipeline *pipeline =
2113 anv_pipeline_to_graphics(gfx->base.pipeline);
2114 const struct vk_dynamic_graphics_state *dyn =
2115 &cmd_buffer->vk.dynamic_graphics_state;
2116 struct anv_push_constants *push_consts =
2117 &cmd_buffer->state.gfx.base.push_constants;
2118 struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
2119 const bool protected = cmd_buffer->vk.pool->flags &
2120 VK_COMMAND_POOL_CREATE_PROTECTED_BIT;
2121
2122 #if INTEL_WA_16011107343_GFX_VER
2123 /* Will be emitted in front of every draw instead */
2124 if (intel_needs_workaround(device->info, 16011107343) &&
2125 anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_CTRL))
2126 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_HS);
2127 #endif
2128
2129 #if INTEL_WA_22018402687_GFX_VER
2130 /* Will be emitted in front of every draw instead */
2131 if (intel_needs_workaround(device->info, 22018402687) &&
2132 anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
2133 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_DS);
2134 #endif
2135
2136 /*
2137 * Values provided by push constants
2138 */
2139
2140 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TCS_INPUT_VERTICES)) {
2141 push_consts->gfx.tcs_input_vertices = dyn->ts.patch_control_points;
2142 cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
2143 gfx->base.push_constants_data_dirty = true;
2144 }
2145
2146 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_FS_MSAA_FLAGS)) {
2147 push_consts->gfx.fs_msaa_flags = hw_state->fs_msaa_flags;
2148 cmd_buffer->state.push_constants_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
2149 gfx->base.push_constants_data_dirty = true;
2150 }
2151
2152 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_URB)) {
2153 genX(urb_workaround)(cmd_buffer, &pipeline->urb_cfg);
2154
2155 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.urb);
2156
2157 memcpy(&gfx->urb_cfg, &pipeline->urb_cfg,
2158 sizeof(struct intel_urb_config));
2159 }
2160
2161 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION))
2162 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.primitive_replication);
2163
2164 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_INSTANCING))
2165 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs_instancing);
2166
2167 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS))
2168 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs);
2169
2170 #if GFX_VER >= 11
2171 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2))
2172 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs_2);
2173 #endif
2174
2175 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VS)) {
2176 anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
2177 final.vs, protected);
2178 }
2179
2180 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_HS)) {
2181 anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
2182 final.hs, protected);
2183 }
2184
2185 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_DS)) {
2186 anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
2187 final.ds, protected);
2188 }
2189
2190 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS)) {
2191 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_STATISTICS), vfs) {
2192 vfs.StatisticsEnable = true;
2193 }
2194 }
2195
2196 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE))
2197 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe);
2198
2199 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_SWIZ))
2200 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe_swiz);
2201
2202 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST)) {
2203 /* Wa_16011773973:
2204 * If SOL is enabled and SO_DECL state has to be programmed,
2205 * 1. Send 3D State SOL state with SOL disabled
2206 * 2. Send SO_DECL NP state
2207 * 3. Send 3D State SOL with SOL Enabled
2208 */
2209 if (intel_needs_workaround(device->info, 16011773973) &&
2210 pipeline->uses_xfb)
2211 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT), so);
2212
2213 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline,
2214 final.so_decl_list);
2215
2216 #if GFX_VER >= 11 && GFX_VER < 20
2217 /* ICL PRMs, Volume 2a - Command Reference: Instructions,
2218 * 3DSTATE_SO_DECL_LIST:
2219 *
2220 * "Workaround: This command must be followed by a PIPE_CONTROL with
2221 * CS Stall bit set."
2222 *
2223 * On DG2+ also known as Wa_1509820217.
2224 */
2225 genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
2226 cmd_buffer->state.current_pipeline,
2227 ANV_PIPE_CS_STALL_BIT);
2228 #endif
2229 }
2230
2231 if (device->vk.enabled_extensions.EXT_mesh_shader) {
2232 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL)) {
2233 anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
2234 final.mesh_control, protected);
2235 }
2236
2237 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_SHADER))
2238 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_shader);
2239
2240 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_DISTRIB))
2241 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_distrib);
2242
2243 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL)) {
2244 anv_batch_emit_pipeline_state_protected(&cmd_buffer->batch, pipeline,
2245 final.task_control, protected);
2246 }
2247
2248 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_SHADER))
2249 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.task_shader);
2250
2251 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_REDISTRIB))
2252 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.task_redistrib);
2253
2254 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH))
2255 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe_mesh);
2256
2257 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP_MESH))
2258 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.clip_mesh);
2259 } else {
2260 assert(!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL) &&
2261 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_SHADER) &&
2262 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_DISTRIB) &&
2263 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL) &&
2264 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_SHADER) &&
2265 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_REDISTRIB) &&
2266 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP_MESH) &&
2267 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH));
2268 }
2269
2270 #define INIT(category, name) \
2271 .name = hw_state->category.name
2272 #define SET(s, category, name) \
2273 s.name = hw_state->category.name
2274
2275 /* Now the potentially dynamic instructions */
2276
2277 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS)) {
2278 anv_batch_emit_merge_protected(&cmd_buffer->batch, GENX(3DSTATE_PS),
2279 pipeline, partial.ps, ps, protected) {
2280 SET(ps, ps, KernelStartPointer0);
2281 SET(ps, ps, KernelStartPointer1);
2282 SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData0);
2283 SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData1);
2284
2285 #if GFX_VER < 20
2286 SET(ps, ps, KernelStartPointer2);
2287 SET(ps, ps, DispatchGRFStartRegisterForConstantSetupData2);
2288
2289 SET(ps, ps, _8PixelDispatchEnable);
2290 SET(ps, ps, _16PixelDispatchEnable);
2291 SET(ps, ps, _32PixelDispatchEnable);
2292 #else
2293 SET(ps, ps, Kernel0Enable);
2294 SET(ps, ps, Kernel1Enable);
2295 SET(ps, ps, Kernel0SIMDWidth);
2296 SET(ps, ps, Kernel1SIMDWidth);
2297 SET(ps, ps, Kernel0PolyPackingPolicy);
2298 #endif
2299 SET(ps, ps, PositionXYOffsetSelect);
2300 }
2301 }
2302
2303 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA) ||
2304 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_STATE)) {
2305 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS_EXTRA),
2306 pipeline, partial.ps_extra, pse) {
2307 SET(pse, ps_extra, PixelShaderHasUAV);
2308 SET(pse, ps_extra, PixelShaderIsPerSample);
2309 #if GFX_VER >= 11
2310 SET(pse, ps_extra, PixelShaderIsPerCoarsePixel);
2311 #endif
2312 SET(pse, ps_extra, PixelShaderKillsPixel);
2313
2314 #if INTEL_WA_18038825448_GFX_VER
2315 /* Add a dependency if easier the shader needs it (because of runtime
2316 * change through pre-rasterization shader) or if we notice a change.
2317 */
2318 pse.EnablePSDependencyOnCPsizeChange =
2319 hw_state->ps_extra.EnablePSDependencyOnCPsizeChange ||
2320 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_STATE);
2321 #elif GFX_VERx10 >= 125
2322 SET(pse, ps_extra, EnablePSDependencyOnCPsizeChange);
2323 #endif
2324 }
2325 }
2326
2327 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP)) {
2328 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_CLIP),
2329 pipeline, partial.clip, clip) {
2330 SET(clip, clip, APIMode);
2331 SET(clip, clip, ViewportXYClipTestEnable);
2332 SET(clip, clip, TriangleStripListProvokingVertexSelect);
2333 SET(clip, clip, LineStripListProvokingVertexSelect);
2334 SET(clip, clip, TriangleFanProvokingVertexSelect);
2335 SET(clip, clip, MaximumVPIndex);
2336 }
2337 }
2338
2339 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_STREAMOUT)) {
2340 genX(streamout_prologue)(cmd_buffer);
2341
2342 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT),
2343 pipeline, partial.so, so) {
2344 SET(so, so, RenderingDisable);
2345 SET(so, so, RenderStreamSelect);
2346 SET(so, so, ReorderMode);
2347 SET(so, so, ForceRendering);
2348 }
2349 }
2350
2351 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP)) {
2352 struct anv_state sf_clip_state =
2353 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
2354 hw_state->vp_sf_clip.count * 64, 64);
2355
2356 for (uint32_t i = 0; i < hw_state->vp_sf_clip.count; i++) {
2357 struct GENX(SF_CLIP_VIEWPORT) sfv = {
2358 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm00),
2359 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm11),
2360 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm22),
2361 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm30),
2362 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm31),
2363 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm32),
2364 INIT(vp_sf_clip.elem[i], XMinClipGuardband),
2365 INIT(vp_sf_clip.elem[i], XMaxClipGuardband),
2366 INIT(vp_sf_clip.elem[i], YMinClipGuardband),
2367 INIT(vp_sf_clip.elem[i], YMaxClipGuardband),
2368 INIT(vp_sf_clip.elem[i], XMinViewPort),
2369 INIT(vp_sf_clip.elem[i], XMaxViewPort),
2370 INIT(vp_sf_clip.elem[i], YMinViewPort),
2371 INIT(vp_sf_clip.elem[i], YMaxViewPort),
2372 };
2373 GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv);
2374 }
2375
2376 anv_batch_emit(&cmd_buffer->batch,
2377 GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
2378 clip.SFClipViewportPointer = sf_clip_state.offset;
2379 }
2380 }
2381
2382 /* Force CC_VIEWPORT reallocation on Gfx9 when reprogramming
2383 * 3DSTATE_VIEWPORT_STATE_POINTERS_CC :
2384 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/11647
2385 */
2386 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) ||
2387 (GFX_VER == 9 &&
2388 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR))) {
2389 hw_state->vp_cc.state =
2390 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
2391 hw_state->vp_cc.count * 8, 32);
2392
2393 for (uint32_t i = 0; i < hw_state->vp_cc.count; i++) {
2394 struct GENX(CC_VIEWPORT) cc_viewport = {
2395 INIT(vp_cc.elem[i], MinimumDepth),
2396 INIT(vp_cc.elem[i], MaximumDepth),
2397 };
2398 GENX(CC_VIEWPORT_pack)(NULL, hw_state->vp_cc.state.map + i * 8,
2399 &cc_viewport);
2400 }
2401
2402 /* Dirty the pointers to reemit 3DSTATE_VIEWPORT_STATE_POINTERS_CC below
2403 */
2404 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR);
2405 }
2406
2407 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) {
2408 anv_batch_emit(&cmd_buffer->batch,
2409 GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
2410 cc.CCViewportPointer = hw_state->vp_cc.state.offset;
2411 }
2412 cmd_buffer->state.gfx.viewport_set = true;
2413 }
2414
2415 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SCISSOR)) {
2416 /* Wa_1409725701:
2417 *
2418 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
2419 * stored as an array of up to 16 elements. The location of first
2420 * element of the array, as specified by Pointer to SCISSOR_RECT,
2421 * should be aligned to a 64-byte boundary.
2422 */
2423 struct anv_state scissor_state =
2424 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
2425 hw_state->scissor.count * 8, 64);
2426
2427 for (uint32_t i = 0; i < hw_state->scissor.count; i++) {
2428 struct GENX(SCISSOR_RECT) scissor = {
2429 INIT(scissor.elem[i], ScissorRectangleYMin),
2430 INIT(scissor.elem[i], ScissorRectangleXMin),
2431 INIT(scissor.elem[i], ScissorRectangleYMax),
2432 INIT(scissor.elem[i], ScissorRectangleXMax),
2433 };
2434 GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8, &scissor);
2435 }
2436
2437 anv_batch_emit(&cmd_buffer->batch,
2438 GENX(3DSTATE_SCISSOR_STATE_POINTERS), ssp) {
2439 ssp.ScissorRectPointer = scissor_state.offset;
2440 }
2441 }
2442
2443 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY)) {
2444 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) {
2445 SET(vft, vft, PrimitiveTopologyType);
2446 }
2447 }
2448
2449 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT)) {
2450 genX(batch_emit_vertex_input)(&cmd_buffer->batch, device,
2451 pipeline, dyn->vi);
2452 }
2453
2454 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TE)) {
2455 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_TE),
2456 pipeline, partial.te, te) {
2457 SET(te, te, OutputTopology);
2458 }
2459 }
2460
2461 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_GS)) {
2462 anv_batch_emit_merge_protected(&cmd_buffer->batch, GENX(3DSTATE_GS),
2463 pipeline, partial.gs, gs, protected) {
2464 SET(gs, gs, ReorderMode);
2465 }
2466 }
2467
2468 #if GFX_VER >= 30
2469 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_COARSE_PIXEL)) {
2470 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_COARSE_PIXEL), coarse_pixel) {
2471 coarse_pixel.DisableCPSPointers = true;
2472 SET(coarse_pixel, coarse_pixel, CPSizeX);
2473 SET(coarse_pixel, coarse_pixel, CPSizeY);
2474 SET(coarse_pixel, coarse_pixel, CPSizeCombiner0Opcode);
2475 SET(coarse_pixel, coarse_pixel, CPSizeCombiner1Opcode);
2476 }
2477 }
2478 #else
2479 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CPS)) {
2480 #if GFX_VER == 11
2481 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CPS), cps) {
2482 SET(cps, cps, CoarsePixelShadingMode);
2483 SET(cps, cps, MinCPSizeX);
2484 SET(cps, cps, MinCPSizeY);
2485 }
2486 #elif GFX_VER >= 12
2487 /* TODO: we can optimize this flush in the following cases:
2488 *
2489 * In the case where the last geometry shader emits a value that is
2490 * not constant, we can avoid this stall because we can synchronize
2491 * the pixel shader internally with
2492 * 3DSTATE_PS::EnablePSDependencyOnCPsizeChange.
2493 *
2494 * If we know that the previous pipeline and the current one are
2495 * using the same fragment shading rate.
2496 */
2497 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
2498 #if GFX_VERx10 >= 125
2499 pc.PSSStallSyncEnable = true;
2500 #else
2501 pc.PSDSyncEnable = true;
2502 #endif
2503 }
2504
2505 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CPS_POINTERS), cps) {
2506 SET(cps, cps, CoarsePixelShadingStateArrayPointer);
2507 }
2508 #endif
2509 }
2510 #endif /* GFX_VER >= 30 */
2511
2512 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SF)) {
2513 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_SF),
2514 pipeline, partial.sf, sf) {
2515 SET(sf, sf, LineWidth);
2516 SET(sf, sf, TriangleStripListProvokingVertexSelect);
2517 SET(sf, sf, LineStripListProvokingVertexSelect);
2518 SET(sf, sf, TriangleFanProvokingVertexSelect);
2519 SET(sf, sf, LegacyGlobalDepthBiasEnable);
2520 }
2521 }
2522
2523 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_RASTER)) {
2524 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_RASTER), raster) {
2525 /* For details on 3DSTATE_RASTER multisample state, see the BSpec
2526 * table "Multisample Modes State".
2527 *
2528 * NOTE: 3DSTATE_RASTER::ForcedSampleCount affects the SKL PMA fix
2529 * computations. If we ever set this bit to a different value, they
2530 * will need to be updated accordingly.
2531 */
2532 raster.ForcedSampleCount = FSC_NUMRASTSAMPLES_0;
2533 raster.ForceMultisampling = false;
2534 raster.ScissorRectangleEnable = true;
2535
2536 SET(raster, raster, APIMode);
2537 SET(raster, raster, DXMultisampleRasterizationEnable);
2538 SET(raster, raster, AntialiasingEnable);
2539 SET(raster, raster, CullMode);
2540 SET(raster, raster, FrontWinding);
2541 SET(raster, raster, GlobalDepthOffsetEnableSolid);
2542 SET(raster, raster, GlobalDepthOffsetEnableWireframe);
2543 SET(raster, raster, GlobalDepthOffsetEnablePoint);
2544 SET(raster, raster, GlobalDepthOffsetConstant);
2545 SET(raster, raster, GlobalDepthOffsetScale);
2546 SET(raster, raster, GlobalDepthOffsetClamp);
2547 SET(raster, raster, FrontFaceFillMode);
2548 SET(raster, raster, BackFaceFillMode);
2549 SET(raster, raster, ViewportZFarClipTestEnable);
2550 SET(raster, raster, ViewportZNearClipTestEnable);
2551 SET(raster, raster, ConservativeRasterizationEnable);
2552 }
2553 }
2554
2555 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE)) {
2556 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_MULTISAMPLE), ms) {
2557 ms.PixelLocation = CENTER;
2558
2559 /* The PRM says that this bit is valid only for DX9:
2560 *
2561 * SW can choose to set this bit only for DX9 API. DX10/OGL API's
2562 * should not have any effect by setting or not setting this bit.
2563 */
2564 ms.PixelPositionOffsetEnable = false;
2565
2566 SET(ms, ms, NumberofMultisamples);
2567 }
2568 }
2569
2570 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CC_STATE)) {
2571 hw_state->cc.state =
2572 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
2573 GENX(COLOR_CALC_STATE_length) * 4,
2574 64);
2575 struct GENX(COLOR_CALC_STATE) cc = {
2576 INIT(cc, BlendConstantColorRed),
2577 INIT(cc, BlendConstantColorGreen),
2578 INIT(cc, BlendConstantColorBlue),
2579 INIT(cc, BlendConstantColorAlpha),
2580 };
2581 GENX(COLOR_CALC_STATE_pack)(NULL, hw_state->cc.state.map, &cc);
2582
2583 /* Dirty the pointers to reemit 3DSTATE_CC_STATE_POINTERS below
2584 */
2585 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CC_STATE_PTR);
2586 }
2587
2588 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CC_STATE_PTR)) {
2589 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
2590 ccp.ColorCalcStatePointer = hw_state->cc.state.offset;
2591 ccp.ColorCalcStatePointerValid = true;
2592 }
2593 }
2594
2595 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SAMPLE_MASK)) {
2596 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
2597 SET(sm, sm, SampleMask);
2598 }
2599 }
2600
2601 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM_DEPTH_STENCIL)) {
2602 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
2603 SET(ds, ds, DoubleSidedStencilEnable);
2604 SET(ds, ds, StencilTestMask);
2605 SET(ds, ds, StencilWriteMask);
2606 SET(ds, ds, BackfaceStencilTestMask);
2607 SET(ds, ds, BackfaceStencilWriteMask);
2608 SET(ds, ds, StencilReferenceValue);
2609 SET(ds, ds, BackfaceStencilReferenceValue);
2610 SET(ds, ds, DepthTestEnable);
2611 SET(ds, ds, DepthBufferWriteEnable);
2612 SET(ds, ds, DepthTestFunction);
2613 SET(ds, ds, StencilTestEnable);
2614 SET(ds, ds, StencilBufferWriteEnable);
2615 SET(ds, ds, StencilFailOp);
2616 SET(ds, ds, StencilPassDepthPassOp);
2617 SET(ds, ds, StencilPassDepthFailOp);
2618 SET(ds, ds, StencilTestFunction);
2619 SET(ds, ds, BackfaceStencilFailOp);
2620 SET(ds, ds, BackfaceStencilPassDepthPassOp);
2621 SET(ds, ds, BackfaceStencilPassDepthFailOp);
2622 SET(ds, ds, BackfaceStencilTestFunction);
2623 }
2624 }
2625
2626 #if GFX_VER >= 12
2627 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_DEPTH_BOUNDS)) {
2628 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
2629 SET(db, db, DepthBoundsTestEnable);
2630 SET(db, db, DepthBoundsTestMinValue);
2631 SET(db, db, DepthBoundsTestMaxValue);
2632 }
2633 }
2634 #endif
2635
2636 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_LINE_STIPPLE)) {
2637 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) {
2638 SET(ls, ls, LineStipplePattern);
2639 SET(ls, ls, LineStippleInverseRepeatCount);
2640 SET(ls, ls, LineStippleRepeatCount);
2641 }
2642 #if GFX_VER >= 11
2643 /* ICL PRMs, Volume 2a - Command Reference: Instructions,
2644 * 3DSTATE_LINE_STIPPLE:
2645 *
2646 * "Workaround: This command must be followed by a PIPE_CONTROL with
2647 * CS Stall bit set."
2648 */
2649 genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
2650 cmd_buffer->state.current_pipeline,
2651 ANV_PIPE_CS_STALL_BIT);
2652 #endif
2653 }
2654
2655 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF)) {
2656 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
2657 #if GFX_VERx10 >= 125
2658 vf.GeometryDistributionEnable = true;
2659 #endif
2660 SET(vf, vf, IndexedDrawCutIndexEnable);
2661 SET(vf, vf, CutIndex);
2662 }
2663 }
2664
2665 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER)) {
2666 struct anv_buffer *buffer = gfx->index_buffer;
2667 uint32_t offset = gfx->index_offset;
2668 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
2669 ib.IndexFormat = gfx->index_type;
2670 ib.MOCS = anv_mocs(device,
2671 buffer ? buffer->address.bo : NULL,
2672 ISL_SURF_USAGE_INDEX_BUFFER_BIT);
2673 #if GFX_VER >= 12
2674 ib.L3BypassDisable = true;
2675 #endif
2676 if (buffer) {
2677 ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
2678 ib.BufferSize = gfx->index_size;
2679 }
2680 }
2681 }
2682
2683 #if GFX_VERx10 >= 125
2684 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VFG)) {
2685 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_VFG),
2686 pipeline, partial.vfg, vfg) {
2687 SET(vfg, vfg, ListCutIndexEnable);
2688 }
2689 }
2690 #endif
2691
2692 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN)) {
2693 genX(emit_sample_pattern)(&cmd_buffer->batch,
2694 dyn->ms.sample_locations_enable ?
2695 dyn->ms.sample_locations : NULL);
2696 }
2697
2698 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM)) {
2699 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_WM),
2700 pipeline, partial.wm, wm) {
2701 SET(wm, wm, LineStippleEnable);
2702 SET(wm, wm, BarycentricInterpolationMode);
2703 }
2704 }
2705
2706 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_BLEND)) {
2707 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_PS_BLEND), blend) {
2708 SET(blend, ps_blend, HasWriteableRT);
2709 SET(blend, ps_blend, ColorBufferBlendEnable);
2710 SET(blend, ps_blend, SourceAlphaBlendFactor);
2711 SET(blend, ps_blend, DestinationAlphaBlendFactor);
2712 SET(blend, ps_blend, SourceBlendFactor);
2713 SET(blend, ps_blend, DestinationBlendFactor);
2714 SET(blend, ps_blend, AlphaTestEnable);
2715 SET(blend, ps_blend, IndependentAlphaBlendEnable);
2716 SET(blend, ps_blend, AlphaToCoverageEnable);
2717 }
2718 }
2719
2720 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE)) {
2721 const uint32_t num_dwords = GENX(BLEND_STATE_length) +
2722 GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
2723 hw_state->blend.state =
2724 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
2725 num_dwords * 4,
2726 64);
2727
2728 uint32_t *dws = hw_state->blend.state.map;
2729
2730 struct GENX(BLEND_STATE) blend_state = {
2731 INIT(blend, AlphaToCoverageEnable),
2732 INIT(blend, AlphaToOneEnable),
2733 INIT(blend, IndependentAlphaBlendEnable),
2734 INIT(blend, ColorDitherEnable),
2735 };
2736 GENX(BLEND_STATE_pack)(NULL, dws, &blend_state);
2737
2738 /* Jump to blend entries. */
2739 dws += GENX(BLEND_STATE_length);
2740 for (uint32_t i = 0; i < MAX_RTS; i++) {
2741 struct GENX(BLEND_STATE_ENTRY) entry = {
2742 INIT(blend.rts[i], WriteDisableAlpha),
2743 INIT(blend.rts[i], WriteDisableRed),
2744 INIT(blend.rts[i], WriteDisableGreen),
2745 INIT(blend.rts[i], WriteDisableBlue),
2746 INIT(blend.rts[i], LogicOpFunction),
2747 INIT(blend.rts[i], LogicOpEnable),
2748 INIT(blend.rts[i], ColorBufferBlendEnable),
2749 INIT(blend.rts[i], ColorClampRange),
2750 INIT(blend.rts[i], PreBlendColorClampEnable),
2751 INIT(blend.rts[i], PostBlendColorClampEnable),
2752 INIT(blend.rts[i], SourceBlendFactor),
2753 INIT(blend.rts[i], DestinationBlendFactor),
2754 INIT(blend.rts[i], ColorBlendFunction),
2755 INIT(blend.rts[i], SourceAlphaBlendFactor),
2756 INIT(blend.rts[i], DestinationAlphaBlendFactor),
2757 INIT(blend.rts[i], AlphaBlendFunction),
2758 };
2759
2760 GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry);
2761 dws += GENX(BLEND_STATE_ENTRY_length);
2762 }
2763
2764 /* Dirty the pointers to reemit 3DSTATE_BLEND_STATE_POINTERS below */
2765 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_PTR);
2766 }
2767
2768 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_PTR)) {
2769 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
2770 bsp.BlendStatePointer = hw_state->blend.state.offset;
2771 bsp.BlendStatePointerValid = true;
2772 }
2773 }
2774
2775 #if INTEL_WA_18019816803_GFX_VER
2776 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_18019816803)) {
2777 genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
2778 cmd_buffer->state.current_pipeline,
2779 ANV_PIPE_PSS_STALL_SYNC_BIT);
2780 }
2781 #endif
2782
2783 #if INTEL_WA_14018283232_GFX_VER
2784 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_14018283232))
2785 genX(batch_emit_wa_14018283232)(&cmd_buffer->batch);
2786 #endif
2787
2788 #if GFX_VER == 9
2789 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PMA_FIX))
2790 genX(cmd_buffer_enable_pma_fix)(cmd_buffer, hw_state->pma_fix);
2791 #endif
2792
2793 #if GFX_VERx10 >= 125
2794 if (hw_state->use_tbimr &&
2795 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TBIMR_TILE_PASS_INFO)) {
2796 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TBIMR_TILE_PASS_INFO),
2797 tbimr) {
2798 SET(tbimr, tbimr, TileRectangleHeight);
2799 SET(tbimr, tbimr, TileRectangleWidth);
2800 SET(tbimr, tbimr, VerticalTileCount);
2801 SET(tbimr, tbimr, HorizontalTileCount);
2802 SET(tbimr, tbimr, TBIMRBatchSize);
2803 SET(tbimr, tbimr, TileBoxCheck);
2804 }
2805 }
2806 #endif
2807
2808 #undef INIT
2809 #undef SET
2810
2811 BITSET_ZERO(hw_state->dirty);
2812 }
2813
2814 /**
2815 * This function handles possible state workarounds and emits the dirty
2816 * instructions to the batch buffer.
2817 */
2818 void
genX(cmd_buffer_flush_gfx_hw_state)2819 genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer)
2820 {
2821 struct anv_device *device = cmd_buffer->device;
2822 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
2823 struct anv_graphics_pipeline *pipeline =
2824 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
2825 struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
2826
2827 if (INTEL_DEBUG(DEBUG_REEMIT)) {
2828 BITSET_OR(gfx->dyn_state.dirty, gfx->dyn_state.dirty,
2829 device->gfx_dirty_state);
2830 }
2831
2832 /**
2833 * Put potential workarounds here if you need to reemit an instruction
2834 * because of another one is changing.
2835 */
2836
2837 /* Wa_16012775297 - Emit dummy VF statistics before each 3DSTATE_VF. */
2838 #if INTEL_WA_16012775297_GFX_VER
2839 if (intel_needs_workaround(device->info, 16012775297) &&
2840 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF))
2841 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS);
2842 #endif
2843
2844 /* Since Wa_16011773973 will disable 3DSTATE_STREAMOUT, we need to reemit
2845 * it after.
2846 */
2847 if (intel_needs_workaround(device->info, 16011773973) &&
2848 pipeline->uses_xfb &&
2849 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST)) {
2850 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
2851 }
2852
2853 #if INTEL_WA_18038825448_GFX_VER
2854 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
2855 if (wm_prog_data) {
2856 genX(cmd_buffer_set_coarse_pixel_active)(
2857 cmd_buffer,
2858 brw_wm_prog_data_is_coarse(wm_prog_data, hw_state->fs_msaa_flags));
2859 }
2860 #endif
2861
2862 /* Gfx11 undocumented issue :
2863 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/9781
2864 */
2865 #if GFX_VER == 11
2866 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE))
2867 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE);
2868 #endif
2869
2870 /* Wa_18020335297 - Apply the WA when viewport ptr is reprogrammed. */
2871 if (intel_needs_workaround(device->info, 18020335297) &&
2872 (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) ||
2873 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC_PTR)) &&
2874 cmd_buffer->state.gfx.viewport_set) {
2875 /* For mesh, we implement the WA using CS stall. This is for
2876 * simplicity and takes care of possible interaction with Wa_16014390852.
2877 */
2878 if (anv_pipeline_is_mesh(pipeline)) {
2879 genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
2880 _3D, ANV_PIPE_CS_STALL_BIT);
2881 } else {
2882 /* Mask off all instructions that we program. */
2883 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VFG);
2884 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF);
2885 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
2886 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_RASTER);
2887 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS);
2888 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_SGVS);
2889 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2);
2890 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_CLIP);
2891 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
2892 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
2893 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY);
2894
2895 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VS);
2896 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_GS);
2897 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_HS);
2898 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_TE);
2899 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_DS);
2900
2901 cmd_buffer_gfx_state_emission(cmd_buffer);
2902
2903 emit_wa_18020335297_dummy_draw(cmd_buffer);
2904
2905 /* Dirty all emitted WA state to make sure that current real
2906 * state is restored.
2907 */
2908 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VFG);
2909 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF);
2910 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
2911 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_RASTER);
2912 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS);
2913 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS);
2914 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2);
2915 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CLIP);
2916 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
2917 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
2918 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY);
2919
2920 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VS);
2921 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_GS);
2922 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_HS);
2923 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TE);
2924 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_DS);
2925 }
2926 }
2927
2928 cmd_buffer_gfx_state_emission(cmd_buffer);
2929 }
2930
2931 void
genX(cmd_buffer_enable_pma_fix)2932 genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
2933 {
2934 if (!anv_cmd_buffer_is_render_queue(cmd_buffer))
2935 return;
2936
2937 if (cmd_buffer->state.gfx.pma_fix_enabled == enable)
2938 return;
2939
2940 cmd_buffer->state.gfx.pma_fix_enabled = enable;
2941
2942 /* According to the Broadwell PIPE_CONTROL documentation, software should
2943 * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
2944 * prior to the LRI. If stencil buffer writes are enabled, then a Render
2945 * Cache Flush is also necessary.
2946 *
2947 * The Skylake docs say to use a depth stall rather than a command
2948 * streamer stall. However, the hardware seems to violently disagree.
2949 * A full command streamer stall seems to be needed in both cases.
2950 */
2951 genx_batch_emit_pipe_control
2952 (&cmd_buffer->batch, cmd_buffer->device->info,
2953 cmd_buffer->state.current_pipeline,
2954 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
2955 ANV_PIPE_CS_STALL_BIT |
2956 #if GFX_VER >= 12
2957 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
2958 #endif
2959 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
2960
2961 #if GFX_VER == 9
2962 uint32_t cache_mode;
2963 anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0),
2964 .STCPMAOptimizationEnable = enable,
2965 .STCPMAOptimizationEnableMask = true);
2966 anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
2967 lri.RegisterOffset = GENX(CACHE_MODE_0_num);
2968 lri.DataDWord = cache_mode;
2969 }
2970
2971 #endif /* GFX_VER == 9 */
2972
2973 /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
2974 * Flush bits is often necessary. We do it regardless because it's easier.
2975 * The render cache flush is also necessary if stencil writes are enabled.
2976 *
2977 * Again, the Skylake docs give a different set of flushes but the BDW
2978 * flushes seem to work just as well.
2979 */
2980 genx_batch_emit_pipe_control
2981 (&cmd_buffer->batch, cmd_buffer->device->info,
2982 cmd_buffer->state.current_pipeline,
2983 ANV_PIPE_DEPTH_STALL_BIT |
2984 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
2985 #if GFX_VER >= 12
2986 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
2987 #endif
2988 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
2989 }
2990