1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "anv_private.h"
31
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
34 #include "common/intel_guardband.h"
35 #include "common/intel_tiled_render.h"
36 #include "compiler/brw_prim.h"
37
38 const uint32_t genX(vk_to_intel_blend)[] = {
39 [VK_BLEND_FACTOR_ZERO] = BLENDFACTOR_ZERO,
40 [VK_BLEND_FACTOR_ONE] = BLENDFACTOR_ONE,
41 [VK_BLEND_FACTOR_SRC_COLOR] = BLENDFACTOR_SRC_COLOR,
42 [VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR] = BLENDFACTOR_INV_SRC_COLOR,
43 [VK_BLEND_FACTOR_DST_COLOR] = BLENDFACTOR_DST_COLOR,
44 [VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR] = BLENDFACTOR_INV_DST_COLOR,
45 [VK_BLEND_FACTOR_SRC_ALPHA] = BLENDFACTOR_SRC_ALPHA,
46 [VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA] = BLENDFACTOR_INV_SRC_ALPHA,
47 [VK_BLEND_FACTOR_DST_ALPHA] = BLENDFACTOR_DST_ALPHA,
48 [VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA] = BLENDFACTOR_INV_DST_ALPHA,
49 [VK_BLEND_FACTOR_CONSTANT_COLOR] = BLENDFACTOR_CONST_COLOR,
50 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR]= BLENDFACTOR_INV_CONST_COLOR,
51 [VK_BLEND_FACTOR_CONSTANT_ALPHA] = BLENDFACTOR_CONST_ALPHA,
52 [VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA]= BLENDFACTOR_INV_CONST_ALPHA,
53 [VK_BLEND_FACTOR_SRC_ALPHA_SATURATE] = BLENDFACTOR_SRC_ALPHA_SATURATE,
54 [VK_BLEND_FACTOR_SRC1_COLOR] = BLENDFACTOR_SRC1_COLOR,
55 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR] = BLENDFACTOR_INV_SRC1_COLOR,
56 [VK_BLEND_FACTOR_SRC1_ALPHA] = BLENDFACTOR_SRC1_ALPHA,
57 [VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA] = BLENDFACTOR_INV_SRC1_ALPHA,
58 };
59
60 static const uint32_t genX(vk_to_intel_blend_op)[] = {
61 [VK_BLEND_OP_ADD] = BLENDFUNCTION_ADD,
62 [VK_BLEND_OP_SUBTRACT] = BLENDFUNCTION_SUBTRACT,
63 [VK_BLEND_OP_REVERSE_SUBTRACT] = BLENDFUNCTION_REVERSE_SUBTRACT,
64 [VK_BLEND_OP_MIN] = BLENDFUNCTION_MIN,
65 [VK_BLEND_OP_MAX] = BLENDFUNCTION_MAX,
66 };
67
68 static void
genX(streamout_prologue)69 genX(streamout_prologue)(struct anv_cmd_buffer *cmd_buffer)
70 {
71 #if INTEL_WA_16013994831_GFX_VER
72 /* Wa_16013994831 - Disable preemption during streamout, enable back
73 * again if XFB not used by the current pipeline.
74 *
75 * Although this workaround applies to Gfx12+, we already disable object
76 * level preemption for another reason in genX_state.c so we can skip this
77 * for Gfx12.
78 */
79 if (!intel_needs_workaround(cmd_buffer->device->info, 16013994831))
80 return;
81
82 struct anv_graphics_pipeline *pipeline =
83 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
84 if (pipeline->uses_xfb) {
85 genX(cmd_buffer_set_preemption)(cmd_buffer, false);
86 return;
87 }
88
89 if (!cmd_buffer->state.gfx.object_preemption)
90 genX(cmd_buffer_set_preemption)(cmd_buffer, true);
91 #endif
92 }
93
94 #if GFX_VER >= 12
95 static uint32_t
get_cps_state_offset(struct anv_device * device,bool cps_enabled,const struct vk_fragment_shading_rate_state * fsr)96 get_cps_state_offset(struct anv_device *device, bool cps_enabled,
97 const struct vk_fragment_shading_rate_state *fsr)
98 {
99 if (!cps_enabled)
100 return device->cps_states.offset;
101
102 uint32_t offset;
103 static const uint32_t size_index[] = {
104 [1] = 0,
105 [2] = 1,
106 [4] = 2,
107 };
108
109 #if GFX_VERx10 >= 125
110 offset =
111 1 + /* skip disabled */
112 fsr->combiner_ops[0] * 5 * 3 * 3 +
113 fsr->combiner_ops[1] * 3 * 3 +
114 size_index[fsr->fragment_size.width] * 3 +
115 size_index[fsr->fragment_size.height];
116 #else
117 offset =
118 1 + /* skip disabled */
119 size_index[fsr->fragment_size.width] * 3 +
120 size_index[fsr->fragment_size.height];
121 #endif
122
123 offset *= MAX_VIEWPORTS * GENX(CPS_STATE_length) * 4;
124
125 return device->cps_states.offset + offset;
126 }
127 #endif /* GFX_VER >= 12 */
128
129 static bool
has_ds_feedback_loop(const struct vk_dynamic_graphics_state * dyn)130 has_ds_feedback_loop(const struct vk_dynamic_graphics_state *dyn)
131 {
132 return dyn->feedback_loops & (VK_IMAGE_ASPECT_DEPTH_BIT |
133 VK_IMAGE_ASPECT_STENCIL_BIT);
134 }
135
136 UNUSED static bool
want_stencil_pma_fix(struct anv_cmd_buffer * cmd_buffer,const struct vk_dynamic_graphics_state * dyn,const struct vk_depth_stencil_state * ds)137 want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer,
138 const struct vk_dynamic_graphics_state *dyn,
139 const struct vk_depth_stencil_state *ds)
140 {
141 if (GFX_VER > 9)
142 return false;
143 assert(GFX_VER == 9);
144
145 /* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable:
146 *
147 * Clearing this bit will force the STC cache to wait for pending
148 * retirement of pixels at the HZ-read stage and do the STC-test for
149 * Non-promoted, R-computed and Computed depth modes instead of
150 * postponing the STC-test to RCPFE.
151 *
152 * STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
153 * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
154 *
155 * STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
156 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
157 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
158 *
159 * COMP_STC_EN = STC_TEST_EN &&
160 * 3DSTATE_PS_EXTRA::PixelShaderComputesStencil
161 *
162 * SW parses the pipeline states to generate the following logical
163 * signal indicating if PMA FIX can be enabled.
164 *
165 * STC_PMA_OPT =
166 * 3DSTATE_WM::ForceThreadDispatch != 1 &&
167 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
168 * 3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
169 * 3DSTATE_DEPTH_BUFFER::HIZ Enable &&
170 * !(3DSTATE_WM::EDSC_Mode == 2) &&
171 * 3DSTATE_PS_EXTRA::PixelShaderValid &&
172 * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
173 * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
174 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
175 * 3DSTATE_WM_HZ_OP::StencilBufferClear) &&
176 * (COMP_STC_EN || STC_WRITE_EN) &&
177 * ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
178 * 3DSTATE_WM::ForceKillPix == ON ||
179 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
180 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
181 * 3DSTATE_PS_BLEND::AlphaTestEnable ||
182 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
183 * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
184 */
185
186 /* These are always true:
187 * 3DSTATE_WM::ForceThreadDispatch != 1 &&
188 * !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
189 */
190
191 /* We only enable the PMA fix if we know for certain that HiZ is enabled.
192 * If we don't know whether HiZ is enabled or not, we disable the PMA fix
193 * and there is no harm.
194 *
195 * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
196 * 3DSTATE_DEPTH_BUFFER::HIZ Enable
197 */
198 if (!cmd_buffer->state.hiz_enabled)
199 return false;
200
201 /* We can't possibly know if HiZ is enabled without the depth attachment */
202 ASSERTED const struct anv_image_view *d_iview =
203 cmd_buffer->state.gfx.depth_att.iview;
204 assert(d_iview && d_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ);
205
206 /* 3DSTATE_PS_EXTRA::PixelShaderValid */
207 struct anv_graphics_pipeline *pipeline =
208 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
209 if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
210 return false;
211
212 /* !(3DSTATE_WM::EDSC_Mode == 2) */
213 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
214 if (wm_prog_data->early_fragment_tests)
215 return false;
216
217 /* We never use anv_pipeline for HiZ ops so this is trivially true:
218 * !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
219 * 3DSTATE_WM_HZ_OP::DepthBufferResolve ||
220 * 3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
221 * 3DSTATE_WM_HZ_OP::StencilBufferClear)
222 */
223
224 /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
225 * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
226 */
227 const bool stc_test_en = ds->stencil.test_enable;
228
229 /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
230 * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
231 * 3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
232 */
233 const bool stc_write_en = ds->stencil.write_enable;
234
235 /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
236 const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil;
237
238 /* COMP_STC_EN || STC_WRITE_EN */
239 if (!(comp_stc_en || stc_write_en))
240 return false;
241
242 /* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
243 * 3DSTATE_WM::ForceKillPix == ON ||
244 * 3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
245 * 3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
246 * 3DSTATE_PS_BLEND::AlphaTestEnable ||
247 * 3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
248 * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
249 */
250 return pipeline->kill_pixel ||
251 pipeline->rp_has_ds_self_dep ||
252 has_ds_feedback_loop(dyn) ||
253 wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
254 }
255
256 static void
genX(rasterization_mode)257 genX(rasterization_mode)(VkPolygonMode raster_mode,
258 VkLineRasterizationModeKHR line_mode,
259 float line_width,
260 uint32_t *api_mode,
261 bool *msaa_rasterization_enable)
262 {
263 if (raster_mode == VK_POLYGON_MODE_LINE) {
264 /* Unfortunately, configuring our line rasterization hardware on gfx8
265 * and later is rather painful. Instead of giving us bits to tell the
266 * hardware what line mode to use like we had on gfx7, we now have an
267 * arcane combination of API Mode and MSAA enable bits which do things
268 * in a table which are expected to magically put the hardware into the
269 * right mode for your API. Sadly, Vulkan isn't any of the APIs the
270 * hardware people thought of so nothing works the way you want it to.
271 *
272 * Look at the table titled "Multisample Rasterization Modes" in Vol 7
273 * of the Skylake PRM for more details.
274 */
275 switch (line_mode) {
276 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT:
277 *api_mode = DX101;
278 #if GFX_VER <= 9
279 /* Prior to ICL, the algorithm the HW uses to draw wide lines
280 * doesn't quite match what the CTS expects, at least for rectangular
281 * lines, so we set this to false here, making it draw parallelograms
282 * instead, which work well enough.
283 */
284 *msaa_rasterization_enable = line_width < 1.0078125;
285 #else
286 *msaa_rasterization_enable = true;
287 #endif
288 break;
289
290 case VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT:
291 case VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT:
292 *api_mode = DX9OGL;
293 *msaa_rasterization_enable = false;
294 break;
295
296 default:
297 unreachable("Unsupported line rasterization mode");
298 }
299 } else {
300 *api_mode = DX101;
301 *msaa_rasterization_enable = true;
302 }
303 }
304
305 #if GFX_VERx10 == 125
306 /**
307 * Return the dimensions of the current rendering area, defined as the
308 * bounding box of all present color, depth and stencil attachments.
309 */
310 UNUSED static bool
calculate_render_area(struct anv_cmd_buffer * cmd_buffer,unsigned * width,unsigned * height)311 calculate_render_area(struct anv_cmd_buffer *cmd_buffer,
312 unsigned *width, unsigned *height)
313 {
314 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
315
316 *width = gfx->render_area.offset.x + gfx->render_area.extent.width;
317 *height = gfx->render_area.offset.y + gfx->render_area.extent.height;
318
319 for (unsigned i = 0; i < gfx->color_att_count; i++) {
320 struct anv_attachment *att = &gfx->color_att[i];
321 if (att->iview) {
322 *width = MAX2(*width, att->iview->vk.extent.width);
323 *height = MAX2(*height, att->iview->vk.extent.height);
324 }
325 }
326
327 const struct anv_image_view *const z_view = gfx->depth_att.iview;
328 if (z_view) {
329 *width = MAX2(*width, z_view->vk.extent.width);
330 *height = MAX2(*height, z_view->vk.extent.height);
331 }
332
333 const struct anv_image_view *const s_view = gfx->stencil_att.iview;
334 if (s_view) {
335 *width = MAX2(*width, s_view->vk.extent.width);
336 *height = MAX2(*height, s_view->vk.extent.height);
337 }
338
339 return *width && *height;
340 }
341
342 /* Calculate TBIMR tiling parameters adequate for the current pipeline
343 * setup. Return true if TBIMR should be enabled.
344 */
345 UNUSED static bool
calculate_tile_dimensions(struct anv_cmd_buffer * cmd_buffer,unsigned fb_width,unsigned fb_height,unsigned * tile_width,unsigned * tile_height)346 calculate_tile_dimensions(struct anv_cmd_buffer *cmd_buffer,
347 unsigned fb_width, unsigned fb_height,
348 unsigned *tile_width, unsigned *tile_height)
349 {
350 const struct anv_device *device = cmd_buffer->device;
351 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
352 const unsigned aux_scale = 256;
353 unsigned pixel_size = 0;
354
355 /* Perform a rough calculation of the tile cache footprint of the
356 * pixel pipeline, approximating it as the sum of the amount of
357 * memory used per pixel by every render target, depth, stencil and
358 * auxiliary surfaces bound to the pipeline.
359 */
360 for (uint32_t i = 0; i < gfx->color_att_count; i++) {
361 struct anv_attachment *att = &gfx->color_att[i];
362
363 if (att->iview) {
364 const struct anv_image *image = att->iview->image;
365 const unsigned p = anv_image_aspect_to_plane(image,
366 VK_IMAGE_ASPECT_COLOR_BIT);
367 const struct anv_image_plane *plane = &image->planes[p];
368
369 pixel_size += intel_calculate_surface_pixel_size(
370 &plane->primary_surface.isl);
371
372 if (isl_aux_usage_has_mcs(att->aux_usage))
373 pixel_size += intel_calculate_surface_pixel_size(
374 &plane->aux_surface.isl);
375
376 /* XXX - Use proper implicit CCS surface metadata tracking
377 * instead of inferring pixel size from primary
378 * surface.
379 */
380 if (isl_aux_usage_has_ccs(att->aux_usage))
381 pixel_size += DIV_ROUND_UP(intel_calculate_surface_pixel_size(
382 &plane->primary_surface.isl),
383 aux_scale);
384 }
385 }
386
387 const struct anv_image_view *const z_view = gfx->depth_att.iview;
388 if (z_view) {
389 const struct anv_image *image = z_view->image;
390 assert(image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT);
391 const unsigned p = anv_image_aspect_to_plane(image,
392 VK_IMAGE_ASPECT_DEPTH_BIT);
393 const struct anv_image_plane *plane = &image->planes[p];
394
395 pixel_size += intel_calculate_surface_pixel_size(
396 &plane->primary_surface.isl);
397
398 if (isl_aux_usage_has_hiz(image->planes[p].aux_usage))
399 pixel_size += intel_calculate_surface_pixel_size(
400 &plane->aux_surface.isl);
401
402 /* XXX - Use proper implicit CCS surface metadata tracking
403 * instead of inferring pixel size from primary
404 * surface.
405 */
406 if (isl_aux_usage_has_ccs(image->planes[p].aux_usage))
407 pixel_size += DIV_ROUND_UP(intel_calculate_surface_pixel_size(
408 &plane->primary_surface.isl),
409 aux_scale);
410 }
411
412 const struct anv_image_view *const s_view = gfx->depth_att.iview;
413 if (s_view && s_view != z_view) {
414 const struct anv_image *image = s_view->image;
415 assert(image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT);
416 const unsigned p = anv_image_aspect_to_plane(image,
417 VK_IMAGE_ASPECT_STENCIL_BIT);
418 const struct anv_image_plane *plane = &image->planes[p];
419
420 pixel_size += intel_calculate_surface_pixel_size(
421 &plane->primary_surface.isl);
422 }
423
424 if (!pixel_size)
425 return false;
426
427 /* Compute a tile layout that allows reasonable utilization of the
428 * tile cache based on the per-pixel cache footprint estimated
429 * above.
430 */
431 intel_calculate_tile_dimensions(device->info, cmd_buffer->state.current_l3_config,
432 32, 32, fb_width, fb_height,
433 pixel_size, tile_width, tile_height);
434
435 /* Perform TBIMR tile passes only if the framebuffer covers more
436 * than a single tile.
437 */
438 return *tile_width < fb_width || *tile_height < fb_height;
439 }
440 #endif
441
442 /**
443 * This function takes the vulkan runtime values & dirty states and updates
444 * the values in anv_gfx_dynamic_state, flagging HW instructions for
445 * reemission if the values are changing.
446 *
447 * Nothing is emitted in the batch buffer.
448 */
449 void
genX(cmd_buffer_flush_gfx_runtime_state)450 genX(cmd_buffer_flush_gfx_runtime_state)(struct anv_cmd_buffer *cmd_buffer)
451 {
452 UNUSED struct anv_device *device = cmd_buffer->device;
453 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
454 const struct anv_graphics_pipeline *pipeline =
455 anv_pipeline_to_graphics(gfx->base.pipeline);
456 const struct vk_dynamic_graphics_state *dyn =
457 &cmd_buffer->vk.dynamic_graphics_state;
458 struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
459 struct anv_instance *instance = cmd_buffer->device->physical->instance;
460
461 #define GET(field) hw_state->field
462 #define SET(bit, field, value) \
463 do { \
464 __typeof(hw_state->field) __v = value; \
465 if (hw_state->field != __v) { \
466 hw_state->field = __v; \
467 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \
468 } \
469 } while (0)
470 #define SET_STAGE(bit, field, value, stage) \
471 do { \
472 __typeof(hw_state->field) __v = value; \
473 if (!anv_pipeline_has_stage(pipeline, \
474 MESA_SHADER_##stage)) { \
475 hw_state->field = __v; \
476 break; \
477 } \
478 if (hw_state->field != __v) { \
479 hw_state->field = __v; \
480 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_##bit); \
481 } \
482 } while (0)
483
484 #define SETUP_PROVOKING_VERTEX(bit, cmd, mode) \
485 switch (mode) { \
486 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT: \
487 SET(bit, cmd.TriangleStripListProvokingVertexSelect, 0); \
488 SET(bit, cmd.LineStripListProvokingVertexSelect, 0); \
489 SET(bit, cmd.TriangleFanProvokingVertexSelect, 1); \
490 break; \
491 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT: \
492 SET(bit, cmd.TriangleStripListProvokingVertexSelect, 2); \
493 SET(bit, cmd.LineStripListProvokingVertexSelect, 1); \
494 SET(bit, cmd.TriangleFanProvokingVertexSelect, 2); \
495 break; \
496 default: \
497 unreachable("Invalid provoking vertex mode"); \
498 } \
499
500 if ((gfx->dirty & (ANV_CMD_DIRTY_PIPELINE |
501 ANV_CMD_DIRTY_XFB_ENABLE |
502 ANV_CMD_DIRTY_OCCLUSION_QUERY_ACTIVE)) ||
503 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZER_DISCARD_ENABLE) ||
504 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_RASTERIZATION_STREAM) ||
505 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX)) {
506 SET(STREAMOUT, so.RenderingDisable, dyn->rs.rasterizer_discard_enable);
507 SET(STREAMOUT, so.RenderStreamSelect, dyn->rs.rasterization_stream);
508
509 #if INTEL_NEEDS_WA_18022508906
510 /* Wa_18022508906 :
511 *
512 * SKL PRMs, Volume 7: 3D-Media-GPGPU, Stream Output Logic (SOL) Stage:
513 *
514 * SOL_INT::Render_Enable =
515 * (3DSTATE_STREAMOUT::Force_Rending == Force_On) ||
516 * (
517 * (3DSTATE_STREAMOUT::Force_Rending != Force_Off) &&
518 * !(3DSTATE_GS::Enable && 3DSTATE_GS::Output Vertex Size == 0) &&
519 * !3DSTATE_STREAMOUT::API_Render_Disable &&
520 * (
521 * 3DSTATE_DEPTH_STENCIL_STATE::Stencil_TestEnable ||
522 * 3DSTATE_DEPTH_STENCIL_STATE::Depth_TestEnable ||
523 * 3DSTATE_DEPTH_STENCIL_STATE::Depth_WriteEnable ||
524 * 3DSTATE_PS_EXTRA::PS_Valid ||
525 * 3DSTATE_WM::Legacy Depth_Buffer_Clear ||
526 * 3DSTATE_WM::Legacy Depth_Buffer_Resolve_Enable ||
527 * 3DSTATE_WM::Legacy Hierarchical_Depth_Buffer_Resolve_Enable
528 * )
529 * )
530 *
531 * If SOL_INT::Render_Enable is false, the SO stage will not forward any
532 * topologies down the pipeline. Which is not what we want for occlusion
533 * queries.
534 *
535 * Here we force rendering to get SOL_INT::Render_Enable when occlusion
536 * queries are active.
537 */
538 SET(STREAMOUT, so.ForceRendering,
539 (!GET(so.RenderingDisable) && gfx->n_occlusion_queries > 0) ?
540 Force_on : 0);
541 #endif
542
543 switch (dyn->rs.provoking_vertex) {
544 case VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT:
545 SET(STREAMOUT, so.ReorderMode, LEADING);
546 SET_STAGE(GS, gs.ReorderMode, LEADING, GEOMETRY);
547 break;
548
549 case VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT:
550 SET(STREAMOUT, so.ReorderMode, TRAILING);
551 SET_STAGE(GS, gs.ReorderMode, TRAILING, GEOMETRY);
552 break;
553
554 default:
555 unreachable("Invalid provoking vertex mode");
556 }
557 }
558
559 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
560 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY)) {
561 uint32_t topology;
562 if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))
563 topology = _3DPRIM_PATCHLIST(dyn->ts.patch_control_points);
564 else
565 topology = genX(vk_to_intel_primitive_type)[dyn->ia.primitive_topology];
566
567 gfx->primitive_topology = topology;
568
569 SET(VF_TOPOLOGY, vft.PrimitiveTopologyType, topology);
570 }
571
572 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
573 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI) ||
574 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDINGS_VALID) ||
575 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VI_BINDING_STRIDES))
576 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
577
578 #if GFX_VER >= 11
579 if (cmd_buffer->device->vk.enabled_extensions.KHR_fragment_shading_rate &&
580 (gfx->dirty & ANV_CMD_DIRTY_PIPELINE ||
581 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_FSR))) {
582 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
583 const bool cps_enable = wm_prog_data &&
584 brw_wm_prog_data_is_coarse(wm_prog_data, pipeline->fs_msaa_flags);
585 #if GFX_VER == 11
586 SET(CPS, cps.CoarsePixelShadingMode,
587 cps_enable ? CPS_MODE_CONSTANT : CPS_MODE_NONE);
588 SET(CPS, cps.MinCPSizeX, dyn->fsr.fragment_size.width);
589 SET(CPS, cps.MinCPSizeY, dyn->fsr.fragment_size.height);
590 #elif GFX_VER >= 12
591 SET(CPS, cps.CoarsePixelShadingStateArrayPointer,
592 get_cps_state_offset(device, cps_enable, &dyn->fsr));
593 #endif
594 }
595 #endif /* GFX_VER >= 11 */
596
597 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
598 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_TS_DOMAIN_ORIGIN)) {
599 const struct brw_tes_prog_data *tes_prog_data = get_tes_prog_data(pipeline);
600
601 if (tes_prog_data && anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) {
602 if (dyn->ts.domain_origin == VK_TESSELLATION_DOMAIN_ORIGIN_LOWER_LEFT) {
603 SET(TE, te.OutputTopology, tes_prog_data->output_topology);
604 } else {
605 /* When the origin is upper-left, we have to flip the winding order */
606 if (tes_prog_data->output_topology == OUTPUT_TRI_CCW) {
607 SET(TE, te.OutputTopology, OUTPUT_TRI_CW);
608 } else if (tes_prog_data->output_topology == OUTPUT_TRI_CW) {
609 SET(TE, te.OutputTopology, OUTPUT_TRI_CCW);
610 } else {
611 SET(TE, te.OutputTopology, tes_prog_data->output_topology);
612 }
613 }
614 } else {
615 SET(TE, te.OutputTopology, OUTPUT_POINT);
616 }
617 }
618
619 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH))
620 SET(SF, sf.LineWidth, dyn->rs.line.width);
621
622 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_PROVOKING_VERTEX)) {
623 SETUP_PROVOKING_VERTEX(SF, sf, dyn->rs.provoking_vertex);
624 SETUP_PROVOKING_VERTEX(CLIP, clip, dyn->rs.provoking_vertex);
625 }
626
627 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS)) {
628 /**
629 * From the Vulkan Spec:
630 *
631 * "VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT specifies that the depth
632 * bias representation is a factor of constant r equal to 1."
633 *
634 * From the SKL PRMs, Volume 7: 3D-Media-GPGPU, Depth Offset:
635 *
636 * "When UNORM Depth Buffer is at Output Merger (or no Depth Buffer):
637 *
638 * Bias = GlobalDepthOffsetConstant * r + GlobalDepthOffsetScale * MaxDepthSlope
639 *
640 * Where r is the minimum representable value > 0 in the depth
641 * buffer format, converted to float32 (note: If state bit Legacy
642 * Global Depth Bias Enable is set, the r term will be forced to
643 * 1.0)"
644 *
645 * When VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT is set, enable
646 * LegacyGlobalDepthBiasEnable.
647 */
648 SET(SF, sf.LegacyGlobalDepthBiasEnable,
649 dyn->rs.depth_bias.representation ==
650 VK_DEPTH_BIAS_REPRESENTATION_FLOAT_EXT);
651 }
652
653 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE))
654 SET(CLIP, clip.APIMode, dyn->vp.depth_clip_negative_one_to_one ? APIMODE_OGL : APIMODE_D3D);
655
656 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
657 (gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
658 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_TOPOLOGY) ||
659 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CULL_MODE) ||
660 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_FRONT_FACE) ||
661 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_ENABLE) ||
662 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_BIAS_FACTORS) ||
663 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_POLYGON_MODE) ||
664 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_MODE) ||
665 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_WIDTH) ||
666 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLIP_ENABLE) ||
667 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) ||
668 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_CONSERVATIVE_MODE)) {
669 /* Take dynamic primitive topology in to account with
670 * 3DSTATE_RASTER::APIMode
671 * 3DSTATE_RASTER::DXMultisampleRasterizationEnable
672 * 3DSTATE_RASTER::AntialiasingEnable
673 */
674 uint32_t api_mode = 0;
675 bool msaa_raster_enable = false;
676
677 const VkLineRasterizationModeKHR line_mode =
678 anv_line_rasterization_mode(dyn->rs.line.mode,
679 pipeline->rasterization_samples);
680
681 const VkPolygonMode dynamic_raster_mode =
682 genX(raster_polygon_mode)(pipeline,
683 dyn->rs.polygon_mode,
684 dyn->ia.primitive_topology);
685
686 genX(rasterization_mode)(dynamic_raster_mode,
687 line_mode, dyn->rs.line.width,
688 &api_mode, &msaa_raster_enable);
689
690 /* From the Browadwell PRM, Volume 2, documentation for
691 * 3DSTATE_RASTER, "Antialiasing Enable":
692 *
693 * "This field must be disabled if any of the render targets
694 * have integer (UINT or SINT) surface format."
695 *
696 * Additionally internal documentation for Gfx12+ states:
697 *
698 * "This bit MUST not be set when NUM_MULTISAMPLES > 1 OR
699 * FORCED_SAMPLE_COUNT > 1."
700 */
701 const bool aa_enable =
702 anv_rasterization_aa_mode(dynamic_raster_mode, line_mode) &&
703 !gfx->has_uint_rt &&
704 !(GFX_VER >= 12 && gfx->samples > 1);
705
706 const bool depth_clip_enable =
707 vk_rasterization_state_depth_clip_enable(&dyn->rs);
708
709 const bool xy_clip_test_enable =
710 (dynamic_raster_mode == VK_POLYGON_MODE_FILL);
711
712 SET(CLIP, clip.ViewportXYClipTestEnable, xy_clip_test_enable);
713
714 SET(RASTER, raster.APIMode, api_mode);
715 SET(RASTER, raster.DXMultisampleRasterizationEnable, msaa_raster_enable);
716 SET(RASTER, raster.AntialiasingEnable, aa_enable);
717 SET(RASTER, raster.CullMode, genX(vk_to_intel_cullmode)[dyn->rs.cull_mode]);
718 SET(RASTER, raster.FrontWinding, genX(vk_to_intel_front_face)[dyn->rs.front_face]);
719 SET(RASTER, raster.GlobalDepthOffsetEnableSolid, dyn->rs.depth_bias.enable);
720 SET(RASTER, raster.GlobalDepthOffsetEnableWireframe, dyn->rs.depth_bias.enable);
721 SET(RASTER, raster.GlobalDepthOffsetEnablePoint, dyn->rs.depth_bias.enable);
722 SET(RASTER, raster.GlobalDepthOffsetConstant, dyn->rs.depth_bias.constant);
723 SET(RASTER, raster.GlobalDepthOffsetScale, dyn->rs.depth_bias.slope);
724 SET(RASTER, raster.GlobalDepthOffsetClamp, dyn->rs.depth_bias.clamp);
725 SET(RASTER, raster.FrontFaceFillMode, genX(vk_to_intel_fillmode)[dyn->rs.polygon_mode]);
726 SET(RASTER, raster.BackFaceFillMode, genX(vk_to_intel_fillmode)[dyn->rs.polygon_mode]);
727 SET(RASTER, raster.ViewportZFarClipTestEnable, depth_clip_enable);
728 SET(RASTER, raster.ViewportZNearClipTestEnable, depth_clip_enable);
729 SET(RASTER, raster.ConservativeRasterizationEnable,
730 dyn->rs.conservative_mode !=
731 VK_CONSERVATIVE_RASTERIZATION_MODE_DISABLED_EXT);
732 }
733
734 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_MASK)) {
735 /* From the Vulkan 1.0 spec:
736 * If pSampleMask is NULL, it is treated as if the mask has all bits
737 * enabled, i.e. no coverage is removed from fragments.
738 *
739 * 3DSTATE_SAMPLE_MASK.SampleMask is 16 bits.
740 */
741 SET(SAMPLE_MASK, sm.SampleMask, dyn->ms.sample_mask & 0xffff);
742 }
743
744 if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
745 #if GFX_VER == 9
746 /* For the PMA fix */
747 (gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
748 #endif
749 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_TEST_ENABLE) ||
750 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_WRITE_ENABLE) ||
751 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_COMPARE_OP) ||
752 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_TEST_ENABLE) ||
753 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_OP) ||
754 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_COMPARE_MASK) ||
755 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_WRITE_MASK) ||
756 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_STENCIL_REFERENCE)) {
757 VkImageAspectFlags ds_aspects = 0;
758 if (gfx->depth_att.vk_format != VK_FORMAT_UNDEFINED)
759 ds_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
760 if (gfx->stencil_att.vk_format != VK_FORMAT_UNDEFINED)
761 ds_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
762
763 struct vk_depth_stencil_state opt_ds = dyn->ds;
764 vk_optimize_depth_stencil_state(&opt_ds, ds_aspects, true);
765
766 SET(WM_DEPTH_STENCIL, ds.DoubleSidedStencilEnable, true);
767
768 SET(WM_DEPTH_STENCIL, ds.StencilTestMask,
769 opt_ds.stencil.front.compare_mask & 0xff);
770 SET(WM_DEPTH_STENCIL, ds.StencilWriteMask,
771 opt_ds.stencil.front.write_mask & 0xff);
772
773 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilTestMask, opt_ds.stencil.back.compare_mask & 0xff);
774 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilWriteMask, opt_ds.stencil.back.write_mask & 0xff);
775
776 SET(WM_DEPTH_STENCIL, ds.StencilReferenceValue,
777 opt_ds.stencil.front.reference & 0xff);
778 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilReferenceValue,
779 opt_ds.stencil.back.reference & 0xff);
780
781 SET(WM_DEPTH_STENCIL, ds.DepthTestEnable, opt_ds.depth.test_enable);
782 SET(WM_DEPTH_STENCIL, ds.DepthBufferWriteEnable, opt_ds.depth.write_enable);
783 SET(WM_DEPTH_STENCIL, ds.DepthTestFunction,
784 genX(vk_to_intel_compare_op)[opt_ds.depth.compare_op]);
785 SET(WM_DEPTH_STENCIL, ds.StencilTestEnable, opt_ds.stencil.test_enable);
786 SET(WM_DEPTH_STENCIL, ds.StencilBufferWriteEnable, opt_ds.stencil.write_enable);
787 SET(WM_DEPTH_STENCIL, ds.StencilFailOp,
788 genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.fail]);
789 SET(WM_DEPTH_STENCIL, ds.StencilPassDepthPassOp,
790 genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.pass]);
791 SET(WM_DEPTH_STENCIL, ds.StencilPassDepthFailOp,
792 genX(vk_to_intel_stencil_op)[opt_ds.stencil.front.op.depth_fail]);
793 SET(WM_DEPTH_STENCIL, ds.StencilTestFunction,
794 genX(vk_to_intel_compare_op)[opt_ds.stencil.front.op.compare]);
795 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilFailOp,
796 genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.fail]);
797 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilPassDepthPassOp,
798 genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.pass]);
799 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilPassDepthFailOp,
800 genX(vk_to_intel_stencil_op)[opt_ds.stencil.back.op.depth_fail]);
801 SET(WM_DEPTH_STENCIL, ds.BackfaceStencilTestFunction,
802 genX(vk_to_intel_compare_op)[opt_ds.stencil.back.op.compare]);
803
804 #if GFX_VER == 9
805 const bool pma = want_stencil_pma_fix(cmd_buffer, dyn, &opt_ds);
806 SET(PMA_FIX, pma_fix, pma);
807 #endif
808
809 #if GFX_VERx10 >= 125
810 if (intel_needs_workaround(cmd_buffer->device->info, 18019816803)) {
811 bool ds_write_state = opt_ds.depth.write_enable || opt_ds.stencil.write_enable;
812 if (gfx->ds_write_state != ds_write_state) {
813 gfx->ds_write_state = ds_write_state;
814 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_WA_18019816803);
815 }
816 }
817 #endif
818 }
819
820 #if GFX_VER >= 12
821 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_ENABLE) ||
822 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_DS_DEPTH_BOUNDS_TEST_BOUNDS)) {
823 SET(DEPTH_BOUNDS, db.DepthBoundsTestEnable, dyn->ds.depth.bounds_test.enable);
824 /* Only look at updating the bounds if testing is enabled */
825 if (dyn->ds.depth.bounds_test.enable) {
826 SET(DEPTH_BOUNDS, db.DepthBoundsTestMinValue, dyn->ds.depth.bounds_test.min);
827 SET(DEPTH_BOUNDS, db.DepthBoundsTestMaxValue, dyn->ds.depth.bounds_test.max);
828 }
829 }
830 #endif
831
832 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE) ||
833 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_LINE_STIPPLE_ENABLE)) {
834 SET(LINE_STIPPLE, ls.LineStipplePattern, dyn->rs.line.stipple.pattern);
835 SET(LINE_STIPPLE, ls.LineStippleInverseRepeatCount,
836 1.0f / MAX2(1, dyn->rs.line.stipple.factor));
837 SET(LINE_STIPPLE, ls.LineStippleRepeatCount, dyn->rs.line.stipple.factor);
838
839 SET(WM, wm.LineStippleEnable, dyn->rs.line.stipple.enable);
840 }
841
842 if ((gfx->dirty & ANV_CMD_DIRTY_RESTART_INDEX) ||
843 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE)) {
844 SET(VF, vf.IndexedDrawCutIndexEnable, dyn->ia.primitive_restart_enable);
845 SET(VF, vf.CutIndex, gfx->restart_index);
846 }
847
848 if (gfx->dirty & ANV_CMD_DIRTY_INDEX_BUFFER)
849 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER);
850
851 #if GFX_VERx10 >= 125
852 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_IA_PRIMITIVE_RESTART_ENABLE))
853 SET(VFG, vfg.ListCutIndexEnable, dyn->ia.primitive_restart_enable);
854 #endif
855
856 if (cmd_buffer->device->vk.enabled_extensions.EXT_sample_locations &&
857 (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS) ||
858 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_SAMPLE_LOCATIONS_ENABLE)))
859 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN);
860
861 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
862 (gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
863 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES)) {
864 /* 3DSTATE_WM in the hope we can avoid spawning fragment shaders
865 * threads.
866 */
867 bool force_thread_dispatch =
868 anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
869 (pipeline->force_fragment_thread_dispatch ||
870 anv_cmd_buffer_all_color_write_masked(cmd_buffer));
871 SET(WM, wm.ForceThreadDispatchEnable, force_thread_dispatch ? ForceON : 0);
872 }
873
874 if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_PIPELINE) ||
875 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_ATTACHMENT_FEEDBACK_LOOP_ENABLE)) {
876 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
877
878 SET_STAGE(PS_EXTRA, ps_extra.PixelShaderKillsPixel,
879 wm_prog_data && (pipeline->rp_has_ds_self_dep ||
880 has_ds_feedback_loop(dyn) ||
881 wm_prog_data->uses_kill),
882 FRAGMENT);
883 }
884
885 if ((gfx->dirty & ANV_CMD_DIRTY_PIPELINE) ||
886 (gfx->dirty & ANV_CMD_DIRTY_RENDER_TARGETS) ||
887 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP) ||
888 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_COLOR_WRITE_ENABLES) ||
889 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_LOGIC_OP_ENABLE) ||
890 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_MS_ALPHA_TO_ONE_ENABLE) ||
891 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_WRITE_MASKS) ||
892 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_ENABLES) ||
893 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_EQUATIONS)) {
894 const uint8_t color_writes = dyn->cb.color_write_enables;
895 const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
896 bool has_writeable_rt =
897 anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT) &&
898 (color_writes & ((1u << gfx->color_att_count) - 1)) != 0;
899
900 SET(BLEND_STATE, blend.AlphaToCoverageEnable,
901 dyn->ms.alpha_to_coverage_enable);
902 SET(BLEND_STATE, blend.AlphaToOneEnable,
903 dyn->ms.alpha_to_one_enable);
904
905 bool independent_alpha_blend = false;
906 /* Wa_14018912822, check if we set these during RT setup. */
907 bool color_blend_zero = false;
908 bool alpha_blend_zero = false;
909 for (uint32_t i = 0; i < MAX_RTS; i++) {
910 /* Disable anything above the current number of color attachments. */
911 bool write_disabled = i >= gfx->color_att_count ||
912 (color_writes & BITFIELD_BIT(i)) == 0;
913
914 SET(BLEND_STATE, blend.rts[i].WriteDisableAlpha,
915 write_disabled ||
916 (dyn->cb.attachments[i].write_mask &
917 VK_COLOR_COMPONENT_A_BIT) == 0);
918 SET(BLEND_STATE, blend.rts[i].WriteDisableRed,
919 write_disabled ||
920 (dyn->cb.attachments[i].write_mask &
921 VK_COLOR_COMPONENT_R_BIT) == 0);
922 SET(BLEND_STATE, blend.rts[i].WriteDisableGreen,
923 write_disabled ||
924 (dyn->cb.attachments[i].write_mask &
925 VK_COLOR_COMPONENT_G_BIT) == 0);
926 SET(BLEND_STATE, blend.rts[i].WriteDisableBlue,
927 write_disabled ||
928 (dyn->cb.attachments[i].write_mask &
929 VK_COLOR_COMPONENT_B_BIT) == 0);
930 /* Vulkan specification 1.2.168, VkLogicOp:
931 *
932 * "Logical operations are controlled by the logicOpEnable and
933 * logicOp members of VkPipelineColorBlendStateCreateInfo. If
934 * logicOpEnable is VK_TRUE, then a logical operation selected by
935 * logicOp is applied between each color attachment and the
936 * fragment’s corresponding output value, and blending of all
937 * attachments is treated as if it were disabled."
938 *
939 * From the Broadwell PRM Volume 2d: Command Reference: Structures:
940 * BLEND_STATE_ENTRY:
941 *
942 * "Enabling LogicOp and Color Buffer Blending at the same time is
943 * UNDEFINED"
944 */
945 SET(BLEND_STATE, blend.rts[i].LogicOpFunction,
946 genX(vk_to_intel_logic_op)[dyn->cb.logic_op]);
947 SET(BLEND_STATE, blend.rts[i].LogicOpEnable, dyn->cb.logic_op_enable);
948
949 SET(BLEND_STATE, blend.rts[i].ColorClampRange, COLORCLAMP_RTFORMAT);
950 SET(BLEND_STATE, blend.rts[i].PreBlendColorClampEnable, true);
951 SET(BLEND_STATE, blend.rts[i].PostBlendColorClampEnable, true);
952
953 /* Setup blend equation. */
954 SET(BLEND_STATE, blend.rts[i].ColorBlendFunction,
955 genX(vk_to_intel_blend_op)[
956 dyn->cb.attachments[i].color_blend_op]);
957 SET(BLEND_STATE, blend.rts[i].AlphaBlendFunction,
958 genX(vk_to_intel_blend_op)[
959 dyn->cb.attachments[i].alpha_blend_op]);
960
961 if (dyn->cb.attachments[i].src_color_blend_factor !=
962 dyn->cb.attachments[i].src_alpha_blend_factor ||
963 dyn->cb.attachments[i].dst_color_blend_factor !=
964 dyn->cb.attachments[i].dst_alpha_blend_factor ||
965 dyn->cb.attachments[i].color_blend_op !=
966 dyn->cb.attachments[i].alpha_blend_op) {
967 independent_alpha_blend = true;
968 }
969
970 /* The Dual Source Blending documentation says:
971 *
972 * "If SRC1 is included in a src/dst blend factor and
973 * a DualSource RT Write message is not used, results
974 * are UNDEFINED. (This reflects the same restriction in DX APIs,
975 * where undefined results are produced if “o1” is not written
976 * by a PS – there are no default values defined)."
977 *
978 * There is no way to gracefully fix this undefined situation
979 * so we just disable the blending to prevent possible issues.
980 */
981 if (wm_prog_data && !wm_prog_data->dual_src_blend &&
982 anv_is_dual_src_blend_equation(&dyn->cb.attachments[i])) {
983 SET(BLEND_STATE, blend.rts[i].ColorBufferBlendEnable, false);
984 } else {
985 SET(BLEND_STATE, blend.rts[i].ColorBufferBlendEnable,
986 !dyn->cb.logic_op_enable &&
987 dyn->cb.attachments[i].blend_enable);
988 }
989
990 /* Our hardware applies the blend factor prior to the blend function
991 * regardless of what function is used. Technically, this means the
992 * hardware can do MORE than GL or Vulkan specify. However, it also
993 * means that, for MIN and MAX, we have to stomp the blend factor to
994 * ONE to make it a no-op.
995 */
996 uint32_t SourceBlendFactor;
997 uint32_t DestinationBlendFactor;
998 uint32_t SourceAlphaBlendFactor;
999 uint32_t DestinationAlphaBlendFactor;
1000 if (dyn->cb.attachments[i].color_blend_op == VK_BLEND_OP_MIN ||
1001 dyn->cb.attachments[i].color_blend_op == VK_BLEND_OP_MAX) {
1002 SourceBlendFactor = BLENDFACTOR_ONE;
1003 DestinationBlendFactor = BLENDFACTOR_ONE;
1004 } else {
1005 SourceBlendFactor = genX(vk_to_intel_blend)[
1006 dyn->cb.attachments[i].src_color_blend_factor];
1007 DestinationBlendFactor = genX(vk_to_intel_blend)[
1008 dyn->cb.attachments[i].dst_color_blend_factor];
1009 }
1010
1011 if (dyn->cb.attachments[i].alpha_blend_op == VK_BLEND_OP_MIN ||
1012 dyn->cb.attachments[i].alpha_blend_op == VK_BLEND_OP_MAX) {
1013 SourceAlphaBlendFactor = BLENDFACTOR_ONE;
1014 DestinationAlphaBlendFactor = BLENDFACTOR_ONE;
1015 } else {
1016 SourceAlphaBlendFactor = genX(vk_to_intel_blend)[
1017 dyn->cb.attachments[i].src_alpha_blend_factor];
1018 DestinationAlphaBlendFactor = genX(vk_to_intel_blend)[
1019 dyn->cb.attachments[i].dst_alpha_blend_factor];
1020 }
1021
1022 if (instance->intel_enable_wa_14018912822 &&
1023 intel_needs_workaround(cmd_buffer->device->info, 14018912822) &&
1024 pipeline->rasterization_samples > 1) {
1025 if (DestinationBlendFactor == BLENDFACTOR_ZERO) {
1026 DestinationBlendFactor = BLENDFACTOR_CONST_COLOR;
1027 color_blend_zero = true;
1028 }
1029 if (DestinationAlphaBlendFactor == BLENDFACTOR_ZERO) {
1030 DestinationAlphaBlendFactor = BLENDFACTOR_CONST_ALPHA;
1031 alpha_blend_zero = true;
1032 }
1033 }
1034
1035 SET(BLEND_STATE, blend.rts[i].SourceBlendFactor, SourceBlendFactor);
1036 SET(BLEND_STATE, blend.rts[i].DestinationBlendFactor, DestinationBlendFactor);
1037 SET(BLEND_STATE, blend.rts[i].SourceAlphaBlendFactor, SourceAlphaBlendFactor);
1038 SET(BLEND_STATE, blend.rts[i].DestinationAlphaBlendFactor, DestinationAlphaBlendFactor);
1039 }
1040 gfx->color_blend_zero = color_blend_zero;
1041 gfx->alpha_blend_zero = alpha_blend_zero;
1042
1043 SET(BLEND_STATE, blend.IndependentAlphaBlendEnable, independent_alpha_blend);
1044
1045 /* 3DSTATE_PS_BLEND to be consistent with the rest of the
1046 * BLEND_STATE_ENTRY.
1047 */
1048 SET(PS_BLEND, ps_blend.HasWriteableRT, has_writeable_rt);
1049 SET(PS_BLEND, ps_blend.ColorBufferBlendEnable, GET(blend.rts[0].ColorBufferBlendEnable));
1050 SET(PS_BLEND, ps_blend.SourceAlphaBlendFactor, GET(blend.rts[0].SourceAlphaBlendFactor));
1051 SET(PS_BLEND, ps_blend.DestinationAlphaBlendFactor, gfx->alpha_blend_zero ?
1052 BLENDFACTOR_CONST_COLOR :
1053 GET(blend.rts[0].DestinationAlphaBlendFactor));
1054 SET(PS_BLEND, ps_blend.SourceBlendFactor, GET(blend.rts[0].SourceBlendFactor));
1055 SET(PS_BLEND, ps_blend.DestinationBlendFactor, gfx->color_blend_zero ?
1056 BLENDFACTOR_CONST_COLOR :
1057 GET(blend.rts[0].DestinationBlendFactor));
1058 SET(PS_BLEND, ps_blend.AlphaTestEnable, false);
1059 SET(PS_BLEND, ps_blend.IndependentAlphaBlendEnable, GET(blend.IndependentAlphaBlendEnable));
1060 SET(PS_BLEND, ps_blend.AlphaToCoverageEnable, dyn->ms.alpha_to_coverage_enable);
1061 }
1062
1063 if (BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_CB_BLEND_CONSTANTS)) {
1064 SET(CC_STATE, cc.BlendConstantColorRed,
1065 gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[0]);
1066 SET(CC_STATE, cc.BlendConstantColorGreen,
1067 gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[1]);
1068 SET(CC_STATE, cc.BlendConstantColorBlue,
1069 gfx->color_blend_zero ? 0.0f : dyn->cb.blend_constants[2]);
1070 SET(CC_STATE, cc.BlendConstantColorAlpha,
1071 gfx->alpha_blend_zero ? 0.0f : dyn->cb.blend_constants[3]);
1072 }
1073
1074 if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_AREA) ||
1075 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS) ||
1076 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) ||
1077 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_RS_DEPTH_CLAMP_ENABLE) ||
1078 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_DEPTH_CLIP_NEGATIVE_ONE_TO_ONE)) {
1079 struct anv_instance *instance = cmd_buffer->device->physical->instance;
1080 const VkViewport *viewports = dyn->vp.viewports;
1081
1082 const float scale = dyn->vp.depth_clip_negative_one_to_one ? 0.5f : 1.0f;
1083
1084 for (uint32_t i = 0; i < dyn->vp.viewport_count; i++) {
1085 const VkViewport *vp = &viewports[i];
1086
1087 /* The gfx7 state struct has just the matrix and guardband fields, the
1088 * gfx8 struct adds the min/max viewport fields. */
1089 struct GENX(SF_CLIP_VIEWPORT) sfv = {
1090 .ViewportMatrixElementm00 = vp->width / 2,
1091 .ViewportMatrixElementm11 = vp->height / 2,
1092 .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) * scale,
1093 .ViewportMatrixElementm30 = vp->x + vp->width / 2,
1094 .ViewportMatrixElementm31 = vp->y + vp->height / 2,
1095 .ViewportMatrixElementm32 = dyn->vp.depth_clip_negative_one_to_one ?
1096 (vp->minDepth + vp->maxDepth) * scale : vp->minDepth,
1097 .XMinClipGuardband = -1.0f,
1098 .XMaxClipGuardband = 1.0f,
1099 .YMinClipGuardband = -1.0f,
1100 .YMaxClipGuardband = 1.0f,
1101 .XMinViewPort = vp->x,
1102 .XMaxViewPort = vp->x + vp->width - 1,
1103 .YMinViewPort = MIN2(vp->y, vp->y + vp->height),
1104 .YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1,
1105 };
1106
1107 /* Fix depth test misrenderings by lowering translated depth range */
1108 if (instance->lower_depth_range_rate != 1.0f)
1109 sfv.ViewportMatrixElementm32 *= instance->lower_depth_range_rate;
1110
1111 const uint32_t fb_size_max = 1 << 14;
1112 uint32_t x_min = 0, x_max = fb_size_max;
1113 uint32_t y_min = 0, y_max = fb_size_max;
1114
1115 /* If we have a valid renderArea, include that */
1116 if (gfx->render_area.extent.width > 0 &&
1117 gfx->render_area.extent.height > 0) {
1118 x_min = MAX2(x_min, gfx->render_area.offset.x);
1119 x_max = MIN2(x_max, gfx->render_area.offset.x +
1120 gfx->render_area.extent.width);
1121 y_min = MAX2(y_min, gfx->render_area.offset.y);
1122 y_max = MIN2(y_max, gfx->render_area.offset.y +
1123 gfx->render_area.extent.height);
1124 }
1125
1126 /* The client is required to have enough scissors for whatever it
1127 * sets as ViewportIndex but it's possible that they've got more
1128 * viewports set from a previous command. Also, from the Vulkan
1129 * 1.3.207:
1130 *
1131 * "The application must ensure (using scissor if necessary) that
1132 * all rendering is contained within the render area."
1133 *
1134 * If the client doesn't set a scissor, that basically means it
1135 * guarantees everything is in-bounds already. If we end up using a
1136 * guardband of [-1, 1] in that case, there shouldn't be much loss.
1137 * It's theoretically possible that they could do all their clipping
1138 * with clip planes but that'd be a bit odd.
1139 */
1140 if (i < dyn->vp.scissor_count) {
1141 const VkRect2D *scissor = &dyn->vp.scissors[i];
1142 x_min = MAX2(x_min, scissor->offset.x);
1143 x_max = MIN2(x_max, scissor->offset.x + scissor->extent.width);
1144 y_min = MAX2(y_min, scissor->offset.y);
1145 y_max = MIN2(y_max, scissor->offset.y + scissor->extent.height);
1146 }
1147
1148 /* Only bother calculating the guardband if our known render area is
1149 * less than the maximum size. Otherwise, it will calculate [-1, 1]
1150 * anyway but possibly with precision loss.
1151 */
1152 if (x_min > 0 || x_max < fb_size_max ||
1153 y_min > 0 || y_max < fb_size_max) {
1154 intel_calculate_guardband_size(x_min, x_max, y_min, y_max,
1155 sfv.ViewportMatrixElementm00,
1156 sfv.ViewportMatrixElementm11,
1157 sfv.ViewportMatrixElementm30,
1158 sfv.ViewportMatrixElementm31,
1159 &sfv.XMinClipGuardband,
1160 &sfv.XMaxClipGuardband,
1161 &sfv.YMinClipGuardband,
1162 &sfv.YMaxClipGuardband);
1163 }
1164
1165 #define SET_VP(bit, state, field) \
1166 do { \
1167 if (hw_state->state.field != sfv.field) { \
1168 hw_state->state.field = sfv.field; \
1169 BITSET_SET(hw_state->dirty, \
1170 ANV_GFX_STATE_##bit); \
1171 } \
1172 } while (0)
1173 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm00);
1174 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm11);
1175 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm22);
1176 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm30);
1177 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm31);
1178 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], ViewportMatrixElementm32);
1179 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMinClipGuardband);
1180 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMaxClipGuardband);
1181 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMinClipGuardband);
1182 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMaxClipGuardband);
1183 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMinViewPort);
1184 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], XMaxViewPort);
1185 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMinViewPort);
1186 SET_VP(VIEWPORT_SF_CLIP, vp_sf_clip.elem[i], YMaxViewPort);
1187 #undef SET_VP
1188
1189 const bool depth_range_unrestricted =
1190 cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted;
1191
1192 float min_depth_limit = depth_range_unrestricted ? -FLT_MAX : 0.0;
1193 float max_depth_limit = depth_range_unrestricted ? FLT_MAX : 1.0;
1194
1195 float min_depth = dyn->rs.depth_clamp_enable ?
1196 MIN2(vp->minDepth, vp->maxDepth) : min_depth_limit;
1197 float max_depth = dyn->rs.depth_clamp_enable ?
1198 MAX2(vp->minDepth, vp->maxDepth) : max_depth_limit;
1199
1200 SET(VIEWPORT_CC, vp_cc.elem[i].MinimumDepth, min_depth);
1201 SET(VIEWPORT_CC, vp_cc.elem[i].MaximumDepth, max_depth);
1202
1203 SET(CLIP, clip.MaximumVPIndex, dyn->vp.viewport_count > 0 ?
1204 dyn->vp.viewport_count - 1 : 0);
1205 }
1206
1207 /* If the HW state is already considered dirty or the previous
1208 * programmed viewport count is smaller than what we need, update the
1209 * viewport count and ensure the HW state is dirty. Otherwise if the
1210 * number of viewport programmed previously was larger than what we need
1211 * now, no need to reemit we can just keep the old programmed values.
1212 */
1213 if (BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP) ||
1214 hw_state->vp_sf_clip.count < dyn->vp.viewport_count) {
1215 hw_state->vp_sf_clip.count = dyn->vp.viewport_count;
1216 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP);
1217 }
1218 if (BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) ||
1219 hw_state->vp_cc.count < dyn->vp.viewport_count) {
1220 hw_state->vp_cc.count = dyn->vp.viewport_count;
1221 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC);
1222 }
1223 }
1224
1225 if ((gfx->dirty & ANV_CMD_DIRTY_RENDER_AREA) ||
1226 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_SCISSORS) ||
1227 BITSET_TEST(dyn->dirty, MESA_VK_DYNAMIC_VP_VIEWPORTS)) {
1228 const VkRect2D *scissors = dyn->vp.scissors;
1229 const VkViewport *viewports = dyn->vp.viewports;
1230
1231 for (uint32_t i = 0; i < dyn->vp.scissor_count; i++) {
1232 const VkRect2D *s = &scissors[i];
1233 const VkViewport *vp = &viewports[i];
1234
1235 const int max = 0xffff;
1236
1237 uint32_t y_min = MAX2(s->offset.y, MIN2(vp->y, vp->y + vp->height));
1238 uint32_t x_min = MAX2(s->offset.x, vp->x);
1239 int64_t y_max = MIN2(s->offset.y + s->extent.height - 1,
1240 MAX2(vp->y, vp->y + vp->height) - 1);
1241 int64_t x_max = MIN2(s->offset.x + s->extent.width - 1,
1242 vp->x + vp->width - 1);
1243
1244 y_max = CLAMP(y_max, 0, INT16_MAX >> 1);
1245 x_max = CLAMP(x_max, 0, INT16_MAX >> 1);
1246
1247 /* Do this math using int64_t so overflow gets clamped correctly. */
1248 if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
1249 y_min = CLAMP((uint64_t) y_min, gfx->render_area.offset.y, max);
1250 x_min = CLAMP((uint64_t) x_min, gfx->render_area.offset.x, max);
1251 y_max = CLAMP((uint64_t) y_max, 0,
1252 gfx->render_area.offset.y +
1253 gfx->render_area.extent.height - 1);
1254 x_max = CLAMP((uint64_t) x_max, 0,
1255 gfx->render_area.offset.x +
1256 gfx->render_area.extent.width - 1);
1257 }
1258
1259 if (s->extent.width <= 0 || s->extent.height <= 0) {
1260 /* Since xmax and ymax are inclusive, we have to have xmax < xmin
1261 * or ymax < ymin for empty clips. In case clip x, y, width height
1262 * are all 0, the clamps below produce 0 for xmin, ymin, xmax,
1263 * ymax, which isn't what we want. Just special case empty clips
1264 * and produce a canonical empty clip.
1265 */
1266 SET(SCISSOR, scissor.elem[i].ScissorRectangleYMin, 1);
1267 SET(SCISSOR, scissor.elem[i].ScissorRectangleXMin, 1);
1268 SET(SCISSOR, scissor.elem[i].ScissorRectangleYMax, 0);
1269 SET(SCISSOR, scissor.elem[i].ScissorRectangleXMax, 0);
1270 } else {
1271 SET(SCISSOR, scissor.elem[i].ScissorRectangleYMin, y_min);
1272 SET(SCISSOR, scissor.elem[i].ScissorRectangleXMin, x_min);
1273 SET(SCISSOR, scissor.elem[i].ScissorRectangleYMax, y_max);
1274 SET(SCISSOR, scissor.elem[i].ScissorRectangleXMax, x_max);
1275 }
1276 }
1277
1278 /* If the HW state is already considered dirty or the previous
1279 * programmed viewport count is smaller than what we need, update the
1280 * viewport count and ensure the HW state is dirty. Otherwise if the
1281 * number of viewport programmed previously was larger than what we need
1282 * now, no need to reemit we can just keep the old programmed values.
1283 */
1284 if (BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SCISSOR) ||
1285 hw_state->scissor.count < dyn->vp.scissor_count) {
1286 hw_state->scissor.count = dyn->vp.scissor_count;
1287 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_SCISSOR);
1288 }
1289 }
1290
1291 #if GFX_VERx10 == 125
1292 if ((cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_RENDER_TARGETS)) {
1293 unsigned fb_width, fb_height, tile_width, tile_height;
1294
1295 if (cmd_buffer->device->physical->instance->enable_tbimr &&
1296 calculate_render_area(cmd_buffer, &fb_width, &fb_height) &&
1297 calculate_tile_dimensions(cmd_buffer, fb_width, fb_height,
1298 &tile_width, &tile_height)) {
1299 /* Use a batch size of 128 polygons per slice as recommended
1300 * by BSpec 68436 "TBIMR Programming".
1301 */
1302 const unsigned num_slices = cmd_buffer->device->info->num_slices;
1303 const unsigned batch_size = DIV_ROUND_UP(num_slices, 2) * 256;
1304
1305 SET(TBIMR_TILE_PASS_INFO, tbimr.TileRectangleHeight, tile_height);
1306 SET(TBIMR_TILE_PASS_INFO, tbimr.TileRectangleWidth, tile_width);
1307 SET(TBIMR_TILE_PASS_INFO, tbimr.VerticalTileCount,
1308 DIV_ROUND_UP(fb_height, tile_height));
1309 SET(TBIMR_TILE_PASS_INFO, tbimr.HorizontalTileCount,
1310 DIV_ROUND_UP(fb_width, tile_width));
1311 SET(TBIMR_TILE_PASS_INFO, tbimr.TBIMRBatchSize,
1312 util_logbase2(batch_size) - 5);
1313 SET(TBIMR_TILE_PASS_INFO, tbimr.TileBoxCheck, true);
1314 SET(TBIMR_TILE_PASS_INFO, use_tbimr, true);
1315 } else {
1316 hw_state->use_tbimr = false;
1317 }
1318 }
1319 #endif
1320
1321 #undef GET
1322 #undef SET
1323 #undef SET_STAGE
1324
1325 vk_dynamic_graphics_state_clear_dirty(&cmd_buffer->vk.dynamic_graphics_state);
1326 }
1327
1328 static void
emit_wa_18020335297_dummy_draw(struct anv_cmd_buffer * cmd_buffer)1329 emit_wa_18020335297_dummy_draw(struct anv_cmd_buffer *cmd_buffer)
1330 {
1331 #if GFX_VERx10 >= 125
1332 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VFG), vfg) {
1333 vfg.DistributionMode = RR_STRICT;
1334 }
1335 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
1336 vf.GeometryDistributionEnable = true;
1337 }
1338 #endif
1339
1340 #if GFX_VER >= 12
1341 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_PRIMITIVE_REPLICATION), pr) {
1342 pr.ReplicaMask = 1;
1343 }
1344 #endif
1345
1346 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_RASTER), rr) {
1347 rr.CullMode = CULLMODE_NONE;
1348 rr.FrontFaceFillMode = FILL_MODE_SOLID;
1349 rr.BackFaceFillMode = FILL_MODE_SOLID;
1350 }
1351
1352 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_STATISTICS), zero);
1353 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_SGVS), zero);
1354
1355 #if GFX_VER >= 11
1356 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_SGVS_2), zero);
1357 #endif
1358
1359 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CLIP), clip) {
1360 clip.ClipEnable = true;
1361 clip.ClipMode = CLIPMODE_REJECT_ALL;
1362 }
1363
1364 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VS), zero);
1365 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_GS), zero);
1366 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_HS), zero);
1367 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TE), zero);
1368 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DS), zero);
1369 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT), zero);
1370
1371 uint32_t *vertex_elements = anv_batch_emitn(&cmd_buffer->batch, 1 + 2 * 2,
1372 GENX(3DSTATE_VERTEX_ELEMENTS));
1373 uint32_t *ve_pack_dest = &vertex_elements[1];
1374
1375 for (int i = 0; i < 2; i++) {
1376 struct GENX(VERTEX_ELEMENT_STATE) element = {
1377 .Valid = true,
1378 .SourceElementFormat = ISL_FORMAT_R32G32B32A32_FLOAT,
1379 .Component0Control = VFCOMP_STORE_0,
1380 .Component1Control = VFCOMP_STORE_0,
1381 .Component2Control = i == 0 ? VFCOMP_STORE_0 : VFCOMP_STORE_1_FP,
1382 .Component3Control = i == 0 ? VFCOMP_STORE_0 : VFCOMP_STORE_1_FP,
1383 };
1384 GENX(VERTEX_ELEMENT_STATE_pack)(NULL, ve_pack_dest, &element);
1385 ve_pack_dest += GENX(VERTEX_ELEMENT_STATE_length);
1386 }
1387
1388 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), topo) {
1389 topo.PrimitiveTopologyType = _3DPRIM_TRILIST;
1390 }
1391
1392 /* Emit dummy draw per slice. */
1393 for (unsigned i = 0; i < cmd_buffer->device->info->num_slices; i++) {
1394 anv_batch_emit(&cmd_buffer->batch, GENX(3DPRIMITIVE), prim) {
1395 prim.VertexCountPerInstance = 3;
1396 prim.PrimitiveTopologyType = _3DPRIM_TRILIST;
1397 prim.InstanceCount = 1;
1398 prim.VertexAccessType = SEQUENTIAL;
1399 }
1400 }
1401 }
1402 /**
1403 * This function handles dirty state emission to the batch buffer.
1404 */
1405 static void
cmd_buffer_gfx_state_emission(struct anv_cmd_buffer * cmd_buffer)1406 cmd_buffer_gfx_state_emission(struct anv_cmd_buffer *cmd_buffer)
1407 {
1408 struct anv_device *device = cmd_buffer->device;
1409 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1410 struct anv_graphics_pipeline *pipeline =
1411 anv_pipeline_to_graphics(gfx->base.pipeline);
1412 const struct vk_dynamic_graphics_state *dyn =
1413 &cmd_buffer->vk.dynamic_graphics_state;
1414 struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
1415
1416 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_URB)) {
1417 genX(urb_workaround)(cmd_buffer, &pipeline->urb_cfg);
1418
1419 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.urb);
1420
1421 memcpy(&gfx->urb_cfg, &pipeline->urb_cfg,
1422 sizeof(struct intel_urb_config));
1423 }
1424
1425 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE))
1426 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.ms);
1427
1428 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION))
1429 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.primitive_replication);
1430
1431 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_INSTANCING))
1432 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs_instancing);
1433
1434 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS))
1435 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs);
1436
1437 #if GFX_VER >= 11
1438 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2))
1439 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_sgvs_2);
1440 #endif
1441
1442 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VS))
1443 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vs);
1444
1445 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_HS))
1446 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.hs);
1447
1448 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_DS))
1449 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.ds);
1450
1451 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS))
1452 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.vf_statistics);
1453
1454 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE))
1455 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe);
1456
1457 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_SWIZ))
1458 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe_swiz);
1459
1460 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST)) {
1461 /* Wa_16011773973:
1462 * If SOL is enabled and SO_DECL state has to be programmed,
1463 * 1. Send 3D State SOL state with SOL disabled
1464 * 2. Send SO_DECL NP state
1465 * 3. Send 3D State SOL with SOL Enabled
1466 */
1467 if (intel_needs_workaround(device->info, 16011773973) &&
1468 pipeline->uses_xfb)
1469 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT), so);
1470
1471 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline,
1472 final.so_decl_list);
1473
1474 #if GFX_VER >= 11
1475 /* ICL PRMs, Volume 2a - Command Reference: Instructions,
1476 * 3DSTATE_SO_DECL_LIST:
1477 *
1478 * "Workaround: This command must be followed by a PIPE_CONTROL with
1479 * CS Stall bit set."
1480 *
1481 * On DG2+ also known as Wa_1509820217.
1482 */
1483 genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
1484 cmd_buffer->state.current_pipeline,
1485 ANV_PIPE_CS_STALL_BIT);
1486 #endif
1487 }
1488
1489 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS))
1490 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.ps);
1491
1492 if (device->vk.enabled_extensions.EXT_mesh_shader) {
1493 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL))
1494 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_control);
1495
1496 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_SHADER))
1497 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_shader);
1498
1499 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_DISTRIB))
1500 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.mesh_distrib);
1501
1502 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL))
1503 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.task_control);
1504
1505 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_SHADER))
1506 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.task_shader);
1507
1508 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_REDISTRIB))
1509 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.task_redistrib);
1510
1511 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH))
1512 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.sbe_mesh);
1513
1514 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP_MESH))
1515 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline, final.clip_mesh);
1516 } else {
1517 assert(!BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_CONTROL) &&
1518 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_SHADER) &&
1519 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_MESH_DISTRIB) &&
1520 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_CONTROL) &&
1521 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_SHADER) &&
1522 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TASK_REDISTRIB) &&
1523 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP_MESH) &&
1524 !BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SBE_MESH));
1525 }
1526
1527 #define INIT(category, name) \
1528 .name = hw_state->category.name
1529 #define SET(s, category, name) \
1530 s.name = hw_state->category.name
1531
1532 /* Now the potentially dynamic instructions */
1533
1534 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_EXTRA)) {
1535 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_PS_EXTRA),
1536 pipeline, partial.ps_extra, pse) {
1537 SET(pse, ps_extra, PixelShaderKillsPixel);
1538 }
1539 }
1540
1541 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CLIP)) {
1542 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_CLIP),
1543 pipeline, partial.clip, clip) {
1544 SET(clip, clip, APIMode);
1545 SET(clip, clip, ViewportXYClipTestEnable);
1546 SET(clip, clip, TriangleStripListProvokingVertexSelect);
1547 SET(clip, clip, LineStripListProvokingVertexSelect);
1548 SET(clip, clip, TriangleFanProvokingVertexSelect);
1549 SET(clip, clip, MaximumVPIndex);
1550 }
1551 }
1552
1553 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_STREAMOUT)) {
1554 genX(streamout_prologue)(cmd_buffer);
1555
1556 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_STREAMOUT),
1557 pipeline, partial.so, so) {
1558 SET(so, so, RenderingDisable);
1559 SET(so, so, RenderStreamSelect);
1560 SET(so, so, ReorderMode);
1561 SET(so, so, ForceRendering);
1562 }
1563 }
1564
1565 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_SF_CLIP)) {
1566 struct anv_state sf_clip_state =
1567 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1568 hw_state->vp_sf_clip.count * 64, 64);
1569
1570 for (uint32_t i = 0; i < hw_state->vp_sf_clip.count; i++) {
1571 struct GENX(SF_CLIP_VIEWPORT) sfv = {
1572 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm00),
1573 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm11),
1574 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm22),
1575 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm30),
1576 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm31),
1577 INIT(vp_sf_clip.elem[i], ViewportMatrixElementm32),
1578 INIT(vp_sf_clip.elem[i], XMinClipGuardband),
1579 INIT(vp_sf_clip.elem[i], XMaxClipGuardband),
1580 INIT(vp_sf_clip.elem[i], YMinClipGuardband),
1581 INIT(vp_sf_clip.elem[i], YMaxClipGuardband),
1582 INIT(vp_sf_clip.elem[i], XMinViewPort),
1583 INIT(vp_sf_clip.elem[i], XMaxViewPort),
1584 INIT(vp_sf_clip.elem[i], YMinViewPort),
1585 INIT(vp_sf_clip.elem[i], YMaxViewPort),
1586 };
1587 GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64, &sfv);
1588 }
1589
1590 anv_batch_emit(&cmd_buffer->batch,
1591 GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
1592 clip.SFClipViewportPointer = sf_clip_state.offset;
1593 }
1594 }
1595
1596 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC)) {
1597 struct anv_state cc_state =
1598 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1599 hw_state->vp_cc.count * 8, 32);
1600
1601 for (uint32_t i = 0; i < hw_state->vp_cc.count; i++) {
1602 struct GENX(CC_VIEWPORT) cc_viewport = {
1603 INIT(vp_cc.elem[i], MinimumDepth),
1604 INIT(vp_cc.elem[i], MaximumDepth),
1605 };
1606 GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
1607 }
1608
1609 anv_batch_emit(&cmd_buffer->batch,
1610 GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
1611 cc.CCViewportPointer = cc_state.offset;
1612 }
1613 cmd_buffer->state.gfx.viewport_set = true;
1614 }
1615
1616 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SCISSOR)) {
1617 /* Wa_1409725701:
1618 *
1619 * "The viewport-specific state used by the SF unit (SCISSOR_RECT) is
1620 * stored as an array of up to 16 elements. The location of first
1621 * element of the array, as specified by Pointer to SCISSOR_RECT,
1622 * should be aligned to a 64-byte boundary.
1623 */
1624 struct anv_state scissor_state =
1625 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1626 hw_state->scissor.count * 8, 64);
1627
1628 for (uint32_t i = 0; i < hw_state->scissor.count; i++) {
1629 struct GENX(SCISSOR_RECT) scissor = {
1630 INIT(scissor.elem[i], ScissorRectangleYMin),
1631 INIT(scissor.elem[i], ScissorRectangleXMin),
1632 INIT(scissor.elem[i], ScissorRectangleYMax),
1633 INIT(scissor.elem[i], ScissorRectangleXMax),
1634 };
1635 GENX(SCISSOR_RECT_pack)(NULL, scissor_state.map + i * 8, &scissor);
1636 }
1637
1638 anv_batch_emit(&cmd_buffer->batch,
1639 GENX(3DSTATE_SCISSOR_STATE_POINTERS), ssp) {
1640 ssp.ScissorRectPointer = scissor_state.offset;
1641 }
1642 }
1643
1644 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY)) {
1645 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF_TOPOLOGY), vft) {
1646 SET(vft, vft, PrimitiveTopologyType);
1647 }
1648 }
1649
1650 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT)) {
1651 const uint32_t ve_count =
1652 pipeline->vs_input_elements + pipeline->svgs_count;
1653 const uint32_t num_dwords = 1 + 2 * MAX2(1, ve_count);
1654 uint32_t *p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
1655 GENX(3DSTATE_VERTEX_ELEMENTS));
1656
1657 if (p) {
1658 if (ve_count == 0) {
1659 memcpy(p + 1, cmd_buffer->device->empty_vs_input,
1660 sizeof(cmd_buffer->device->empty_vs_input));
1661 } else if (ve_count == pipeline->vertex_input_elems) {
1662 /* MESA_VK_DYNAMIC_VI is not dynamic for this pipeline, so
1663 * everything is in pipeline->vertex_input_data and we can just
1664 * memcpy
1665 */
1666 memcpy(p + 1, pipeline->vertex_input_data, 4 * 2 * ve_count);
1667 anv_batch_emit_pipeline_state(&cmd_buffer->batch, pipeline,
1668 final.vf_instancing);
1669 } else {
1670 assert(pipeline->final.vf_instancing.len == 0);
1671 /* Use dyn->vi to emit the dynamic VERTEX_ELEMENT_STATE input. */
1672 genX(emit_vertex_input)(&cmd_buffer->batch, p + 1,
1673 pipeline, dyn->vi, false /* emit_in_pipeline */);
1674 /* Then append the VERTEX_ELEMENT_STATE for the draw parameters */
1675 memcpy(p + 1 + 2 * pipeline->vs_input_elements,
1676 pipeline->vertex_input_data,
1677 4 * 2 * pipeline->vertex_input_elems);
1678 }
1679 }
1680 }
1681
1682 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TE)) {
1683 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_TE),
1684 pipeline, partial.te, te) {
1685 SET(te, te, OutputTopology);
1686 }
1687 }
1688
1689 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_GS)) {
1690 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_GS),
1691 pipeline, partial.gs, gs) {
1692 SET(gs, gs, ReorderMode);
1693 }
1694 }
1695
1696 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CPS)) {
1697 #if GFX_VER == 11
1698 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CPS), cps) {
1699 SET(cps, cps, CoarsePixelShadingMode);
1700 SET(cps, cps, MinCPSizeX);
1701 SET(cps, cps, MinCPSizeY);
1702 }
1703 #elif GFX_VER >= 12
1704 /* TODO: we can optimize this flush in the following cases:
1705 *
1706 * In the case where the last geometry shader emits a value that is
1707 * not constant, we can avoid this stall because we can synchronize
1708 * the pixel shader internally with
1709 * 3DSTATE_PS::EnablePSDependencyOnCPsizeChange.
1710 *
1711 * If we know that the previous pipeline and the current one are
1712 * using the same fragment shading rate.
1713 */
1714 anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
1715 #if GFX_VERx10 >= 125
1716 pc.PSSStallSyncEnable = true;
1717 #else
1718 pc.PSDSyncEnable = true;
1719 #endif
1720 }
1721
1722 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CPS_POINTERS), cps) {
1723 SET(cps, cps, CoarsePixelShadingStateArrayPointer);
1724 }
1725 #endif
1726 }
1727
1728 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SF)) {
1729 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_SF),
1730 pipeline, partial.sf, sf) {
1731 SET(sf, sf, LineWidth);
1732 SET(sf, sf, TriangleStripListProvokingVertexSelect);
1733 SET(sf, sf, LineStripListProvokingVertexSelect);
1734 SET(sf, sf, TriangleFanProvokingVertexSelect);
1735 SET(sf, sf, LegacyGlobalDepthBiasEnable);
1736 }
1737 }
1738
1739 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_RASTER)) {
1740 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_RASTER),
1741 pipeline, partial.raster, raster) {
1742 SET(raster, raster, APIMode);
1743 SET(raster, raster, DXMultisampleRasterizationEnable);
1744 SET(raster, raster, AntialiasingEnable);
1745 SET(raster, raster, CullMode);
1746 SET(raster, raster, FrontWinding);
1747 SET(raster, raster, GlobalDepthOffsetEnableSolid);
1748 SET(raster, raster, GlobalDepthOffsetEnableWireframe);
1749 SET(raster, raster, GlobalDepthOffsetEnablePoint);
1750 SET(raster, raster, GlobalDepthOffsetConstant);
1751 SET(raster, raster, GlobalDepthOffsetScale);
1752 SET(raster, raster, GlobalDepthOffsetClamp);
1753 SET(raster, raster, FrontFaceFillMode);
1754 SET(raster, raster, BackFaceFillMode);
1755 SET(raster, raster, ViewportZFarClipTestEnable);
1756 SET(raster, raster, ViewportZNearClipTestEnable);
1757 SET(raster, raster, ConservativeRasterizationEnable);
1758 }
1759 }
1760
1761 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_CC_STATE)) {
1762 struct anv_state cc_state =
1763 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1764 GENX(COLOR_CALC_STATE_length) * 4,
1765 64);
1766 struct GENX(COLOR_CALC_STATE) cc = {
1767 INIT(cc, BlendConstantColorRed),
1768 INIT(cc, BlendConstantColorGreen),
1769 INIT(cc, BlendConstantColorBlue),
1770 INIT(cc, BlendConstantColorAlpha),
1771 };
1772 GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
1773
1774 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
1775 ccp.ColorCalcStatePointer = cc_state.offset;
1776 ccp.ColorCalcStatePointerValid = true;
1777 }
1778 }
1779
1780 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SAMPLE_MASK)) {
1781 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_SAMPLE_MASK), sm) {
1782 SET(sm, sm, SampleMask);
1783 }
1784 }
1785
1786 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM_DEPTH_STENCIL)) {
1787 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_WM_DEPTH_STENCIL), ds) {
1788 SET(ds, ds, DoubleSidedStencilEnable);
1789 SET(ds, ds, StencilTestMask);
1790 SET(ds, ds, StencilWriteMask);
1791 SET(ds, ds, BackfaceStencilTestMask);
1792 SET(ds, ds, BackfaceStencilWriteMask);
1793 SET(ds, ds, StencilReferenceValue);
1794 SET(ds, ds, BackfaceStencilReferenceValue);
1795 SET(ds, ds, DepthTestEnable);
1796 SET(ds, ds, DepthBufferWriteEnable);
1797 SET(ds, ds, DepthTestFunction);
1798 SET(ds, ds, StencilTestEnable);
1799 SET(ds, ds, StencilBufferWriteEnable);
1800 SET(ds, ds, StencilFailOp);
1801 SET(ds, ds, StencilPassDepthPassOp);
1802 SET(ds, ds, StencilPassDepthFailOp);
1803 SET(ds, ds, StencilTestFunction);
1804 SET(ds, ds, BackfaceStencilFailOp);
1805 SET(ds, ds, BackfaceStencilPassDepthPassOp);
1806 SET(ds, ds, BackfaceStencilPassDepthFailOp);
1807 SET(ds, ds, BackfaceStencilTestFunction);
1808 }
1809 }
1810
1811 #if GFX_VER >= 12
1812 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_DEPTH_BOUNDS)) {
1813 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_DEPTH_BOUNDS), db) {
1814 SET(db, db, DepthBoundsTestEnable);
1815 SET(db, db, DepthBoundsTestMinValue);
1816 SET(db, db, DepthBoundsTestMaxValue);
1817 }
1818 }
1819 #endif
1820
1821 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_LINE_STIPPLE)) {
1822 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_LINE_STIPPLE), ls) {
1823 SET(ls, ls, LineStipplePattern);
1824 SET(ls, ls, LineStippleInverseRepeatCount);
1825 SET(ls, ls, LineStippleRepeatCount);
1826 }
1827 #if GFX_VER >= 11
1828 /* ICL PRMs, Volume 2a - Command Reference: Instructions,
1829 * 3DSTATE_LINE_STIPPLE:
1830 *
1831 * "Workaround: This command must be followed by a PIPE_CONTROL with
1832 * CS Stall bit set."
1833 */
1834 genx_batch_emit_pipe_control(&cmd_buffer->batch,
1835 cmd_buffer->device->info,
1836 cmd_buffer->state.current_pipeline,
1837 ANV_PIPE_CS_STALL_BIT);
1838 #endif
1839 }
1840
1841 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VF)) {
1842 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
1843 #if GFX_VERx10 >= 125
1844 vf.GeometryDistributionEnable = true;
1845 #endif
1846 SET(vf, vf, IndexedDrawCutIndexEnable);
1847 SET(vf, vf, CutIndex);
1848 }
1849 }
1850
1851 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_INDEX_BUFFER)) {
1852 struct anv_buffer *buffer = gfx->index_buffer;
1853 uint32_t offset = gfx->index_offset;
1854 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
1855 ib.IndexFormat = gfx->index_type;
1856 ib.MOCS = anv_mocs(cmd_buffer->device,
1857 buffer ? buffer->address.bo : NULL,
1858 ISL_SURF_USAGE_INDEX_BUFFER_BIT);
1859 #if GFX_VER >= 12
1860 ib.L3BypassDisable = true;
1861 #endif
1862 if (buffer) {
1863 ib.BufferStartingAddress = anv_address_add(buffer->address, offset);
1864 ib.BufferSize = gfx->index_size;
1865 }
1866 }
1867 }
1868
1869 #if GFX_VERx10 >= 125
1870 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VFG)) {
1871 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_VFG),
1872 pipeline, partial.vfg, vfg) {
1873 SET(vfg, vfg, ListCutIndexEnable);
1874 }
1875 }
1876 #endif
1877
1878 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SAMPLE_PATTERN)) {
1879 genX(emit_sample_pattern)(&cmd_buffer->batch,
1880 dyn->ms.sample_locations_enable ?
1881 dyn->ms.sample_locations : NULL);
1882 }
1883
1884 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM)) {
1885 anv_batch_emit_merge(&cmd_buffer->batch, GENX(3DSTATE_WM),
1886 pipeline, partial.wm, wm) {
1887 SET(wm, wm, ForceThreadDispatchEnable);
1888 SET(wm, wm, LineStippleEnable);
1889 }
1890 }
1891
1892 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PS_BLEND)) {
1893 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_PS_BLEND), blend) {
1894 SET(blend, ps_blend, HasWriteableRT);
1895 SET(blend, ps_blend, ColorBufferBlendEnable);
1896 SET(blend, ps_blend, SourceAlphaBlendFactor);
1897 SET(blend, ps_blend, DestinationAlphaBlendFactor);
1898 SET(blend, ps_blend, SourceBlendFactor);
1899 SET(blend, ps_blend, DestinationBlendFactor);
1900 SET(blend, ps_blend, AlphaTestEnable);
1901 SET(blend, ps_blend, IndependentAlphaBlendEnable);
1902 SET(blend, ps_blend, AlphaToCoverageEnable);
1903 }
1904 }
1905
1906 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE)) {
1907 const uint32_t num_dwords = GENX(BLEND_STATE_length) +
1908 GENX(BLEND_STATE_ENTRY_length) * MAX_RTS;
1909 struct anv_state blend_states =
1910 anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
1911 num_dwords * 4,
1912 64);
1913
1914 uint32_t *dws = blend_states.map;
1915
1916 struct GENX(BLEND_STATE) blend_state = {
1917 INIT(blend, AlphaToCoverageEnable),
1918 INIT(blend, AlphaToOneEnable),
1919 INIT(blend, IndependentAlphaBlendEnable),
1920 };
1921 GENX(BLEND_STATE_pack)(NULL, blend_states.map, &blend_state);
1922
1923 /* Jump to blend entries. */
1924 dws += GENX(BLEND_STATE_length);
1925 for (uint32_t i = 0; i < MAX_RTS; i++) {
1926 struct GENX(BLEND_STATE_ENTRY) entry = {
1927 INIT(blend.rts[i], WriteDisableAlpha),
1928 INIT(blend.rts[i], WriteDisableRed),
1929 INIT(blend.rts[i], WriteDisableGreen),
1930 INIT(blend.rts[i], WriteDisableBlue),
1931 INIT(blend.rts[i], LogicOpFunction),
1932 INIT(blend.rts[i], LogicOpEnable),
1933 INIT(blend.rts[i], ColorBufferBlendEnable),
1934 INIT(blend.rts[i], ColorClampRange),
1935 INIT(blend.rts[i], PreBlendColorClampEnable),
1936 INIT(blend.rts[i], PostBlendColorClampEnable),
1937 INIT(blend.rts[i], SourceBlendFactor),
1938 INIT(blend.rts[i], DestinationBlendFactor),
1939 INIT(blend.rts[i], ColorBlendFunction),
1940 INIT(blend.rts[i], SourceAlphaBlendFactor),
1941 INIT(blend.rts[i], DestinationAlphaBlendFactor),
1942 INIT(blend.rts[i], AlphaBlendFunction),
1943 };
1944
1945 GENX(BLEND_STATE_ENTRY_pack)(NULL, dws, &entry);
1946 dws += GENX(BLEND_STATE_ENTRY_length);
1947 }
1948
1949 gfx->blend_states = blend_states;
1950 /* Dirty the pointers to reemit 3DSTATE_BLEND_STATE_POINTERS below */
1951 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_POINTERS);
1952 }
1953
1954 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_BLEND_STATE_POINTERS)) {
1955 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_BLEND_STATE_POINTERS), bsp) {
1956 bsp.BlendStatePointer = gfx->blend_states.offset;
1957 bsp.BlendStatePointerValid = true;
1958 }
1959 }
1960
1961 #if GFX_VERx10 >= 125
1962 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WA_18019816803)) {
1963 genx_batch_emit_pipe_control(&cmd_buffer->batch, cmd_buffer->device->info,
1964 cmd_buffer->state.current_pipeline,
1965 ANV_PIPE_PSS_STALL_SYNC_BIT);
1966 }
1967 #endif
1968
1969 #if GFX_VER == 9
1970 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_PMA_FIX))
1971 genX(cmd_buffer_enable_pma_fix)(cmd_buffer, hw_state->pma_fix);
1972 #endif
1973
1974 #if GFX_VERx10 >= 125
1975 if (hw_state->use_tbimr &&
1976 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_TBIMR_TILE_PASS_INFO)) {
1977 anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_TBIMR_TILE_PASS_INFO),
1978 tbimr) {
1979 SET(tbimr, tbimr, TileRectangleHeight);
1980 SET(tbimr, tbimr, TileRectangleWidth);
1981 SET(tbimr, tbimr, VerticalTileCount);
1982 SET(tbimr, tbimr, HorizontalTileCount);
1983 SET(tbimr, tbimr, TBIMRBatchSize);
1984 SET(tbimr, tbimr, TileBoxCheck);
1985 }
1986 }
1987 #endif
1988
1989 #undef INIT
1990 #undef SET
1991
1992 BITSET_ZERO(hw_state->dirty);
1993 }
1994
1995 /**
1996 * This function handles possible state workarounds and emits the dirty
1997 * instructions to the batch buffer.
1998 */
1999 void
genX(cmd_buffer_flush_gfx_hw_state)2000 genX(cmd_buffer_flush_gfx_hw_state)(struct anv_cmd_buffer *cmd_buffer)
2001 {
2002 struct anv_device *device = cmd_buffer->device;
2003 struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
2004 struct anv_graphics_pipeline *pipeline =
2005 anv_pipeline_to_graphics(cmd_buffer->state.gfx.base.pipeline);
2006 struct anv_gfx_dynamic_state *hw_state = &gfx->dyn_state;
2007
2008 if (INTEL_DEBUG(DEBUG_REEMIT)) {
2009 BITSET_OR(gfx->dyn_state.dirty, gfx->dyn_state.dirty,
2010 device->gfx_dirty_state);
2011 }
2012
2013 /**
2014 * Put potential workarounds here if you need to reemit an instruction
2015 * because of another one is changing.
2016 */
2017
2018 /* Since Wa_16011773973 will disable 3DSTATE_STREAMOUT, we need to reemit
2019 * it after.
2020 */
2021 if (intel_needs_workaround(device->info, 16011773973) &&
2022 pipeline->uses_xfb &&
2023 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_SO_DECL_LIST)) {
2024 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
2025 }
2026
2027 /* Gfx11 undocumented issue :
2028 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/9781
2029 */
2030 #if GFX_VER == 11
2031 if (BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_WM))
2032 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_MULTISAMPLE);
2033 #endif
2034
2035 /* Wa_18020335297 - Apply the WA when viewport ptr is reprogrammed. */
2036 if (intel_needs_workaround(device->info, 18020335297) &&
2037 BITSET_TEST(hw_state->dirty, ANV_GFX_STATE_VIEWPORT_CC) &&
2038 cmd_buffer->state.gfx.viewport_set) {
2039 /* For mesh, we implement the WA using CS stall. This is for
2040 * simplicity and takes care of possible interaction with Wa_16014390852.
2041 */
2042 if (anv_pipeline_is_mesh(pipeline)) {
2043 genx_batch_emit_pipe_control(&cmd_buffer->batch, device->info,
2044 _3D, ANV_PIPE_CS_STALL_BIT);
2045 } else {
2046 /* Mask off all instructions that we program. */
2047 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VFG);
2048 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF);
2049 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
2050 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_RASTER);
2051 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS);
2052 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_SGVS);
2053 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2);
2054 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_CLIP);
2055 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
2056 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
2057 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY);
2058
2059 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_VS);
2060 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_GS);
2061 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_HS);
2062 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_TE);
2063 BITSET_CLEAR(hw_state->dirty, ANV_GFX_STATE_DS);
2064
2065 cmd_buffer_gfx_state_emission(cmd_buffer);
2066
2067 emit_wa_18020335297_dummy_draw(cmd_buffer);
2068
2069 /* Dirty all emitted WA state to make sure that current real
2070 * state is restored.
2071 */
2072 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VFG);
2073 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF);
2074 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_PRIMITIVE_REPLICATION);
2075 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_RASTER);
2076 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_STATISTICS);
2077 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS);
2078 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_SGVS_2);
2079 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_CLIP);
2080 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_STREAMOUT);
2081 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VERTEX_INPUT);
2082 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VF_TOPOLOGY);
2083
2084 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_VS);
2085 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_GS);
2086 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_HS);
2087 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_TE);
2088 BITSET_SET(hw_state->dirty, ANV_GFX_STATE_DS);
2089 }
2090 }
2091
2092 cmd_buffer_gfx_state_emission(cmd_buffer);
2093 }
2094
2095 void
genX(cmd_buffer_enable_pma_fix)2096 genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
2097 {
2098 if (!anv_cmd_buffer_is_render_queue(cmd_buffer))
2099 return;
2100
2101 if (cmd_buffer->state.pma_fix_enabled == enable)
2102 return;
2103
2104 cmd_buffer->state.pma_fix_enabled = enable;
2105
2106 /* According to the Broadwell PIPE_CONTROL documentation, software should
2107 * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
2108 * prior to the LRI. If stencil buffer writes are enabled, then a Render
2109 * Cache Flush is also necessary.
2110 *
2111 * The Skylake docs say to use a depth stall rather than a command
2112 * streamer stall. However, the hardware seems to violently disagree.
2113 * A full command streamer stall seems to be needed in both cases.
2114 */
2115 genx_batch_emit_pipe_control
2116 (&cmd_buffer->batch, cmd_buffer->device->info,
2117 cmd_buffer->state.current_pipeline,
2118 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
2119 ANV_PIPE_CS_STALL_BIT |
2120 #if GFX_VER >= 12
2121 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
2122 #endif
2123 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
2124
2125 #if GFX_VER == 9
2126 uint32_t cache_mode;
2127 anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0),
2128 .STCPMAOptimizationEnable = enable,
2129 .STCPMAOptimizationEnableMask = true);
2130 anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
2131 lri.RegisterOffset = GENX(CACHE_MODE_0_num);
2132 lri.DataDWord = cache_mode;
2133 }
2134
2135 #endif /* GFX_VER == 9 */
2136
2137 /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
2138 * Flush bits is often necessary. We do it regardless because it's easier.
2139 * The render cache flush is also necessary if stencil writes are enabled.
2140 *
2141 * Again, the Skylake docs give a different set of flushes but the BDW
2142 * flushes seem to work just as well.
2143 */
2144 genx_batch_emit_pipe_control
2145 (&cmd_buffer->batch, cmd_buffer->device->info,
2146 cmd_buffer->state.current_pipeline,
2147 ANV_PIPE_DEPTH_STALL_BIT |
2148 ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
2149 #if GFX_VER >= 12
2150 ANV_PIPE_TILE_CACHE_FLUSH_BIT |
2151 #endif
2152 ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT);
2153 }
2154