• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2015 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 
30 #include "anv_private.h"
31 
32 #include "genxml/gen_macros.h"
33 #include "genxml/genX_pack.h"
34 
35 #if GEN_GEN == 8
36 void
gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer * cmd_buffer)37 gen8_cmd_buffer_emit_viewport(struct anv_cmd_buffer *cmd_buffer)
38 {
39    uint32_t count = cmd_buffer->state.gfx.dynamic.viewport.count;
40    const VkViewport *viewports =
41       cmd_buffer->state.gfx.dynamic.viewport.viewports;
42    struct anv_state sf_clip_state =
43       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 64, 64);
44 
45    for (uint32_t i = 0; i < count; i++) {
46       const VkViewport *vp = &viewports[i];
47 
48       /* The gen7 state struct has just the matrix and guardband fields, the
49        * gen8 struct adds the min/max viewport fields. */
50       struct GENX(SF_CLIP_VIEWPORT) sf_clip_viewport = {
51          .ViewportMatrixElementm00 = vp->width / 2,
52          .ViewportMatrixElementm11 = vp->height / 2,
53          .ViewportMatrixElementm22 = vp->maxDepth - vp->minDepth,
54          .ViewportMatrixElementm30 = vp->x + vp->width / 2,
55          .ViewportMatrixElementm31 = vp->y + vp->height / 2,
56          .ViewportMatrixElementm32 = vp->minDepth,
57          .XMinClipGuardband = -1.0f,
58          .XMaxClipGuardband = 1.0f,
59          .YMinClipGuardband = -1.0f,
60          .YMaxClipGuardband = 1.0f,
61          .XMinViewPort = vp->x,
62          .XMaxViewPort = vp->x + vp->width - 1,
63          .YMinViewPort = MIN2(vp->y, vp->y + vp->height),
64          .YMaxViewPort = MAX2(vp->y, vp->y + vp->height) - 1,
65       };
66 
67       GENX(SF_CLIP_VIEWPORT_pack)(NULL, sf_clip_state.map + i * 64,
68                                  &sf_clip_viewport);
69    }
70 
71    anv_state_flush(cmd_buffer->device, sf_clip_state);
72 
73    anv_batch_emit(&cmd_buffer->batch,
74                   GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), clip) {
75       clip.SFClipViewportPointer = sf_clip_state.offset;
76    }
77 }
78 
79 void
gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer * cmd_buffer,bool depth_clamp_enable)80 gen8_cmd_buffer_emit_depth_viewport(struct anv_cmd_buffer *cmd_buffer,
81                                     bool depth_clamp_enable)
82 {
83    uint32_t count = cmd_buffer->state.gfx.dynamic.viewport.count;
84    const VkViewport *viewports =
85       cmd_buffer->state.gfx.dynamic.viewport.viewports;
86    struct anv_state cc_state =
87       anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, count * 8, 32);
88 
89    for (uint32_t i = 0; i < count; i++) {
90       const VkViewport *vp = &viewports[i];
91 
92       struct GENX(CC_VIEWPORT) cc_viewport = {
93          .MinimumDepth = depth_clamp_enable ? vp->minDepth : 0.0f,
94          .MaximumDepth = depth_clamp_enable ? vp->maxDepth : 1.0f,
95       };
96 
97       GENX(CC_VIEWPORT_pack)(NULL, cc_state.map + i * 8, &cc_viewport);
98    }
99 
100    anv_state_flush(cmd_buffer->device, cc_state);
101 
102    anv_batch_emit(&cmd_buffer->batch,
103                   GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), cc) {
104       cc.CCViewportPointer = cc_state.offset;
105    }
106 }
107 #endif
108 
109 void
genX(cmd_buffer_enable_pma_fix)110 genX(cmd_buffer_enable_pma_fix)(struct anv_cmd_buffer *cmd_buffer, bool enable)
111 {
112    if (cmd_buffer->state.pma_fix_enabled == enable)
113       return;
114 
115    cmd_buffer->state.pma_fix_enabled = enable;
116 
117    /* According to the Broadwell PIPE_CONTROL documentation, software should
118     * emit a PIPE_CONTROL with the CS Stall and Depth Cache Flush bits set
119     * prior to the LRI.  If stencil buffer writes are enabled, then a Render
120     * Cache Flush is also necessary.
121     *
122     * The Skylake docs say to use a depth stall rather than a command
123     * streamer stall.  However, the hardware seems to violently disagree.
124     * A full command streamer stall seems to be needed in both cases.
125     */
126    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
127       pc.DepthCacheFlushEnable = true;
128       pc.CommandStreamerStallEnable = true;
129       pc.RenderTargetCacheFlushEnable = true;
130    }
131 
132 #if GEN_GEN == 9
133 
134    uint32_t cache_mode;
135    anv_pack_struct(&cache_mode, GENX(CACHE_MODE_0),
136                    .STCPMAOptimizationEnable = enable,
137                    .STCPMAOptimizationEnableMask = true);
138    anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
139       lri.RegisterOffset   = GENX(CACHE_MODE_0_num);
140       lri.DataDWord        = cache_mode;
141    }
142 
143 #elif GEN_GEN == 8
144 
145    uint32_t cache_mode;
146    anv_pack_struct(&cache_mode, GENX(CACHE_MODE_1),
147                    .NPPMAFixEnable = enable,
148                    .NPEarlyZFailsDisable = enable,
149                    .NPPMAFixEnableMask = true,
150                    .NPEarlyZFailsDisableMask = true);
151    anv_batch_emit(&cmd_buffer->batch, GENX(MI_LOAD_REGISTER_IMM), lri) {
152       lri.RegisterOffset   = GENX(CACHE_MODE_1_num);
153       lri.DataDWord        = cache_mode;
154    }
155 
156 #endif /* GEN_GEN == 8 */
157 
158    /* After the LRI, a PIPE_CONTROL with both the Depth Stall and Depth Cache
159     * Flush bits is often necessary.  We do it regardless because it's easier.
160     * The render cache flush is also necessary if stencil writes are enabled.
161     *
162     * Again, the Skylake docs give a different set of flushes but the BDW
163     * flushes seem to work just as well.
164     */
165    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
166       pc.DepthStallEnable = true;
167       pc.DepthCacheFlushEnable = true;
168       pc.RenderTargetCacheFlushEnable = true;
169    }
170 }
171 
172 UNUSED static bool
want_depth_pma_fix(struct anv_cmd_buffer * cmd_buffer)173 want_depth_pma_fix(struct anv_cmd_buffer *cmd_buffer)
174 {
175    assert(GEN_GEN == 8);
176 
177    /* From the Broadwell PRM Vol. 2c CACHE_MODE_1::NP_PMA_FIX_ENABLE:
178     *
179     *    SW must set this bit in order to enable this fix when following
180     *    expression is TRUE.
181     *
182     *    3DSTATE_WM::ForceThreadDispatch != 1 &&
183     *    !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
184     *    (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
185     *    (3DSTATE_DEPTH_BUFFER::HIZ Enable) &&
186     *    !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) &&
187     *    (3DSTATE_PS_EXTRA::PixelShaderValid) &&
188     *    !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
189     *      3DSTATE_WM_HZ_OP::DepthBufferResolve ||
190     *      3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
191     *      3DSTATE_WM_HZ_OP::StencilBufferClear) &&
192     *    (3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable) &&
193     *    (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
194     *       3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
195     *       3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
196     *       3DSTATE_PS_BLEND::AlphaTestEnable ||
197     *       3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
198     *      3DSTATE_WM::ForceKillPix != ForceOff &&
199     *      ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
200     *        3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
201     *       (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
202     *        3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
203     *        3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
204     *     (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
205     */
206 
207    /* These are always true:
208     *    3DSTATE_WM::ForceThreadDispatch != 1 &&
209     *    !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
210     */
211 
212    /* We only enable the PMA fix if we know for certain that HiZ is enabled.
213     * If we don't know whether HiZ is enabled or not, we disable the PMA fix
214     * and there is no harm.
215     *
216     * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
217     * 3DSTATE_DEPTH_BUFFER::HIZ Enable
218     */
219    if (!cmd_buffer->state.hiz_enabled)
220       return false;
221 
222    /* 3DSTATE_PS_EXTRA::PixelShaderValid */
223    struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
224    if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
225       return false;
226 
227    /* !(3DSTATE_WM::EDSC_Mode == EDSC_PREPS) */
228    const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
229    if (wm_prog_data->early_fragment_tests)
230       return false;
231 
232    /* We never use anv_pipeline for HiZ ops so this is trivially true:
233     *    !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
234     *      3DSTATE_WM_HZ_OP::DepthBufferResolve ||
235     *      3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
236     *      3DSTATE_WM_HZ_OP::StencilBufferClear)
237     */
238 
239    /* 3DSTATE_WM_DEPTH_STENCIL::DepthTestEnable */
240    if (!pipeline->depth_test_enable)
241       return false;
242 
243    /* (((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
244     *    3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
245     *    3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
246     *    3DSTATE_PS_BLEND::AlphaTestEnable ||
247     *    3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) &&
248     *   3DSTATE_WM::ForceKillPix != ForceOff &&
249     *   ((3DSTATE_WM_DEPTH_STENCIL::DepthWriteEnable &&
250     *     3DSTATE_DEPTH_BUFFER::DEPTH_WRITE_ENABLE) ||
251     *    (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
252     *     3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE &&
253     *     3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE))) ||
254     *  (3DSTATE_PS_EXTRA:: Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
255     */
256    return (pipeline->kill_pixel && (pipeline->writes_depth ||
257                                     pipeline->writes_stencil)) ||
258           wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
259 }
260 
261 UNUSED static bool
want_stencil_pma_fix(struct anv_cmd_buffer * cmd_buffer)262 want_stencil_pma_fix(struct anv_cmd_buffer *cmd_buffer)
263 {
264    if (GEN_GEN > 9)
265       return false;
266    assert(GEN_GEN == 9);
267 
268    /* From the Skylake PRM Vol. 2c CACHE_MODE_1::STC PMA Optimization Enable:
269     *
270     *    Clearing this bit will force the STC cache to wait for pending
271     *    retirement of pixels at the HZ-read stage and do the STC-test for
272     *    Non-promoted, R-computed and Computed depth modes instead of
273     *    postponing the STC-test to RCPFE.
274     *
275     *    STC_TEST_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
276     *                  3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
277     *
278     *    STC_WRITE_EN = 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
279     *                   (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
280     *                    3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
281     *
282     *    COMP_STC_EN = STC_TEST_EN &&
283     *                  3DSTATE_PS_EXTRA::PixelShaderComputesStencil
284     *
285     *    SW parses the pipeline states to generate the following logical
286     *    signal indicating if PMA FIX can be enabled.
287     *
288     *    STC_PMA_OPT =
289     *       3DSTATE_WM::ForceThreadDispatch != 1 &&
290     *       !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0) &&
291     *       3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL &&
292     *       3DSTATE_DEPTH_BUFFER::HIZ Enable &&
293     *       !(3DSTATE_WM::EDSC_Mode == 2) &&
294     *       3DSTATE_PS_EXTRA::PixelShaderValid &&
295     *       !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
296     *         3DSTATE_WM_HZ_OP::DepthBufferResolve ||
297     *         3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
298     *         3DSTATE_WM_HZ_OP::StencilBufferClear) &&
299     *       (COMP_STC_EN || STC_WRITE_EN) &&
300     *       ((3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
301     *         3DSTATE_WM::ForceKillPix == ON ||
302     *         3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
303     *         3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
304     *         3DSTATE_PS_BLEND::AlphaTestEnable ||
305     *         3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
306     *        (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF))
307     */
308 
309    /* These are always true:
310     *    3DSTATE_WM::ForceThreadDispatch != 1 &&
311     *    !(3DSTATE_RASTER::ForceSampleCount != NUMRASTSAMPLES_0)
312     */
313 
314    /* We only enable the PMA fix if we know for certain that HiZ is enabled.
315     * If we don't know whether HiZ is enabled or not, we disable the PMA fix
316     * and there is no harm.
317     *
318     * (3DSTATE_DEPTH_BUFFER::SURFACE_TYPE != NULL) &&
319     * 3DSTATE_DEPTH_BUFFER::HIZ Enable
320     */
321    if (!cmd_buffer->state.hiz_enabled)
322       return false;
323 
324    /* We can't possibly know if HiZ is enabled without the framebuffer */
325    assert(cmd_buffer->state.framebuffer);
326 
327    /* HiZ is enabled so we had better have a depth buffer with HiZ */
328    const struct anv_image_view *ds_iview =
329       anv_cmd_buffer_get_depth_stencil_view(cmd_buffer);
330    assert(ds_iview && ds_iview->image->planes[0].aux_usage == ISL_AUX_USAGE_HIZ);
331 
332    /* 3DSTATE_PS_EXTRA::PixelShaderValid */
333    struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
334    if (!anv_pipeline_has_stage(pipeline, MESA_SHADER_FRAGMENT))
335       return false;
336 
337    /* !(3DSTATE_WM::EDSC_Mode == 2) */
338    const struct brw_wm_prog_data *wm_prog_data = get_wm_prog_data(pipeline);
339    if (wm_prog_data->early_fragment_tests)
340       return false;
341 
342    /* We never use anv_pipeline for HiZ ops so this is trivially true:
343    *    !(3DSTATE_WM_HZ_OP::DepthBufferClear ||
344     *      3DSTATE_WM_HZ_OP::DepthBufferResolve ||
345     *      3DSTATE_WM_HZ_OP::Hierarchical Depth Buffer Resolve Enable ||
346     *      3DSTATE_WM_HZ_OP::StencilBufferClear)
347     */
348 
349    /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
350     * 3DSTATE_WM_DEPTH_STENCIL::StencilTestEnable
351     */
352    const bool stc_test_en =
353       (ds_iview->image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
354       pipeline->stencil_test_enable;
355 
356    /* 3DSTATE_STENCIL_BUFFER::STENCIL_BUFFER_ENABLE &&
357     * (3DSTATE_WM_DEPTH_STENCIL::Stencil Buffer Write Enable &&
358     *  3DSTATE_DEPTH_BUFFER::STENCIL_WRITE_ENABLE)
359     */
360    const bool stc_write_en =
361       (ds_iview->image->aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
362       pipeline->writes_stencil;
363 
364    /* STC_TEST_EN && 3DSTATE_PS_EXTRA::PixelShaderComputesStencil */
365    const bool comp_stc_en = stc_test_en && wm_prog_data->computed_stencil;
366 
367    /* COMP_STC_EN || STC_WRITE_EN */
368    if (!(comp_stc_en || stc_write_en))
369       return false;
370 
371    /* (3DSTATE_PS_EXTRA::PixelShaderKillsPixels ||
372     *  3DSTATE_WM::ForceKillPix == ON ||
373     *  3DSTATE_PS_EXTRA::oMask Present to RenderTarget ||
374     *  3DSTATE_PS_BLEND::AlphaToCoverageEnable ||
375     *  3DSTATE_PS_BLEND::AlphaTestEnable ||
376     *  3DSTATE_WM_CHROMAKEY::ChromaKeyKillEnable) ||
377     * (3DSTATE_PS_EXTRA::Pixel Shader Computed Depth mode != PSCDEPTH_OFF)
378     */
379    return pipeline->kill_pixel ||
380           wm_prog_data->computed_depth_mode != PSCDEPTH_OFF;
381 }
382 
383 void
genX(cmd_buffer_flush_dynamic_state)384 genX(cmd_buffer_flush_dynamic_state)(struct anv_cmd_buffer *cmd_buffer)
385 {
386    struct anv_pipeline *pipeline = cmd_buffer->state.gfx.base.pipeline;
387    struct anv_dynamic_state *d = &cmd_buffer->state.gfx.dynamic;
388 
389    if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
390                                       ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)) {
391       uint32_t sf_dw[GENX(3DSTATE_SF_length)];
392       struct GENX(3DSTATE_SF) sf = {
393          GENX(3DSTATE_SF_header),
394       };
395 #if GEN_GEN == 8
396       if (cmd_buffer->device->info.is_cherryview) {
397          sf.CHVLineWidth = d->line_width;
398       } else {
399          sf.LineWidth = d->line_width;
400       }
401 #else
402       sf.LineWidth = d->line_width,
403 #endif
404       GENX(3DSTATE_SF_pack)(NULL, sf_dw, &sf);
405       anv_batch_emit_merge(&cmd_buffer->batch, sf_dw, pipeline->gen8.sf);
406    }
407 
408    if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
409                                       ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)){
410       uint32_t raster_dw[GENX(3DSTATE_RASTER_length)];
411       struct GENX(3DSTATE_RASTER) raster = {
412          GENX(3DSTATE_RASTER_header),
413          .GlobalDepthOffsetConstant = d->depth_bias.bias,
414          .GlobalDepthOffsetScale = d->depth_bias.slope,
415          .GlobalDepthOffsetClamp = d->depth_bias.clamp
416       };
417       GENX(3DSTATE_RASTER_pack)(NULL, raster_dw, &raster);
418       anv_batch_emit_merge(&cmd_buffer->batch, raster_dw,
419                            pipeline->gen8.raster);
420    }
421 
422    /* Stencil reference values moved from COLOR_CALC_STATE in gen8 to
423     * 3DSTATE_WM_DEPTH_STENCIL in gen9. That means the dirty bits gets split
424     * across different state packets for gen8 and gen9. We handle that by
425     * using a big old #if switch here.
426     */
427 #if GEN_GEN == 8
428    if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS |
429                                       ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
430       struct anv_state cc_state =
431          anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
432                                             GENX(COLOR_CALC_STATE_length) * 4,
433                                             64);
434       struct GENX(COLOR_CALC_STATE) cc = {
435          .BlendConstantColorRed = d->blend_constants[0],
436          .BlendConstantColorGreen = d->blend_constants[1],
437          .BlendConstantColorBlue = d->blend_constants[2],
438          .BlendConstantColorAlpha = d->blend_constants[3],
439          .StencilReferenceValue = d->stencil_reference.front & 0xff,
440          .BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff,
441       };
442       GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
443 
444       anv_state_flush(cmd_buffer->device, cc_state);
445 
446       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
447          ccp.ColorCalcStatePointer        = cc_state.offset;
448          ccp.ColorCalcStatePointerValid   = true;
449       }
450    }
451 
452    if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
453                                       ANV_CMD_DIRTY_RENDER_TARGETS |
454                                       ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
455                                       ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK)) {
456       uint32_t wm_depth_stencil_dw[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
457 
458       struct GENX(3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil) = {
459          GENX(3DSTATE_WM_DEPTH_STENCIL_header),
460 
461          .StencilTestMask = d->stencil_compare_mask.front & 0xff,
462          .StencilWriteMask = d->stencil_write_mask.front & 0xff,
463 
464          .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,
465          .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,
466 
467          .StencilBufferWriteEnable =
468             (d->stencil_write_mask.front || d->stencil_write_mask.back) &&
469             pipeline->writes_stencil,
470       };
471       GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, wm_depth_stencil_dw,
472                                           &wm_depth_stencil);
473 
474       anv_batch_emit_merge(&cmd_buffer->batch, wm_depth_stencil_dw,
475                            pipeline->gen8.wm_depth_stencil);
476 
477       genX(cmd_buffer_enable_pma_fix)(cmd_buffer,
478                                       want_depth_pma_fix(cmd_buffer));
479    }
480 #else
481    if (cmd_buffer->state.gfx.dirty & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS) {
482       struct anv_state cc_state =
483          anv_cmd_buffer_alloc_dynamic_state(cmd_buffer,
484                                             GENX(COLOR_CALC_STATE_length) * 4,
485                                             64);
486       struct GENX(COLOR_CALC_STATE) cc = {
487          .BlendConstantColorRed = d->blend_constants[0],
488          .BlendConstantColorGreen = d->blend_constants[1],
489          .BlendConstantColorBlue = d->blend_constants[2],
490          .BlendConstantColorAlpha = d->blend_constants[3],
491       };
492       GENX(COLOR_CALC_STATE_pack)(NULL, cc_state.map, &cc);
493 
494       anv_state_flush(cmd_buffer->device, cc_state);
495 
496       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_CC_STATE_POINTERS), ccp) {
497          ccp.ColorCalcStatePointer = cc_state.offset;
498          ccp.ColorCalcStatePointerValid = true;
499       }
500    }
501 
502    if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
503                                       ANV_CMD_DIRTY_RENDER_TARGETS |
504                                       ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK |
505                                       ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
506                                       ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE)) {
507       uint32_t dwords[GENX(3DSTATE_WM_DEPTH_STENCIL_length)];
508       struct GENX(3DSTATE_WM_DEPTH_STENCIL) wm_depth_stencil = {
509          GENX(3DSTATE_WM_DEPTH_STENCIL_header),
510 
511          .StencilTestMask = d->stencil_compare_mask.front & 0xff,
512          .StencilWriteMask = d->stencil_write_mask.front & 0xff,
513 
514          .BackfaceStencilTestMask = d->stencil_compare_mask.back & 0xff,
515          .BackfaceStencilWriteMask = d->stencil_write_mask.back & 0xff,
516 
517          .StencilReferenceValue = d->stencil_reference.front & 0xff,
518          .BackfaceStencilReferenceValue = d->stencil_reference.back & 0xff,
519 
520          .StencilBufferWriteEnable =
521             (d->stencil_write_mask.front || d->stencil_write_mask.back) &&
522             pipeline->writes_stencil,
523       };
524       GENX(3DSTATE_WM_DEPTH_STENCIL_pack)(NULL, dwords, &wm_depth_stencil);
525 
526       anv_batch_emit_merge(&cmd_buffer->batch, dwords,
527                            pipeline->gen9.wm_depth_stencil);
528 
529       genX(cmd_buffer_enable_pma_fix)(cmd_buffer,
530                                       want_stencil_pma_fix(cmd_buffer));
531    }
532 #endif
533 
534    if (cmd_buffer->state.gfx.dirty & (ANV_CMD_DIRTY_PIPELINE |
535                                       ANV_CMD_DIRTY_INDEX_BUFFER)) {
536       anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_VF), vf) {
537          vf.IndexedDrawCutIndexEnable  = pipeline->primitive_restart;
538          vf.CutIndex                   = cmd_buffer->state.restart_index;
539       }
540    }
541 
542    cmd_buffer->state.gfx.dirty = 0;
543 }
544 
genX(CmdBindIndexBuffer)545 void genX(CmdBindIndexBuffer)(
546     VkCommandBuffer                             commandBuffer,
547     VkBuffer                                    _buffer,
548     VkDeviceSize                                offset,
549     VkIndexType                                 indexType)
550 {
551    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
552    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
553 
554    static const uint32_t vk_to_gen_index_type[] = {
555       [VK_INDEX_TYPE_UINT16]                    = INDEX_WORD,
556       [VK_INDEX_TYPE_UINT32]                    = INDEX_DWORD,
557    };
558 
559    static const uint32_t restart_index_for_type[] = {
560       [VK_INDEX_TYPE_UINT16]                    = UINT16_MAX,
561       [VK_INDEX_TYPE_UINT32]                    = UINT32_MAX,
562    };
563 
564    cmd_buffer->state.restart_index = restart_index_for_type[indexType];
565 
566    anv_batch_emit(&cmd_buffer->batch, GENX(3DSTATE_INDEX_BUFFER), ib) {
567       ib.IndexFormat                = vk_to_gen_index_type[indexType];
568       ib.MemoryObjectControlState   = GENX(MOCS);
569       ib.BufferStartingAddress      =
570          (struct anv_address) { buffer->bo, buffer->offset + offset };
571       ib.BufferSize                 = buffer->size - offset;
572    }
573 
574    cmd_buffer->state.gfx.dirty |= ANV_CMD_DIRTY_INDEX_BUFFER;
575 }
576 
577 /* Set of stage bits for which are pipelined, i.e. they get queued by the
578  * command streamer for later execution.
579  */
580 #define ANV_PIPELINE_STAGE_PIPELINED_BITS \
581    (VK_PIPELINE_STAGE_VERTEX_INPUT_BIT | \
582     VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | \
583     VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT | \
584     VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT | \
585     VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | \
586     VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT | \
587     VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | \
588     VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | \
589     VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | \
590     VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT | \
591     VK_PIPELINE_STAGE_TRANSFER_BIT | \
592     VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT | \
593     VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT | \
594     VK_PIPELINE_STAGE_ALL_COMMANDS_BIT)
595 
genX(CmdSetEvent)596 void genX(CmdSetEvent)(
597     VkCommandBuffer                             commandBuffer,
598     VkEvent                                     _event,
599     VkPipelineStageFlags                        stageMask)
600 {
601    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
602    ANV_FROM_HANDLE(anv_event, event, _event);
603 
604    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
605       if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
606          pc.StallAtPixelScoreboard = true;
607          pc.CommandStreamerStallEnable = true;
608       }
609 
610       pc.DestinationAddressType  = DAT_PPGTT,
611       pc.PostSyncOperation       = WriteImmediateData,
612       pc.Address = (struct anv_address) {
613          &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
614          event->state.offset
615       };
616       pc.ImmediateData           = VK_EVENT_SET;
617    }
618 }
619 
genX(CmdResetEvent)620 void genX(CmdResetEvent)(
621     VkCommandBuffer                             commandBuffer,
622     VkEvent                                     _event,
623     VkPipelineStageFlags                        stageMask)
624 {
625    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
626    ANV_FROM_HANDLE(anv_event, event, _event);
627 
628    anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
629       if (stageMask & ANV_PIPELINE_STAGE_PIPELINED_BITS) {
630          pc.StallAtPixelScoreboard = true;
631          pc.CommandStreamerStallEnable = true;
632       }
633 
634       pc.DestinationAddressType  = DAT_PPGTT;
635       pc.PostSyncOperation       = WriteImmediateData;
636       pc.Address = (struct anv_address) {
637          &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
638          event->state.offset
639       };
640       pc.ImmediateData           = VK_EVENT_RESET;
641    }
642 }
643 
genX(CmdWaitEvents)644 void genX(CmdWaitEvents)(
645     VkCommandBuffer                             commandBuffer,
646     uint32_t                                    eventCount,
647     const VkEvent*                              pEvents,
648     VkPipelineStageFlags                        srcStageMask,
649     VkPipelineStageFlags                        destStageMask,
650     uint32_t                                    memoryBarrierCount,
651     const VkMemoryBarrier*                      pMemoryBarriers,
652     uint32_t                                    bufferMemoryBarrierCount,
653     const VkBufferMemoryBarrier*                pBufferMemoryBarriers,
654     uint32_t                                    imageMemoryBarrierCount,
655     const VkImageMemoryBarrier*                 pImageMemoryBarriers)
656 {
657    ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
658    for (uint32_t i = 0; i < eventCount; i++) {
659       ANV_FROM_HANDLE(anv_event, event, pEvents[i]);
660 
661       anv_batch_emit(&cmd_buffer->batch, GENX(MI_SEMAPHORE_WAIT), sem) {
662          sem.WaitMode            = PollingMode,
663          sem.CompareOperation    = COMPARE_SAD_EQUAL_SDD,
664          sem.SemaphoreDataDword  = VK_EVENT_SET,
665          sem.SemaphoreAddress = (struct anv_address) {
666             &cmd_buffer->device->dynamic_state_pool.block_pool.bo,
667             event->state.offset
668          };
669       }
670    }
671 
672    genX(CmdPipelineBarrier)(commandBuffer, srcStageMask, destStageMask,
673                             false, /* byRegion */
674                             memoryBarrierCount, pMemoryBarriers,
675                             bufferMemoryBarrierCount, pBufferMemoryBarriers,
676                             imageMemoryBarrierCount, pImageMemoryBarriers);
677 }
678