/* * Copyright (c) 2022 Intel Corporation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS * IN THE SOFTWARE. */ #ifndef INTEL_GENX_STATE_BRW_H #define INTEL_GENX_STATE_BRW_H #ifndef GFX_VERx10 #error This file should only be included by genX files. #endif #include #include "dev/intel_device_info.h" #include "genxml/gen_macros.h" #ifdef __cplusplus extern "C" { #endif #if GFX_VER >= 7 static inline void intel_set_ps_dispatch_state(struct GENX(3DSTATE_PS) *ps, const struct intel_device_info *devinfo, const struct brw_wm_prog_data *prog_data, unsigned rasterization_samples, enum intel_msaa_flags msaa_flags) { assert(rasterization_samples != 0); bool enable_8 = prog_data->dispatch_8; bool enable_16 = prog_data->dispatch_16; bool enable_32 = prog_data->dispatch_32; uint8_t dispatch_multi = prog_data->dispatch_multi; #if GFX_VER >= 20 if (ps->RenderTargetFastClearEnable) { /* Bspec 57340 (r59562): * * Clearing shader must use SIMD16 dispatch mode. * * The spec doesn't state if a fast-clear shader can be multi-poly. We * just assume it can't. */ assert(enable_16); enable_32 = enable_8 = false; dispatch_multi = 0; } #elif GFX_VER >= 9 /* SKL PRMs, Volume 2a: Command Reference: Instructions: * 3DSTATE_PS_BODY::8 Pixel Dispatch Enable: * * "When Render Target Fast Clear Enable is ENABLED or Render Target * Resolve Type = RESOLVE_PARTIAL or RESOLVE_FULL, this bit must be * DISABLED." */ if (ps->RenderTargetFastClearEnable || ps->RenderTargetResolveType == RESOLVE_PARTIAL || ps->RenderTargetResolveType == RESOLVE_FULL) enable_8 = false; #elif GFX_VER == 8 /* BDW has the same wording as SKL, except some of the fields mentioned * don't exist... */ if (ps->RenderTargetFastClearEnable || ps->RenderTargetResolveEnable) enable_8 = false; #endif const bool is_persample_dispatch = brw_wm_prog_data_is_persample(prog_data, msaa_flags); if (is_persample_dispatch) { /* TGL PRMs, Volume 2d: Command Reference: Structures: * 3DSTATE_PS_BODY::32 Pixel Dispatch Enable: * * "Must not be enabled when dispatch rate is sample AND NUM_MULTISAMPLES > 1." */ if (GFX_VER >= 12 && rasterization_samples > 1) enable_32 = false; /* Starting with SandyBridge (where we first get MSAA), the different * pixel dispatch combinations are grouped into classifications A * through F (SNB PRM Vol. 2 Part 1 Section 7.7.1). On most hardware * generations, the only configurations supporting persample dispatch * are those in which only one dispatch width is enabled. * * The Gfx12 hardware spec has a similar dispatch grouping table, but * the following conflicting restriction applies (from the page on * "Structure_3DSTATE_PS_BODY"), so we need to keep the SIMD16 shader: * * "SIMD32 may only be enabled if SIMD16 or (dual)SIMD8 is also * enabled." */ if (enable_32 || enable_16) enable_8 = false; if (GFX_VER < 12 && enable_32) enable_16 = false; } /* The docs for 3DSTATE_PS::32 Pixel Dispatch Enable say: * * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, * SIMD32 Dispatch must not be enabled for PER_PIXEL dispatch * mode." * * 16x MSAA only exists on Gfx9+, so we can skip this on Gfx8. */ if (GFX_VER >= 9 && rasterization_samples == 16 && !is_persample_dispatch) { assert(enable_8 || enable_16); enable_32 = false; } assert(enable_8 || enable_16 || enable_32 || (GFX_VER >= 12 && dispatch_multi)); assert(!dispatch_multi || (GFX_VER >= 12 && !enable_8)); #if GFX_VER >= 20 if (dispatch_multi) { ps->Kernel0Enable = true; ps->Kernel0SIMDWidth = (dispatch_multi == 32 ? PS_SIMD32 : PS_SIMD16); ps->Kernel0MaximumPolysperThread = prog_data->max_polygons - 1; switch (dispatch_multi / prog_data->max_polygons) { case 8: ps->Kernel0PolyPackingPolicy = POLY_PACK8_FIXED; break; case 16: ps->Kernel0PolyPackingPolicy = POLY_PACK16_FIXED; break; default: unreachable("Invalid polygon width"); } } else if (enable_16) { ps->Kernel0Enable = true; ps->Kernel0SIMDWidth = PS_SIMD16; ps->Kernel0PolyPackingPolicy = POLY_PACK16_FIXED; } if (enable_32) { ps->Kernel1Enable = true; ps->Kernel1SIMDWidth = PS_SIMD32; } else if (enable_16 && dispatch_multi == 16) { ps->Kernel1Enable = true; ps->Kernel1SIMDWidth = PS_SIMD16; } #else ps->_8PixelDispatchEnable = enable_8 || (GFX_VER == 12 && dispatch_multi); ps->_16PixelDispatchEnable = enable_16; ps->_32PixelDispatchEnable = enable_32; #endif } #endif #ifdef __cplusplus } #endif #endif /* INTEL_GENX_STATE_BRW_H */