1 /****************************************************************************
2 * Copyright (C) 2014-2015 Intel Corporation. All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 * @file depthstencil.h
24 *
25 * @brief Implements depth/stencil functionality
26 *
27 ******************************************************************************/
28 #pragma once
29 #include "common/os.h"
30 #include "format_conversion.h"
31
32 INLINE
StencilOp(SWR_STENCILOP op,simdscalar mask,simdscalar stencilRefps,simdscalar & stencilps)33 void StencilOp(SWR_STENCILOP op, simdscalar mask, simdscalar stencilRefps, simdscalar &stencilps)
34 {
35 simdscalari stencil = _simd_castps_si(stencilps);
36
37 switch (op)
38 {
39 case STENCILOP_KEEP:
40 break;
41 case STENCILOP_ZERO:
42 stencilps = _simd_blendv_ps(stencilps, _simd_setzero_ps(), mask);
43 break;
44 case STENCILOP_REPLACE:
45 stencilps = _simd_blendv_ps(stencilps, stencilRefps, mask);
46 break;
47 case STENCILOP_INCRSAT:
48 {
49 simdscalari stencilincr = _simd_adds_epu8(stencil, _simd_set1_epi32(1));
50 stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
51 break;
52 }
53 case STENCILOP_DECRSAT:
54 {
55 simdscalari stencildecr = _simd_subs_epu8(stencil, _simd_set1_epi32(1));
56 stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
57 break;
58 }
59 case STENCILOP_INCR:
60 {
61 simdscalari stencilincr = _simd_add_epi8(stencil, _simd_set1_epi32(1));
62 stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencilincr), mask);
63 break;
64 }
65 case STENCILOP_DECR:
66 {
67 simdscalari stencildecr = _simd_add_epi8(stencil, _simd_set1_epi32((-1) & 0xff));
68 stencilps = _simd_blendv_ps(stencilps, _simd_castsi_ps(stencildecr), mask);
69 break;
70 }
71 case STENCILOP_INVERT:
72 {
73 simdscalar stencilinvert = _simd_andnot_ps(stencilps, _simd_cmpeq_ps(_simd_setzero_ps(), _simd_setzero_ps()));
74 stencilps = _simd_blendv_ps(stencilps, stencilinvert, mask);
75 break;
76 }
77 default:
78 break;
79 }
80 }
81
82
83 template<SWR_FORMAT depthFormatT>
QuantizeDepth(simdscalar depth)84 simdscalar QuantizeDepth(simdscalar depth)
85 {
86 SWR_TYPE depthType = FormatTraits<depthFormatT>::GetType(0);
87 uint32_t depthBpc = FormatTraits<depthFormatT>::GetBPC(0);
88
89 if (depthType == SWR_TYPE_FLOAT)
90 {
91 // assume only 32bit float depth supported
92 SWR_ASSERT(depthBpc == 32);
93
94 // matches shader precision, no quantizing needed
95 return depth;
96 }
97
98 // should be unorm depth if not float
99 SWR_ASSERT(depthType == SWR_TYPE_UNORM);
100
101 float quantize = (float)((1 << depthBpc) - 1);
102 simdscalar result = _simd_mul_ps(depth, _simd_set1_ps(quantize));
103 result = _simd_add_ps(result, _simd_set1_ps(0.5f));
104 result = _simd_round_ps(result, _MM_FROUND_TO_ZERO);
105
106 if (depthBpc > 16)
107 {
108 result = _simd_div_ps(result, _simd_set1_ps(quantize));
109 }
110 else
111 {
112 result = _simd_mul_ps(result, _simd_set1_ps(1.0f / quantize));
113 }
114
115 return result;
116 }
117
118 INLINE
DepthStencilTest(const API_STATE * pState,bool frontFacing,uint32_t viewportIndex,simdscalar interpZ,uint8_t * pDepthBase,simdscalar coverageMask,uint8_t * pStencilBase,simdscalar * pStencilMask)119 simdscalar DepthStencilTest(const API_STATE* pState,
120 bool frontFacing, uint32_t viewportIndex, simdscalar interpZ, uint8_t* pDepthBase, simdscalar coverageMask,
121 uint8_t *pStencilBase, simdscalar* pStencilMask)
122 {
123 static_assert(KNOB_DEPTH_HOT_TILE_FORMAT == R32_FLOAT, "Unsupported depth hot tile format");
124 static_assert(KNOB_STENCIL_HOT_TILE_FORMAT == R8_UINT, "Unsupported stencil hot tile format");
125
126 const SWR_DEPTH_STENCIL_STATE* pDSState = &pState->depthStencilState;
127 const SWR_VIEWPORT* pViewport = &pState->vp[viewportIndex];
128
129 simdscalar depthResult = _simd_set1_ps(-1.0f);
130 simdscalar zbuf;
131
132 // clamp Z to viewport [minZ..maxZ]
133 simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
134 simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
135 interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, interpZ));
136
137 if (pDSState->depthTestEnable)
138 {
139 switch (pDSState->depthTestFunc)
140 {
141 case ZFUNC_NEVER: depthResult = _simd_setzero_ps(); break;
142 case ZFUNC_ALWAYS: break;
143 default:
144 zbuf = _simd_load_ps((const float*)pDepthBase);
145 }
146
147 switch (pDSState->depthTestFunc)
148 {
149 case ZFUNC_LE: depthResult = _simd_cmple_ps(interpZ, zbuf); break;
150 case ZFUNC_LT: depthResult = _simd_cmplt_ps(interpZ, zbuf); break;
151 case ZFUNC_GT: depthResult = _simd_cmpgt_ps(interpZ, zbuf); break;
152 case ZFUNC_GE: depthResult = _simd_cmpge_ps(interpZ, zbuf); break;
153 case ZFUNC_EQ: depthResult = _simd_cmpeq_ps(interpZ, zbuf); break;
154 case ZFUNC_NE: depthResult = _simd_cmpneq_ps(interpZ, zbuf); break;
155 }
156 }
157
158 simdscalar stencilMask = _simd_set1_ps(-1.0f);
159
160 if (pDSState->stencilTestEnable)
161 {
162 uint8_t stencilRefValue;
163 uint32_t stencilTestFunc;
164 uint8_t stencilTestMask;
165 if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
166 {
167 stencilRefValue = pDSState->stencilRefValue;
168 stencilTestFunc = pDSState->stencilTestFunc;
169 stencilTestMask = pDSState->stencilTestMask;
170 }
171 else
172 {
173 stencilRefValue = pDSState->backfaceStencilRefValue;
174 stencilTestFunc = pDSState->backfaceStencilTestFunc;
175 stencilTestMask = pDSState->backfaceStencilTestMask;
176 }
177
178 simdvector sbuf;
179 simdscalar stencilWithMask;
180 simdscalar stencilRef;
181 switch(stencilTestFunc)
182 {
183 case ZFUNC_NEVER: stencilMask = _simd_setzero_ps(); break;
184 case ZFUNC_ALWAYS: break;
185 default:
186 LoadSOA<R8_UINT>(pStencilBase, sbuf);
187
188 // apply stencil read mask
189 stencilWithMask = _simd_castsi_ps(_simd_and_si(_simd_castps_si(sbuf.v[0]), _simd_set1_epi32(stencilTestMask)));
190
191 // do stencil compare in float to avoid simd integer emulation in AVX1
192 stencilWithMask = _simd_cvtepi32_ps(_simd_castps_si(stencilWithMask));
193
194 stencilRef = _simd_set1_ps((float)(stencilRefValue & stencilTestMask));
195 break;
196 }
197
198 switch(stencilTestFunc)
199 {
200 case ZFUNC_LE: stencilMask = _simd_cmple_ps(stencilRef, stencilWithMask); break;
201 case ZFUNC_LT: stencilMask = _simd_cmplt_ps(stencilRef, stencilWithMask); break;
202 case ZFUNC_GT: stencilMask = _simd_cmpgt_ps(stencilRef, stencilWithMask); break;
203 case ZFUNC_GE: stencilMask = _simd_cmpge_ps(stencilRef, stencilWithMask); break;
204 case ZFUNC_EQ: stencilMask = _simd_cmpeq_ps(stencilRef, stencilWithMask); break;
205 case ZFUNC_NE: stencilMask = _simd_cmpneq_ps(stencilRef, stencilWithMask); break;
206 }
207 }
208
209 simdscalar depthWriteMask = _simd_and_ps(depthResult, stencilMask);
210 depthWriteMask = _simd_and_ps(depthWriteMask, coverageMask);
211
212 *pStencilMask = stencilMask;
213 return depthWriteMask;
214 }
215
216 INLINE
DepthStencilWrite(const SWR_VIEWPORT * pViewport,const SWR_DEPTH_STENCIL_STATE * pDSState,bool frontFacing,simdscalar interpZ,uint8_t * pDepthBase,const simdscalar & depthMask,const simdscalar & coverageMask,uint8_t * pStencilBase,const simdscalar & stencilMask)217 void DepthStencilWrite(const SWR_VIEWPORT* pViewport, const SWR_DEPTH_STENCIL_STATE* pDSState,
218 bool frontFacing, simdscalar interpZ, uint8_t* pDepthBase, const simdscalar& depthMask, const simdscalar& coverageMask,
219 uint8_t *pStencilBase, const simdscalar& stencilMask)
220 {
221 if (pDSState->depthWriteEnable)
222 {
223 // clamp Z to viewport [minZ..maxZ]
224 simdscalar vMinZ = _simd_broadcast_ss(&pViewport->minZ);
225 simdscalar vMaxZ = _simd_broadcast_ss(&pViewport->maxZ);
226 interpZ = _simd_min_ps(vMaxZ, _simd_max_ps(vMinZ, interpZ));
227
228 simdscalar vMask = _simd_and_ps(depthMask, coverageMask);
229 _simd_maskstore_ps((float*)pDepthBase, _simd_castps_si(vMask), interpZ);
230 }
231
232 if (pDSState->stencilWriteEnable)
233 {
234 simdvector sbuf;
235 LoadSOA<R8_UINT>(pStencilBase, sbuf);
236 simdscalar stencilbuf = sbuf.v[0];
237
238 uint8_t stencilRefValue;
239 uint32_t stencilFailOp;
240 uint32_t stencilPassDepthPassOp;
241 uint32_t stencilPassDepthFailOp;
242 uint8_t stencilWriteMask;
243 if (frontFacing || !pDSState->doubleSidedStencilTestEnable)
244 {
245 stencilRefValue = pDSState->stencilRefValue;
246 stencilFailOp = pDSState->stencilFailOp;
247 stencilPassDepthPassOp = pDSState->stencilPassDepthPassOp;
248 stencilPassDepthFailOp = pDSState->stencilPassDepthFailOp;
249 stencilWriteMask = pDSState->stencilWriteMask;
250 }
251 else
252 {
253 stencilRefValue = pDSState->backfaceStencilRefValue;
254 stencilFailOp = pDSState->backfaceStencilFailOp;
255 stencilPassDepthPassOp = pDSState->backfaceStencilPassDepthPassOp;
256 stencilPassDepthFailOp = pDSState->backfaceStencilPassDepthFailOp;
257 stencilWriteMask = pDSState->backfaceStencilWriteMask;
258 }
259
260 simdscalar stencilps = stencilbuf;
261 simdscalar stencilRefps = _simd_castsi_ps(_simd_set1_epi32(stencilRefValue));
262
263 simdscalar stencilFailMask = _simd_andnot_ps(stencilMask, coverageMask);
264 simdscalar stencilPassDepthPassMask = _simd_and_ps(stencilMask, depthMask);
265 simdscalar stencilPassDepthFailMask = _simd_and_ps(stencilMask, _simd_andnot_ps(depthMask, _simd_set1_ps(-1)));
266
267 simdscalar origStencil = stencilps;
268
269 StencilOp((SWR_STENCILOP)stencilFailOp, stencilFailMask, stencilRefps, stencilps);
270 StencilOp((SWR_STENCILOP)stencilPassDepthFailOp, stencilPassDepthFailMask, stencilRefps, stencilps);
271 StencilOp((SWR_STENCILOP)stencilPassDepthPassOp, stencilPassDepthPassMask, stencilRefps, stencilps);
272
273 // apply stencil write mask
274 simdscalari vWriteMask = _simd_set1_epi32(stencilWriteMask);
275 stencilps = _simd_and_ps(stencilps, _simd_castsi_ps(vWriteMask));
276 stencilps = _simd_or_ps(_simd_andnot_ps(_simd_castsi_ps(vWriteMask), origStencil), stencilps);
277
278 simdvector stencilResult;
279 stencilResult.v[0] = _simd_blendv_ps(origStencil, stencilps, coverageMask);
280 StoreSOA<R8_UINT>(stencilResult, pStencilBase);
281 }
282
283 }
284