1 /*-------------------------------------------------------------------------
2 * drawElements Quality Program Reference Renderer
3 * -----------------------------------------------
4 *
5 * Copyright 2014 The Android Open Source Project
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 * http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 *
19 *//*!
20 * \file
21 * \brief Reference implementation for per-fragment operations.
22 *//*--------------------------------------------------------------------*/
23
24 #include "rrFragmentOperations.hpp"
25 #include "tcuVectorUtil.hpp"
26 #include "tcuTextureUtil.hpp"
27 #include <limits>
28
29 using tcu::IVec2;
30 using tcu::Vec3;
31 using tcu::Vec4;
32 using tcu::IVec4;
33 using tcu::UVec4;
34 using tcu::min;
35 using tcu::max;
36 using tcu::clamp;
37 using de::min;
38 using de::max;
39 using de::clamp;
40
41 namespace rr
42 {
43
44 // Return oldValue with the bits indicated by mask replaced by corresponding bits of newValue.
maskedBitReplace(int oldValue,int newValue,deUint32 mask)45 static inline int maskedBitReplace (int oldValue, int newValue, deUint32 mask)
46 {
47 return (oldValue & ~mask) | (newValue & mask);
48 }
49
isInsideRect(const IVec2 & point,const WindowRectangle & rect)50 static inline bool isInsideRect (const IVec2& point, const WindowRectangle& rect)
51 {
52 return de::inBounds(point.x(), rect.left, rect.left + rect.width) &&
53 de::inBounds(point.y(), rect.bottom, rect.bottom + rect.height);
54 }
55
unpremultiply(const Vec4 & v)56 static inline Vec4 unpremultiply (const Vec4& v)
57 {
58 if (v.w() > 0.0f)
59 return Vec4(v.x()/v.w(), v.y()/v.w(), v.z()/v.w(), v.w());
60 else
61 {
62 DE_ASSERT(v.x() == 0.0f && v.y() == 0.0f && v.z() == 0.0f);
63 return Vec4(0.0f, 0.0f, 0.0f, 0.0f);
64 }
65 }
66
clearMultisampleColorBuffer(const tcu::PixelBufferAccess & dst,const Vec4 & v,const WindowRectangle & r)67 void clearMultisampleColorBuffer (const tcu::PixelBufferAccess& dst, const Vec4& v, const WindowRectangle& r) { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v); }
clearMultisampleColorBuffer(const tcu::PixelBufferAccess & dst,const IVec4 & v,const WindowRectangle & r)68 void clearMultisampleColorBuffer (const tcu::PixelBufferAccess& dst, const IVec4& v, const WindowRectangle& r) { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v); }
clearMultisampleColorBuffer(const tcu::PixelBufferAccess & dst,const UVec4 & v,const WindowRectangle & r)69 void clearMultisampleColorBuffer (const tcu::PixelBufferAccess& dst, const UVec4& v, const WindowRectangle& r) { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v.cast<int>()); }
clearMultisampleDepthBuffer(const tcu::PixelBufferAccess & dst,float v,const WindowRectangle & r)70 void clearMultisampleDepthBuffer (const tcu::PixelBufferAccess& dst, float v, const WindowRectangle& r) { tcu::clearDepth(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v); }
clearMultisampleStencilBuffer(const tcu::PixelBufferAccess & dst,int v,const WindowRectangle & r)71 void clearMultisampleStencilBuffer (const tcu::PixelBufferAccess& dst, int v, const WindowRectangle& r) { tcu::clearStencil(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v); }
72
FragmentProcessor(void)73 FragmentProcessor::FragmentProcessor (void)
74 : m_sampleRegister()
75 {
76 }
77
executeScissorTest(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const WindowRectangle & scissorRect)78 void FragmentProcessor::executeScissorTest (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const WindowRectangle& scissorRect)
79 {
80 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
81 {
82 if (m_sampleRegister[regSampleNdx].isAlive)
83 {
84 int fragNdx = fragNdxOffset + regSampleNdx/numSamplesPerFragment;
85
86 if (!isInsideRect(inputFragments[fragNdx].pixelCoord, scissorRect))
87 m_sampleRegister[regSampleNdx].isAlive = false;
88 }
89 }
90 }
91
executeStencilCompare(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const StencilState & stencilState,int numStencilBits,const tcu::ConstPixelBufferAccess & stencilBuffer)92 void FragmentProcessor::executeStencilCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::ConstPixelBufferAccess& stencilBuffer)
93 {
94 #define SAMPLE_REGISTER_STENCIL_COMPARE(COMPARE_EXPRESSION) \
95 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
96 { \
97 if (m_sampleRegister[regSampleNdx].isAlive) \
98 { \
99 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \
100 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \
101 int stencilBufferValue = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
102 int maskedRef = stencilState.compMask & clampedStencilRef; \
103 int maskedBuf = stencilState.compMask & stencilBufferValue; \
104 DE_UNREF(maskedRef); \
105 DE_UNREF(maskedBuf); \
106 \
107 m_sampleRegister[regSampleNdx].stencilPassed = (COMPARE_EXPRESSION); \
108 } \
109 }
110
111 int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
112
113 switch (stencilState.func)
114 {
115 case TESTFUNC_NEVER: SAMPLE_REGISTER_STENCIL_COMPARE(false) break;
116 case TESTFUNC_ALWAYS: SAMPLE_REGISTER_STENCIL_COMPARE(true) break;
117 case TESTFUNC_LESS: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef < maskedBuf) break;
118 case TESTFUNC_LEQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef <= maskedBuf) break;
119 case TESTFUNC_GREATER: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef > maskedBuf) break;
120 case TESTFUNC_GEQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef >= maskedBuf) break;
121 case TESTFUNC_EQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef == maskedBuf) break;
122 case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef != maskedBuf) break;
123 default:
124 DE_ASSERT(false);
125 }
126
127 #undef SAMPLE_REGISTER_STENCIL_COMPARE
128 }
129
executeStencilSFail(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const StencilState & stencilState,int numStencilBits,const tcu::PixelBufferAccess & stencilBuffer)130 void FragmentProcessor::executeStencilSFail (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
131 {
132 #define SAMPLE_REGISTER_SFAIL(SFAIL_EXPRESSION) \
133 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
134 { \
135 if (m_sampleRegister[regSampleNdx].isAlive && !m_sampleRegister[regSampleNdx].stencilPassed) \
136 { \
137 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \
138 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \
139 int stencilBufferValue = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
140 \
141 stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (SFAIL_EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
142 m_sampleRegister[regSampleNdx].isAlive = false; \
143 } \
144 }
145
146 int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
147
148 switch (stencilState.sFail)
149 {
150 case STENCILOP_KEEP: SAMPLE_REGISTER_SFAIL(stencilBufferValue) break;
151 case STENCILOP_ZERO: SAMPLE_REGISTER_SFAIL(0) break;
152 case STENCILOP_REPLACE: SAMPLE_REGISTER_SFAIL(clampedStencilRef) break;
153 case STENCILOP_INCR: SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1)) break;
154 case STENCILOP_DECR: SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1)) break;
155 case STENCILOP_INCR_WRAP: SAMPLE_REGISTER_SFAIL((stencilBufferValue + 1) & ((1<<numStencilBits) - 1)) break;
156 case STENCILOP_DECR_WRAP: SAMPLE_REGISTER_SFAIL((stencilBufferValue - 1) & ((1<<numStencilBits) - 1)) break;
157 case STENCILOP_INVERT: SAMPLE_REGISTER_SFAIL((~stencilBufferValue) & ((1<<numStencilBits) - 1)) break;
158 default:
159 DE_ASSERT(false);
160 }
161
162 #undef SAMPLE_REGISTER_SFAIL
163 }
164
165
executeDepthBoundsTest(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const float minDepthBound,const float maxDepthBound,const tcu::ConstPixelBufferAccess & depthBuffer)166 void FragmentProcessor::executeDepthBoundsTest (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const float minDepthBound, const float maxDepthBound, const tcu::ConstPixelBufferAccess& depthBuffer)
167 {
168 if (depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT || depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
169 {
170 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; ++regSampleNdx)
171 {
172 if (m_sampleRegister[regSampleNdx].isAlive)
173 {
174 const int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
175 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
176 const float depthBufferValue = depthBuffer.getPixDepth(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
177
178 if (!de::inRange(depthBufferValue, minDepthBound, maxDepthBound))
179 m_sampleRegister[regSampleNdx].isAlive = false;
180 }
181 }
182 }
183 else
184 {
185 /* Convert float bounds to target buffer format for comparison */
186
187 deUint32 minDepthBoundUint, maxDepthBoundUint;
188 {
189 deUint32 buffer[2];
190 DE_ASSERT(sizeof(buffer) >= (size_t)depthBuffer.getFormat().getPixelSize());
191
192 tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer);
193 access.setPixDepth(minDepthBound, 0, 0, 0);
194 minDepthBoundUint = access.getPixelUint(0, 0, 0).x();
195 }
196 {
197 deUint32 buffer[2];
198
199 tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer);
200 access.setPixDepth(maxDepthBound, 0, 0, 0);
201 maxDepthBoundUint = access.getPixelUint(0, 0, 0).x();
202 }
203
204 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; ++regSampleNdx)
205 {
206 if (m_sampleRegister[regSampleNdx].isAlive)
207 {
208 const int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
209 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment];
210 const deUint32 depthBufferValue = depthBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()).x();
211
212 if (!de::inRange(depthBufferValue, minDepthBoundUint, maxDepthBoundUint))
213 m_sampleRegister[regSampleNdx].isAlive = false;
214 }
215 }
216 }
217 }
218
executeDepthCompare(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,TestFunc depthFunc,const tcu::ConstPixelBufferAccess & depthBuffer)219 void FragmentProcessor::executeDepthCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, TestFunc depthFunc, const tcu::ConstPixelBufferAccess& depthBuffer)
220 {
221 #define SAMPLE_REGISTER_DEPTH_COMPARE_F(COMPARE_EXPRESSION) \
222 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
223 { \
224 if (m_sampleRegister[regSampleNdx].isAlive) \
225 { \
226 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \
227 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \
228 float depthBufferValue = depthBuffer.getPixDepth(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
229 float sampleDepthFloat = frag.sampleDepths[fragSampleNdx]; \
230 float sampleDepth = de::clamp(sampleDepthFloat, 0.0f, 1.0f); \
231 \
232 m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION); \
233 \
234 DE_UNREF(depthBufferValue); \
235 DE_UNREF(sampleDepth); \
236 } \
237 }
238
239 #define SAMPLE_REGISTER_DEPTH_COMPARE_UI(COMPARE_EXPRESSION) \
240 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
241 { \
242 if (m_sampleRegister[regSampleNdx].isAlive) \
243 { \
244 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \
245 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \
246 deUint32 depthBufferValue = depthBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()).x(); \
247 float sampleDepthFloat = frag.sampleDepths[fragSampleNdx]; \
248 \
249 /* Convert input float to target buffer format for comparison */ \
250 \
251 deUint32 buffer[2]; \
252 \
253 DE_ASSERT(sizeof(buffer) >= (size_t)depthBuffer.getFormat().getPixelSize()); \
254 \
255 tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer); \
256 access.setPixDepth(sampleDepthFloat, 0, 0, 0); \
257 deUint32 sampleDepth = access.getPixelUint(0, 0, 0).x(); \
258 \
259 m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION); \
260 \
261 DE_UNREF(depthBufferValue); \
262 DE_UNREF(sampleDepth); \
263 } \
264 }
265
266 if (depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT || depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
267 {
268
269 switch (depthFunc)
270 {
271 case TESTFUNC_NEVER: SAMPLE_REGISTER_DEPTH_COMPARE_F(false) break;
272 case TESTFUNC_ALWAYS: SAMPLE_REGISTER_DEPTH_COMPARE_F(true) break;
273 case TESTFUNC_LESS: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth < depthBufferValue) break;
274 case TESTFUNC_LEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth <= depthBufferValue) break;
275 case TESTFUNC_GREATER: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth > depthBufferValue) break;
276 case TESTFUNC_GEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth >= depthBufferValue) break;
277 case TESTFUNC_EQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth == depthBufferValue) break;
278 case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth != depthBufferValue) break;
279 default:
280 DE_ASSERT(false);
281 }
282
283 }
284 else
285 {
286 switch (depthFunc)
287 {
288 case TESTFUNC_NEVER: SAMPLE_REGISTER_DEPTH_COMPARE_UI(false) break;
289 case TESTFUNC_ALWAYS: SAMPLE_REGISTER_DEPTH_COMPARE_UI(true) break;
290 case TESTFUNC_LESS: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth < depthBufferValue) break;
291 case TESTFUNC_LEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth <= depthBufferValue) break;
292 case TESTFUNC_GREATER: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth > depthBufferValue) break;
293 case TESTFUNC_GEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth >= depthBufferValue) break;
294 case TESTFUNC_EQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth == depthBufferValue) break;
295 case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth != depthBufferValue) break;
296 default:
297 DE_ASSERT(false);
298 }
299 }
300
301 #undef SAMPLE_REGISTER_DEPTH_COMPARE_F
302 #undef SAMPLE_REGISTER_DEPTH_COMPARE_UI
303 }
304
executeDepthWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const tcu::PixelBufferAccess & depthBuffer)305 void FragmentProcessor::executeDepthWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& depthBuffer)
306 {
307 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
308 {
309 if (m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed)
310 {
311 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
312 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
313 const float clampedDepth = de::clamp(frag.sampleDepths[fragSampleNdx], 0.0f, 1.0f);
314
315 depthBuffer.setPixDepth(clampedDepth, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
316 }
317 }
318 }
319
executeStencilDpFailAndPass(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const StencilState & stencilState,int numStencilBits,const tcu::PixelBufferAccess & stencilBuffer)320 void FragmentProcessor::executeStencilDpFailAndPass (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
321 {
322 #define SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, EXPRESSION) \
323 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
324 { \
325 if (m_sampleRegister[regSampleNdx].isAlive && (CONDITION)) \
326 { \
327 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \
328 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \
329 int stencilBufferValue = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
330 \
331 stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \
332 } \
333 }
334
335 #define SWITCH_DPFAIL_OR_DPPASS(OP_NAME, CONDITION) \
336 switch (stencilState.OP_NAME) \
337 { \
338 case STENCILOP_KEEP: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, stencilBufferValue) break; \
339 case STENCILOP_ZERO: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, 0) break; \
340 case STENCILOP_REPLACE: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, clampedStencilRef) break; \
341 case STENCILOP_INCR: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1)) break; \
342 case STENCILOP_DECR: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1)) break; \
343 case STENCILOP_INCR_WRAP: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue + 1) & ((1<<numStencilBits) - 1)) break; \
344 case STENCILOP_DECR_WRAP: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue - 1) & ((1<<numStencilBits) - 1)) break; \
345 case STENCILOP_INVERT: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (~stencilBufferValue) & ((1<<numStencilBits) - 1)) break; \
346 default: \
347 DE_ASSERT(false); \
348 }
349
350 int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
351
352 SWITCH_DPFAIL_OR_DPPASS(dpFail, !m_sampleRegister[regSampleNdx].depthPassed)
353 SWITCH_DPFAIL_OR_DPPASS(dpPass, m_sampleRegister[regSampleNdx].depthPassed)
354
355 #undef SWITCH_DPFAIL_OR_DPPASS
356 #undef SAMPLE_REGISTER_DPFAIL_OR_DPPASS
357 }
358
executeBlendFactorComputeRGB(const Vec4 & blendColor,const BlendState & blendRGBState)359 void FragmentProcessor::executeBlendFactorComputeRGB (const Vec4& blendColor, const BlendState& blendRGBState)
360 {
361 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION) \
362 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
363 { \
364 if (m_sampleRegister[regSampleNdx].isAlive) \
365 { \
366 const Vec4& src = m_sampleRegister[regSampleNdx].clampedBlendSrcColor; \
367 const Vec4& src1 = m_sampleRegister[regSampleNdx].clampedBlendSrc1Color; \
368 const Vec4& dst = m_sampleRegister[regSampleNdx].clampedBlendDstColor; \
369 DE_UNREF(src); \
370 DE_UNREF(src1); \
371 DE_UNREF(dst); \
372 \
373 m_sampleRegister[regSampleNdx].FACTOR_NAME = (FACTOR_EXPRESSION); \
374 } \
375 }
376
377 #define SWITCH_SRC_OR_DST_FACTOR_RGB(FUNC_NAME, FACTOR_NAME) \
378 switch (blendRGBState.FUNC_NAME) \
379 { \
380 case BLENDFUNC_ZERO: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(0.0f)) break; \
381 case BLENDFUNC_ONE: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f)) break; \
382 case BLENDFUNC_SRC_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.swizzle(0,1,2)) break; \
383 case BLENDFUNC_ONE_MINUS_SRC_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src.swizzle(0,1,2)) break; \
384 case BLENDFUNC_DST_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.swizzle(0,1,2)) break; \
385 case BLENDFUNC_ONE_MINUS_DST_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - dst.swizzle(0,1,2)) break; \
386 case BLENDFUNC_SRC_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src.w())) break; \
387 case BLENDFUNC_ONE_MINUS_SRC_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src.w())) break; \
388 case BLENDFUNC_DST_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(dst.w())) break; \
389 case BLENDFUNC_ONE_MINUS_DST_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - dst.w())) break; \
390 case BLENDFUNC_CONSTANT_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.swizzle(0,1,2)) break; \
391 case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - blendColor.swizzle(0,1,2)) break; \
392 case BLENDFUNC_CONSTANT_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(blendColor.w())) break; \
393 case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - blendColor.w())) break; \
394 case BLENDFUNC_SRC_ALPHA_SATURATE: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(de::min(src.w(), 1.0f - dst.w()))) break; \
395 case BLENDFUNC_SRC1_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.swizzle(0,1,2)) break; \
396 case BLENDFUNC_ONE_MINUS_SRC1_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src1.swizzle(0,1,2)) break; \
397 case BLENDFUNC_SRC1_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src1.w())) break; \
398 case BLENDFUNC_ONE_MINUS_SRC1_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src1.w())) break; \
399 default: \
400 DE_ASSERT(false); \
401 }
402
403 SWITCH_SRC_OR_DST_FACTOR_RGB(srcFunc, blendSrcFactorRGB)
404 SWITCH_SRC_OR_DST_FACTOR_RGB(dstFunc, blendDstFactorRGB)
405
406 #undef SWITCH_SRC_OR_DST_FACTOR_RGB
407 #undef SAMPLE_REGISTER_BLEND_FACTOR
408 }
409
executeBlendFactorComputeA(const Vec4 & blendColor,const BlendState & blendAState)410 void FragmentProcessor::executeBlendFactorComputeA (const Vec4& blendColor, const BlendState& blendAState)
411 {
412 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION) \
413 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
414 { \
415 if (m_sampleRegister[regSampleNdx].isAlive) \
416 { \
417 const Vec4& src = m_sampleRegister[regSampleNdx].clampedBlendSrcColor; \
418 const Vec4& src1 = m_sampleRegister[regSampleNdx].clampedBlendSrc1Color; \
419 const Vec4& dst = m_sampleRegister[regSampleNdx].clampedBlendDstColor; \
420 DE_UNREF(src); \
421 DE_UNREF(src1); \
422 DE_UNREF(dst); \
423 \
424 m_sampleRegister[regSampleNdx].FACTOR_NAME = (FACTOR_EXPRESSION); \
425 } \
426 }
427
428 #define SWITCH_SRC_OR_DST_FACTOR_A(FUNC_NAME, FACTOR_NAME) \
429 switch (blendAState.FUNC_NAME) \
430 { \
431 case BLENDFUNC_ZERO: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 0.0f) break; \
432 case BLENDFUNC_ONE: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f) break; \
433 case BLENDFUNC_SRC_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w()) break; \
434 case BLENDFUNC_ONE_MINUS_SRC_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w()) break; \
435 case BLENDFUNC_DST_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w()) break; \
436 case BLENDFUNC_ONE_MINUS_DST_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w()) break; \
437 case BLENDFUNC_SRC_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w()) break; \
438 case BLENDFUNC_ONE_MINUS_SRC_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w()) break; \
439 case BLENDFUNC_DST_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w()) break; \
440 case BLENDFUNC_ONE_MINUS_DST_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w()) break; \
441 case BLENDFUNC_CONSTANT_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w()) break; \
442 case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w()) break; \
443 case BLENDFUNC_CONSTANT_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w()) break; \
444 case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w()) break; \
445 case BLENDFUNC_SRC_ALPHA_SATURATE: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f) break; \
446 case BLENDFUNC_SRC1_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w()) break; \
447 case BLENDFUNC_ONE_MINUS_SRC1_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w()) break; \
448 case BLENDFUNC_SRC1_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w()) break; \
449 case BLENDFUNC_ONE_MINUS_SRC1_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w()) break; \
450 default: \
451 DE_ASSERT(false); \
452 }
453
454 SWITCH_SRC_OR_DST_FACTOR_A(srcFunc, blendSrcFactorA)
455 SWITCH_SRC_OR_DST_FACTOR_A(dstFunc, blendDstFactorA)
456
457 #undef SWITCH_SRC_OR_DST_FACTOR_A
458 #undef SAMPLE_REGISTER_BLEND_FACTOR
459 }
460
executeBlend(const BlendState & blendRGBState,const BlendState & blendAState)461 void FragmentProcessor::executeBlend (const BlendState& blendRGBState, const BlendState& blendAState)
462 {
463 #define SAMPLE_REGISTER_BLENDED_COLOR(COLOR_NAME, COLOR_EXPRESSION) \
464 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
465 { \
466 if (m_sampleRegister[regSampleNdx].isAlive) \
467 { \
468 SampleData& sample = m_sampleRegister[regSampleNdx]; \
469 const Vec4& srcColor = sample.clampedBlendSrcColor; \
470 const Vec4& dstColor = sample.clampedBlendDstColor; \
471 \
472 sample.COLOR_NAME = (COLOR_EXPRESSION); \
473 } \
474 }
475
476 switch (blendRGBState.equation)
477 {
478 case BLENDEQUATION_ADD: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB + dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB) break;
479 case BLENDEQUATION_SUBTRACT: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB - dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB) break;
480 case BLENDEQUATION_REVERSE_SUBTRACT: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB - srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB) break;
481 case BLENDEQUATION_MIN: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, min(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2))) break;
482 case BLENDEQUATION_MAX: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, max(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2))) break;
483 default:
484 DE_ASSERT(false);
485 }
486
487 switch (blendAState.equation)
488 {
489 case BLENDEQUATION_ADD: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA + dstColor.w()*sample.blendDstFactorA) break;
490 case BLENDEQUATION_SUBTRACT: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA - dstColor.w()*sample.blendDstFactorA) break;
491 case BLENDEQUATION_REVERSE_SUBTRACT: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, dstColor.w()*sample.blendDstFactorA - srcColor.w()*sample.blendSrcFactorA) break;
492 case BLENDEQUATION_MIN: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, min(srcColor.w(), dstColor.w())) break;
493 case BLENDEQUATION_MAX: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, max(srcColor.w(), dstColor.w())) break;
494 default:
495 DE_ASSERT(false);
496 }
497 #undef SAMPLE_REGISTER_BLENDED_COLOR
498 }
499
500 namespace advblend
501 {
502
multiply(float src,float dst)503 inline float multiply (float src, float dst) { return src*dst; }
screen(float src,float dst)504 inline float screen (float src, float dst) { return src + dst - src*dst; }
darken(float src,float dst)505 inline float darken (float src, float dst) { return de::min(src, dst); }
lighten(float src,float dst)506 inline float lighten (float src, float dst) { return de::max(src, dst); }
difference(float src,float dst)507 inline float difference (float src, float dst) { return de::abs(dst-src); }
exclusion(float src,float dst)508 inline float exclusion (float src, float dst) { return src + dst - 2.0f*src*dst; }
509
overlay(float src,float dst)510 inline float overlay (float src, float dst)
511 {
512 if (dst <= 0.5f)
513 return 2.0f*src*dst;
514 else
515 return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
516 }
517
colordodge(float src,float dst)518 inline float colordodge (float src, float dst)
519 {
520 if (dst <= 0.0f)
521 return 0.0f;
522 else if (src < 1.0f)
523 return de::min(1.0f, dst/(1.0f-src));
524 else
525 return 1.0f;
526 }
527
colorburn(float src,float dst)528 inline float colorburn (float src, float dst)
529 {
530 if (dst >= 1.0f)
531 return 1.0f;
532 else if (src > 0.0f)
533 return 1.0f - de::min(1.0f, (1.0f-dst)/src);
534 else
535 return 0.0f;
536 }
537
hardlight(float src,float dst)538 inline float hardlight (float src, float dst)
539 {
540 if (src <= 0.5f)
541 return 2.0f*src*dst;
542 else
543 return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
544 }
545
softlight(float src,float dst)546 inline float softlight (float src, float dst)
547 {
548 if (src <= 0.5f)
549 return dst - (1.0f - 2.0f*src)*dst*(1.0f-dst);
550 else if (dst <= 0.25f)
551 return dst + (2.0f*src - 1.0f)*dst*((16.0f*dst - 12.0f)*dst + 3.0f);
552 else
553 return dst + (2.0f*src - 1.0f)*(deFloatSqrt(dst)-dst);
554 }
555
minComp(const Vec3 & v)556 inline float minComp (const Vec3& v)
557 {
558 return de::min(de::min(v.x(), v.y()), v.z());
559 }
560
maxComp(const Vec3 & v)561 inline float maxComp (const Vec3& v)
562 {
563 return de::max(de::max(v.x(), v.y()), v.z());
564 }
565
luminosity(const Vec3 & rgb)566 inline float luminosity (const Vec3& rgb)
567 {
568 return dot(rgb, Vec3(0.3f, 0.59f, 0.11f));
569 }
570
saturation(const Vec3 & rgb)571 inline float saturation (const Vec3& rgb)
572 {
573 return maxComp(rgb) - minComp(rgb);
574 }
575
setLum(const Vec3 & cbase,const Vec3 & clum)576 Vec3 setLum (const Vec3& cbase, const Vec3& clum)
577 {
578 const float lbase = luminosity(cbase);
579 const float llum = luminosity(clum);
580 const float ldiff = llum - lbase;
581 const Vec3 color = cbase + Vec3(ldiff);
582 const float minC = minComp(color);
583 const float maxC = maxComp(color);
584
585 if (minC < 0.0f)
586 return llum + ((color-llum)*llum / (llum != minC ? (llum-minC) : 1.0f));
587 else if (maxC > 1.0f)
588 return llum + ((color-llum)*(1.0f-llum) / (llum != maxC ? (maxC-llum) : 1.0f));
589 else
590 return color;
591 }
592
setLumSat(const Vec3 & cbase,const Vec3 & csat,const Vec3 & clum)593 Vec3 setLumSat (const Vec3& cbase, const Vec3& csat, const Vec3& clum)
594 {
595 const float minbase = minComp(cbase);
596 const float sbase = saturation(cbase);
597 const float ssat = saturation(csat);
598 Vec3 color = Vec3(0.0f);
599
600 if (sbase > 0.0f)
601 color = (cbase - minbase) * ssat / sbase;
602
603 return setLum(color, clum);
604 }
605
606 } // advblend
607
executeAdvancedBlend(BlendEquationAdvanced equation)608 void FragmentProcessor::executeAdvancedBlend (BlendEquationAdvanced equation)
609 {
610 using namespace advblend;
611
612 #define SAMPLE_REGISTER_ADV_BLEND(FUNCTION_NAME) \
613 do { \
614 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
615 { \
616 if (m_sampleRegister[regSampleNdx].isAlive) \
617 { \
618 SampleData& sample = m_sampleRegister[regSampleNdx]; \
619 const Vec4& srcColor = sample.clampedBlendSrcColor; \
620 const Vec4& dstColor = sample.clampedBlendDstColor; \
621 const Vec3& bias = sample.blendSrcFactorRGB; \
622 const float p0 = sample.blendSrcFactorA; \
623 const float r = FUNCTION_NAME(srcColor[0], dstColor[0])*p0 + bias[0]; \
624 const float g = FUNCTION_NAME(srcColor[1], dstColor[1])*p0 + bias[1]; \
625 const float b = FUNCTION_NAME(srcColor[2], dstColor[2])*p0 + bias[2]; \
626 \
627 sample.blendedRGB = Vec3(r, g, b); \
628 } \
629 } \
630 } while (0)
631
632 #define SAMPLE_REGISTER_ADV_BLEND_HSL(COLOR_EXPRESSION) \
633 do { \
634 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \
635 { \
636 if (m_sampleRegister[regSampleNdx].isAlive) \
637 { \
638 SampleData& sample = m_sampleRegister[regSampleNdx]; \
639 const Vec3 srcColor = sample.clampedBlendSrcColor.swizzle(0,1,2); \
640 const Vec3 dstColor = sample.clampedBlendDstColor.swizzle(0,1,2); \
641 const Vec3& bias = sample.blendSrcFactorRGB; \
642 const float p0 = sample.blendSrcFactorA; \
643 \
644 sample.blendedRGB = (COLOR_EXPRESSION)*p0 + bias; \
645 } \
646 } \
647 } while (0)
648
649 // Pre-compute factors & compute alpha \todo [2014-03-18 pyry] Re-using variable names.
650 // \note clampedBlend*Color contains clamped & unpremultiplied colors
651 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
652 {
653 if (m_sampleRegister[regSampleNdx].isAlive)
654 {
655 SampleData& sample = m_sampleRegister[regSampleNdx];
656 const Vec4& srcColor = sample.clampedBlendSrcColor;
657 const Vec4& dstColor = sample.clampedBlendDstColor;
658 const float srcA = srcColor.w();
659 const float dstA = dstColor.w();
660 const float p0 = srcA*dstA;
661 const float p1 = srcA*(1.0f-dstA);
662 const float p2 = dstA*(1.0f-srcA);
663 const Vec3 bias (srcColor[0]*p1 + dstColor[0]*p2,
664 srcColor[1]*p1 + dstColor[1]*p2,
665 srcColor[2]*p1 + dstColor[2]*p2);
666
667 sample.blendSrcFactorRGB = bias;
668 sample.blendSrcFactorA = p0;
669 sample.blendedA = p0 + p1 + p2;
670 }
671 }
672
673 switch (equation)
674 {
675 case BLENDEQUATION_ADVANCED_MULTIPLY: SAMPLE_REGISTER_ADV_BLEND(multiply); break;
676 case BLENDEQUATION_ADVANCED_SCREEN: SAMPLE_REGISTER_ADV_BLEND(screen); break;
677 case BLENDEQUATION_ADVANCED_OVERLAY: SAMPLE_REGISTER_ADV_BLEND(overlay); break;
678 case BLENDEQUATION_ADVANCED_DARKEN: SAMPLE_REGISTER_ADV_BLEND(darken); break;
679 case BLENDEQUATION_ADVANCED_LIGHTEN: SAMPLE_REGISTER_ADV_BLEND(lighten); break;
680 case BLENDEQUATION_ADVANCED_COLORDODGE: SAMPLE_REGISTER_ADV_BLEND(colordodge); break;
681 case BLENDEQUATION_ADVANCED_COLORBURN: SAMPLE_REGISTER_ADV_BLEND(colorburn); break;
682 case BLENDEQUATION_ADVANCED_HARDLIGHT: SAMPLE_REGISTER_ADV_BLEND(hardlight); break;
683 case BLENDEQUATION_ADVANCED_SOFTLIGHT: SAMPLE_REGISTER_ADV_BLEND(softlight); break;
684 case BLENDEQUATION_ADVANCED_DIFFERENCE: SAMPLE_REGISTER_ADV_BLEND(difference); break;
685 case BLENDEQUATION_ADVANCED_EXCLUSION: SAMPLE_REGISTER_ADV_BLEND(exclusion); break;
686 case BLENDEQUATION_ADVANCED_HSL_HUE: SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(srcColor, dstColor, dstColor)); break;
687 case BLENDEQUATION_ADVANCED_HSL_SATURATION: SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(dstColor, srcColor, dstColor)); break;
688 case BLENDEQUATION_ADVANCED_HSL_COLOR: SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(srcColor, dstColor)); break;
689 case BLENDEQUATION_ADVANCED_HSL_LUMINOSITY: SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(dstColor, srcColor)); break;
690 default:
691 DE_ASSERT(false);
692 }
693
694 #undef SAMPLE_REGISTER_ADV_BLEND
695 #undef SAMPLE_REGISTER_ADV_BLEND_HSL
696 }
697
executeColorWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,bool isSRGB,const tcu::PixelBufferAccess & colorBuffer)698 void FragmentProcessor::executeColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
699 {
700 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
701 {
702 if (m_sampleRegister[regSampleNdx].isAlive)
703 {
704 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
705 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
706 Vec4 combinedColor;
707
708 combinedColor.xyz() = m_sampleRegister[regSampleNdx].blendedRGB;
709 combinedColor.w() = m_sampleRegister[regSampleNdx].blendedA;
710
711 if (isSRGB)
712 combinedColor = tcu::linearToSRGB(combinedColor);
713
714 colorBuffer.setPixel(combinedColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
715 }
716 }
717 }
718
executeRGBA8ColorWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const tcu::PixelBufferAccess & colorBuffer)719 void FragmentProcessor::executeRGBA8ColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& colorBuffer)
720 {
721 const int fragStride = 4;
722 const int xStride = colorBuffer.getRowPitch();
723 const int yStride = colorBuffer.getSlicePitch();
724 deUint8* const basePtr = (deUint8*)colorBuffer.getDataPtr();
725
726 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
727 {
728 if (m_sampleRegister[regSampleNdx].isAlive)
729 {
730 const int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
731 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
732 deUint8* dstPtr = basePtr + fragSampleNdx*fragStride + frag.pixelCoord.x()*xStride + frag.pixelCoord.y()*yStride;
733
734 dstPtr[0] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.x());
735 dstPtr[1] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.y());
736 dstPtr[2] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.z());
737 dstPtr[3] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedA);
738 }
739 }
740 }
741
executeMaskedColorWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const Vec4 & colorMaskFactor,const Vec4 & colorMaskNegationFactor,bool isSRGB,const tcu::PixelBufferAccess & colorBuffer)742 void FragmentProcessor::executeMaskedColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const Vec4& colorMaskFactor, const Vec4& colorMaskNegationFactor, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
743 {
744 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
745 {
746 if (m_sampleRegister[regSampleNdx].isAlive)
747 {
748 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
749 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
750 Vec4 originalColor = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
751 Vec4 newColor;
752
753 newColor.xyz() = m_sampleRegister[regSampleNdx].blendedRGB;
754 newColor.w() = m_sampleRegister[regSampleNdx].blendedA;
755
756 if (isSRGB)
757 newColor = tcu::linearToSRGB(newColor);
758
759 newColor = colorMaskFactor*newColor + colorMaskNegationFactor*originalColor;
760
761 colorBuffer.setPixel(newColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
762 }
763 }
764 }
765
executeSignedValueWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const tcu::BVec4 & colorMask,const tcu::PixelBufferAccess & colorBuffer)766 void FragmentProcessor::executeSignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
767 {
768 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
769 {
770 if (m_sampleRegister[regSampleNdx].isAlive)
771 {
772 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
773 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
774 const IVec4 originalValue = colorBuffer.getPixelInt(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
775
776 colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].signedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
777 }
778 }
779 }
780
executeUnsignedValueWrite(int fragNdxOffset,int numSamplesPerFragment,const Fragment * inputFragments,const tcu::BVec4 & colorMask,const tcu::PixelBufferAccess & colorBuffer)781 void FragmentProcessor::executeUnsignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
782 {
783 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
784 {
785 if (m_sampleRegister[regSampleNdx].isAlive)
786 {
787 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
788 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
789 const UVec4 originalValue = colorBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
790
791 colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].unsignedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
792 }
793 }
794 }
795
render(const rr::MultisamplePixelBufferAccess & msColorBuffer,const rr::MultisamplePixelBufferAccess & msDepthBuffer,const rr::MultisamplePixelBufferAccess & msStencilBuffer,const Fragment * inputFragments,int numFragments,FaceType fragmentFacing,const FragmentOperationState & state)796 void FragmentProcessor::render (const rr::MultisamplePixelBufferAccess& msColorBuffer,
797 const rr::MultisamplePixelBufferAccess& msDepthBuffer,
798 const rr::MultisamplePixelBufferAccess& msStencilBuffer,
799 const Fragment* inputFragments,
800 int numFragments,
801 FaceType fragmentFacing,
802 const FragmentOperationState& state)
803 {
804 DE_ASSERT(fragmentFacing < FACETYPE_LAST);
805 DE_ASSERT(state.numStencilBits < 32); // code bitshifts numStencilBits, avoid undefined behavior
806
807 const tcu::PixelBufferAccess& colorBuffer = msColorBuffer.raw();
808 const tcu::PixelBufferAccess& depthBuffer = msDepthBuffer.raw();
809 const tcu::PixelBufferAccess& stencilBuffer = msStencilBuffer.raw();
810
811 bool hasDepth = depthBuffer.getWidth() > 0 && depthBuffer.getHeight() > 0 && depthBuffer.getDepth() > 0;
812 bool hasStencil = stencilBuffer.getWidth() > 0 && stencilBuffer.getHeight() > 0 && stencilBuffer.getDepth() > 0;
813 bool doDepthBoundsTest = hasDepth && state.depthBoundsTestEnabled;
814 bool doDepthTest = hasDepth && state.depthTestEnabled;
815 bool doStencilTest = hasStencil && state.stencilTestEnabled;
816
817 tcu::TextureChannelClass colorbufferClass = tcu::getTextureChannelClass(msColorBuffer.raw().getFormat().type);
818 rr::GenericVecType fragmentDataType = (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER) ? (rr::GENERICVECTYPE_INT32) : ((colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER) ? (rr::GENERICVECTYPE_UINT32) : (rr::GENERICVECTYPE_FLOAT));
819
820 DE_ASSERT((!hasDepth || colorBuffer.getWidth() == depthBuffer.getWidth()) && (!hasStencil || colorBuffer.getWidth() == stencilBuffer.getWidth()));
821 DE_ASSERT((!hasDepth || colorBuffer.getHeight() == depthBuffer.getHeight()) && (!hasStencil || colorBuffer.getHeight() == stencilBuffer.getHeight()));
822 DE_ASSERT((!hasDepth || colorBuffer.getDepth() == depthBuffer.getDepth()) && (!hasStencil || colorBuffer.getDepth() == stencilBuffer.getDepth()));
823
824 // Combined formats must be separated beforehand
825 DE_ASSERT(!hasDepth || (!tcu::isCombinedDepthStencilType(depthBuffer.getFormat().type) && depthBuffer.getFormat().order == tcu::TextureFormat::D));
826 DE_ASSERT(!hasStencil || (!tcu::isCombinedDepthStencilType(stencilBuffer.getFormat().type) && stencilBuffer.getFormat().order == tcu::TextureFormat::S));
827
828 int numSamplesPerFragment = colorBuffer.getWidth();
829 int totalNumSamples = numFragments*numSamplesPerFragment;
830 int numSampleGroups = (totalNumSamples - 1) / SAMPLE_REGISTER_SIZE + 1; // \note totalNumSamples/SAMPLE_REGISTER_SIZE rounded up.
831 const StencilState& stencilState = state.stencilStates[fragmentFacing];
832 Vec4 colorMaskFactor (state.colorMask[0] ? 1.0f : 0.0f, state.colorMask[1] ? 1.0f : 0.0f, state.colorMask[2] ? 1.0f : 0.0f, state.colorMask[3] ? 1.0f : 0.0f);
833 Vec4 colorMaskNegationFactor (state.colorMask[0] ? 0.0f : 1.0f, state.colorMask[1] ? 0.0f : 1.0f, state.colorMask[2] ? 0.0f : 1.0f, state.colorMask[3] ? 0.0f : 1.0f);
834 bool sRGBTarget = state.sRGBEnabled && tcu::isSRGB(colorBuffer.getFormat());
835
836 DE_ASSERT(SAMPLE_REGISTER_SIZE % numSamplesPerFragment == 0);
837
838 // Divide the fragments' samples into groups of size SAMPLE_REGISTER_SIZE, and perform
839 // the per-sample operations for one group at a time.
840
841 for (int sampleGroupNdx = 0; sampleGroupNdx < numSampleGroups; sampleGroupNdx++)
842 {
843 // The index of the fragment of the sample at the beginning of m_sampleRegisters.
844 int groupFirstFragNdx = (sampleGroupNdx*SAMPLE_REGISTER_SIZE) / numSamplesPerFragment;
845
846 // Initialize sample data in the sample register.
847
848 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
849 {
850 int fragNdx = groupFirstFragNdx + regSampleNdx/numSamplesPerFragment;
851 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
852
853 if (fragNdx < numFragments)
854 {
855 m_sampleRegister[regSampleNdx].isAlive = (inputFragments[fragNdx].coverage & (1u << fragSampleNdx)) != 0;
856 m_sampleRegister[regSampleNdx].depthPassed = true; // \note This will stay true if depth test is disabled.
857 }
858 else
859 m_sampleRegister[regSampleNdx].isAlive = false;
860 }
861
862 // Scissor test.
863
864 if (state.scissorTestEnabled)
865 executeScissorTest(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.scissorRectangle);
866
867 // Depth bounds test.
868
869 if (doDepthBoundsTest)
870 executeDepthBoundsTest(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.minDepthBound, state.maxDepthBound, depthBuffer);
871
872 // Stencil test.
873
874 if (doStencilTest)
875 {
876 executeStencilCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
877 executeStencilSFail(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
878 }
879
880 // Depth test.
881 // \note Current value of isAlive is needed for dpPass and dpFail, so it's only updated after them and not right after depth test.
882
883 if (doDepthTest)
884 {
885 executeDepthCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.depthFunc, depthBuffer);
886
887 if (state.depthMask)
888 executeDepthWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, depthBuffer);
889 }
890
891 // Do dpFail and dpPass stencil writes.
892
893 if (doStencilTest)
894 executeStencilDpFailAndPass(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
895
896 // Kill the samples that failed depth test.
897
898 if (doDepthTest)
899 {
900 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
901 m_sampleRegister[regSampleNdx].isAlive = m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed;
902 }
903
904 // Paint fragments to target
905
906 switch (fragmentDataType)
907 {
908 case rr::GENERICVECTYPE_FLOAT:
909 {
910 // Select min/max clamping values for blending factors and operands
911 Vec4 minClampValue;
912 Vec4 maxClampValue;
913
914 if (colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT)
915 {
916 minClampValue = Vec4(0.0f);
917 maxClampValue = Vec4(1.0f);
918 }
919 else if (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT)
920 {
921 minClampValue = Vec4(-1.0f);
922 maxClampValue = Vec4(1.0f);
923 }
924 else
925 {
926 // No clamping
927 minClampValue = Vec4(-std::numeric_limits<float>::infinity());
928 maxClampValue = Vec4(std::numeric_limits<float>::infinity());
929 }
930
931 // Blend calculation - only if using blend.
932 if (state.blendMode == BLENDMODE_STANDARD)
933 {
934 // Put dst color to register, doing srgb-to-linear conversion if needed.
935 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
936 {
937 if (m_sampleRegister[regSampleNdx].isAlive)
938 {
939 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
940 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
941 Vec4 dstColor = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
942
943 m_sampleRegister[regSampleNdx].clampedBlendSrcColor = clamp(frag.value.get<float>(), minClampValue, maxClampValue);
944 m_sampleRegister[regSampleNdx].clampedBlendSrc1Color = clamp(frag.value1.get<float>(), minClampValue, maxClampValue);
945 m_sampleRegister[regSampleNdx].clampedBlendDstColor = clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, minClampValue, maxClampValue);
946 }
947 }
948
949 // Calculate blend factors to register.
950 executeBlendFactorComputeRGB(state.blendColor, state.blendRGBState);
951 executeBlendFactorComputeA(state.blendColor, state.blendAState);
952
953 // Compute blended color.
954 executeBlend(state.blendRGBState, state.blendAState);
955 }
956 else if (state.blendMode == BLENDMODE_ADVANCED)
957 {
958 // Unpremultiply colors for blending, and do sRGB->linear if necessary
959 // \todo [2014-03-17 pyry] Re-consider clampedBlend*Color var names
960 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
961 {
962 if (m_sampleRegister[regSampleNdx].isAlive)
963 {
964 int fragSampleNdx = regSampleNdx % numSamplesPerFragment;
965 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
966 const Vec4 srcColor = frag.value.get<float>();
967 const Vec4 dstColor = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
968
969 m_sampleRegister[regSampleNdx].clampedBlendSrcColor = unpremultiply(clamp(srcColor, minClampValue, maxClampValue));
970 m_sampleRegister[regSampleNdx].clampedBlendDstColor = unpremultiply(clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, minClampValue, maxClampValue));
971 }
972 }
973
974 executeAdvancedBlend(state.blendEquationAdvaced);
975 }
976 else
977 {
978 // Not using blend - just put values to register as-is.
979 DE_ASSERT(state.blendMode == BLENDMODE_NONE);
980
981 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
982 {
983 if (m_sampleRegister[regSampleNdx].isAlive)
984 {
985 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
986
987 m_sampleRegister[regSampleNdx].blendedRGB = frag.value.get<float>().xyz();
988 m_sampleRegister[regSampleNdx].blendedA = frag.value.get<float>().w();
989 }
990 }
991 }
992
993 // Clamp result values in sample register
994 if (colorbufferClass != tcu::TEXTURECHANNELCLASS_FLOATING_POINT)
995 {
996 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
997 {
998 if (m_sampleRegister[regSampleNdx].isAlive)
999 {
1000 m_sampleRegister[regSampleNdx].blendedRGB = clamp(m_sampleRegister[regSampleNdx].blendedRGB, minClampValue.swizzle(0, 1, 2), maxClampValue.swizzle(0, 1, 2));
1001 m_sampleRegister[regSampleNdx].blendedA = clamp(m_sampleRegister[regSampleNdx].blendedA, minClampValue.w(), maxClampValue.w());
1002 }
1003 }
1004 }
1005
1006 // Finally, write the colors to the color buffer.
1007
1008 if (state.colorMask[0] && state.colorMask[1] && state.colorMask[2] && state.colorMask[3])
1009 {
1010 if (colorBuffer.getFormat() == tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8))
1011 executeRGBA8ColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorBuffer);
1012 else
1013 executeColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, sRGBTarget, colorBuffer);
1014 }
1015 else if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
1016 executeMaskedColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorMaskFactor, colorMaskNegationFactor, sRGBTarget, colorBuffer);
1017 break;
1018 }
1019 case rr::GENERICVECTYPE_INT32:
1020 // Write fragments
1021 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
1022 {
1023 if (m_sampleRegister[regSampleNdx].isAlive)
1024 {
1025 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
1026
1027 m_sampleRegister[regSampleNdx].signedValue = frag.value.get<deInt32>();
1028 }
1029 }
1030
1031 if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
1032 executeSignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
1033 break;
1034
1035 case rr::GENERICVECTYPE_UINT32:
1036 // Write fragments
1037 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
1038 {
1039 if (m_sampleRegister[regSampleNdx].isAlive)
1040 {
1041 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
1042
1043 m_sampleRegister[regSampleNdx].unsignedValue = frag.value.get<deUint32>();
1044 }
1045 }
1046
1047 if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
1048 executeUnsignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
1049 break;
1050
1051 default:
1052 DE_ASSERT(DE_FALSE);
1053 }
1054 }
1055 }
1056
1057 } // rr
1058