• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2016 The Khronos Group Inc.
6  * Copyright (c) 2016 Samsung Electronics Co., Ltd.
7  * Copyright (c) 2016 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Shader derivate function tests.
24  *
25  * \todo [2013-06-25 pyry] Missing features:
26  *  - lines and points
27  *  - projected coordinates
28  *  - continous non-trivial functions (sin, exp)
29  *  - non-continous functions (step)
30  *//*--------------------------------------------------------------------*/
31 
32 #include "vktShaderRenderDerivateTests.hpp"
33 #include "vktShaderRender.hpp"
34 #include "vkImageUtil.hpp"
35 #include "vkQueryUtil.hpp"
36 
37 #include "gluTextureUtil.hpp"
38 
39 #include "tcuStringTemplate.hpp"
40 #include "tcuSurface.hpp"
41 #include "tcuTestLog.hpp"
42 #include "tcuVectorUtil.hpp"
43 #include "tcuTextureUtil.hpp"
44 #include "tcuRGBA.hpp"
45 #include "tcuFloat.hpp"
46 #include "tcuInterval.hpp"
47 
48 #include "deUniquePtr.hpp"
49 #include "glwEnums.hpp"
50 
51 #include <sstream>
52 #include <string>
53 
54 namespace vkt
55 {
56 namespace sr
57 {
58 namespace
59 {
60 
61 using namespace vk;
62 
63 using std::vector;
64 using std::string;
65 using std::map;
66 using tcu::TestLog;
67 using std::ostringstream;
68 
69 enum
70 {
71 	VIEWPORT_WIDTH			= 99,
72 	VIEWPORT_HEIGHT			= 133,
73 	MAX_FAILED_MESSAGES		= 10
74 };
75 
76 enum DerivateFunc
77 {
78 	DERIVATE_DFDX			= 0,
79 	DERIVATE_DFDXFINE,
80 	DERIVATE_DFDXCOARSE,
81 	DERIVATE_DFDXSUBGROUP,
82 
83 	DERIVATE_DFDY,
84 	DERIVATE_DFDYFINE,
85 	DERIVATE_DFDYCOARSE,
86 	DERIVATE_DFDYSUBGROUP,
87 
88 	DERIVATE_FWIDTH,
89 	DERIVATE_FWIDTHFINE,
90 	DERIVATE_FWIDTHCOARSE,
91 
92 	DERIVATE_LAST
93 };
94 
95 enum SurfaceType
96 {
97 	SURFACETYPE_UNORM_FBO	= 0,
98 	SURFACETYPE_FLOAT_FBO,	// \note Uses RGBA32UI fbo actually, since FP rendertargets are not in core spec.
99 
100 	SURFACETYPE_LAST
101 };
102 
103 // Utilities
104 
getDerivateFuncName(DerivateFunc func)105 static const char* getDerivateFuncName (DerivateFunc func)
106 {
107 	switch (func)
108 	{
109 		case DERIVATE_DFDX:				return "dFdx";
110 		case DERIVATE_DFDXFINE:			return "dFdxFine";
111 		case DERIVATE_DFDXCOARSE:		return "dFdxCoarse";
112 		case DERIVATE_DFDXSUBGROUP:		return "dFdxSubgroup";
113 		case DERIVATE_DFDY:				return "dFdy";
114 		case DERIVATE_DFDYFINE:			return "dFdyFine";
115 		case DERIVATE_DFDYCOARSE:		return "dFdyCoarse";
116 		case DERIVATE_DFDYSUBGROUP:		return "dFdySubgroup";
117 		case DERIVATE_FWIDTH:			return "fwidth";
118 		case DERIVATE_FWIDTHFINE:		return "fwidthFine";
119 		case DERIVATE_FWIDTHCOARSE:		return "fwidthCoarse";
120 		default:
121 			DE_ASSERT(false);
122 			return DE_NULL;
123 	}
124 }
125 
getDerivateFuncCaseName(DerivateFunc func)126 static const char* getDerivateFuncCaseName (DerivateFunc func)
127 {
128 	switch (func)
129 	{
130 		case DERIVATE_DFDX:				return "dfdx";
131 		case DERIVATE_DFDXFINE:			return "dfdxfine";
132 		case DERIVATE_DFDXCOARSE:		return "dfdxcoarse";
133 		case DERIVATE_DFDXSUBGROUP:		return "dfdxsubgroup";
134 		case DERIVATE_DFDY:				return "dfdy";
135 		case DERIVATE_DFDYFINE:			return "dfdyfine";
136 		case DERIVATE_DFDYCOARSE:		return "dfdycoarse";
137 		case DERIVATE_DFDYSUBGROUP:		return "dfdysubgroup";
138 		case DERIVATE_FWIDTH:			return "fwidth";
139 		case DERIVATE_FWIDTHFINE:		return "fwidthfine";
140 		case DERIVATE_FWIDTHCOARSE:		return "fwidthcoarse";
141 		default:
142 			DE_ASSERT(false);
143 			return DE_NULL;
144 	}
145 }
146 
isDfdxFunc(DerivateFunc func)147 static inline bool isDfdxFunc (DerivateFunc func)
148 {
149 	return func == DERIVATE_DFDX || func == DERIVATE_DFDXFINE || func == DERIVATE_DFDXCOARSE || func == DERIVATE_DFDXSUBGROUP;
150 }
151 
isDfdyFunc(DerivateFunc func)152 static inline bool isDfdyFunc (DerivateFunc func)
153 {
154 	return func == DERIVATE_DFDY || func == DERIVATE_DFDYFINE || func == DERIVATE_DFDYCOARSE || func == DERIVATE_DFDYSUBGROUP;
155 }
156 
isFwidthFunc(DerivateFunc func)157 static inline bool isFwidthFunc (DerivateFunc func)
158 {
159 	return func == DERIVATE_FWIDTH || func == DERIVATE_FWIDTHFINE || func == DERIVATE_FWIDTHCOARSE;
160 }
161 
isSubgroupFunc(DerivateFunc func)162 static inline bool isSubgroupFunc (DerivateFunc func)
163 {
164 	return func == DERIVATE_DFDXSUBGROUP || func == DERIVATE_DFDYSUBGROUP;
165 }
166 
getDerivateMask(glu::DataType type)167 static inline tcu::BVec4 getDerivateMask (glu::DataType type)
168 {
169 	switch (type)
170 	{
171 		case glu::TYPE_FLOAT:		return tcu::BVec4(true, false, false, false);
172 		case glu::TYPE_FLOAT_VEC2:	return tcu::BVec4(true, true, false, false);
173 		case glu::TYPE_FLOAT_VEC3:	return tcu::BVec4(true, true, true, false);
174 		case glu::TYPE_FLOAT_VEC4:	return tcu::BVec4(true, true, true, true);
175 		default:
176 			DE_ASSERT(false);
177 			return tcu::BVec4(true);
178 	}
179 }
180 
isSkippedPixel(const tcu::ConstPixelBufferAccess & surface,int x,int y)181 static inline bool isSkippedPixel (const tcu::ConstPixelBufferAccess& surface, int x, int y)
182 {
183 	const tcu::Vec4 skipValue(0.7843f, 0.2039f, 0.4706f, 0.0f);
184 	const tcu::Vec4 value = surface.getPixel(x, y);
185 	return tcu::allEqual(tcu::lessThanEqual(tcu::abs(value - skipValue), tcu::Vec4(0.01f)), tcu::BVec4(true));
186 }
187 
readDerivate(const tcu::ConstPixelBufferAccess & surface,const tcu::Vec4 & derivScale,const tcu::Vec4 & derivBias,int x,int y)188 static inline tcu::Vec4 readDerivate (const tcu::ConstPixelBufferAccess& surface, const tcu::Vec4& derivScale, const tcu::Vec4& derivBias, int x, int y)
189 {
190 	return (surface.getPixel(x, y) - derivBias) / derivScale;
191 }
192 
getCompExpBits(const tcu::Vec4 & v)193 static inline tcu::UVec4 getCompExpBits (const tcu::Vec4& v)
194 {
195 	return tcu::UVec4(tcu::Float32(v[0]).exponentBits(),
196 					  tcu::Float32(v[1]).exponentBits(),
197 					  tcu::Float32(v[2]).exponentBits(),
198 					  tcu::Float32(v[3]).exponentBits());
199 }
200 
computeFloatingPointError(const float value,const int numAccurateBits)201 float computeFloatingPointError (const float value, const int numAccurateBits)
202 {
203 	const int		numGarbageBits	= 23-numAccurateBits;
204 	const deUint32	mask			= (1u<<numGarbageBits)-1u;
205 	const int		exp				= tcu::Float32(value).exponent();
206 
207 	return tcu::Float32::construct(+1, exp, (1u<<23) | mask).asFloat() - tcu::Float32::construct(+1, exp, 1u<<23).asFloat();
208 }
209 
getNumMantissaBits(const glu::Precision precision)210 static int getNumMantissaBits (const glu::Precision precision)
211 {
212 	switch (precision)
213 	{
214 		case glu::PRECISION_HIGHP:		return 23;
215 		case glu::PRECISION_MEDIUMP:	return 10;
216 		case glu::PRECISION_LOWP:		return 6;
217 		default:
218 			DE_ASSERT(false);
219 			return 0;
220 	}
221 }
222 
getMinExponent(const glu::Precision precision)223 static int getMinExponent (const glu::Precision precision)
224 {
225 	switch (precision)
226 	{
227 		case glu::PRECISION_HIGHP:		return -126;
228 		case glu::PRECISION_MEDIUMP:	return -14;
229 		case glu::PRECISION_LOWP:		return -8;
230 		default:
231 			DE_ASSERT(false);
232 			return 0;
233 	}
234 }
235 
getSingleULPForExponent(int exp,int numMantissaBits)236 static float getSingleULPForExponent (int exp, int numMantissaBits)
237 {
238 	if (numMantissaBits > 0)
239 	{
240 		DE_ASSERT(numMantissaBits <= 23);
241 
242 		const int ulpBitNdx = 23-numMantissaBits;
243 		return tcu::Float32::construct(+1, exp, (1<<23) | (1 << ulpBitNdx)).asFloat() - tcu::Float32::construct(+1, exp, (1<<23)).asFloat();
244 	}
245 	else
246 	{
247 		DE_ASSERT(numMantissaBits == 0);
248 		return tcu::Float32::construct(+1, exp, (1<<23)).asFloat();
249 	}
250 }
251 
getSingleULPForValue(float value,int numMantissaBits)252 static float getSingleULPForValue (float value, int numMantissaBits)
253 {
254 	const int exp = tcu::Float32(value).exponent();
255 	return getSingleULPForExponent(exp, numMantissaBits);
256 }
257 
convertFloatFlushToZeroRtn(float value,int minExponent,int numAccurateBits)258 static float convertFloatFlushToZeroRtn (float value, int minExponent, int numAccurateBits)
259 {
260 	if (value == 0.0f)
261 	{
262 		return 0.0f;
263 	}
264 	else
265 	{
266 		const tcu::Float32	inputFloat			= tcu::Float32(value);
267 		const int			numTruncatedBits	= 23-numAccurateBits;
268 		const deUint32		truncMask			= (1u<<numTruncatedBits)-1u;
269 
270 		if (value > 0.0f)
271 		{
272 			if (value > 0.0f && tcu::Float32(value).exponent() < minExponent)
273 			{
274 				// flush to zero if possible
275 				return 0.0f;
276 			}
277 			else
278 			{
279 				// just mask away non-representable bits
280 				return tcu::Float32::construct(+1, inputFloat.exponent(), inputFloat.mantissa() & ~truncMask).asFloat();
281 			}
282 		}
283 		else
284 		{
285 			if (inputFloat.mantissa() & truncMask)
286 			{
287 				// decrement one ulp if truncated bits are non-zero (i.e. if value is not representable)
288 				return tcu::Float32::construct(-1, inputFloat.exponent(), inputFloat.mantissa() & ~truncMask).asFloat() - getSingleULPForExponent(inputFloat.exponent(), numAccurateBits);
289 			}
290 			else
291 			{
292 				// value is representable, no need to do anything
293 				return value;
294 			}
295 		}
296 	}
297 }
298 
convertFloatFlushToZeroRtp(float value,int minExponent,int numAccurateBits)299 static float convertFloatFlushToZeroRtp (float value, int minExponent, int numAccurateBits)
300 {
301 	return -convertFloatFlushToZeroRtn(-value, minExponent, numAccurateBits);
302 }
303 
addErrorUlp(float value,float numUlps,int numMantissaBits)304 static float addErrorUlp (float value, float numUlps, int numMantissaBits)
305 {
306 	return value + numUlps * getSingleULPForValue(value, numMantissaBits);
307 }
308 
309 enum
310 {
311 	INTERPOLATION_LOST_BITS = 3, // number mantissa of bits allowed to be lost in varying interpolation
312 };
313 
getDerivateThreshold(const glu::Precision precision,const tcu::Vec4 & valueMin,const tcu::Vec4 & valueMax,const tcu::Vec4 & expectedDerivate)314 static inline tcu::Vec4 getDerivateThreshold (const glu::Precision precision, const tcu::Vec4& valueMin, const tcu::Vec4& valueMax, const tcu::Vec4& expectedDerivate)
315 {
316 	const int			baseBits		= getNumMantissaBits(precision);
317 	const tcu::UVec4	derivExp		= getCompExpBits(expectedDerivate);
318 	const tcu::UVec4	maxValueExp		= max(getCompExpBits(valueMin), getCompExpBits(valueMax));
319 	const tcu::UVec4	numBitsLost		= maxValueExp - min(maxValueExp, derivExp);
320 	const tcu::IVec4	numAccurateBits	= max(baseBits - numBitsLost.asInt() - (int)INTERPOLATION_LOST_BITS, tcu::IVec4(0));
321 
322 	return tcu::Vec4(computeFloatingPointError(expectedDerivate[0], numAccurateBits[0]),
323 					 computeFloatingPointError(expectedDerivate[1], numAccurateBits[1]),
324 					 computeFloatingPointError(expectedDerivate[2], numAccurateBits[2]),
325 					 computeFloatingPointError(expectedDerivate[3], numAccurateBits[3]));
326 }
327 
328 struct LogVecComps
329 {
330 	const tcu::Vec4&	v;
331 	int					numComps;
332 
LogVecCompsvkt::sr::__anona541e7d70111::LogVecComps333 	LogVecComps (const tcu::Vec4& v_, int numComps_)
334 		: v			(v_)
335 		, numComps	(numComps_)
336 	{
337 	}
338 };
339 
operator <<(std::ostream & str,const LogVecComps & v)340 std::ostream& operator<< (std::ostream& str, const LogVecComps& v)
341 {
342 	DE_ASSERT(de::inRange(v.numComps, 1, 4));
343 	if (v.numComps == 1)		return str << v.v[0];
344 	else if (v.numComps == 2)	return str << v.v.toWidth<2>();
345 	else if (v.numComps == 3)	return str << v.v.toWidth<3>();
346 	else						return str << v.v;
347 }
348 
349 enum VerificationLogging
350 {
351 	LOG_ALL = 0,
352 	LOG_NOTHING
353 };
354 
verifyConstantDerivate(tcu::TestLog & log,const tcu::ConstPixelBufferAccess & result,const tcu::PixelBufferAccess & errorMask,glu::DataType dataType,const tcu::Vec4 & reference,const tcu::Vec4 & threshold,const tcu::Vec4 & scale,const tcu::Vec4 & bias,VerificationLogging logPolicy=LOG_ALL)355 static bool verifyConstantDerivate (tcu::TestLog&						log,
356 									const tcu::ConstPixelBufferAccess&	result,
357 									const tcu::PixelBufferAccess&		errorMask,
358 									glu::DataType						dataType,
359 									const tcu::Vec4&					reference,
360 									const tcu::Vec4&					threshold,
361 									const tcu::Vec4&					scale,
362 									const tcu::Vec4&					bias,
363 									VerificationLogging					logPolicy = LOG_ALL)
364 {
365 	const int			numComps		= glu::getDataTypeFloatScalars(dataType);
366 	const tcu::BVec4	mask			= tcu::logicalNot(getDerivateMask(dataType));
367 	int					numFailedPixels	= 0;
368 
369 	if (logPolicy == LOG_ALL)
370 		log << TestLog::Message << "Expecting " << LogVecComps(reference, numComps) << " with threshold " << LogVecComps(threshold, numComps) << TestLog::EndMessage;
371 
372 	for (int y = 0; y < result.getHeight(); y++)
373 	{
374 		for (int x = 0; x < result.getWidth(); x++)
375 		{
376 			if (isSkippedPixel(result, x, y))
377 				continue;
378 
379 			const tcu::Vec4		resDerivate		= readDerivate(result, scale, bias, x, y);
380 			const bool			isOk			= tcu::allEqual(tcu::logicalOr(tcu::lessThanEqual(tcu::abs(reference - resDerivate), threshold), mask), tcu::BVec4(true));
381 
382 			if (!isOk)
383 			{
384 				if (numFailedPixels < MAX_FAILED_MESSAGES && logPolicy == LOG_ALL)
385 					log << TestLog::Message << "FAIL: got " << LogVecComps(resDerivate, numComps)
386 											<< ", diff = " << LogVecComps(tcu::abs(reference - resDerivate), numComps)
387 											<< ", at x = " << x << ", y = " << y
388 						<< TestLog::EndMessage;
389 				numFailedPixels += 1;
390 				errorMask.setPixel(tcu::RGBA::red().toVec(), x, y);
391 			}
392 		}
393 	}
394 
395 	if (numFailedPixels >= MAX_FAILED_MESSAGES && logPolicy == LOG_ALL)
396 		log << TestLog::Message << "..." << TestLog::EndMessage;
397 
398 	if (numFailedPixels > 0 && logPolicy == LOG_ALL)
399 		log << TestLog::Message << "FAIL: found " << numFailedPixels << " failed pixels" << TestLog::EndMessage;
400 
401 	return numFailedPixels == 0;
402 }
403 
404 struct Linear2DFunctionEvaluator
405 {
406 	tcu::Matrix<float, 4, 3> matrix;
407 
408 	//      .-----.
409 	//      | s_x |
410 	//  M x | s_y |
411 	//      | 1.0 |
412 	//      '-----'
413 	tcu::Vec4 evaluateAt (float screenX, float screenY) const;
414 };
415 
evaluateAt(float screenX,float screenY) const416 tcu::Vec4 Linear2DFunctionEvaluator::evaluateAt (float screenX, float screenY) const
417 {
418 	const tcu::Vec3 position(screenX, screenY, 1.0f);
419 	return matrix * position;
420 }
421 
reverifyConstantDerivateWithFlushRelaxations(tcu::TestLog & log,const tcu::ConstPixelBufferAccess & result,const tcu::PixelBufferAccess & errorMask,glu::DataType dataType,glu::Precision precision,const tcu::Vec4 & derivScale,const tcu::Vec4 & derivBias,const tcu::Vec4 & surfaceThreshold,DerivateFunc derivateFunc,const Linear2DFunctionEvaluator & function)422 static bool reverifyConstantDerivateWithFlushRelaxations (tcu::TestLog&							log,
423 														  const tcu::ConstPixelBufferAccess&	result,
424 														  const tcu::PixelBufferAccess&			errorMask,
425 														  glu::DataType							dataType,
426 														  glu::Precision						precision,
427 														  const tcu::Vec4&						derivScale,
428 														  const tcu::Vec4&						derivBias,
429 														  const tcu::Vec4&						surfaceThreshold,
430 														  DerivateFunc							derivateFunc,
431 														  const Linear2DFunctionEvaluator&		function)
432 {
433 	DE_ASSERT(result.getWidth() == errorMask.getWidth());
434 	DE_ASSERT(result.getHeight() == errorMask.getHeight());
435 	DE_ASSERT(isDfdxFunc(derivateFunc) || isDfdyFunc(derivateFunc));
436 
437 	const tcu::IVec4	red						(255, 0, 0, 255);
438 	const tcu::IVec4	green					(0, 255, 0, 255);
439 	const float			divisionErrorUlps		= 2.5f;
440 
441 	const int			numComponents			= glu::getDataTypeFloatScalars(dataType);
442 	const int			numBits					= getNumMantissaBits(precision);
443 	const int			minExponent				= getMinExponent(precision);
444 
445 	const int			numVaryingSampleBits	= numBits - INTERPOLATION_LOST_BITS;
446 	int					numFailedPixels			= 0;
447 
448 	tcu::clear(errorMask, green);
449 
450 	// search for failed pixels
451 	for (int y = 0; y < result.getHeight(); ++y)
452 	for (int x = 0; x < result.getWidth(); ++x)
453 	{
454 		if (isSkippedPixel(result, x, y))
455 			continue;
456 
457 		//                 flushToZero?(f2z?(functionValueCurrent) - f2z?(functionValueBefore))
458 		// flushToZero? ( ------------------------------------------------------------------------ +- 2.5 ULP )
459 		//                                                  dx
460 
461 		const tcu::Vec4	resultDerivative		= readDerivate(result, derivScale, derivBias, x, y);
462 
463 		// sample at the front of the back pixel and the back of the front pixel to cover the whole area of
464 		// legal sample positions. In general case this is NOT OK, but we know that the target funtion is
465 		// (mostly*) linear which allows us to take the sample points at arbitrary points. This gets us the
466 		// maximum difference possible in exponents which are used in error bound calculations.
467 		// * non-linearity may happen around zero or with very high function values due to subnorms not
468 		//   behaving well.
469 		const tcu::Vec4	functionValueForward	= (isDfdxFunc(derivateFunc))
470 													? (function.evaluateAt((float)x + 2.0f, (float)y + 0.5f))
471 													: (function.evaluateAt((float)x + 0.5f, (float)y + 2.0f));
472 		const tcu::Vec4	functionValueBackward	= (isDfdyFunc(derivateFunc))
473 													? (function.evaluateAt((float)x - 1.0f, (float)y + 0.5f))
474 													: (function.evaluateAt((float)x + 0.5f, (float)y - 1.0f));
475 
476 		bool	anyComponentFailed				= false;
477 
478 		// check components separately
479 		for (int c = 0; c < numComponents; ++c)
480 		{
481 			// Simulate interpolation. Add allowed interpolation error and round to target precision. Allow one half ULP (i.e. correct rounding)
482 			const tcu::Interval	forwardComponent		(convertFloatFlushToZeroRtn(addErrorUlp((float)functionValueForward[c],  -0.5f, numVaryingSampleBits), minExponent, numBits),
483 														 convertFloatFlushToZeroRtp(addErrorUlp((float)functionValueForward[c],  +0.5f, numVaryingSampleBits), minExponent, numBits));
484 			const tcu::Interval	backwardComponent		(convertFloatFlushToZeroRtn(addErrorUlp((float)functionValueBackward[c], -0.5f, numVaryingSampleBits), minExponent, numBits),
485 														 convertFloatFlushToZeroRtp(addErrorUlp((float)functionValueBackward[c], +0.5f, numVaryingSampleBits), minExponent, numBits));
486 			const int			maxValueExp				= de::max(de::max(tcu::Float32(forwardComponent.lo()).exponent(),   tcu::Float32(forwardComponent.hi()).exponent()),
487 																  de::max(tcu::Float32(backwardComponent.lo()).exponent(),  tcu::Float32(backwardComponent.hi()).exponent()));
488 
489 			// subtraction in numerator will likely cause a cancellation of the most
490 			// significant bits. Apply error bounds.
491 
492 			const tcu::Interval	numerator				(forwardComponent - backwardComponent);
493 			const int			numeratorLoExp			= tcu::Float32(numerator.lo()).exponent();
494 			const int			numeratorHiExp			= tcu::Float32(numerator.hi()).exponent();
495 			const int			numeratorLoBitsLost		= de::max(0, maxValueExp - numeratorLoExp); //!< must clamp to zero since if forward and backward components have different
496 			const int			numeratorHiBitsLost		= de::max(0, maxValueExp - numeratorHiExp); //!< sign, numerator might have larger exponent than its operands.
497 			const int			numeratorLoBits			= de::max(0, numBits - numeratorLoBitsLost);
498 			const int			numeratorHiBits			= de::max(0, numBits - numeratorHiBitsLost);
499 
500 			const tcu::Interval	numeratorRange			(convertFloatFlushToZeroRtn((float)numerator.lo(), minExponent, numeratorLoBits),
501 														 convertFloatFlushToZeroRtp((float)numerator.hi(), minExponent, numeratorHiBits));
502 
503 			const tcu::Interval	divisionRange			= numeratorRange / 3.0f; // legal sample area is anywhere within this and neighboring pixels (i.e. size = 3)
504 			const tcu::Interval	divisionResultRange		(convertFloatFlushToZeroRtn(addErrorUlp((float)divisionRange.lo(), -divisionErrorUlps, numBits), minExponent, numBits),
505 														 convertFloatFlushToZeroRtp(addErrorUlp((float)divisionRange.hi(), +divisionErrorUlps, numBits), minExponent, numBits));
506 			const tcu::Interval	finalResultRange		(divisionResultRange.lo() - surfaceThreshold[c], divisionResultRange.hi() + surfaceThreshold[c]);
507 
508 			if (resultDerivative[c] >= finalResultRange.lo() && resultDerivative[c] <= finalResultRange.hi())
509 			{
510 				// value ok
511 			}
512 			else
513 			{
514 				if (numFailedPixels < MAX_FAILED_MESSAGES)
515 					log << tcu::TestLog::Message
516 						<< "Error in pixel at " << x << ", " << y << " with component " << c << " (channel " << ("rgba"[c]) << ")\n"
517 						<< "\tGot pixel value " << result.getPixelInt(x, y) << "\n"
518 						<< "\t\tdFd" << ((isDfdxFunc(derivateFunc)) ? ('x') : ('y')) << " ~= " << resultDerivative[c] << "\n"
519 						<< "\t\tdifference to a valid range: "
520 							<< ((resultDerivative[c] < finalResultRange.lo()) ? ("-") : ("+"))
521 							<< ((resultDerivative[c] < finalResultRange.lo()) ? (finalResultRange.lo() - resultDerivative[c]) : (resultDerivative[c] - finalResultRange.hi()))
522 							<< "\n"
523 						<< "\tDerivative value range:\n"
524 						<< "\t\tMin: " << finalResultRange.lo() << "\n"
525 						<< "\t\tMax: " << finalResultRange.hi() << "\n"
526 						<< tcu::TestLog::EndMessage;
527 
528 				++numFailedPixels;
529 				anyComponentFailed = true;
530 			}
531 		}
532 
533 		if (anyComponentFailed)
534 			errorMask.setPixel(red, x, y);
535 	}
536 
537 	if (numFailedPixels >= MAX_FAILED_MESSAGES)
538 		log << TestLog::Message << "..." << TestLog::EndMessage;
539 
540 	if (numFailedPixels > 0)
541 		log << TestLog::Message << "FAIL: found " << numFailedPixels << " failed pixels" << TestLog::EndMessage;
542 
543 	return numFailedPixels == 0;
544 }
545 
546 // TestCase utils
547 
548 struct DerivateCaseDefinition
549 {
DerivateCaseDefinitionvkt::sr::__anona541e7d70111::DerivateCaseDefinition550 	DerivateCaseDefinition (void)
551 	{
552 		func					= DERIVATE_LAST;
553 		dataType				= glu::TYPE_LAST;
554 		precision				= glu::PRECISION_LAST;
555 		inNonUniformControlFlow	= false;
556 		coordDataType			= glu::TYPE_LAST;
557 		coordPrecision			= glu::PRECISION_LAST;
558 		surfaceType				= SURFACETYPE_UNORM_FBO;
559 		numSamples				= 0;
560 	}
561 
562 	DerivateFunc			func;
563 	glu::DataType			dataType;
564 	glu::Precision			precision;
565 	bool					inNonUniformControlFlow;
566 
567 	glu::DataType			coordDataType;
568 	glu::Precision			coordPrecision;
569 
570 	SurfaceType				surfaceType;
571 	int						numSamples;
572 };
573 
574 struct DerivateCaseValues
575 {
576 	tcu::Vec4	coordMin;
577 	tcu::Vec4	coordMax;
578 	tcu::Vec4	derivScale;
579 	tcu::Vec4	derivBias;
580 };
581 
582 struct TextureCaseValues
583 {
584 	tcu::Vec4	texValueMin;
585 	tcu::Vec4	texValueMax;
586 };
587 
588 class DerivateUniformSetup : public UniformSetup
589 {
590 public:
591 						DerivateUniformSetup		(bool useSampler);
592 	virtual				~DerivateUniformSetup		(void);
593 
594 	virtual void		setup						(ShaderRenderCaseInstance& instance, const tcu::Vec4&) const;
595 
596 private:
597 	const bool			m_useSampler;
598 };
599 
DerivateUniformSetup(bool useSampler)600 DerivateUniformSetup::DerivateUniformSetup (bool useSampler)
601 	: m_useSampler(useSampler)
602 {
603 }
604 
~DerivateUniformSetup(void)605 DerivateUniformSetup::~DerivateUniformSetup (void)
606 {
607 }
608 
609 // TriangleDerivateCaseInstance
610 
611 class TriangleDerivateCaseInstance : public ShaderRenderCaseInstance
612 {
613 public:
614 									TriangleDerivateCaseInstance	(Context&						context,
615 																	 const UniformSetup&			uniformSetup,
616 																	 const DerivateCaseDefinition&	definitions,
617 																	 const DerivateCaseValues&		values);
618 	virtual							~TriangleDerivateCaseInstance	(void);
619 	virtual tcu::TestStatus			iterate							(void);
getDerivateCaseDefinition(void)620 	DerivateCaseDefinition			getDerivateCaseDefinition		(void) { return m_definitions; }
getDerivateCaseValues(void)621 	DerivateCaseValues				getDerivateCaseValues			(void) { return m_values; }
622 
623 protected:
624 	virtual bool					verify							(const tcu::ConstPixelBufferAccess& result, const tcu::PixelBufferAccess& errorMask) = 0;
625 	tcu::Vec4						getSurfaceThreshold				(void) const;
626 	virtual void					setupDefaultInputs				(void);
627 
628 	const DerivateCaseDefinition&	m_definitions;
629 	const DerivateCaseValues&		m_values;
630 };
631 
getVkSampleCount(int numSamples)632 static VkSampleCountFlagBits getVkSampleCount (int numSamples)
633 {
634 	switch (numSamples)
635 	{
636 		case 0:		return VK_SAMPLE_COUNT_1_BIT;
637 		case 2:		return VK_SAMPLE_COUNT_2_BIT;
638 		case 4:		return VK_SAMPLE_COUNT_4_BIT;
639 		default:
640 			DE_ASSERT(false);
641 			return (VkSampleCountFlagBits)0;
642 	}
643 }
644 
TriangleDerivateCaseInstance(Context & context,const UniformSetup & uniformSetup,const DerivateCaseDefinition & definitions,const DerivateCaseValues & values)645 TriangleDerivateCaseInstance::TriangleDerivateCaseInstance (Context&						context,
646 															const UniformSetup&				uniformSetup,
647 															const DerivateCaseDefinition&	definitions,
648 															const DerivateCaseValues&		values)
649 	: ShaderRenderCaseInstance	(context, true, DE_NULL, uniformSetup, DE_NULL)
650 	, m_definitions				(definitions)
651 	, m_values					(values)
652 {
653 	m_renderSize	= tcu::UVec2(VIEWPORT_WIDTH, VIEWPORT_HEIGHT);
654 	m_colorFormat	= vk::mapTextureFormat(glu::mapGLInternalFormat(m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO ? GL_RGBA32UI : GL_RGBA8));
655 
656 	setSampleCount(getVkSampleCount(definitions.numSamples));
657 }
658 
~TriangleDerivateCaseInstance(void)659 TriangleDerivateCaseInstance::~TriangleDerivateCaseInstance (void)
660 {
661 }
662 
getSurfaceThreshold(void) const663 tcu::Vec4 TriangleDerivateCaseInstance::getSurfaceThreshold (void) const
664 {
665 	switch (m_definitions.surfaceType)
666 	{
667 		case SURFACETYPE_UNORM_FBO:				return tcu::IVec4(1).asFloat() / 255.0f;
668 		case SURFACETYPE_FLOAT_FBO:				return tcu::Vec4(0.0f);
669 		default:
670 			DE_ASSERT(false);
671 			return tcu::Vec4(0.0f);
672 	}
673 }
674 
setupDefaultInputs(void)675 void TriangleDerivateCaseInstance::setupDefaultInputs (void)
676 {
677 	const int		numVertices			= 4;
678 	const float		positions[]			=
679 	{
680 		-1.0f, -1.0f, 0.0f, 1.0f,
681 		-1.0f,  1.0f, 0.0f, 1.0f,
682 		1.0f, -1.0f, 0.0f, 1.0f,
683 		1.0f,  1.0f, 0.0f, 1.0f
684 	};
685 	const float		coords[]			=
686 	{
687 		m_values.coordMin.x(), m_values.coordMin.y(), m_values.coordMin.z(),								m_values.coordMax.w(),
688 		m_values.coordMin.x(), m_values.coordMax.y(), (m_values.coordMin.z()+m_values.coordMax.z())*0.5f,	(m_values.coordMin.w()+m_values.coordMax.w())*0.5f,
689 		m_values.coordMax.x(), m_values.coordMin.y(), (m_values.coordMin.z()+m_values.coordMax.z())*0.5f,	(m_values.coordMin.w()+m_values.coordMax.w())*0.5f,
690 		m_values.coordMax.x(), m_values.coordMax.y(), m_values.coordMax.z(),								m_values.coordMin.w()
691 	};
692 
693 	addAttribute(0u, vk::VK_FORMAT_R32G32B32A32_SFLOAT, 4 * (deUint32)sizeof(float), numVertices, positions);
694 	if (m_definitions.coordDataType != glu::TYPE_LAST)
695 		addAttribute(1u, vk::VK_FORMAT_R32G32B32A32_SFLOAT, 4 * (deUint32)sizeof(float), numVertices, coords);
696 }
697 
iterate(void)698 tcu::TestStatus TriangleDerivateCaseInstance::iterate (void)
699 {
700 	tcu::TestLog&				log				= m_context.getTestContext().getLog();
701 	const deUint32				numVertices		= 4;
702 	const deUint32				numTriangles	= 2;
703 	const deUint16				indices[]		= { 0, 2, 1, 2, 3, 1 };
704 	tcu::TextureLevel			resultImage;
705 
706 	setup();
707 
708 	render(numVertices, numTriangles, indices);
709 
710 	{
711 		const tcu::TextureLevel&		renderedImage	= getResultImage();
712 
713 		if (m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO)
714 		{
715 			const tcu::TextureFormat	dataFormat		(tcu::TextureFormat::RGBA, tcu::TextureFormat::FLOAT);
716 
717 			resultImage.setStorage(dataFormat, renderedImage.getWidth(), renderedImage.getHeight());
718 			tcu::copy(resultImage.getAccess(), tcu::ConstPixelBufferAccess(dataFormat, renderedImage.getSize(), renderedImage.getAccess().getDataPtr()));
719 		}
720 		else
721 		{
722 			resultImage = renderedImage;
723 		}
724 	}
725 
726 	// Verify
727 	{
728 		tcu::Surface errorMask(resultImage.getWidth(), resultImage.getHeight());
729 		tcu::clear(errorMask.getAccess(), tcu::RGBA::green().toVec());
730 
731 		const bool isOk = verify(resultImage.getAccess(), errorMask.getAccess());
732 
733 		log << TestLog::ImageSet("Result", "Result images")
734 			<< TestLog::Image("Rendered", "Rendered image", resultImage);
735 
736 		if (!isOk)
737 			log << TestLog::Image("ErrorMask", "Error mask", errorMask);
738 
739 		log << TestLog::EndImageSet;
740 
741 		if (isOk)
742 			return tcu::TestStatus::pass("Pass");
743 		else
744 			return tcu::TestStatus::fail("Image comparison failed");
745 	}
746 }
747 
setup(ShaderRenderCaseInstance & instance,const tcu::Vec4 &) const748 void DerivateUniformSetup::setup (ShaderRenderCaseInstance& instance, const tcu::Vec4&) const
749 {
750 	DerivateCaseDefinition	definitions		= dynamic_cast<TriangleDerivateCaseInstance&>(instance).getDerivateCaseDefinition();
751 	DerivateCaseValues		values			= dynamic_cast<TriangleDerivateCaseInstance&>(instance).getDerivateCaseValues();
752 
753 	DE_ASSERT(glu::isDataTypeFloatOrVec(definitions.dataType));
754 
755 	instance.addUniform(0u, vk::VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, glu::getDataTypeScalarSize(definitions.dataType) * sizeof(float), values.derivScale.getPtr());
756 	instance.addUniform(1u, vk::VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, glu::getDataTypeScalarSize(definitions.dataType) * sizeof(float), values.derivBias.getPtr());
757 
758 	if (m_useSampler)
759 		instance.useSampler(2u, 0u); // To the uniform binding location 2 bind the texture 0
760 }
761 
762 // TriangleDerivateCase
763 
764 class TriangleDerivateCase : public ShaderRenderCase
765 {
766 public:
767 									TriangleDerivateCase	(tcu::TestContext&		testCtx,
768 															 const std::string&		name,
769 															 const UniformSetup*	uniformSetup);
770 	virtual							~TriangleDerivateCase	(void);
771 
772 	void							checkSupport			(Context& context) const override;
773 
774 protected:
775 	DerivateCaseDefinition	m_definitions;
776 	DerivateCaseValues		m_values;
777 };
778 
TriangleDerivateCase(tcu::TestContext & testCtx,const std::string & name,const UniformSetup * uniformSetup)779 TriangleDerivateCase::TriangleDerivateCase (tcu::TestContext&		testCtx,
780 											const std::string&		name,
781 											const UniformSetup*		uniformSetup)
782 	: ShaderRenderCase		(testCtx, name, false, (ShaderEvaluator*)DE_NULL, uniformSetup, DE_NULL)
783 	, m_definitions			()
784 {
785 }
786 
~TriangleDerivateCase(void)787 TriangleDerivateCase::~TriangleDerivateCase (void)
788 {
789 }
790 
checkSupport(Context & context) const791 void TriangleDerivateCase::checkSupport (Context& context) const
792 {
793 	ShaderRenderCase::checkSupport(context);
794 
795 	const bool subgroupFunc = isSubgroupFunc(m_definitions.func);
796 
797 	if (m_definitions.inNonUniformControlFlow || subgroupFunc)
798 	{
799 		const std::string errorPrefix	= m_definitions.inNonUniformControlFlow
800 										? "Derivatives in dynamic control flow"
801 										: "Manual derivatives with subgroup operations";
802 
803 		if (!context.contextSupports(vk::ApiVersion(0, 1, 1, 0)))
804 			throw tcu::NotSupportedError(errorPrefix + " require Vulkan 1.1");
805 
806 		const auto& subgroupProperties = context.getSubgroupProperties();
807 
808 		if (subgroupProperties.subgroupSize < 4)
809 			throw tcu::NotSupportedError(errorPrefix + " require subgroupSize >= 4");
810 
811 		if ((subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_BALLOT_BIT) == 0)
812 			throw tcu::NotSupportedError(errorPrefix + " tests require VK_SUBGROUP_FEATURE_BALLOT_BIT");
813 
814 		if ((subgroupProperties.supportedStages & VK_SHADER_STAGE_FRAGMENT_BIT) == 0)
815 			throw tcu::NotSupportedError(errorPrefix + " tests require subgroup supported stage including VK_SHADER_STAGE_FRAGMENT_BIT");
816 
817 		if (subgroupFunc && (subgroupProperties.supportedOperations & VK_SUBGROUP_FEATURE_QUAD_BIT) == 0)
818 			throw tcu::NotSupportedError(errorPrefix + " tests require VK_SUBGROUP_FEATURE_QUAD_BIT");
819 	}
820 }
821 
genVertexSource(glu::DataType coordType,glu::Precision precision)822 static std::string genVertexSource (glu::DataType coordType, glu::Precision precision)
823 {
824 	DE_ASSERT(coordType == glu::TYPE_LAST || glu::isDataTypeFloatOrVec(coordType));
825 
826 	const std::string vertexTmpl =
827 		"#version 450\n"
828 		"layout(location = 0) in highp vec4 a_position;\n"
829 		+ string(coordType != glu::TYPE_LAST ? "layout(location = 1) in ${PRECISION} ${DATATYPE} a_coord;\n"
830 											   "layout(location = 0) out ${PRECISION} ${DATATYPE} v_coord;\n" : "") +
831 		"out gl_PerVertex {\n"
832 		"	vec4 gl_Position;\n"
833 		"};\n"
834 		"void main (void)\n"
835 		"{\n"
836 		"	gl_Position = a_position;\n"
837 		+ string(coordType != glu::TYPE_LAST ? "	v_coord = a_coord;\n" : "") +
838 		"}\n";
839 
840 	map<string, string> vertexParams;
841 
842 	if (coordType != glu::TYPE_LAST)
843 	{
844 		vertexParams["PRECISION"]	= glu::getPrecisionName(precision);
845 		vertexParams["DATATYPE"]	= glu::getDataTypeName(coordType);
846 	}
847 
848 	return tcu::StringTemplate(vertexTmpl).specialize(vertexParams);
849 }
850 
851 // ConstantDerivateCaseInstance
852 
853 class ConstantDerivateCaseInstance : public TriangleDerivateCaseInstance
854 {
855 public:
856 								ConstantDerivateCaseInstance	(Context&						context,
857 																 const UniformSetup&			uniformSetup,
858 																 const DerivateCaseDefinition&	definitions,
859 																 const DerivateCaseValues&		values);
860 	virtual						~ConstantDerivateCaseInstance	(void);
861 
862 	virtual bool				verify							(const tcu::ConstPixelBufferAccess& result, const tcu::PixelBufferAccess& errorMask);
863 };
864 
ConstantDerivateCaseInstance(Context & context,const UniformSetup & uniformSetup,const DerivateCaseDefinition & definitions,const DerivateCaseValues & values)865 ConstantDerivateCaseInstance::ConstantDerivateCaseInstance (Context&							context,
866 															const UniformSetup&					uniformSetup,
867 															const DerivateCaseDefinition&		definitions,
868 															const DerivateCaseValues&			values)
869 	: TriangleDerivateCaseInstance	(context, uniformSetup, definitions, values)
870 {
871 }
872 
~ConstantDerivateCaseInstance(void)873 ConstantDerivateCaseInstance::~ConstantDerivateCaseInstance (void)
874 {
875 }
876 
verify(const tcu::ConstPixelBufferAccess & result,const tcu::PixelBufferAccess & errorMask)877 bool ConstantDerivateCaseInstance::verify (const tcu::ConstPixelBufferAccess& result, const tcu::PixelBufferAccess& errorMask)
878 {
879 	const tcu::Vec4 reference	(0.0f); // Derivate of constant argument should always be 0
880 	const tcu::Vec4	threshold	= getSurfaceThreshold() / abs(m_values.derivScale);
881 
882 	return verifyConstantDerivate(m_context.getTestContext().getLog(), result, errorMask, m_definitions.dataType,
883 								  reference, threshold, m_values.derivScale, m_values.derivBias);
884 }
885 
886 // ConstantDerivateCase
887 
888 class ConstantDerivateCase : public TriangleDerivateCase
889 {
890 public:
891 							ConstantDerivateCase		(tcu::TestContext&		testCtx,
892 														 const std::string&		name,
893 														 DerivateFunc			func,
894 														 glu::DataType			type);
895 	virtual					~ConstantDerivateCase		(void);
896 
897 	virtual	void			initPrograms				(vk::SourceCollections& programCollection) const;
898 	virtual TestInstance*	createInstance				(Context& context) const;
899 };
900 
ConstantDerivateCase(tcu::TestContext & testCtx,const std::string & name,DerivateFunc func,glu::DataType type)901 ConstantDerivateCase::ConstantDerivateCase (tcu::TestContext&		testCtx,
902 											const std::string&		name,
903 											DerivateFunc			func,
904 											glu::DataType			type)
905 	: TriangleDerivateCase	(testCtx, name, new DerivateUniformSetup(false))
906 {
907 	m_definitions.func				= func;
908 	m_definitions.dataType			= type;
909 	m_definitions.precision			= glu::PRECISION_HIGHP;
910 
911 	m_values.derivScale		= tcu::Vec4(1e3f, 1e3f, 1e3f, 1e3f);
912 	m_values.derivBias		= tcu::Vec4(0.5f, 0.5f, 0.5f, 0.5f);
913 }
914 
~ConstantDerivateCase(void)915 ConstantDerivateCase::~ConstantDerivateCase (void)
916 {
917 }
918 
createInstance(Context & context) const919 TestInstance* ConstantDerivateCase::createInstance (Context& context) const
920 {
921 	DE_ASSERT(m_uniformSetup != DE_NULL);
922 	return new ConstantDerivateCaseInstance(context, *m_uniformSetup, m_definitions, m_values);
923 }
924 
initPrograms(vk::SourceCollections & programCollection) const925 void ConstantDerivateCase::initPrograms (vk::SourceCollections& programCollection) const
926 {
927 	const char* fragmentTmpl =
928 		"#version 450\n"
929 		"layout(location = 0) out mediump vec4 o_color;\n"
930 		"layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
931 		"layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; }; \n"
932 		"void main (void)\n"
933 		"{\n"
934 		"	${PRECISION} ${DATATYPE} res = ${FUNC}(${VALUE}) * u_scale + u_bias;\n"
935 		"	o_color = ${CAST_TO_OUTPUT};\n"
936 		"}\n";
937 
938 	map<string, string> fragmentParams;
939 	fragmentParams["PRECISION"]			= glu::getPrecisionName(m_definitions.precision);
940 	fragmentParams["DATATYPE"]			= glu::getDataTypeName(m_definitions.dataType);
941 	fragmentParams["FUNC"]				= getDerivateFuncName(m_definitions.func);
942 	fragmentParams["VALUE"]				= m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ? "vec4(1.0, 7.2, -1e5, 0.0)" :
943 										  m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ? "vec3(1e2, 8.0, 0.01)" :
944 										  m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ? "vec2(-0.0, 2.7)" :
945 										  /* TYPE_FLOAT */								   "7.7";
946 	fragmentParams["CAST_TO_OUTPUT"]	= m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ? "res" :
947 										  m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ? "vec4(res, 1.0)" :
948 										  m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ? "vec4(res, 0.0, 1.0)" :
949 										  /* TYPE_FLOAT */								   "vec4(res, 0.0, 0.0, 1.0)";
950 
951 	std::string fragmentSrc = tcu::StringTemplate(fragmentTmpl).specialize(fragmentParams);
952 	programCollection.glslSources.add("vert") << glu::VertexSource(genVertexSource(m_definitions.coordDataType, m_definitions.coordPrecision));
953 	programCollection.glslSources.add("frag") << glu::FragmentSource(fragmentSrc);
954 }
955 
956 // Linear cases
957 
958 class LinearDerivateUniformSetup : public DerivateUniformSetup
959 {
960 public:
961 					LinearDerivateUniformSetup		(bool useSampler, BaseUniformType usedDefaultUniform);
962 	virtual			~LinearDerivateUniformSetup		(void);
963 
964 	virtual void	setup							(ShaderRenderCaseInstance& instance, const tcu::Vec4& constCoords) const;
965 
966 private:
967 	const BaseUniformType	m_usedDefaultUniform;
968 };
969 
LinearDerivateUniformSetup(bool useSampler,BaseUniformType usedDefaultUniform)970 LinearDerivateUniformSetup::LinearDerivateUniformSetup (bool useSampler, BaseUniformType usedDefaultUniform)
971 	: DerivateUniformSetup	(useSampler)
972 	, m_usedDefaultUniform	(usedDefaultUniform)
973 {
974 }
975 
~LinearDerivateUniformSetup(void)976 LinearDerivateUniformSetup::~LinearDerivateUniformSetup (void)
977 {
978 }
979 
setup(ShaderRenderCaseInstance & instance,const tcu::Vec4 & constCoords) const980 void LinearDerivateUniformSetup::setup (ShaderRenderCaseInstance& instance, const tcu::Vec4& constCoords) const
981 {
982 	DerivateUniformSetup::setup(instance, constCoords);
983 
984 	if (m_usedDefaultUniform != U_LAST)
985 		switch (m_usedDefaultUniform)
986 		{
987 			case UB_TRUE:
988 			case UI_ONE:
989 			case UI_TWO:
990 				instance.useUniform(2u, m_usedDefaultUniform);
991 				break;
992 			default:
993 				DE_ASSERT(false);
994 				break;
995 		}
996 }
997 
998 class LinearDerivateCaseInstance : public TriangleDerivateCaseInstance
999 {
1000 public:
1001 								LinearDerivateCaseInstance	(Context&						context,
1002 															 const UniformSetup&			uniformSetup,
1003 															 const DerivateCaseDefinition&	definitions,
1004 															 const DerivateCaseValues&		values);
1005 	virtual						~LinearDerivateCaseInstance	(void);
1006 
1007 	virtual bool				verify						(const tcu::ConstPixelBufferAccess& result, const tcu::PixelBufferAccess& errorMask);
1008 };
1009 
LinearDerivateCaseInstance(Context & context,const UniformSetup & uniformSetup,const DerivateCaseDefinition & definitions,const DerivateCaseValues & values)1010 LinearDerivateCaseInstance::LinearDerivateCaseInstance (Context&						context,
1011 														const UniformSetup&				uniformSetup,
1012 														const DerivateCaseDefinition&	definitions,
1013 														const DerivateCaseValues&		values)
1014 	: TriangleDerivateCaseInstance	(context, uniformSetup, definitions, values)
1015 {
1016 }
1017 
~LinearDerivateCaseInstance(void)1018 LinearDerivateCaseInstance::~LinearDerivateCaseInstance (void)
1019 {
1020 }
1021 
verify(const tcu::ConstPixelBufferAccess & result,const tcu::PixelBufferAccess & errorMask)1022 bool LinearDerivateCaseInstance::verify (const tcu::ConstPixelBufferAccess& result, const tcu::PixelBufferAccess& errorMask)
1023 {
1024 	const tcu::Vec4		xScale				= tcu::Vec4(1.0f, 0.0f, 0.5f, -0.5f);
1025 	const tcu::Vec4		yScale				= tcu::Vec4(0.0f, 1.0f, 0.5f, -0.5f);
1026 	const tcu::Vec4		surfaceThreshold	= getSurfaceThreshold() / abs(m_values.derivScale);
1027 
1028 	if (isDfdxFunc(m_definitions.func) || isDfdyFunc(m_definitions.func))
1029 	{
1030 		const bool			isX			= isDfdxFunc(m_definitions.func);
1031 		const float			div			= isX ? float(result.getWidth()) : float(result.getHeight());
1032 		const tcu::Vec4		scale		= isX ? xScale : yScale;
1033 		tcu::Vec4			reference	= ((m_values.coordMax - m_values.coordMin) / div);
1034 		const tcu::Vec4		opThreshold	= getDerivateThreshold(m_definitions.precision, m_values.coordMin, m_values.coordMax, reference);
1035 		const tcu::Vec4		threshold	= max(surfaceThreshold, opThreshold);
1036 		const int			numComps	= glu::getDataTypeFloatScalars(m_definitions.dataType);
1037 
1038 		/* adjust the reference value for the correct dfdx or dfdy sample adjacency */
1039 		reference = reference * scale;
1040 
1041 		m_context.getTestContext().getLog()
1042 			<< tcu::TestLog::Message
1043 			<< "Verifying result image.\n"
1044 			<< "\tValid derivative is " << LogVecComps(reference, numComps) << " with threshold " << LogVecComps(threshold, numComps)
1045 			<< tcu::TestLog::EndMessage;
1046 
1047 		// short circuit if result is strictly within the normal value error bounds.
1048 		// This improves performance significantly.
1049 		if (verifyConstantDerivate(m_context.getTestContext().getLog(), result, errorMask, m_definitions.dataType,
1050 								   reference, threshold, m_values.derivScale, m_values.derivBias,
1051 								   LOG_NOTHING))
1052 		{
1053 			m_context.getTestContext().getLog()
1054 				<< tcu::TestLog::Message
1055 				<< "No incorrect derivatives found, result valid."
1056 				<< tcu::TestLog::EndMessage;
1057 
1058 			return true;
1059 		}
1060 
1061 		// some pixels exceed error bounds calculated for normal values. Verify that these
1062 		// potentially invalid pixels are in fact valid due to (for example) subnorm flushing.
1063 
1064 		m_context.getTestContext().getLog()
1065 			<< tcu::TestLog::Message
1066 			<< "Initial verification failed, verifying image by calculating accurate error bounds for each result pixel.\n"
1067 			<< "\tVerifying each result derivative is within its range of legal result values."
1068 			<< tcu::TestLog::EndMessage;
1069 
1070 		{
1071 			const tcu::UVec2			viewportSize	(VIEWPORT_WIDTH, VIEWPORT_HEIGHT);
1072 			const float					w				= float(viewportSize.x());
1073 			const float					h				= float(viewportSize.y());
1074 			const tcu::Vec4				valueRamp		= (m_values.coordMax - m_values.coordMin);
1075 			Linear2DFunctionEvaluator	function;
1076 
1077 			function.matrix.setRow(0, tcu::Vec3(valueRamp.x() / w, 0.0f, m_values.coordMin.x()));
1078 			function.matrix.setRow(1, tcu::Vec3(0.0f, valueRamp.y() / h, m_values.coordMin.y()));
1079 			function.matrix.setRow(2, tcu::Vec3(valueRamp.z() / w, valueRamp.z() / h, m_values.coordMin.z() + m_values.coordMin.z()) / 2.0f);
1080 			function.matrix.setRow(3, tcu::Vec3(-valueRamp.w() / w, -valueRamp.w() / h, m_values.coordMax.w() + m_values.coordMax.w()) / 2.0f);
1081 
1082 			return reverifyConstantDerivateWithFlushRelaxations(m_context.getTestContext().getLog(), result, errorMask,
1083 																m_definitions.dataType, m_definitions.precision, m_values.derivScale,
1084 																m_values.derivBias, surfaceThreshold, m_definitions.func,
1085 																function);
1086 		}
1087 	}
1088 	else
1089 	{
1090 		DE_ASSERT(isFwidthFunc(m_definitions.func));
1091 		const float			w			= float(result.getWidth());
1092 		const float			h			= float(result.getHeight());
1093 
1094 		const tcu::Vec4		dx			= ((m_values.coordMax - m_values.coordMin) / w) * xScale;
1095 		const tcu::Vec4		dy			= ((m_values.coordMax - m_values.coordMin) / h) * yScale;
1096 		const tcu::Vec4		reference	= tcu::abs(dx) + tcu::abs(dy);
1097 		const tcu::Vec4		dxThreshold	= getDerivateThreshold(m_definitions.precision, m_values.coordMin*xScale, m_values.coordMax*xScale, dx);
1098 		const tcu::Vec4		dyThreshold	= getDerivateThreshold(m_definitions.precision, m_values.coordMin*yScale, m_values.coordMax*yScale, dy);
1099 		const tcu::Vec4		threshold	= max(surfaceThreshold, max(dxThreshold, dyThreshold));
1100 
1101 		return verifyConstantDerivate(m_context.getTestContext().getLog(), result, errorMask, m_definitions.dataType,
1102 									  reference, threshold, m_values.derivScale, m_values.derivBias);
1103 	}
1104 }
1105 
1106 // LinearDerivateCase
1107 
1108 class LinearDerivateCase : public TriangleDerivateCase
1109 {
1110 public:
1111 							LinearDerivateCase			(tcu::TestContext&		testCtx,
1112 														 const std::string&		name,
1113 														 DerivateFunc			func,
1114 														 glu::DataType			type,
1115 														 glu::Precision			precision,
1116 														 bool					inNonUniformControlFlow,
1117 														 SurfaceType			surfaceType,
1118 														 int					numSamples,
1119 														 const std::string&		fragmentSrcTmpl,
1120 														 BaseUniformType		usedDefaultUniform);
1121 	virtual					~LinearDerivateCase			(void);
1122 
1123 	virtual	void			initPrograms				(vk::SourceCollections& programCollection) const;
1124 	virtual TestInstance*	createInstance				(Context& context) const;
1125 
1126 private:
1127 	const std::string		m_fragmentTmpl;
1128 };
1129 
LinearDerivateCase(tcu::TestContext & testCtx,const std::string & name,DerivateFunc func,glu::DataType type,glu::Precision precision,bool inNonUniformControlFlow,SurfaceType surfaceType,int numSamples,const std::string & fragmentSrcTmpl,BaseUniformType usedDefaultUniform)1130 LinearDerivateCase::LinearDerivateCase (tcu::TestContext&		testCtx,
1131 										const std::string&		name,
1132 										DerivateFunc			func,
1133 										glu::DataType			type,
1134 										glu::Precision			precision,
1135 										bool					inNonUniformControlFlow,
1136 										SurfaceType				surfaceType,
1137 										int						numSamples,
1138 										const std::string&		fragmentSrcTmpl,
1139 										BaseUniformType			usedDefaultUniform)
1140 	: TriangleDerivateCase	(testCtx, name, new LinearDerivateUniformSetup(false, usedDefaultUniform))
1141 	, m_fragmentTmpl		(fragmentSrcTmpl)
1142 {
1143 	m_definitions.func						= func;
1144 	m_definitions.dataType					= type;
1145 	m_definitions.precision					= precision;
1146 	m_definitions.inNonUniformControlFlow	= inNonUniformControlFlow;
1147 	m_definitions.coordDataType				= m_definitions.dataType;
1148 	m_definitions.coordPrecision			= m_definitions.precision;
1149 	m_definitions.surfaceType				= surfaceType;
1150 	m_definitions.numSamples				= numSamples;
1151 
1152 	const tcu::UVec2	viewportSize	(VIEWPORT_WIDTH, VIEWPORT_HEIGHT);
1153 	const float			w				= float(viewportSize.x());
1154 	const float			h				= float(viewportSize.y());
1155 
1156 	switch (m_definitions.precision)
1157 	{
1158 		case glu::PRECISION_HIGHP:
1159 			m_values.coordMin = tcu::Vec4(-97.f, 0.2f, 71.f, 74.f);
1160 			m_values.coordMax = tcu::Vec4(-13.2f, -77.f, 44.f, 76.f);
1161 			break;
1162 
1163 		case glu::PRECISION_MEDIUMP:
1164 			m_values.coordMin = tcu::Vec4(-37.0f, 47.f, -7.f, 0.0f);
1165 			m_values.coordMax = tcu::Vec4(-1.0f, 12.f, 7.f, 19.f);
1166 			break;
1167 
1168 		case glu::PRECISION_LOWP:
1169 			m_values.coordMin = tcu::Vec4(0.0f, -1.0f, 0.0f, 1.0f);
1170 			m_values.coordMax = tcu::Vec4(1.0f, 1.0f, -1.0f, -1.0f);
1171 			break;
1172 
1173 		default:
1174 			DE_ASSERT(false);
1175 	}
1176 
1177 	if (m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO)
1178 	{
1179 		// No scale or bias used for accuracy.
1180 		m_values.derivScale	= tcu::Vec4(1.0f);
1181 		m_values.derivBias	= tcu::Vec4(0.0f);
1182 	}
1183 	else
1184 	{
1185 		// Compute scale - bias that normalizes to 0..1 range.
1186 		const tcu::Vec4 dx = (m_values.coordMax - m_values.coordMin) / tcu::Vec4(w, w, w*0.5f, -w*0.5f);
1187 		const tcu::Vec4 dy = (m_values.coordMax - m_values.coordMin) / tcu::Vec4(h, h, h*0.5f, -h*0.5f);
1188 
1189 		if (isDfdxFunc(m_definitions.func))
1190 			m_values.derivScale = 0.5f / dx;
1191 		else if (isDfdyFunc(m_definitions.func))
1192 			m_values.derivScale = 0.5f / dy;
1193 		else if (isFwidthFunc(m_definitions.func))
1194 			m_values.derivScale = 0.5f / (tcu::abs(dx) + tcu::abs(dy));
1195 		else
1196 			DE_ASSERT(false);
1197 
1198 		m_values.derivBias = tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f);
1199 	}
1200 }
1201 
~LinearDerivateCase(void)1202 LinearDerivateCase::~LinearDerivateCase (void)
1203 {
1204 }
1205 
createInstance(Context & context) const1206 TestInstance* LinearDerivateCase::createInstance (Context& context) const
1207 {
1208 	DE_ASSERT(m_uniformSetup != DE_NULL);
1209 	return new LinearDerivateCaseInstance(context, *m_uniformSetup, m_definitions, m_values);
1210 }
1211 
initPrograms(vk::SourceCollections & programCollection) const1212 void LinearDerivateCase::initPrograms (vk::SourceCollections& programCollection) const
1213 {
1214 	const SpirvVersion				spirvVersion = (m_definitions.inNonUniformControlFlow || isSubgroupFunc(m_definitions.func)) ? vk::SPIRV_VERSION_1_3 : vk::SPIRV_VERSION_1_0;
1215 	const vk::ShaderBuildOptions	buildOptions(programCollection.usedVulkanVersion, spirvVersion, 0u);
1216 
1217 	const bool			packToInt		= m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO;
1218 	map<string, string>	fragmentParams;
1219 
1220 	fragmentParams["OUTPUT_TYPE"]		= glu::getDataTypeName(packToInt ? glu::TYPE_UINT_VEC4 : glu::TYPE_FLOAT_VEC4);
1221 	fragmentParams["OUTPUT_PREC"]		= glu::getPrecisionName(packToInt ? glu::PRECISION_HIGHP : m_definitions.precision);
1222 	fragmentParams["PRECISION"]			= glu::getPrecisionName(m_definitions.precision);
1223 	fragmentParams["DATATYPE"]			= glu::getDataTypeName(m_definitions.dataType);
1224 	fragmentParams["FUNC"]				= getDerivateFuncName(m_definitions.func);
1225 
1226 	if (packToInt)
1227 	{
1228 		fragmentParams["CAST_TO_OUTPUT"]	= m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ? "floatBitsToUint(res)" :
1229 											  m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ? "floatBitsToUint(vec4(res, 1.0))" :
1230 											  m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ? "floatBitsToUint(vec4(res, 0.0, 1.0))" :
1231 											  /* TYPE_FLOAT */								   "floatBitsToUint(vec4(res, 0.0, 0.0, 1.0))";
1232 	}
1233 	else
1234 	{
1235 		fragmentParams["CAST_TO_OUTPUT"]	= m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ? "res" :
1236 											  m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ? "vec4(res, 1.0)" :
1237 											  m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ? "vec4(res, 0.0, 1.0)" :
1238 											  /* TYPE_FLOAT */								   "vec4(res, 0.0, 0.0, 1.0)";
1239 	}
1240 
1241 	std::string fragmentSrc = tcu::StringTemplate(m_fragmentTmpl).specialize(fragmentParams);
1242 	programCollection.glslSources.add("vert") << glu::VertexSource(genVertexSource(m_definitions.coordDataType, m_definitions.coordPrecision));
1243 	programCollection.glslSources.add("frag") << glu::FragmentSource(fragmentSrc) << buildOptions;
1244 }
1245 
1246 // TextureDerivateCaseInstance
1247 
1248 class TextureDerivateCaseInstance : public TriangleDerivateCaseInstance
1249 {
1250 public:
1251 								TextureDerivateCaseInstance		(Context&							context,
1252 																 const UniformSetup&				uniformSetup,
1253 																 const DerivateCaseDefinition&		definitions,
1254 																 const DerivateCaseValues&			values,
1255 																 const TextureCaseValues&			textureValues);
1256 	virtual						~TextureDerivateCaseInstance	(void);
1257 
1258 	virtual bool				verify							(const tcu::ConstPixelBufferAccess& result, const tcu::PixelBufferAccess& errorMask);
1259 
1260 private:
1261 	const TextureCaseValues&	m_textureValues;
1262 };
1263 
TextureDerivateCaseInstance(Context & context,const UniformSetup & uniformSetup,const DerivateCaseDefinition & definitions,const DerivateCaseValues & values,const TextureCaseValues & textureValues)1264 TextureDerivateCaseInstance::TextureDerivateCaseInstance (Context&							context,
1265 														  const UniformSetup&				uniformSetup,
1266 														  const DerivateCaseDefinition&		definitions,
1267 														  const DerivateCaseValues&			values,
1268 														  const TextureCaseValues&			textureValues)
1269 	: TriangleDerivateCaseInstance	(context, uniformSetup, definitions, values)
1270 	, m_textureValues				(textureValues)
1271 {
1272 	de::MovePtr<tcu::Texture2D>		texture;
1273 
1274 	// Lowp and mediump cases use RGBA16F format, while highp uses RGBA32F.
1275 	{
1276 		const tcu::UVec2			viewportSize	(VIEWPORT_WIDTH, VIEWPORT_HEIGHT);
1277 		const tcu::TextureFormat	format			= glu::mapGLInternalFormat(m_definitions.precision == glu::PRECISION_HIGHP ? GL_RGBA32F : GL_RGBA16F);
1278 
1279 		texture = de::MovePtr<tcu::Texture2D>(new tcu::Texture2D(format, viewportSize.x(), viewportSize.y()));
1280 		texture->allocLevel(0);
1281 	}
1282 
1283 	// Fill with gradients.
1284 	{
1285 		const tcu::PixelBufferAccess level0 = texture->getLevel(0);
1286 		for (int y = 0; y < level0.getHeight(); y++)
1287 		{
1288 			for (int x = 0; x < level0.getWidth(); x++)
1289 			{
1290 				const float		xf		= (float(x)+0.5f) / float(level0.getWidth());
1291 				const float		yf		= (float(y)+0.5f) / float(level0.getHeight());
1292 				const tcu::Vec4	s		= tcu::Vec4(xf, yf, (xf+yf)/2.0f, 1.0f - (xf+yf)/2.0f);
1293 
1294 				level0.setPixel(m_textureValues.texValueMin + (m_textureValues.texValueMax - m_textureValues.texValueMin)*s, x, y);
1295 			}
1296 		}
1297 	}
1298 
1299 	de::SharedPtr<TextureBinding>	testTexture		(new TextureBinding(texture.release(),
1300 																		tcu::Sampler(tcu::Sampler::CLAMP_TO_EDGE,
1301 																					 tcu::Sampler::CLAMP_TO_EDGE,
1302 																					 tcu::Sampler::CLAMP_TO_EDGE,
1303 																					 tcu::Sampler::NEAREST,
1304 																					 tcu::Sampler::NEAREST,
1305 																					 0.0f,
1306 																					 true,
1307 																					 tcu::Sampler::COMPAREMODE_NONE,
1308 																					 0,
1309 																					 tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f),
1310 																					 true)));
1311 	m_textures.push_back(testTexture);
1312 }
1313 
~TextureDerivateCaseInstance(void)1314 TextureDerivateCaseInstance::~TextureDerivateCaseInstance (void)
1315 {
1316 }
1317 
verify(const tcu::ConstPixelBufferAccess & result,const tcu::PixelBufferAccess & errorMask)1318 bool TextureDerivateCaseInstance::verify (const tcu::ConstPixelBufferAccess& result, const tcu::PixelBufferAccess& errorMask)
1319 {
1320 	// \note Edges are ignored in comparison
1321 	if (result.getWidth() < 2 || result.getHeight() < 2)
1322 		throw tcu::NotSupportedError("Too small viewport");
1323 
1324 	tcu::ConstPixelBufferAccess	compareArea			= tcu::getSubregion(result, 1, 1, result.getWidth()-2, result.getHeight()-2);
1325 	tcu::PixelBufferAccess		maskArea			= tcu::getSubregion(errorMask, 1, 1, errorMask.getWidth()-2, errorMask.getHeight()-2);
1326 	const tcu::Vec4				xScale				= tcu::Vec4(1.0f, 0.0f, 0.5f, -0.5f);
1327 	const tcu::Vec4				yScale				= tcu::Vec4(0.0f, 1.0f, 0.5f, -0.5f);
1328 	const float					w					= float(result.getWidth());
1329 	const float					h					= float(result.getHeight());
1330 
1331 	const tcu::Vec4				surfaceThreshold	= getSurfaceThreshold() / abs(m_values.derivScale);
1332 
1333 	if (isDfdxFunc(m_definitions.func) || isDfdyFunc(m_definitions.func))
1334 	{
1335 		const bool			isX			= isDfdxFunc(m_definitions.func);
1336 		const float			div			= isX ? w : h;
1337 		const tcu::Vec4		scale		= isX ? xScale : yScale;
1338 		tcu::Vec4			reference	= ((m_textureValues.texValueMax - m_textureValues.texValueMin) / div);
1339 		const tcu::Vec4		opThreshold	= getDerivateThreshold(m_definitions.precision, m_textureValues.texValueMin, m_textureValues.texValueMax, reference);
1340 		const tcu::Vec4		threshold	= max(surfaceThreshold, opThreshold);
1341 		const int			numComps	= glu::getDataTypeFloatScalars(m_definitions.dataType);
1342 
1343 		/* adjust the reference value for the correct dfdx or dfdy sample adjacency */
1344 		reference = reference * scale;
1345 
1346 		m_context.getTestContext().getLog()
1347 			<< tcu::TestLog::Message
1348 			<< "Verifying result image.\n"
1349 			<< "\tValid derivative is " << LogVecComps(reference, numComps) << " with threshold " << LogVecComps(threshold, numComps)
1350 			<< tcu::TestLog::EndMessage;
1351 
1352 		// short circuit if result is strictly within the normal value error bounds.
1353 		// This improves performance significantly.
1354 		if (verifyConstantDerivate(m_context.getTestContext().getLog(), compareArea, maskArea, m_definitions.dataType,
1355 								   reference, threshold, m_values.derivScale, m_values.derivBias,
1356 								   LOG_NOTHING))
1357 		{
1358 			m_context.getTestContext().getLog()
1359 				<< tcu::TestLog::Message
1360 				<< "No incorrect derivatives found, result valid."
1361 				<< tcu::TestLog::EndMessage;
1362 
1363 			return true;
1364 		}
1365 
1366 		// some pixels exceed error bounds calculated for normal values. Verify that these
1367 		// potentially invalid pixels are in fact valid due to (for example) subnorm flushing.
1368 
1369 		m_context.getTestContext().getLog()
1370 			<< tcu::TestLog::Message
1371 			<< "Initial verification failed, verifying image by calculating accurate error bounds for each result pixel.\n"
1372 			<< "\tVerifying each result derivative is within its range of legal result values."
1373 			<< tcu::TestLog::EndMessage;
1374 
1375 		{
1376 			const tcu::Vec4				valueRamp		= (m_textureValues.texValueMax - m_textureValues.texValueMin);
1377 			Linear2DFunctionEvaluator	function;
1378 
1379 			function.matrix.setRow(0, tcu::Vec3(valueRamp.x() / w, 0.0f, m_textureValues.texValueMin.x()));
1380 			function.matrix.setRow(1, tcu::Vec3(0.0f, valueRamp.y() / h, m_textureValues.texValueMin.y()));
1381 			function.matrix.setRow(2, tcu::Vec3(valueRamp.z() / w, valueRamp.z() / h, m_textureValues.texValueMin.z() + m_textureValues.texValueMin.z()) / 2.0f);
1382 			function.matrix.setRow(3, tcu::Vec3(-valueRamp.w() / w, -valueRamp.w() / h, m_textureValues.texValueMax.w() + m_textureValues.texValueMax.w()) / 2.0f);
1383 
1384 			return reverifyConstantDerivateWithFlushRelaxations(m_context.getTestContext().getLog(), compareArea, maskArea,
1385 																m_definitions.dataType, m_definitions.precision, m_values.derivScale,
1386 																m_values.derivBias, surfaceThreshold, m_definitions.func,
1387 																function);
1388 		}
1389 	}
1390 	else
1391 	{
1392 		DE_ASSERT(isFwidthFunc(m_definitions.func));
1393 		const tcu::Vec4	dx			= ((m_textureValues.texValueMax - m_textureValues.texValueMin) / w) * xScale;
1394 		const tcu::Vec4	dy			= ((m_textureValues.texValueMax - m_textureValues.texValueMin) / h) * yScale;
1395 		const tcu::Vec4	reference	= tcu::abs(dx) + tcu::abs(dy);
1396 		const tcu::Vec4	dxThreshold	= getDerivateThreshold(m_definitions.precision, m_textureValues.texValueMin*xScale, m_textureValues.texValueMax*xScale, dx);
1397 		const tcu::Vec4	dyThreshold	= getDerivateThreshold(m_definitions.precision, m_textureValues.texValueMin*yScale, m_textureValues.texValueMax*yScale, dy);
1398 		const tcu::Vec4	threshold	= max(surfaceThreshold, max(dxThreshold, dyThreshold));
1399 
1400 		return verifyConstantDerivate(m_context.getTestContext().getLog(), compareArea, maskArea, m_definitions.dataType,
1401 									  reference, threshold, m_values.derivScale, m_values.derivBias);
1402 	}
1403 }
1404 
1405 // TextureDerivateCase
1406 
1407 class TextureDerivateCase : public TriangleDerivateCase
1408 {
1409 public:
1410 							TextureDerivateCase			(tcu::TestContext&		testCtx,
1411 														 const std::string&		name,
1412 														 DerivateFunc			func,
1413 														 glu::DataType			type,
1414 														 glu::Precision			precision,
1415 														 SurfaceType			surfaceType,
1416 														 int					numSamples);
1417 	virtual					~TextureDerivateCase		(void);
1418 
1419 	virtual	void			initPrograms				(vk::SourceCollections& programCollection) const;
1420 	virtual TestInstance*	createInstance				(Context& context) const;
1421 
1422 private:
1423 	TextureCaseValues		m_textureValues;
1424 };
1425 
TextureDerivateCase(tcu::TestContext & testCtx,const std::string & name,DerivateFunc func,glu::DataType type,glu::Precision precision,SurfaceType surfaceType,int numSamples)1426 TextureDerivateCase::TextureDerivateCase (tcu::TestContext&		testCtx,
1427 										  const std::string&	name,
1428 										  DerivateFunc			func,
1429 										  glu::DataType			type,
1430 										  glu::Precision		precision,
1431 										  SurfaceType			surfaceType,
1432 										  int					numSamples)
1433 	: TriangleDerivateCase	(testCtx, name, new DerivateUniformSetup(true))
1434 {
1435 	m_definitions.dataType			= type;
1436 	m_definitions.func				= func;
1437 	m_definitions.precision			= precision;
1438 	m_definitions.coordDataType		= glu::TYPE_FLOAT_VEC2;
1439 	m_definitions.coordPrecision	= glu::PRECISION_HIGHP;
1440 	m_definitions.surfaceType		= surfaceType;
1441 	m_definitions.numSamples		= numSamples;
1442 
1443 	// Texture size matches viewport and nearest sampling is used. Thus texture sampling
1444 	// is equal to just interpolating the texture value range.
1445 
1446 	// Determine value range for texture.
1447 
1448 	switch (m_definitions.precision)
1449 	{
1450 		case glu::PRECISION_HIGHP:
1451 			m_textureValues.texValueMin = tcu::Vec4(-97.f, 0.2f, 71.f, 74.f);
1452 			m_textureValues.texValueMax = tcu::Vec4(-13.2f, -77.f, 44.f, 76.f);
1453 			break;
1454 
1455 		case glu::PRECISION_MEDIUMP:
1456 			m_textureValues.texValueMin = tcu::Vec4(-37.0f, 47.f, -7.f, 0.0f);
1457 			m_textureValues.texValueMax = tcu::Vec4(-1.0f, 12.f, 7.f, 19.f);
1458 			break;
1459 
1460 		case glu::PRECISION_LOWP:
1461 			m_textureValues.texValueMin = tcu::Vec4(0.0f, -1.0f, 0.0f, 1.0f);
1462 			m_textureValues.texValueMax = tcu::Vec4(1.0f, 1.0f, -1.0f, -1.0f);
1463 			break;
1464 
1465 		default:
1466 			DE_ASSERT(false);
1467 	}
1468 
1469 	// Texture coordinates
1470 	m_values.coordMin = tcu::Vec4(0.0f);
1471 	m_values.coordMax = tcu::Vec4(1.0f);
1472 
1473 	if (m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO)
1474 	{
1475 		// No scale or bias used for accuracy.
1476 		m_values.derivScale		= tcu::Vec4(1.0f);
1477 		m_values.derivBias		= tcu::Vec4(0.0f);
1478 	}
1479 	else
1480 	{
1481 		// Compute scale - bias that normalizes to 0..1 range.
1482 		const tcu::UVec2	viewportSize	(VIEWPORT_WIDTH, VIEWPORT_HEIGHT);
1483 		const float			w				= float(viewportSize.x());
1484 		const float			h				= float(viewportSize.y());
1485 		const tcu::Vec4		dx				= (m_textureValues.texValueMax - m_textureValues.texValueMin) / tcu::Vec4(w, w, w*0.5f, -w*0.5f);
1486 		const tcu::Vec4		dy				= (m_textureValues.texValueMax - m_textureValues.texValueMin) / tcu::Vec4(h, h, h*0.5f, -h*0.5f);
1487 
1488 		if (isDfdxFunc(m_definitions.func))
1489 			m_values.derivScale = 0.5f / dx;
1490 		else if (isDfdyFunc(m_definitions.func))
1491 			m_values.derivScale = 0.5f / dy;
1492 		else if (isFwidthFunc(m_definitions.func))
1493 			m_values.derivScale = 0.5f / (tcu::abs(dx) + tcu::abs(dy));
1494 		else
1495 			DE_ASSERT(false);
1496 
1497 		m_values.derivBias = tcu::Vec4(0.0f, 0.0f, 0.0f, 0.0f);
1498 	}
1499 }
1500 
~TextureDerivateCase(void)1501 TextureDerivateCase::~TextureDerivateCase (void)
1502 {
1503 }
1504 
createInstance(Context & context) const1505 TestInstance* TextureDerivateCase::createInstance (Context& context) const
1506 {
1507 	DE_ASSERT(m_uniformSetup != DE_NULL);
1508 	return new TextureDerivateCaseInstance(context, *m_uniformSetup, m_definitions, m_values, m_textureValues);
1509 }
1510 
initPrograms(vk::SourceCollections & programCollection) const1511 void TextureDerivateCase::initPrograms (vk::SourceCollections& programCollection) const
1512 {
1513 	// Generate shader
1514 	{
1515 		const char* fragmentTmpl =
1516 			"#version 450\n"
1517 			"layout(location = 0) in highp vec2 v_coord;\n"
1518 			"layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1519 			"layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1520 			"layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1521 			"layout(binding = 2) uniform ${PRECISION} sampler2D u_sampler;\n"
1522 			"void main (void)\n"
1523 			"{\n"
1524 			"	${PRECISION} vec4 tex = texture(u_sampler, v_coord);\n"
1525 			"	${PRECISION} ${DATATYPE} res = ${FUNC}(tex${SWIZZLE}) * u_scale + u_bias;\n"
1526 			"	o_color = ${CAST_TO_OUTPUT};\n"
1527 			"}\n";
1528 
1529 		const bool			packToInt		= m_definitions.surfaceType == SURFACETYPE_FLOAT_FBO;
1530 		map<string, string> fragmentParams;
1531 
1532 		fragmentParams["OUTPUT_TYPE"]		= glu::getDataTypeName(packToInt ? glu::TYPE_UINT_VEC4 : glu::TYPE_FLOAT_VEC4);
1533 		fragmentParams["OUTPUT_PREC"]		= glu::getPrecisionName(packToInt ? glu::PRECISION_HIGHP : m_definitions.precision);
1534 		fragmentParams["PRECISION"]			= glu::getPrecisionName(m_definitions.precision);
1535 		fragmentParams["DATATYPE"]			= glu::getDataTypeName(m_definitions.dataType);
1536 		fragmentParams["FUNC"]				= getDerivateFuncName(m_definitions.func);
1537 		fragmentParams["SWIZZLE"]			= m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ? "" :
1538 											  m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ? ".xyz" :
1539 											  m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ? ".xy" :
1540 											  /* TYPE_FLOAT */								   ".x";
1541 
1542 		if (packToInt)
1543 		{
1544 			fragmentParams["CAST_TO_OUTPUT"]	= m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ? "floatBitsToUint(res)" :
1545 												  m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ? "floatBitsToUint(vec4(res, 1.0))" :
1546 												  m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ? "floatBitsToUint(vec4(res, 0.0, 1.0))" :
1547 												  /* TYPE_FLOAT */								   "floatBitsToUint(vec4(res, 0.0, 0.0, 1.0))";
1548 		}
1549 		else
1550 		{
1551 			fragmentParams["CAST_TO_OUTPUT"]	= m_definitions.dataType == glu::TYPE_FLOAT_VEC4 ? "res" :
1552 												  m_definitions.dataType == glu::TYPE_FLOAT_VEC3 ? "vec4(res, 1.0)" :
1553 												  m_definitions.dataType == glu::TYPE_FLOAT_VEC2 ? "vec4(res, 0.0, 1.0)" :
1554 												  /* TYPE_FLOAT */								   "vec4(res, 0.0, 0.0, 1.0)";
1555 		}
1556 
1557 		std::string fragmentSrc = tcu::StringTemplate(fragmentTmpl).specialize(fragmentParams);
1558 		programCollection.glslSources.add("vert") << glu::VertexSource(genVertexSource(m_definitions.coordDataType, m_definitions.coordPrecision));
1559 		programCollection.glslSources.add("frag") << glu::FragmentSource(fragmentSrc);
1560 	}
1561 }
1562 
1563 // ShaderDerivateTests
1564 
1565 class ShaderDerivateTests : public tcu::TestCaseGroup
1566 {
1567 public:
1568 							ShaderDerivateTests		(tcu::TestContext& testCtx);
1569 	virtual					~ShaderDerivateTests	(void);
1570 
1571 	virtual void			init					(void);
1572 
1573 private:
1574 							ShaderDerivateTests		(const ShaderDerivateTests&);		// not allowed!
1575 	ShaderDerivateTests&	operator=				(const ShaderDerivateTests&);		// not allowed!
1576 };
1577 
ShaderDerivateTests(tcu::TestContext & testCtx)1578 ShaderDerivateTests::ShaderDerivateTests (tcu::TestContext& testCtx)
1579 	: TestCaseGroup(testCtx, "derivate")
1580 {
1581 }
1582 
~ShaderDerivateTests(void)1583 ShaderDerivateTests::~ShaderDerivateTests (void)
1584 {
1585 }
1586 
1587 struct FunctionSpec
1588 {
1589 	std::string		name;
1590 	DerivateFunc	function;
1591 	glu::DataType	dataType;
1592 	glu::Precision	precision;
1593 
FunctionSpecvkt::sr::__anona541e7d70111::FunctionSpec1594 	FunctionSpec (const std::string& name_, DerivateFunc function_, glu::DataType dataType_, glu::Precision precision_)
1595 		: name		(name_)
1596 		, function	(function_)
1597 		, dataType	(dataType_)
1598 		, precision	(precision_)
1599 	{
1600 	}
1601 };
1602 
init(void)1603 void ShaderDerivateTests::init (void)
1604 {
1605 	static const struct
1606 	{
1607 		const char*			name;
1608 		const char*			description;
1609 		const char*			source;
1610 		BaseUniformType		usedDefaultUniform;
1611 		bool				inNonUniformControlFlow;
1612 	} s_linearDerivateCases[] =
1613 	{
1614 		{
1615 			"linear",
1616 			"Basic derivate of linearly interpolated argument",
1617 
1618 			"#version 450\n"
1619 			"layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1620 			"layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1621 			"layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1622 			"layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1623 			"void main (void)\n"
1624 			"{\n"
1625 			"	${PRECISION} ${DATATYPE} res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
1626 			"	o_color = ${CAST_TO_OUTPUT};\n"
1627 			"}\n",
1628 
1629 			U_LAST,
1630 			false
1631 		},
1632 		{
1633 			"in_function",
1634 			"Derivate of linear function argument",
1635 
1636 			"#version 450\n"
1637 			"layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1638 			"layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1639 			"layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1640 			"layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1641 			"\n"
1642 			"${PRECISION} ${DATATYPE} computeRes (${PRECISION} ${DATATYPE} value)\n"
1643 			"{\n"
1644 			"	return ${FUNC}(v_coord) * u_scale + u_bias;\n"
1645 			"}\n"
1646 			"\n"
1647 			"void main (void)\n"
1648 			"{\n"
1649 			"	${PRECISION} ${DATATYPE} res = computeRes(v_coord);\n"
1650 			"	o_color = ${CAST_TO_OUTPUT};\n"
1651 			"}\n",
1652 
1653 			U_LAST,
1654 			false
1655 		},
1656 		{
1657 			"static_if",
1658 			"Derivate of linearly interpolated value in static if",
1659 
1660 			"#version 450\n"
1661 			"layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1662 			"layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1663 			"layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1664 			"layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1665 			"void main (void)\n"
1666 			"{\n"
1667 			"	${PRECISION} ${DATATYPE} res;\n"
1668 			"	if (false)\n"
1669 			"		res = ${FUNC}(-v_coord) * u_scale + u_bias;\n"
1670 			"	else\n"
1671 			"		res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
1672 			"	o_color = ${CAST_TO_OUTPUT};\n"
1673 			"}\n",
1674 
1675 			U_LAST,
1676 			false
1677 		},
1678 		{
1679 			"static_loop",
1680 			"Derivate of linearly interpolated value in static loop",
1681 
1682 			"#version 450\n"
1683 			"layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1684 			"layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1685 			"layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1686 			"layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1687 			"void main (void)\n"
1688 			"{\n"
1689 			"	${PRECISION} ${DATATYPE} res = ${DATATYPE}(0.0);\n"
1690 			"	for (int i = 0; i < 2; i++)\n"
1691 			"		res += ${FUNC}(v_coord * float(i));\n"
1692 			"	res = res * u_scale + u_bias;\n"
1693 			"	o_color = ${CAST_TO_OUTPUT};\n"
1694 			"}\n",
1695 
1696 			U_LAST,
1697 			false
1698 		},
1699 		{
1700 			"static_switch",
1701 			"Derivate of linearly interpolated value in static switch",
1702 
1703 			"#version 450\n"
1704 			"layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1705 			"layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1706 			"layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1707 			"layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1708 			"void main (void)\n"
1709 			"{\n"
1710 			"	${PRECISION} ${DATATYPE} res;\n"
1711 			"	switch (1)\n"
1712 			"	{\n"
1713 			"		case 0:	res = ${FUNC}(-v_coord) * u_scale + u_bias;	break;\n"
1714 			"		case 1:	res = ${FUNC}(v_coord) * u_scale + u_bias;	break;\n"
1715 			"	}\n"
1716 			"	o_color = ${CAST_TO_OUTPUT};\n"
1717 			"}\n",
1718 
1719 			U_LAST,
1720 			false
1721 		},
1722 		{
1723 			"uniform_if",
1724 			"Derivate of linearly interpolated value in uniform if",
1725 
1726 			"#version 450\n"
1727 			"layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1728 			"layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1729 			"layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1730 			"layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1731 			"layout(binding = 2, std140) uniform Ui_true { bool ub_true; };\n"
1732 			"void main (void)\n"
1733 			"{\n"
1734 			"	${PRECISION} ${DATATYPE} res;\n"
1735 			"	if (ub_true)"
1736 			"		res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
1737 			"	else\n"
1738 			"		res = ${FUNC}(-v_coord) * u_scale + u_bias;\n"
1739 			"	o_color = ${CAST_TO_OUTPUT};\n"
1740 			"}\n",
1741 
1742 			UB_TRUE,
1743 			false
1744 		},
1745 		{
1746 			"uniform_loop",
1747 			"Derivate of linearly interpolated value in uniform loop",
1748 
1749 			"#version 450\n"
1750 			"layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1751 			"layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1752 			"layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1753 			"layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1754 			"layout(binding = 2, std140) uniform Ui_two { int ui_two; };\n"
1755 			"void main (void)\n"
1756 			"{\n"
1757 			"	${PRECISION} ${DATATYPE} res = ${DATATYPE}(0.0);\n"
1758 			"	for (int i = 0; i < ui_two; i++)\n"
1759 			"		res += ${FUNC}(v_coord * float(i));\n"
1760 			"	res = res * u_scale + u_bias;\n"
1761 			"	o_color = ${CAST_TO_OUTPUT};\n"
1762 			"}\n",
1763 
1764 			UI_TWO,
1765 			false
1766 		},
1767 		{
1768 			"uniform_switch",
1769 			"Derivate of linearly interpolated value in uniform switch",
1770 
1771 			"#version 450\n"
1772 			"layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1773 			"layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1774 			"layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1775 			"layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1776 			"layout(binding = 2, std140) uniform Ui_one { int ui_one; };\n"
1777 			"void main (void)\n"
1778 			"{\n"
1779 			"	${PRECISION} ${DATATYPE} res;\n"
1780 			"	switch (ui_one)\n"
1781 			"	{\n"
1782 			"		case 0:	res = ${FUNC}(-v_coord) * u_scale + u_bias;	break;\n"
1783 			"		case 1:	res = ${FUNC}(v_coord) * u_scale + u_bias;	break;\n"
1784 			"	}\n"
1785 			"	o_color = ${CAST_TO_OUTPUT};\n"
1786 			"}\n",
1787 
1788 			UI_ONE,
1789 			false
1790 		},
1791 		{
1792 			"dynamic_if",
1793 			"Derivate of linearly interpolated value in static if",
1794 
1795 			"#version 450\n"
1796 			"#extension GL_KHR_shader_subgroup_ballot : require\n"
1797 			"layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1798 			"layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1799 			"layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1800 			"layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1801 			"layout(binding = 2, std140) uniform Ui_one { int ui_one; };\n"
1802 			"void main (void)\n"
1803 			"{\n"
1804 			"	${PRECISION} ${DATATYPE} res;\n"
1805 			"	bool non_uniform = ((uint(gl_FragCoord.x * 0.4) + uint(gl_FragCoord.y * 0.3)) & 2) != 0;\n"
1806 			"	uvec4 quad_ballot = uvec4(0);\n"
1807 			"	quad_ballot[gl_SubgroupInvocationID >> 5] = 0xf << (gl_SubgroupInvocationID & 0x1c);\n"
1808 			"	bool quad_uniform = (subgroupBallot(non_uniform) & quad_ballot) == quad_ballot;\n"
1809 			"	if (quad_uniform)\n"
1810 			"		res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
1811 			"	else\n"
1812 			"		res = ${FUNC}(v_coord * float(ui_one)) * u_scale + u_bias;\n"
1813 			"	o_color = ${CAST_TO_OUTPUT};\n"
1814 			"}\n",
1815 
1816 			UI_ONE,
1817 			true
1818 		},
1819 		{
1820 			"dynamic_loop",
1821 			"Derivate of linearly interpolated value in uniform loop",
1822 
1823 			"#version 450\n"
1824 			"#extension GL_KHR_shader_subgroup_ballot : require\n"
1825 			"layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1826 			"layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1827 			"layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1828 			"layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1829 			"layout(binding = 2, std140) uniform Ui_one { int ui_one; };\n"
1830 			"void main (void)\n"
1831 			"{\n"
1832 			"	${PRECISION} ${DATATYPE} res = ${DATATYPE}(0.0);\n"
1833 			"	bool non_uniform = ((uint(gl_FragCoord.x * 0.4) + uint(gl_FragCoord.y * 0.3)) & 2) != 0;\n"
1834 			"	uvec4 quad_ballot = uvec4(0);\n"
1835 			"	quad_ballot[gl_SubgroupInvocationID >> 5] = 0xf << (gl_SubgroupInvocationID & 0x1c);\n"
1836 			"	bool quad_uniform = (subgroupBallot(non_uniform) & quad_ballot) == quad_ballot;\n"
1837 			"	for (int i = 0; i < ui_one + int(quad_uniform); i++)\n"
1838 			"		res = ${FUNC}(v_coord * float(i - int(quad_uniform) + 1));\n"
1839 			"	res = res * u_scale + u_bias;\n"
1840 			"	o_color = ${CAST_TO_OUTPUT};\n"
1841 			"}\n",
1842 
1843 			UI_ONE,
1844 			true
1845 		},
1846 		{
1847 			"dynamic_switch",
1848 			"Derivate of linearly interpolated value in uniform switch",
1849 
1850 			"#version 450\n"
1851 			"#extension GL_KHR_shader_subgroup_ballot : require\n"
1852 			"layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1853 			"layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1854 			"layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1855 			"layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1856 			"layout(binding = 2, std140) uniform Ui_one { int ui_one; };\n"
1857 			"void main (void)\n"
1858 			"{\n"
1859 			"	${PRECISION} ${DATATYPE} res;\n"
1860 			"	bool non_uniform = ((uint(gl_FragCoord.x * 0.4) + uint(gl_FragCoord.y * 0.3)) & 2) != 0;\n"
1861 			"	uvec4 quad_ballot = uvec4(0);\n"
1862 			"	quad_ballot[gl_SubgroupInvocationID >> 5] = 0xf << (gl_SubgroupInvocationID & 0x1c);\n"
1863 			"	bool quad_uniform = (subgroupBallot(non_uniform) & quad_ballot) == quad_ballot;\n"
1864 			"	switch (int(quad_uniform))\n"
1865 			"	{\n"
1866 			"		case 0:	res = ${FUNC}(v_coord) * u_scale + u_bias;	break;\n"
1867 			"		case 1:	res = ${FUNC}(v_coord * float(ui_one)) * u_scale + u_bias;	break;\n"
1868 			"	}\n"
1869 			"	o_color = ${CAST_TO_OUTPUT};\n"
1870 			"}\n",
1871 
1872 			UI_ONE,
1873 			true
1874 		},
1875 	};
1876 
1877 	const char*	dFdxSubgroupSource =
1878 		"#version 450\n"
1879 		"#extension GL_KHR_shader_subgroup_ballot : require\n"
1880 		"#extension GL_KHR_shader_subgroup_quad : require\n"
1881 		"layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1882 		"layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1883 		"layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1884 		"layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1885 		"${DATATYPE} dFdxSubgroup(${DATATYPE} f)\n"
1886 		"{\n"
1887 		"	${DATATYPE} left, right;\n"
1888 		"	if ((gl_SubgroupInvocationID & 2) == 0) {\n"
1889 		"		left = subgroupQuadBroadcast(f, 0);\n"
1890 		"		right = subgroupQuadBroadcast(f, 1);\n"
1891 		"	} else {\n"
1892 		"		left = subgroupQuadBroadcast(f, 2);\n"
1893 		"		right = subgroupQuadBroadcast(f, 3);\n"
1894 		"	}\n"
1895 		"	return right - left;\n"
1896 		"}\n"
1897 		"\n"
1898 		"void main (void)\n"
1899 		"{\n"
1900 		"	uvec4 quad_ballot = uvec4(0);\n"
1901 		"	${PRECISION} ${DATATYPE} res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
1902 		"	o_color = ${CAST_TO_OUTPUT};\n"
1903 		"}\n";
1904 
1905 	const char*	dFdySubgroupSource =
1906 		"#version 450\n"
1907 		"#extension GL_KHR_shader_subgroup_quad : require\n"
1908 		"#extension GL_KHR_shader_subgroup_ballot : require\n"
1909 		"layout(location = 0) in ${PRECISION} ${DATATYPE} v_coord;\n"
1910 		"layout(location = 0) out ${OUTPUT_PREC} ${OUTPUT_TYPE} o_color;\n"
1911 		"layout(binding = 0, std140) uniform Scale { ${PRECISION} ${DATATYPE} u_scale; };\n"
1912 		"layout(binding = 1, std140) uniform Bias { ${PRECISION} ${DATATYPE} u_bias; };\n"
1913 		"${DATATYPE} dFdySubgroup(${DATATYPE} f)\n"
1914 		"{\n"
1915 		"	${DATATYPE} top, bottom;\n"
1916 		"	if ((gl_SubgroupInvocationID & 1) == 0) {\n"
1917 		"		top = subgroupQuadBroadcast(f, 0);\n"
1918 		"		bottom = subgroupQuadBroadcast(f, 2);\n"
1919 		"	} else {\n"
1920 		"		top = subgroupQuadBroadcast(f, 1);\n"
1921 		"		bottom = subgroupQuadBroadcast(f, 3);\n"
1922 		"	}\n"
1923 		"	return bottom - top;\n"
1924 		"}\n"
1925 		"\n"
1926 		"void main (void)\n"
1927 		"{\n"
1928 		"	uvec4 quad_ballot = uvec4(0);\n"
1929 		"	quad_ballot[gl_SubgroupInvocationID >> 5] = 0xf << (gl_SubgroupInvocationID & 0x1c);\n"
1930 		"	${PRECISION} ${DATATYPE} res = ${FUNC}(v_coord) * u_scale + u_bias;\n"
1931 		"	o_color = ${CAST_TO_OUTPUT};\n"
1932 		"}\n";
1933 
1934 	static const struct
1935 	{
1936 		const char*		name;
1937 		SurfaceType		surfaceType;
1938 		int				numSamples;
1939 	} s_fboConfigs[] =
1940 	{
1941 		{ "fbo",			SURFACETYPE_UNORM_FBO,		0 },
1942 		{ "fbo_msaa2",		SURFACETYPE_UNORM_FBO,		2 },
1943 		{ "fbo_msaa4",		SURFACETYPE_UNORM_FBO,		4 },
1944 		{ "fbo_float",		SURFACETYPE_FLOAT_FBO,		0 },
1945 	};
1946 
1947 	static const struct
1948 	{
1949 		const char*		name;
1950 		SurfaceType		surfaceType;
1951 		int				numSamples;
1952 	} s_textureConfigs[] =
1953 	{
1954 		{ "basic",			SURFACETYPE_UNORM_FBO,		0 },
1955 		{ "msaa4",			SURFACETYPE_UNORM_FBO,		4 },
1956 		{ "float",			SURFACETYPE_FLOAT_FBO,		0 },
1957 	};
1958 
1959 	// .dfdx[fine|coarse], .dfdy[fine|coarse], .fwidth[fine|coarse]
1960 	for (int funcNdx = 0; funcNdx < DERIVATE_LAST; funcNdx++)
1961 	{
1962 		const DerivateFunc					function		= DerivateFunc(funcNdx);
1963 		de::MovePtr<tcu::TestCaseGroup>		functionGroup	(new tcu::TestCaseGroup(m_testCtx, getDerivateFuncCaseName(function)));
1964 
1965 		// .constant - no precision variants and no subgroup derivatives, checks that derivate of constant arguments is 0
1966 		if (!isSubgroupFunc(function))
1967 		{
1968 			// Derivate of constant argument
1969 			de::MovePtr<tcu::TestCaseGroup>	constantGroup	(new tcu::TestCaseGroup(m_testCtx, "constant"));
1970 
1971 			for (int vecSize = 1; vecSize <= 4; vecSize++)
1972 			{
1973 				const glu::DataType			dataType		= vecSize > 1 ? glu::getDataTypeFloatVec(vecSize) : glu::TYPE_FLOAT;
1974 				constantGroup->addChild(new ConstantDerivateCase(m_testCtx, glu::getDataTypeName(dataType), function, dataType));
1975 			}
1976 
1977 			functionGroup->addChild(constantGroup.release());
1978 		}
1979 
1980 		// Cases based on LinearDerivateCase; subgroup derivatives are handled separately
1981 		if (!isSubgroupFunc(function))
1982 		{
1983 			for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(s_linearDerivateCases); caseNdx++)
1984 			{
1985 				de::MovePtr<tcu::TestCaseGroup>	linearCaseGroup	(new tcu::TestCaseGroup(m_testCtx, s_linearDerivateCases[caseNdx].name));
1986 				const char*						source			= s_linearDerivateCases[caseNdx].source;
1987 
1988 				for (int vecSize = 1; vecSize <= 4; vecSize++)
1989 				{
1990 					for (int precNdx = 0; precNdx < glu::PRECISION_LAST; precNdx++)
1991 					{
1992 						const glu::DataType		dataType		= vecSize > 1 ? glu::getDataTypeFloatVec(vecSize) : glu::TYPE_FLOAT;
1993 						const glu::Precision	precision		= glu::Precision(precNdx);
1994 						const SurfaceType		surfaceType		= SURFACETYPE_UNORM_FBO;
1995 						const int				numSamples		= 0;
1996 						std::ostringstream		caseName;
1997 
1998 						if (caseNdx != 0 && precision == glu::PRECISION_LOWP)
1999 							continue; // Skip as lowp doesn't actually produce any bits when rendered to default FB.
2000 
2001 						caseName << glu::getDataTypeName(dataType) << "_" << glu::getPrecisionName(precision);
2002 
2003 						linearCaseGroup->addChild(new LinearDerivateCase(m_testCtx, caseName.str(), function, dataType, precision, s_linearDerivateCases[caseNdx].inNonUniformControlFlow, surfaceType, numSamples, source, s_linearDerivateCases[caseNdx].usedDefaultUniform));
2004 					}
2005 				}
2006 
2007 				functionGroup->addChild(linearCaseGroup.release());
2008 			}
2009 		}
2010 
2011 		// Fbo cases
2012 		for (int caseNdx = 0; caseNdx < DE_LENGTH_OF_ARRAY(s_fboConfigs); caseNdx++)
2013 		{
2014 			de::MovePtr<tcu::TestCaseGroup>	fboGroup		(new tcu::TestCaseGroup(m_testCtx, s_fboConfigs[caseNdx].name, "Derivate usage when rendering into FBO"));
2015 			// use source from subgroup source or source from .linear group
2016 			const char*						source			= function == DERIVATE_DFDXSUBGROUP ? dFdxSubgroupSource :
2017 															  function == DERIVATE_DFDYSUBGROUP ? dFdySubgroupSource :
2018 																								  s_linearDerivateCases[0].source;
2019 			const SurfaceType				surfaceType		= s_fboConfigs[caseNdx].surfaceType;
2020 			const int						numSamples		= s_fboConfigs[caseNdx].numSamples;
2021 
2022 			for (int vecSize = 1; vecSize <= 4; vecSize++)
2023 			{
2024 				for (int precNdx = 0; precNdx < glu::PRECISION_LAST; precNdx++)
2025 				{
2026 					const glu::DataType		dataType		= vecSize > 1 ? glu::getDataTypeFloatVec(vecSize) : glu::TYPE_FLOAT;
2027 					const glu::Precision	precision		= glu::Precision(precNdx);
2028 					std::ostringstream		caseName;
2029 
2030 					if (surfaceType != SURFACETYPE_FLOAT_FBO && precision == glu::PRECISION_LOWP)
2031 						continue; // Skip as lowp doesn't actually produce any bits when rendered to U8 RT.
2032 
2033 					caseName << glu::getDataTypeName(dataType) << "_" << glu::getPrecisionName(precision);
2034 
2035 					fboGroup->addChild(new LinearDerivateCase(m_testCtx, caseName.str(), function, dataType, precision, false, surfaceType, numSamples, source, U_LAST));
2036 				}
2037 			}
2038 
2039 			functionGroup->addChild(fboGroup.release());
2040 		}
2041 
2042 		// .texture
2043 		if (!isSubgroupFunc(function))
2044 		{
2045 			de::MovePtr<tcu::TestCaseGroup>		textureGroup	(new tcu::TestCaseGroup(m_testCtx, "texture", "Derivate of texture lookup result"));
2046 
2047 			for (int texCaseNdx = 0; texCaseNdx < DE_LENGTH_OF_ARRAY(s_textureConfigs); texCaseNdx++)
2048 			{
2049 				de::MovePtr<tcu::TestCaseGroup>	caseGroup		(new tcu::TestCaseGroup(m_testCtx, s_textureConfigs[texCaseNdx].name));
2050 				const SurfaceType				surfaceType		= s_textureConfigs[texCaseNdx].surfaceType;
2051 				const int						numSamples		= s_textureConfigs[texCaseNdx].numSamples;
2052 
2053 				for (int vecSize = 1; vecSize <= 4; vecSize++)
2054 				{
2055 					for (int precNdx = 0; precNdx < glu::PRECISION_LAST; precNdx++)
2056 					{
2057 						const glu::DataType		dataType		= vecSize > 1 ? glu::getDataTypeFloatVec(vecSize) : glu::TYPE_FLOAT;
2058 						const glu::Precision	precision		= glu::Precision(precNdx);
2059 						std::ostringstream		caseName;
2060 
2061 						if (surfaceType != SURFACETYPE_FLOAT_FBO && precision == glu::PRECISION_LOWP)
2062 							continue; // Skip as lowp doesn't actually produce any bits when rendered to U8 RT.
2063 
2064 						caseName << glu::getDataTypeName(dataType) << "_" << glu::getPrecisionName(precision);
2065 
2066 						caseGroup->addChild(new TextureDerivateCase(m_testCtx, caseName.str(), function, dataType, precision, surfaceType, numSamples));
2067 					}
2068 				}
2069 
2070 				textureGroup->addChild(caseGroup.release());
2071 			}
2072 
2073 			functionGroup->addChild(textureGroup.release());
2074 		}
2075 
2076 		addChild(functionGroup.release());
2077 	}
2078 }
2079 
2080 } // anonymous
2081 
createDerivateTests(tcu::TestContext & testCtx)2082 tcu::TestCaseGroup* createDerivateTests (tcu::TestContext& testCtx)
2083 {
2084 	return new ShaderDerivateTests(testCtx);
2085 }
2086 
2087 } // sr
2088 } // vkt
2089