• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2019 Valve Corporation.
6  * Copyright (c) 2019 The Khronos Group Inc.
7  *
8  * Licensed under the Apache License, Version 2.0 (the "License");
9  * you may not use this file except in compliance with the License.
10  * You may obtain a copy of the License at
11  *
12  *      http://www.apache.org/licenses/LICENSE-2.0
13  *
14  * Unless required by applicable law or agreed to in writing, software
15  * distributed under the License is distributed on an "AS IS" BASIS,
16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17  * See the License for the specific language governing permissions and
18  * limitations under the License.
19  *
20  *//*!
21  * \file
22  * \brief OpFConvert tests.
23  *//*--------------------------------------------------------------------*/
24 
25 #include "vktShaderFConvertTests.hpp"
26 #include "vktTestCase.hpp"
27 
28 #include "vkBufferWithMemory.hpp"
29 #include "vkObjUtil.hpp"
30 #include "vkBuilderUtil.hpp"
31 #include "vkCmdUtil.hpp"
32 #include "vkPrograms.hpp"
33 
34 #include "deDefs.hpp"
35 #include "deRandom.hpp"
36 
37 #include "tcuFloat.hpp"
38 #include "tcuTestLog.hpp"
39 #include "tcuFormatUtil.hpp"
40 
41 #include <vector>
42 #include <iterator>
43 #include <algorithm>
44 #include <memory>
45 #include <sstream>
46 #include <iomanip>
47 #include <string>
48 #include <limits>
49 #include <cassert>
50 
51 namespace vkt
52 {
53 namespace shaderexecutor
54 {
55 
56 namespace
57 {
58 
59 constexpr deUint32	kRandomSeed								= 0xdeadbeef;
60 constexpr size_t	kRandomSourcesPerType					= 240;
61 constexpr size_t	kMinVectorLength						= 1;
62 constexpr size_t	kMaxVectorLength						= 4;
63 constexpr size_t	kArrayAlignment							= 16;					// Bytes.
64 constexpr size_t	kEffectiveLength[kMaxVectorLength + 1]	= { 0, 1, 2, 4, 4 };	// Effective length of a vector of size i.
65 constexpr size_t	kGCFNumFloats							= 12;					// Greatest Common Factor of the number of floats in a test.
66 
67 // Get a random normal number.
68 // Works for implementations of tcu::Float as T.
69 template <class T>
getRandomNormal(de::Random & rnd)70 T getRandomNormal (de::Random& rnd)
71 {
72 	static constexpr typename T::StorageType	kLeadingMantissaBit	= (static_cast<typename T::StorageType>(1) << T::MANTISSA_BITS);
73 	static constexpr int						kSignValues[]		= { -1, 1 };
74 
75 	int						signBit		= rnd.getInt(0, 1);
76 	int						exponent	= rnd.getInt(1 - T::EXPONENT_BIAS, T::EXPONENT_BIAS + 1);
77 	typename T::StorageType	mantissa	= static_cast<typename T::StorageType>(rnd.getUint64() & static_cast<deUint64>(kLeadingMantissaBit - 1));
78 
79 	// Construct number.
80 	return T::construct(kSignValues[signBit], exponent, (kLeadingMantissaBit | mantissa));
81 }
82 
83 // Get a list of hand-picked interesting samples for tcu::Float class T.
84 template <class T>
interestingSamples()85 const std::vector<T>& interestingSamples ()
86 {
87 	static const std::vector<T> samples =
88 	{
89 		T::zero				(-1),
90 		T::zero				( 1),
91 		//T::inf				(-1),
92 		//T::inf				( 1),
93 		//T::nan				(  ),
94 		T::largestNormal	(-1),
95 		T::largestNormal	( 1),
96 		T::smallestNormal	(-1),
97 		T::smallestNormal	( 1),
98 	};
99 
100 	return samples;
101 }
102 
103 // Get some random interesting numbers.
104 // Works for implementations of tcu::Float as T.
105 template <class T>
getRandomInteresting(de::Random & rnd,size_t numSamples)106 std::vector<T> getRandomInteresting (de::Random& rnd, size_t numSamples)
107 {
108 	auto&			samples = interestingSamples<T>();
109 	std::vector<T>	result;
110 
111 	result.reserve(numSamples);
112 	std::generate_n(std::back_inserter(result), numSamples, [&rnd, &samples]() { return rnd.choose<T>(begin(samples), end(samples)); });
113 
114 	return result;
115 }
116 
117 // Helper class to build each vector only once in a thread-safe way.
118 template <class T>
119 struct StaticVectorHelper
120 {
121 	std::vector<T> v;
122 
StaticVectorHelpervkt::shaderexecutor::__anon57ac2e260111::StaticVectorHelper123 	StaticVectorHelper (de::Random& rnd)
124 	{
125 		v.reserve(kRandomSourcesPerType);
126 		for (size_t i = 0; i < kRandomSourcesPerType; ++i)
127 			v.push_back(getRandomNormal<T>(rnd));
128 	}
129 };
130 
131 // Get a list of random normal input values for type T.
132 template <class T>
getRandomNormals(de::Random & rnd)133 const std::vector<T>& getRandomNormals (de::Random& rnd)
134 {
135 	static StaticVectorHelper<T> helper(rnd);
136 	return helper.v;
137 }
138 
139 // Convert a vector of tcu::Float elements of type T1 to type T2.
140 template <class T1, class T2>
convertVector(const std::vector<T1> & orig)141 std::vector<T2> convertVector (const std::vector<T1>& orig)
142 {
143 	std::vector<T2> result;
144 	result.reserve(orig.size());
145 
146 	std::transform(begin(orig), end(orig), std::back_inserter(result),
147 		[](T1 f) { return T2::convert(f); });
148 
149 	return result;
150 }
151 
152 // Get converted normal values for other tcu::Float types smaller than T, which should be exact conversions when converting back to
153 // those types.
154 template <class T>
155 std::vector<T> getOtherNormals (de::Random& rnd);
156 
157 template<>
getOtherNormals(de::Random &)158 std::vector<tcu::Float16> getOtherNormals<tcu::Float16> (de::Random&)
159 {
160 	// Nothing below tcu::Float16.
161 	return std::vector<tcu::Float16>();
162 }
163 
164 template<>
getOtherNormals(de::Random & rnd)165 std::vector<tcu::Float32> getOtherNormals<tcu::Float32> (de::Random& rnd)
166 {
167 	// The ones from tcu::Float16.
168 	return convertVector<tcu::Float16, tcu::Float32>(getRandomNormals<tcu::Float16>(rnd));
169 }
170 
171 template<>
getOtherNormals(de::Random & rnd)172 std::vector<tcu::Float64> getOtherNormals<tcu::Float64> (de::Random& rnd)
173 {
174 	// The ones from both tcu::Float16 and tcu::Float64.
175 	auto v1 = convertVector<tcu::Float16, tcu::Float64>(getRandomNormals<tcu::Float16>(rnd));
176 	auto v2 = convertVector<tcu::Float32, tcu::Float64>(getRandomNormals<tcu::Float32>(rnd));
177 
178 	v1.reserve(v1.size() + v2.size());
179 	std::copy(begin(v2), end(v2), std::back_inserter(v1));
180 	return v1;
181 }
182 
183 // Get the full list of input values for type T.
184 template <class T>
getInputValues(de::Random & rnd)185 std::vector<T> getInputValues (de::Random& rnd)
186 {
187 	auto&	interesting		= interestingSamples<T>();
188 	auto&	normals			= getRandomNormals<T>(rnd);
189 	auto	otherNormals	= getOtherNormals<T>(rnd);
190 
191 	const size_t numValues		= interesting.size() + normals.size() + otherNormals.size();
192 	const size_t extraValues	= numValues % kGCFNumFloats;
193 	const size_t needed			= ((extraValues == 0) ? 0 : (kGCFNumFloats - extraValues));
194 
195 	auto extra = getRandomInteresting<T> (rnd, needed);
196 
197 	std::vector<T> values;
198 	values.reserve(interesting.size() + normals.size() + otherNormals.size() + extra.size());
199 
200 	std::copy(begin(interesting),	end(interesting),	std::back_inserter(values));
201 	std::copy(begin(normals),		end(normals),		std::back_inserter(values));
202 	std::copy(begin(otherNormals),	end(otherNormals),	std::back_inserter(values));
203 	std::copy(begin(extra),			end(extra),			std::back_inserter(values));
204 
205 	// Shuffle samples around a bit to make it more interesting.
206 	rnd.shuffle(begin(values), end(values));
207 
208 	return values;
209 }
210 
211 // This singleton makes sure generated samples are stable no matter the test order.
212 class InputGenerator
213 {
214 public:
getInstance()215 	static const InputGenerator& getInstance ()
216 	{
217 		static InputGenerator instance;
218 		return instance;
219 	}
220 
getInputValues16() const221 	const std::vector<tcu::Float16>& getInputValues16 () const
222 	{
223 		return m_values16;
224 	}
225 
getInputValues32() const226 	const std::vector<tcu::Float32>& getInputValues32 () const
227 	{
228 		return m_values32;
229 	}
230 
getInputValues64() const231 	const std::vector<tcu::Float64>& getInputValues64 () const
232 	{
233 		return m_values64;
234 	}
235 
236 private:
InputGenerator()237 	InputGenerator ()
238 		: m_rnd(kRandomSeed)
239 		, m_values16(getInputValues<tcu::Float16>(m_rnd))
240 		, m_values32(getInputValues<tcu::Float32>(m_rnd))
241 		, m_values64(getInputValues<tcu::Float64>(m_rnd))
242 	{
243 	}
244 
245 	// Cannot copy or assign.
246 	InputGenerator(const InputGenerator&)				= delete;
247 	InputGenerator& operator=(const InputGenerator&)	= delete;
248 
249 	de::Random					m_rnd;
250 	std::vector<tcu::Float16>	m_values16;
251 	std::vector<tcu::Float32>	m_values32;
252 	std::vector<tcu::Float64>	m_values64;
253 };
254 
255 // Check single result is as expected.
256 // Works for implementations of tcu::Float as T1 and T2.
257 template <class T1, class T2>
validConversion(const T1 & orig,const T2 & result)258 bool validConversion (const T1& orig, const T2& result)
259 {
260 	const T2	acceptedResults[]	= { T2::convert(orig, tcu::ROUND_DOWNWARD), T2::convert(orig, tcu::ROUND_UPWARD) };
261 	bool		valid				= false;
262 
263 	for (const auto& validResult : acceptedResults)
264 	{
265 		if (validResult.isNaN() && result.isNaN())
266 			valid = true;
267 		else if (validResult.isInf() && result.isInf())
268 			valid = true;
269 		else if (validResult.isZero() && result.isZero())
270 			valid = true;
271 		else if (validResult.isDenorm() && (result.isDenorm() || result.isZero()))
272 			valid = true;
273 		else if (validResult.bits() == result.bits()) // Exact conversion, up or down.
274 			valid = true;
275 	}
276 
277 	return valid;
278 }
279 
280 // Check results vector is as expected.
281 template <class T1, class T2>
validConversion(const std::vector<T1> & orig,const std::vector<T2> & converted,tcu::TestLog & log)282 bool validConversion (const std::vector<T1>& orig, const std::vector<T2>& converted, tcu::TestLog& log)
283 {
284 	DE_ASSERT(orig.size() == converted.size());
285 
286 	bool allValid = true;
287 
288 	for (size_t i = 0; i < orig.size(); ++i)
289 	{
290 		const bool valid = validConversion(orig[i], converted[i]);
291 
292 		{
293 			const double origD = orig[i].asDouble();
294 			const double convD = converted[i].asDouble();
295 
296 			std::ostringstream msg;
297 			msg << "[" << i << "] "
298 				<< std::setprecision(std::numeric_limits<double>::digits10 + 2) << std::scientific
299 				<< origD << " converted to " << convD << ": " << (valid ? "OK" : "FAILURE");
300 
301 			log << tcu::TestLog::Message << msg.str() << tcu::TestLog::EndMessage;
302 		}
303 
304 		if (!valid)
305 			allValid = false;
306 	}
307 
308 	return allValid;
309 }
310 
311 // Helps calculate buffer sizes and other parameters for the given number of values and vector length using a given floating point
312 // type. This is mostly used in packFloats() below, but we also need this information in the iterate() method for the test instance,
313 // so it has been separated.
314 struct BufferSizeInfo
315 {
316 	template <class T>
calculatevkt::shaderexecutor::__anon57ac2e260111::BufferSizeInfo317 	static BufferSizeInfo calculate (size_t numValues_, size_t vectorLength_)
318 	{
319 		// The vector length must be a known number.
320 		DE_ASSERT(vectorLength_ >= kMinVectorLength && vectorLength_ <= kMaxVectorLength);
321 		// The number of values must be appropriate for the vector length.
322 		DE_ASSERT(numValues_ % vectorLength_ == 0);
323 
324 		BufferSizeInfo info;
325 
326 		info.numValues		= numValues_;
327 		info.vectorLength	= vectorLength_;
328 		info.totalVectors	= numValues_ / vectorLength_;
329 
330 		const size_t elementSize		= sizeof(typename T::StorageType);
331 		const size_t effectiveLength	= kEffectiveLength[vectorLength_];
332 		const size_t vectorSize			= elementSize * effectiveLength;
333 		const size_t extraBytes			= vectorSize % kArrayAlignment;
334 
335 		info.vectorStrideBytes	= vectorSize + ((extraBytes == 0) ? 0 : (kArrayAlignment - extraBytes));
336 		info.memorySizeBytes	= info.vectorStrideBytes * info.totalVectors;
337 
338 		return info;
339 	}
340 
341 	size_t numValues;
342 	size_t vectorLength;
343 	size_t totalVectors;
344 	size_t vectorStrideBytes;
345 	size_t memorySizeBytes;
346 };
347 
348 // Pack an array of tcu::Float values into a buffer to be read from a shader, as if it was an array of vectors with each vector
349 // having size vectorLength (e.g. 3 for a vec3). Note: assumes std140.
350 template <class T>
packFloats(const std::vector<T> & values,size_t vectorLength)351 std::vector<deUint8> packFloats (const std::vector<T>& values, size_t vectorLength)
352 {
353 	BufferSizeInfo sizeInfo = BufferSizeInfo::calculate<T>(values.size(), vectorLength);
354 
355 	std::vector<deUint8> memory(sizeInfo.memorySizeBytes);
356 	for (size_t i = 0; i < sizeInfo.totalVectors; ++i)
357 	{
358 		T* vectorPtr = reinterpret_cast<T*>(memory.data() + sizeInfo.vectorStrideBytes * i);
359 		for (size_t j = 0; j < vectorLength; ++j)
360 			vectorPtr[j] = values[i*vectorLength + j];
361 	}
362 
363 	return memory;
364 }
365 
366 // Unpack an array of vectors into an array of values, undoing what packFloats would do.
367 // expectedNumValues is used for verification.
368 template <class T>
unpackFloats(const std::vector<deUint8> & memory,size_t vectorLength,size_t expectedNumValues)369 std::vector<T> unpackFloats (const std::vector<deUint8>& memory, size_t vectorLength, size_t expectedNumValues)
370 {
371 	DE_ASSERT(vectorLength >= kMinVectorLength && vectorLength <= kMaxVectorLength);
372 
373 	const size_t effectiveLength	= kEffectiveLength[vectorLength];
374 	const size_t elementSize		= sizeof(typename T::StorageType);
375 	const size_t vectorSize			= elementSize * effectiveLength;
376 	const size_t extraBytes			= vectorSize % kArrayAlignment;
377 	const size_t vectorBlockSize	= vectorSize + ((extraBytes == 0) ? 0 : (kArrayAlignment - extraBytes));
378 
379 	DE_ASSERT(memory.size() % vectorBlockSize == 0);
380 	const size_t numStoredVectors	= memory.size() / vectorBlockSize;
381 	const size_t numStoredValues	= numStoredVectors * vectorLength;
382 
383 	DE_UNREF(expectedNumValues); // For release builds.
384 	DE_ASSERT(numStoredValues == expectedNumValues);
385 	std::vector<T> values;
386 	values.reserve(numStoredValues);
387 
388 	for (size_t i = 0; i < numStoredVectors; ++i)
389 	{
390 		const T* vectorPtr = reinterpret_cast<const T*>(memory.data() + vectorBlockSize * i);
391 		for (size_t j = 0; j < vectorLength; ++j)
392 			values.push_back(vectorPtr[j]);
393 	}
394 
395 	return values;
396 }
397 
398 enum FloatType
399 {
400 	FLOAT_TYPE_16_BITS = 0,
401 	FLOAT_TYPE_32_BITS,
402 	FLOAT_TYPE_64_BITS,
403 	FLOAT_TYPE_MAX_ENUM,
404 };
405 
406 static const char* const kFloatNames[FLOAT_TYPE_MAX_ENUM] =
407 {
408 	"f16",
409 	"f32",
410 	"f64",
411 };
412 
413 static const char* const kGLSLTypes[][kMaxVectorLength + 1] =
414 {
415 	{ nullptr, "float16_t",	"f16vec2",	"f16vec3",	"f16vec4"	},
416 	{ nullptr, "float",		"vec2",		"vec3",		"vec4"		},
417 	{ nullptr, "double",	"dvec2",	"dvec3",	"dvec4"		},
418 };
419 
420 struct TestParams
421 {
422 	FloatType	from;
423 	FloatType	to;
424 	size_t		vectorLength;
425 
getInputTypeStrvkt::shaderexecutor::__anon57ac2e260111::TestParams426 	std::string	getInputTypeStr		() const
427 	{
428 		DE_ASSERT(from >= 0 && from < FLOAT_TYPE_MAX_ENUM);
429 		DE_ASSERT(vectorLength >= kMinVectorLength && vectorLength <= kMaxVectorLength);
430 		return kGLSLTypes[from][vectorLength];
431 	}
432 
getOutputTypeStrvkt::shaderexecutor::__anon57ac2e260111::TestParams433 	std::string getOutputTypeStr	() const
434 	{
435 		DE_ASSERT(to >= 0 && to < FLOAT_TYPE_MAX_ENUM);
436 		DE_ASSERT(vectorLength >= kMinVectorLength && vectorLength <= kMaxVectorLength);
437 		return kGLSLTypes[to][vectorLength];
438 	}
439 };
440 
441 class FConvertTestInstance : public TestInstance
442 {
443 public:
FConvertTestInstance(Context & context,const TestParams & params)444 							FConvertTestInstance	(Context& context, const TestParams& params)
445 								: TestInstance(context)
446 								, m_params(params)
447 								{}
448 
449 	virtual tcu::TestStatus	iterate					(void);
450 
451 private:
452 	TestParams	m_params;
453 };
454 
455 class FConvertTestCase : public TestCase
456 {
457 public:
FConvertTestCase(tcu::TestContext & context,const std::string & name,const std::string & desc,const TestParams & params)458 								FConvertTestCase	(tcu::TestContext& context, const std::string& name, const std::string& desc, const TestParams& params)
459 									: TestCase	(context, name, desc)
460 									, m_params	(params)
461 									{}
462 
~FConvertTestCase(void)463 								~FConvertTestCase	(void) {}
createInstance(Context & context) const464 	virtual TestInstance*		createInstance		(Context& context) const { return new FConvertTestInstance(context, m_params); }
465 	virtual	void				initPrograms		(vk::SourceCollections& programCollection) const;
466 	virtual void				checkSupport		(Context& context) const;
467 
468 private:
469 	TestParams	m_params;
470 };
471 
initPrograms(vk::SourceCollections & programCollection) const472 void FConvertTestCase::initPrograms (vk::SourceCollections& programCollection) const
473 {
474 	const std::string		inputType		= m_params.getInputTypeStr();
475 	const std::string		outputType		= m_params.getOutputTypeStr();
476 	const InputGenerator&	inputGenerator	= InputGenerator::getInstance();
477 
478 	size_t numValues = 0;
479 	switch (m_params.from)
480 	{
481 	case FLOAT_TYPE_16_BITS:
482 		numValues = inputGenerator.getInputValues16().size();
483 		break;
484 	case FLOAT_TYPE_32_BITS:
485 		numValues = inputGenerator.getInputValues32().size();
486 		break;
487 	case FLOAT_TYPE_64_BITS:
488 		numValues = inputGenerator.getInputValues64().size();
489 		break;
490 	default:
491 		DE_ASSERT(false);
492 		break;
493 	}
494 
495 	const size_t arraySize = numValues / m_params.vectorLength;
496 
497 	std::ostringstream shader;
498 
499 	shader
500 		<< "#version 450 core\n"
501 		<< ((m_params.from == FLOAT_TYPE_16_BITS || m_params.to == FLOAT_TYPE_16_BITS) ?
502 			"#extension GL_EXT_shader_16bit_storage: require\n"					// This is needed to use 16-bit float types in buffers.
503 			"#extension GL_EXT_shader_explicit_arithmetic_types: require\n"		// This is needed for some conversions.
504 			: "")
505 		<< "layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
506 		<< "layout(set = 0, binding = 0, std140) buffer issbodef { " << inputType << " val[" << arraySize << "]; } issbo;\n"
507 		<< "layout(set = 0, binding = 1, std140) buffer ossbodef { " << outputType << " val[" << arraySize << "]; } ossbo;\n"
508 		<< "void main()\n"
509 		<< "{\n"
510 		<< "	ossbo.val[gl_WorkGroupID.x] = " << outputType << "(issbo.val[gl_WorkGroupID.x]);\n"
511 		<< "}\n";
512 
513 	programCollection.glslSources.add("comp") << glu::ComputeSource(shader.str());
514 }
515 
checkSupport(Context & context) const516 void FConvertTestCase::checkSupport (Context& context) const
517 {
518 	if (m_params.from == FLOAT_TYPE_64_BITS || m_params.to == FLOAT_TYPE_64_BITS)
519 	{
520 		// Check for 64-bit float support.
521 		auto features = context.getDeviceFeatures();
522 		if (!features.shaderFloat64)
523 			TCU_THROW(NotSupportedError, "64-bit floats not supported in shader code");
524 	}
525 
526 	if (m_params.from == FLOAT_TYPE_16_BITS || m_params.to == FLOAT_TYPE_16_BITS)
527 	{
528 		// Check for 16-bit float support.
529 		auto& features16 = context.getShaderFloat16Int8Features();
530 		if (!features16.shaderFloat16)
531 			TCU_THROW(NotSupportedError, "16-bit floats not supported in shader code");
532 
533 		auto& storage16 = context.get16BitStorageFeatures();
534 		if (!storage16.storageBuffer16BitAccess)
535 			TCU_THROW(NotSupportedError, "16-bit floats not supported for storage buffers");
536 	}
537 }
538 
iterate(void)539 tcu::TestStatus FConvertTestInstance::iterate (void)
540 {
541 	BufferSizeInfo			inputBufferSizeInfo;
542 	BufferSizeInfo			outputBufferSizeInfo;
543 	std::vector<deUint8>	inputMemory;
544 
545 	assert(m_params.from == FLOAT_TYPE_16_BITS || m_params.from == FLOAT_TYPE_32_BITS || m_params.from == FLOAT_TYPE_64_BITS);
546 
547 	if (m_params.from == FLOAT_TYPE_16_BITS)
548 	{
549 		auto& inputValues = InputGenerator::getInstance().getInputValues16();
550 		inputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float16>(inputValues.size(), m_params.vectorLength);
551 		inputMemory = packFloats(inputValues, m_params.vectorLength);
552 	}
553 	else if (m_params.from == FLOAT_TYPE_32_BITS)
554 	{
555 		auto& inputValues = InputGenerator::getInstance().getInputValues32();
556 		inputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float32>(inputValues.size(), m_params.vectorLength);
557 		inputMemory = packFloats(inputValues, m_params.vectorLength);
558 	}
559 	else
560 	{
561 		auto& inputValues = InputGenerator::getInstance().getInputValues64();
562 		inputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float64>(inputValues.size(), m_params.vectorLength);
563 		inputMemory = packFloats(inputValues, m_params.vectorLength);
564 	}
565 
566 	switch (m_params.to)
567 	{
568 	case FLOAT_TYPE_16_BITS:
569 		outputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float16>(inputBufferSizeInfo.numValues, m_params.vectorLength);
570 		break;
571 	case FLOAT_TYPE_32_BITS:
572 		outputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float32>(inputBufferSizeInfo.numValues, m_params.vectorLength);
573 		break;
574 	case FLOAT_TYPE_64_BITS:
575 		outputBufferSizeInfo = BufferSizeInfo::calculate<tcu::Float64>(inputBufferSizeInfo.numValues, m_params.vectorLength);
576 		break;
577 	default:
578 		assert(false);
579 		break;
580 	}
581 
582 	// Prepare input and output buffers.
583 	auto&	vkd			= m_context.getDeviceInterface();
584 	auto	device		= m_context.getDevice();
585 	auto&	allocator	= m_context.getDefaultAllocator();
586 
587 	de::MovePtr<vk::BufferWithMemory> inputBuffer(
588 		new vk::BufferWithMemory(vkd, device, allocator,
589 								 vk::makeBufferCreateInfo(inputBufferSizeInfo.memorySizeBytes, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
590 								 vk::MemoryRequirement::HostVisible)
591 	);
592 
593 	de::MovePtr<vk::BufferWithMemory> outputBuffer(
594 		new vk::BufferWithMemory(vkd, device, allocator,
595 								 vk::makeBufferCreateInfo(outputBufferSizeInfo.memorySizeBytes, vk::VK_BUFFER_USAGE_STORAGE_BUFFER_BIT),
596 								 vk::MemoryRequirement::HostVisible)
597 	);
598 
599 	// Copy values to input buffer.
600 	{
601 		auto& alloc = inputBuffer->getAllocation();
602 		deMemcpy(reinterpret_cast<deUint8*>(alloc.getHostPtr()) + alloc.getOffset(), inputMemory.data(), inputMemory.size());
603 		vk::flushAlloc(vkd, device, alloc);
604 	}
605 
606 	// Create an array with the input and output buffers to make it easier to iterate below.
607 	const vk::VkBuffer buffers[] = { inputBuffer->get(), outputBuffer->get() };
608 
609 	// Create descriptor set layout.
610 	std::vector<vk::VkDescriptorSetLayoutBinding> bindings;
611 	for (int i = 0; i < DE_LENGTH_OF_ARRAY(buffers); ++i)
612 	{
613 		const vk::VkDescriptorSetLayoutBinding binding =
614 		{
615 			static_cast<deUint32>(i),								// uint32_t              binding;
616 			vk::VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,					// VkDescriptorType      descriptorType;
617 			1u,														// uint32_t              descriptorCount;
618 			vk::VK_SHADER_STAGE_COMPUTE_BIT,						// VkShaderStageFlags    stageFlags;
619 			DE_NULL,													// const VkSampler*      pImmutableSamplers;
620 		};
621 		bindings.push_back(binding);
622 	}
623 
624 	const vk::VkDescriptorSetLayoutCreateInfo layoutCreateInfo =
625 	{
626 		vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,	// VkStructureType                        sType;
627 		DE_NULL,													// const void*                            pNext;
628 		0,															// VkDescriptorSetLayoutCreateFlags       flags;
629 		static_cast<deUint32>(bindings.size()),						// uint32_t                               bindingCount;
630 		bindings.data()												// const VkDescriptorSetLayoutBinding*    pBindings;
631 	};
632 	auto descriptorSetLayout = vk::createDescriptorSetLayout(vkd, device, &layoutCreateInfo);
633 
634 	// Create descriptor set.
635 	vk::DescriptorPoolBuilder poolBuilder;
636 	for (const auto& b : bindings)
637 		poolBuilder.addType(b.descriptorType, 1u);
638 	auto descriptorPool = poolBuilder.build(vkd, device, vk::VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
639 
640 	const vk::VkDescriptorSetAllocateInfo allocateInfo =
641 	{
642 		vk::VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,	// VkStructureType                 sType;
643 		DE_NULL,											// const void*                     pNext;
644 		*descriptorPool,									// VkDescriptorPool                descriptorPool;
645 		1u,													// uint32_t                        descriptorSetCount;
646 		&descriptorSetLayout.get()							// const VkDescriptorSetLayout*    pSetLayouts;
647 	};
648 	auto descriptorSet = vk::allocateDescriptorSet(vkd, device, &allocateInfo);
649 
650 	// Update descriptor set.
651 	std::vector<vk::VkDescriptorBufferInfo>	descriptorBufferInfos;
652 	std::vector<vk::VkWriteDescriptorSet>	descriptorWrites;
653 
654 	for (const auto& buffer : buffers)
655 	{
656 		const vk::VkDescriptorBufferInfo bufferInfo =
657 		{
658 			buffer,			// VkBuffer        buffer;
659 			0u,				// VkDeviceSize    offset;
660 			VK_WHOLE_SIZE,	// VkDeviceSize    range;
661 		};
662 		descriptorBufferInfos.push_back(bufferInfo);
663 	}
664 
665 	for (size_t i = 0; i < bindings.size(); ++i)
666 	{
667 		const vk::VkWriteDescriptorSet write =
668 		{
669 			vk::VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,	// VkStructureType                  sType;
670 			DE_NULL,									// const void*                      pNext;
671 			*descriptorSet,								// VkDescriptorSet                  dstSet;
672 			static_cast<deUint32>(i),					// uint32_t                         dstBinding;
673 			0u,											// uint32_t                         dstArrayElement;
674 			1u,											// uint32_t                         descriptorCount;
675 			bindings[i].descriptorType,					// VkDescriptorType                 descriptorType;
676 			DE_NULL,									// const VkDescriptorImageInfo*     pImageInfo;
677 			&descriptorBufferInfos[i],					// const VkDescriptorBufferInfo*    pBufferInfo;
678 			DE_NULL,									// const VkBufferView*              pTexelBufferView;
679 		};
680 		descriptorWrites.push_back(write);
681 	}
682 	vkd.updateDescriptorSets(device, static_cast<deUint32>(descriptorWrites.size()), descriptorWrites.data(), 0u, DE_NULL);
683 
684 	// Prepare barriers in advance so data is visible to the shaders and the host.
685 	std::vector<vk::VkBufferMemoryBarrier> hostToDevBarriers;
686 	std::vector<vk::VkBufferMemoryBarrier> devToHostBarriers;
687 	for (int i = 0; i < DE_LENGTH_OF_ARRAY(buffers); ++i)
688 	{
689 		const vk::VkBufferMemoryBarrier hostToDev =
690 		{
691 			vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,						// VkStructureType	sType;
692 			DE_NULL,															// const void*		pNext;
693 			vk::VK_ACCESS_HOST_WRITE_BIT,										// VkAccessFlags	srcAccessMask;
694 			(vk::VK_ACCESS_SHADER_READ_BIT | vk::VK_ACCESS_SHADER_WRITE_BIT),	// VkAccessFlags	dstAccessMask;
695 			VK_QUEUE_FAMILY_IGNORED,											// deUint32			srcQueueFamilyIndex;
696 			VK_QUEUE_FAMILY_IGNORED,											// deUint32			dstQueueFamilyIndex;
697 			buffers[i],															// VkBuffer			buffer;
698 			0u,																	// VkDeviceSize		offset;
699 			VK_WHOLE_SIZE,														// VkDeviceSize		size;
700 		};
701 		hostToDevBarriers.push_back(hostToDev);
702 
703 		const vk::VkBufferMemoryBarrier devToHost =
704 		{
705 			vk::VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,						// VkStructureType	sType;
706 			DE_NULL,															// const void*		pNext;
707 			vk::VK_ACCESS_SHADER_WRITE_BIT,										// VkAccessFlags	srcAccessMask;
708 			vk::VK_ACCESS_HOST_READ_BIT,										// VkAccessFlags	dstAccessMask;
709 			VK_QUEUE_FAMILY_IGNORED,											// deUint32			srcQueueFamilyIndex;
710 			VK_QUEUE_FAMILY_IGNORED,											// deUint32			dstQueueFamilyIndex;
711 			buffers[i],															// VkBuffer			buffer;
712 			0u,																	// VkDeviceSize		offset;
713 			VK_WHOLE_SIZE,														// VkDeviceSize		size;
714 		};
715 		devToHostBarriers.push_back(devToHost);
716 	}
717 
718 	// Create command pool and command buffer.
719 	auto queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
720 
721 	const vk::VkCommandPoolCreateInfo cmdPoolCreateInfo =
722 	{
723 		vk::VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,	// VkStructureType				sType;
724 		DE_NULL,										// const void*					pNext;
725 		vk::VK_COMMAND_POOL_CREATE_TRANSIENT_BIT,		// VkCommandPoolCreateFlags		flags;
726 		queueFamilyIndex,								// deUint32						queueFamilyIndex;
727 	};
728 	auto cmdPool = vk::createCommandPool(vkd, device, &cmdPoolCreateInfo);
729 
730 	const vk::VkCommandBufferAllocateInfo cmdBufferAllocateInfo =
731 	{
732 		vk::VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,	// VkStructureType			sType;
733 		DE_NULL,											// const void*				pNext;
734 		*cmdPool,											// VkCommandPool			commandPool;
735 		vk::VK_COMMAND_BUFFER_LEVEL_PRIMARY,				// VkCommandBufferLevel		level;
736 		1u,													// deUint32					commandBufferCount;
737 	};
738 	auto cmdBuffer = vk::allocateCommandBuffer(vkd, device, &cmdBufferAllocateInfo);
739 
740 	// Create pipeline layout.
741 	const vk::VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo =
742 	{
743 		vk::VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,	// VkStructureType					sType;
744 		DE_NULL,											// const void*						pNext;
745 		0,													// VkPipelineLayoutCreateFlags		flags;
746 		1u,													// deUint32							setLayoutCount;
747 		&descriptorSetLayout.get(),							// const VkDescriptorSetLayout*		pSetLayouts;
748 		0u,													// deUint32							pushConstantRangeCount;
749 		DE_NULL,											// const VkPushConstantRange*		pPushConstantRanges;
750 	};
751 	auto pipelineLayout = vk::createPipelineLayout(vkd, device, &pipelineLayoutCreateInfo);
752 
753 	// Create compute pipeline.
754 	const vk::Unique<vk::VkShaderModule> shader(vk::createShaderModule(vkd, device, m_context.getBinaryCollection().get("comp"), 0));
755 
756 	const vk::VkComputePipelineCreateInfo computeCreateInfo =
757 	{
758 		vk::VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,	// VkStructureType                    sType;
759 		DE_NULL,											// const void*                        pNext;
760 		0,													// VkPipelineCreateFlags              flags;
761 		{													// VkPipelineShaderStageCreateInfo    stage;
762 			vk::VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType                     sType;
763 			DE_NULL,													// const void*                         pNext;
764 			0,															// VkPipelineShaderStageCreateFlags    flags;
765 			vk::VK_SHADER_STAGE_COMPUTE_BIT,							// VkShaderStageFlagBits               stage;
766 			*shader,													// VkShaderModule                      module;
767 			"main",														// const char*                         pName;
768 			DE_NULL,													// const VkSpecializationInfo*         pSpecializationInfo;
769 		},
770 		*pipelineLayout,									// VkPipelineLayout                   layout;
771 		DE_NULL,											// VkPipeline                         basePipelineHandle;
772 		0,													// int32_t                            basePipelineIndex;
773 	};
774 	auto computePipeline = vk::createComputePipeline(vkd, device, DE_NULL, &computeCreateInfo);
775 
776 	// Run the shader.
777 	vk::beginCommandBuffer(vkd, *cmdBuffer);
778 		vkd.cmdBindPipeline(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
779 		vkd.cmdBindDescriptorSets(*cmdBuffer, vk::VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0, 1u, &descriptorSet.get(), 0u, DE_NULL);
780 		vkd.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_HOST_BIT, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0u, DE_NULL, static_cast<deUint32>(hostToDevBarriers.size()), hostToDevBarriers.data(), 0u, DE_NULL);
781 		vkd.cmdDispatch(*cmdBuffer, static_cast<deUint32>(inputBufferSizeInfo.totalVectors), 1u, 1u);
782 		vkd.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, 0, 0u, DE_NULL, static_cast<deUint32>(devToHostBarriers.size()), devToHostBarriers.data(), 0u, DE_NULL);
783 	vk::endCommandBuffer(vkd, *cmdBuffer);
784 	vk::submitCommandsAndWait(vkd, device, m_context.getUniversalQueue(), *cmdBuffer);
785 
786 	// Invalidate output allocation.
787 	vk::invalidateAlloc(vkd, device, outputBuffer->getAllocation());
788 
789 	// Copy output buffer data.
790 	std::vector<deUint8> outputMemory(outputBufferSizeInfo.memorySizeBytes);
791 	{
792 		auto& alloc = outputBuffer->getAllocation();
793 		deMemcpy(outputMemory.data(), reinterpret_cast<deUint8*>(alloc.getHostPtr()) + alloc.getOffset(), outputBufferSizeInfo.memorySizeBytes);
794 	}
795 
796 	// Unpack and verify output data.
797 	auto& testLog = m_context.getTestContext().getLog();
798 	bool conversionOk = false;
799 	switch (m_params.to)
800 	{
801 	case FLOAT_TYPE_16_BITS:
802 		{
803 			auto outputValues = unpackFloats<tcu::Float16>(outputMemory, m_params.vectorLength, inputBufferSizeInfo.numValues);
804 			switch (m_params.from)
805 			{
806 			case FLOAT_TYPE_32_BITS:
807 				{
808 					auto& inputValues = InputGenerator::getInstance().getInputValues32();
809 					conversionOk = validConversion(inputValues, outputValues, testLog);
810 				}
811 				break;
812 
813 			case FLOAT_TYPE_64_BITS:
814 				{
815 					auto& inputValues = InputGenerator::getInstance().getInputValues64();
816 					conversionOk = validConversion(inputValues, outputValues, testLog);
817 				}
818 				break;
819 
820 			default:
821 				DE_ASSERT(false);
822 				break;
823 			}
824 		}
825 		break;
826 
827 	case FLOAT_TYPE_32_BITS:
828 		{
829 			auto outputValues = unpackFloats<tcu::Float32>(outputMemory, m_params.vectorLength, inputBufferSizeInfo.numValues);
830 			switch (m_params.from)
831 			{
832 			case FLOAT_TYPE_16_BITS:
833 				{
834 					auto& inputValues = InputGenerator::getInstance().getInputValues16();
835 					conversionOk = validConversion(inputValues, outputValues, testLog);
836 				}
837 				break;
838 
839 			case FLOAT_TYPE_64_BITS:
840 				{
841 					auto& inputValues = InputGenerator::getInstance().getInputValues64();
842 					conversionOk = validConversion(inputValues, outputValues, testLog);
843 				}
844 				break;
845 
846 			default:
847 				DE_ASSERT(false);
848 				break;
849 			}
850 		}
851 		break;
852 
853 	case FLOAT_TYPE_64_BITS:
854 		{
855 			auto outputValues = unpackFloats<tcu::Float64>(outputMemory, m_params.vectorLength, inputBufferSizeInfo.numValues);
856 			switch (m_params.from)
857 			{
858 			case FLOAT_TYPE_16_BITS:
859 				{
860 					auto& inputValues = InputGenerator::getInstance().getInputValues16();
861 					conversionOk = validConversion(inputValues, outputValues, testLog);
862 				}
863 				break;
864 
865 			case FLOAT_TYPE_32_BITS:
866 				{
867 					auto& inputValues = InputGenerator::getInstance().getInputValues32();
868 					conversionOk = validConversion(inputValues, outputValues, testLog);
869 				}
870 				break;
871 
872 			default:
873 				DE_ASSERT(false);
874 				break;
875 			}
876 		}
877 		break;
878 
879 	default:
880 		DE_ASSERT(false);
881 		break;
882 	}
883 
884 	return (conversionOk ? tcu::TestStatus::pass("Pass") : tcu::TestStatus::fail("Fail"));
885 }
886 
887 } // anonymous
888 
createPrecisionFconvertGroup(tcu::TestContext & testCtx)889 tcu::TestCaseGroup*	createPrecisionFconvertGroup (tcu::TestContext& testCtx)
890 {
891 	tcu::TestCaseGroup* newGroup = new tcu::TestCaseGroup(testCtx, "precision_fconvert", "OpFConvert precision tests");
892 
893 	for (int i = 0; i < FLOAT_TYPE_MAX_ENUM; ++i)
894 	for (int j = 0; j < FLOAT_TYPE_MAX_ENUM; ++j)
895 	for (size_t k = kMinVectorLength; k <= kMaxVectorLength; ++k)
896 	{
897 		// No actual conversion if the types are the same.
898 		if (i == j)
899 			continue;
900 
901 		TestParams params = {
902 			static_cast<FloatType>(i),
903 			static_cast<FloatType>(j),
904 			k,
905 		};
906 
907 		std::string testName = std::string() + kFloatNames[i] + "_to_" + kFloatNames[j] + "_size_" + std::to_string(k);
908 		std::string testDescription = std::string("Conversion from ") + kFloatNames[i] + " to " + kFloatNames[j] + " with vectors of size " + std::to_string(k);
909 
910 		newGroup->addChild(new FConvertTestCase(testCtx, testName, testDescription, params));
911 	}
912 
913 	return newGroup;
914 }
915 
916 } // shaderexecutor
917 } // vkt
918