• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7  * Copyright (c) 2016 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Vulkan ShaderExecutor
24  *//*--------------------------------------------------------------------*/
25 
26 #include "vktShaderExecutor.hpp"
27 
28 #include "vkMemUtil.hpp"
29 #include "vkRef.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37 #include "vkBarrierUtil.hpp"
38 
39 #include "gluShaderUtil.hpp"
40 
41 #include "tcuVector.hpp"
42 #include "tcuTestLog.hpp"
43 #include "tcuTextureUtil.hpp"
44 
45 #include "deUniquePtr.hpp"
46 #include "deStringUtil.hpp"
47 #include "deSharedPtr.hpp"
48 #include "deFloat16.h"
49 
50 #include <map>
51 #include <sstream>
52 #include <iostream>
53 
54 using std::vector;
55 using namespace vk;
56 
57 namespace vkt
58 {
59 namespace shaderexecutor
60 {
61 namespace
62 {
63 
64 enum
65 {
66 	DEFAULT_RENDER_WIDTH	= 100,
67 	DEFAULT_RENDER_HEIGHT	= 100,
68 };
69 
70 // Common typedefs
71 
72 typedef de::SharedPtr<Unique<VkImage> >		VkImageSp;
73 typedef de::SharedPtr<Unique<VkImageView> >	VkImageViewSp;
74 typedef de::SharedPtr<Unique<VkBuffer> >	VkBufferSp;
75 typedef de::SharedPtr<Allocation>			AllocationSp;
76 
77 static VkFormat getAttributeFormat(const glu::DataType dataType);
78 
79 // Shader utilities
80 
getDefaultClearColor(void)81 static VkClearValue	getDefaultClearColor (void)
82 {
83 	return makeClearValueColorF32(0.125f, 0.25f, 0.5f, 1.0f);
84 }
85 
generateEmptyFragmentSource(void)86 static std::string generateEmptyFragmentSource (void)
87 {
88 	std::ostringstream src;
89 
90 	src << "#version 450\n"
91 		   "layout(location=0) out highp vec4 o_color;\n";
92 
93 	src << "void main (void)\n{\n";
94 	src << "	o_color = vec4(0.0);\n";
95 	src << "}\n";
96 
97 	return src.str();
98 }
99 
packFloat16Bit(std::ostream & src,const std::vector<Symbol> & outputs)100 void packFloat16Bit (std::ostream& src, const std::vector<Symbol>& outputs)
101 {
102 	for (vector<Symbol>::const_iterator symIter = outputs.begin(); symIter != outputs.end(); ++symIter)
103 	{
104 		if(glu::isDataTypeFloatType(symIter->varType.getBasicType()))
105 		{
106 			if(glu::isDataTypeVector(symIter->varType.getBasicType()))
107 			{
108 				for(int i = 0; i < glu::getDataTypeScalarSize(symIter->varType.getBasicType()); i++)
109 				{
110 					src << "\tpacked_" << symIter->name << "[" << i << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "], -1.0)));\n";
111 				}
112 			}
113 			else if (glu::isDataTypeMatrix(symIter->varType.getBasicType()))
114 			{
115 				int maxRow = 0;
116 				int maxCol = 0;
117 				switch (symIter->varType.getBasicType())
118 				{
119 				case glu::TYPE_FLOAT_MAT2:
120 					maxRow = maxCol = 2;
121 					break;
122 				case glu::TYPE_FLOAT_MAT2X3:
123 					maxRow = 2;
124 					maxCol = 3;
125 					break;
126 				case glu::TYPE_FLOAT_MAT2X4:
127 					maxRow = 2;
128 					maxCol = 4;
129 					break;
130 				case glu::TYPE_FLOAT_MAT3X2:
131 					maxRow = 3;
132 					maxCol = 2;
133 					break;
134 				case glu::TYPE_FLOAT_MAT3:
135 					maxRow = maxCol = 3;
136 					break;
137 				case glu::TYPE_FLOAT_MAT3X4:
138 					maxRow = 3;
139 					maxCol = 4;
140 					break;
141 				case glu::TYPE_FLOAT_MAT4X2:
142 					maxRow = 4;
143 					maxCol = 2;
144 					break;
145 				case glu::TYPE_FLOAT_MAT4X3:
146 					maxRow = 4;
147 					maxCol = 3;
148 					break;
149 				case glu::TYPE_FLOAT_MAT4:
150 					maxRow = maxCol = 4;
151 					break;
152 				default:
153 					DE_ASSERT(false);
154 					break;
155 				}
156 
157 				for(int i = 0; i < maxRow; i++)
158 				for(int j = 0; j < maxCol; j++)
159 				{
160 					src << "\tpacked_" << symIter->name << "[" << i << "][" << j << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "][" << j << "], -1.0)));\n";
161 				}
162 			}
163 			else
164 			{
165 					src << "\tpacked_" << symIter->name << " = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << ", -1.0)));\n";
166 			}
167 		}
168 	}
169 }
170 
generatePassthroughVertexShader(const ShaderSpec & shaderSpec,const char * inputPrefix,const char * outputPrefix)171 static std::string generatePassthroughVertexShader (const ShaderSpec& shaderSpec, const char* inputPrefix, const char* outputPrefix)
172 {
173 	std::ostringstream	src;
174 	int					location	= 0;
175 
176 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
177 
178 	if (!shaderSpec.globalDeclarations.empty())
179 		src << shaderSpec.globalDeclarations << "\n";
180 
181 	src << "layout(location = " << location << ") in highp vec4 a_position;\n";
182 
183 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
184 	{
185 		location++;
186 		src << "layout(location = "<< location << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n"
187 			<< "layout(location = " << location - 1 << ") flat out " << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
188 	}
189 
190 	src << "\nvoid main (void)\n{\n"
191 		<< "	gl_Position = a_position;\n"
192 		<< "	gl_PointSize = 1.0;\n";
193 
194 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
195 		src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
196 
197 	src << "}\n";
198 
199 	return src.str();
200 }
201 
generateVertexShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix)202 static std::string generateVertexShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
203 {
204 	DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
205 
206 	std::ostringstream	src;
207 
208 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
209 
210 	if (!shaderSpec.globalDeclarations.empty())
211 		src << shaderSpec.globalDeclarations << "\n";
212 
213 	src << "layout(location = 0) in highp vec4 a_position;\n";
214 
215 	int			locationNumber	= 1;
216 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
217 	{
218 		src <<  "layout(location = " << locationNumber << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
219 	}
220 
221 	locationNumber = 0;
222 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
223 	{
224 		DE_ASSERT(output->varType.isBasicType());
225 
226 		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
227 		{
228 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
229 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
230 			const glu::VarType		intType		(intBaseType, glu::PRECISION_HIGHP);
231 
232 			src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
233 		}
234 		else
235 			src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
236 	}
237 
238 	src << "\n"
239 		<< "void main (void)\n"
240 		<< "{\n"
241 		<< "	gl_Position = a_position;\n"
242 		<< "	gl_PointSize = 1.0;\n";
243 
244 	// Declare & fetch local input variables
245 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
246 	{
247 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
248 		{
249 			const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
250 			src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
251 		}
252 		else
253 			src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
254 	}
255 
256 	// Declare local output variables
257 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
258 	{
259 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
260 		{
261 			const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
262 			src << "\t" << tname << " " << output->name << ";\n";
263 			const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
264 			src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
265 		}
266 		else
267 			src << "\t" << glu::declare(output->varType, output->name) << ";\n";
268 	}
269 
270 	// Operation - indented to correct level.
271 	{
272 		std::istringstream	opSrc	(shaderSpec.source);
273 		std::string			line;
274 
275 		while (std::getline(opSrc, line))
276 			src << "\t" << line << "\n";
277 	}
278 
279 	if (shaderSpec.packFloat16Bit)
280 		packFloat16Bit(src, shaderSpec.outputs);
281 
282 	// Assignments to outputs.
283 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
284 	{
285 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
286 		{
287 			src << "\t" << outputPrefix << output->name << " = packed_" << output->name << ";\n";
288 		}
289 		else
290 		{
291 			if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
292 			{
293 				const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
294 				const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
295 
296 				src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
297 			}
298 			else
299 				src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
300 		}
301 	}
302 
303 	src << "}\n";
304 
305 	return src.str();
306 }
307 
308 struct FragmentOutputLayout
309 {
310 	std::vector<const Symbol*>		locationSymbols;		//! Symbols by location
311 	std::map<std::string, int>		locationMap;			//! Map from symbol name to start location
312 };
313 
generateFragShaderOutputDecl(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & outputPrefix)314 static void generateFragShaderOutputDecl (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& outputPrefix)
315 {
316 	for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
317 	{
318 		const Symbol&				output		= shaderSpec.outputs[outNdx];
319 		const int					location	= de::lookup(outLocationMap, output.name);
320 		const std::string			outVarName	= outputPrefix + output.name;
321 		glu::VariableDeclaration	decl		(output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, glu::Layout(location));
322 
323 		TCU_CHECK_INTERNAL(output.varType.isBasicType());
324 
325 		if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
326 		{
327 			const int			vecSize			= glu::getDataTypeScalarSize(output.varType.getBasicType());
328 			const glu::DataType	uintBasicType	= vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
329 			const glu::VarType	uintType		(uintBasicType, glu::PRECISION_HIGHP);
330 
331 			decl.varType = uintType;
332 			src << decl << ";\n";
333 		}
334 		else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
335 		{
336 			const int			vecSize			= glu::getDataTypeScalarSize(output.varType.getBasicType());
337 			const glu::DataType	intBasicType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
338 			const glu::VarType	intType			(intBasicType, glu::PRECISION_HIGHP);
339 
340 			decl.varType = intType;
341 			src << decl << ";\n";
342 		}
343 		else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
344 		{
345 			const int			vecSize			= glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
346 			const int			numVecs			= glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
347 			const glu::DataType	uintBasicType	= glu::getDataTypeUintVec(vecSize);
348 			const glu::VarType	uintType		(uintBasicType, glu::PRECISION_HIGHP);
349 
350 			decl.varType = uintType;
351 			for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
352 			{
353 				decl.name				= outVarName + "_" + de::toString(vecNdx);
354 				decl.layout.location	= location + vecNdx;
355 				src << decl << ";\n";
356 			}
357 		}
358 		else
359 			src << decl << ";\n";
360 	}
361 }
362 
generateFragShaderOutAssign(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::string & valuePrefix,const std::string & outputPrefix,const bool isInput16Bit=false)363 static void generateFragShaderOutAssign (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::string& valuePrefix, const std::string& outputPrefix, const bool isInput16Bit = false)
364 {
365 	if (isInput16Bit)
366 		packFloat16Bit(src, shaderSpec.outputs);
367 
368 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
369 	{
370 		const std::string packPrefix = (isInput16Bit && glu::isDataTypeFloatType(output->varType.getBasicType())) ? "packed_" : "";
371 
372 		if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
373 			src << "	o_" << output->name << " = floatBitsToUint(" << valuePrefix << output->name << ");\n";
374 		else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
375 		{
376 			const int	numVecs		= glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
377 
378 			for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
379 				if (useIntOutputs)
380 					src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = floatBitsToUint(" << valuePrefix << output->name << "[" << vecNdx << "]);\n";
381 				else
382 					src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = " << packPrefix << valuePrefix << output->name << "[" << vecNdx << "];\n";
383 		}
384 		else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
385 		{
386 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
387 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
388 
389 			src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << valuePrefix << output->name << ");\n";
390 		}
391 		else
392 			src << "\t" << outputPrefix << output->name << " = " << packPrefix << valuePrefix << output->name << ";\n";
393 	}
394 }
395 
generatePassthroughFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)396 static std::string generatePassthroughFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
397 {
398 	std::ostringstream	src;
399 
400 	src <<"#version 450\n";
401 
402 	if (!shaderSpec.globalDeclarations.empty())
403 		src << shaderSpec.globalDeclarations << "\n";
404 
405 	int locationNumber = 0;
406 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
407 	{
408 		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
409 		{
410 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
411 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
412 			const glu::VarType		intType		(intBaseType, glu::PRECISION_HIGHP);
413 
414 			src << "layout(location = " << locationNumber << ") flat in " << glu::declare(intType, inputPrefix + output->name) << ";\n";
415 		}
416 		else
417 			src << "layout(location = " << locationNumber << ") flat in " << glu::declare(output->varType, inputPrefix + output->name) << ";\n";
418 	}
419 
420 	generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
421 
422 	src << "\nvoid main (void)\n{\n";
423 
424 	generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, inputPrefix, outputPrefix);
425 
426 	src << "}\n";
427 
428 	return src.str();
429 }
430 
generateGeometryShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix,const bool pointSizeSupported)431 static std::string generateGeometryShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix, const bool pointSizeSupported)
432 {
433 	DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
434 
435 	std::ostringstream	src;
436 
437 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
438 
439 	if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
440 		src << "#extension GL_EXT_geometry_shader : require\n";
441 
442 	if (!shaderSpec.globalDeclarations.empty())
443 		src << shaderSpec.globalDeclarations << "\n";
444 
445 	src << "layout(points) in;\n"
446 		<< "layout(points, max_vertices = 1) out;\n";
447 
448 	int locationNumber = 0;
449 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
450 		src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << "[];\n";
451 
452 	locationNumber = 0;
453 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
454 	{
455 		DE_ASSERT(output->varType.isBasicType());
456 
457 		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
458 		{
459 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
460 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
461 			const glu::VarType		intType		(intBaseType, glu::PRECISION_HIGHP);
462 
463 			src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
464 		}
465 		else
466 			src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
467 	}
468 
469 	src << "\n"
470 		<< "void main (void)\n"
471 		<< "{\n"
472 		<< "	gl_Position = gl_in[0].gl_Position;\n"
473 		<< (pointSizeSupported ? "	gl_PointSize = gl_in[0].gl_PointSize;\n\n" : "");
474 
475 	// Fetch input variables
476 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
477 		src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << "[0];\n";
478 
479 	// Declare local output variables.
480 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
481 		src << "\t" << glu::declare(output->varType, output->name) << ";\n";
482 
483 	src << "\n";
484 
485 	// Operation - indented to correct level.
486 	{
487 		std::istringstream	opSrc	(shaderSpec.source);
488 		std::string			line;
489 
490 		while (std::getline(opSrc, line))
491 			src << "\t" << line << "\n";
492 	}
493 
494 	// Assignments to outputs.
495 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
496 	{
497 		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
498 		{
499 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
500 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
501 
502 			src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
503 		}
504 		else
505 			src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
506 	}
507 
508 	src << "	EmitVertex();\n"
509 		<< "	EndPrimitive();\n"
510 		<< "}\n";
511 
512 	return src.str();
513 }
514 
generateFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)515 static std::string generateFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
516 {
517 	std::ostringstream src;
518 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
519 	if (!shaderSpec.globalDeclarations.empty())
520 		src << shaderSpec.globalDeclarations << "\n";
521 
522 	int			locationNumber	= 0;
523 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
524 	{
525 		src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
526 	}
527 
528 	generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
529 
530 	src << "\nvoid main (void)\n{\n";
531 
532 	// Declare & fetch local input variables
533 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
534 	{
535 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
536 		{
537 			const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
538 			src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
539 		}
540 		else
541 			src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
542 	}
543 
544 	// Declare output variables
545 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
546 	{
547 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
548 		{
549 			const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
550 			src << "\t" << tname << " " << output->name << ";\n";
551 			const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
552 			src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
553 		}
554 		else
555 			src << "\t" << glu::declare(output->varType, output->name) << ";\n";
556 	}
557 
558 	// Operation - indented to correct level.
559 	{
560 		std::istringstream	opSrc	(shaderSpec.source);
561 		std::string			line;
562 
563 		while (std::getline(opSrc, line))
564 			src << "\t" << line << "\n";
565 	}
566 
567 	generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, "", outputPrefix, shaderSpec.packFloat16Bit);
568 
569 	src << "}\n";
570 
571 	return src.str();
572 }
573 
574 // FragmentOutExecutor
575 
576 class FragmentOutExecutor : public ShaderExecutor
577 {
578 public:
579 														FragmentOutExecutor		(Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
580 	virtual												~FragmentOutExecutor	(void);
581 
582 	virtual void										execute					(int					numValues,
583 																				 const void* const*		inputs,
584 																				 void* const*			outputs,
585 																				 VkDescriptorSet		extraResources);
586 
587 protected:
588 	const glu::ShaderType								m_shaderType;
589 	const FragmentOutputLayout							m_outputLayout;
590 
591 private:
592 	void												bindAttributes			(int					numValues,
593 																				 const void* const*		inputs);
594 
595 	void												addAttribute			(deUint32				bindingLocation,
596 																				 VkFormat				format,
597 																				 deUint32				sizePerElement,
598 																				 deUint32				count,
599 																				 const void*			dataPtr);
600 	// reinit render data members
601 	virtual void										clearRenderData			(void);
602 
603 	const VkDescriptorSetLayout							m_extraResourcesLayout;
604 
605 	std::vector<VkVertexInputBindingDescription>		m_vertexBindingDescriptions;
606 	std::vector<VkVertexInputAttributeDescription>		m_vertexAttributeDescriptions;
607 	std::vector<VkBufferSp>								m_vertexBuffers;
608 	std::vector<AllocationSp>							m_vertexBufferAllocs;
609 };
610 
computeFragmentOutputLayout(const std::vector<Symbol> & symbols)611 static FragmentOutputLayout computeFragmentOutputLayout (const std::vector<Symbol>& symbols)
612 {
613 	FragmentOutputLayout	ret;
614 	int						location	= 0;
615 
616 	for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
617 	{
618 		const int	numLocations	= glu::getDataTypeNumLocations(it->varType.getBasicType());
619 
620 		TCU_CHECK_INTERNAL(!de::contains(ret.locationMap, it->name));
621 		de::insert(ret.locationMap, it->name, location);
622 		location += numLocations;
623 
624 		for (int ndx = 0; ndx < numLocations; ++ndx)
625 			ret.locationSymbols.push_back(&*it);
626 	}
627 
628 	return ret;
629 }
630 
FragmentOutExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)631 FragmentOutExecutor::FragmentOutExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
632 	: ShaderExecutor			(context, shaderSpec)
633 	, m_shaderType				(shaderType)
634 	, m_outputLayout			(computeFragmentOutputLayout(m_shaderSpec.outputs))
635 	, m_extraResourcesLayout	(extraResourcesLayout)
636 {
637 	const VkPhysicalDevice		physicalDevice = m_context.getPhysicalDevice();
638 	const InstanceInterface&	vki = m_context.getInstanceInterface();
639 
640 	// Input attributes
641 	for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
642 	{
643 		const Symbol&				symbol = m_shaderSpec.inputs[inputNdx];
644 		const glu::DataType			basicType = symbol.varType.getBasicType();
645 		const VkFormat				format = getAttributeFormat(basicType);
646 		const VkFormatProperties	formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
647 		if ((formatProperties.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) == 0)
648 			TCU_THROW(NotSupportedError, "format not supported by device as vertex buffer attribute format");
649 	}
650 }
651 
~FragmentOutExecutor(void)652 FragmentOutExecutor::~FragmentOutExecutor (void)
653 {
654 }
655 
computeVertexPositions(int numValues,const tcu::IVec2 & renderSize)656 static std::vector<tcu::Vec2> computeVertexPositions (int numValues, const tcu::IVec2& renderSize)
657 {
658 	std::vector<tcu::Vec2> positions(numValues);
659 	for (int valNdx = 0; valNdx < numValues; valNdx++)
660 	{
661 		const int		ix		= valNdx % renderSize.x();
662 		const int		iy		= valNdx / renderSize.x();
663 		const float		fx		= -1.0f + 2.0f*((float(ix) + 0.5f) / float(renderSize.x()));
664 		const float		fy		= -1.0f + 2.0f*((float(iy) + 0.5f) / float(renderSize.y()));
665 
666 		positions[valNdx] = tcu::Vec2(fx, fy);
667 	}
668 
669 	return positions;
670 }
671 
getRenderbufferFormatForOutput(const glu::VarType & outputType,bool useIntOutputs)672 static tcu::TextureFormat getRenderbufferFormatForOutput (const glu::VarType& outputType, bool useIntOutputs)
673 {
674 	const tcu::TextureFormat::ChannelOrder channelOrderMap[] =
675 	{
676 		tcu::TextureFormat::R,
677 		tcu::TextureFormat::RG,
678 		tcu::TextureFormat::RGBA,	// No RGB variants available.
679 		tcu::TextureFormat::RGBA
680 	};
681 
682 	const glu::DataType					basicType		= outputType.getBasicType();
683 	const int							numComps		= glu::getDataTypeNumComponents(basicType);
684 	tcu::TextureFormat::ChannelType		channelType;
685 
686 	switch (glu::getDataTypeScalarType(basicType))
687 	{
688 		case glu::TYPE_UINT:	channelType = tcu::TextureFormat::UNSIGNED_INT32;														break;
689 		case glu::TYPE_INT:		channelType = tcu::TextureFormat::SIGNED_INT32;															break;
690 		case glu::TYPE_BOOL:	channelType = tcu::TextureFormat::SIGNED_INT32;															break;
691 		case glu::TYPE_FLOAT:	channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT;			break;
692 		case glu::TYPE_FLOAT16:	channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::HALF_FLOAT;		break;
693 		default:
694 			throw tcu::InternalError("Invalid output type");
695 	}
696 
697 	DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
698 
699 	return tcu::TextureFormat(channelOrderMap[numComps-1], channelType);
700 }
701 
getAttributeFormat(const glu::DataType dataType)702 static VkFormat getAttributeFormat (const glu::DataType dataType)
703 {
704 	switch (dataType)
705 	{
706 		case glu::TYPE_FLOAT16:			return VK_FORMAT_R16_SFLOAT;
707 		case glu::TYPE_FLOAT16_VEC2:	return VK_FORMAT_R16G16_SFLOAT;
708 		case glu::TYPE_FLOAT16_VEC3:	return VK_FORMAT_R16G16B16_SFLOAT;
709 		case glu::TYPE_FLOAT16_VEC4:	return VK_FORMAT_R16G16B16A16_SFLOAT;
710 
711 		case glu::TYPE_FLOAT:			return VK_FORMAT_R32_SFLOAT;
712 		case glu::TYPE_FLOAT_VEC2:		return VK_FORMAT_R32G32_SFLOAT;
713 		case glu::TYPE_FLOAT_VEC3:		return VK_FORMAT_R32G32B32_SFLOAT;
714 		case glu::TYPE_FLOAT_VEC4:		return VK_FORMAT_R32G32B32A32_SFLOAT;
715 
716 		case glu::TYPE_INT:				return VK_FORMAT_R32_SINT;
717 		case glu::TYPE_INT_VEC2:		return VK_FORMAT_R32G32_SINT;
718 		case glu::TYPE_INT_VEC3:		return VK_FORMAT_R32G32B32_SINT;
719 		case glu::TYPE_INT_VEC4:		return VK_FORMAT_R32G32B32A32_SINT;
720 
721 		case glu::TYPE_UINT:			return VK_FORMAT_R32_UINT;
722 		case glu::TYPE_UINT_VEC2:		return VK_FORMAT_R32G32_UINT;
723 		case glu::TYPE_UINT_VEC3:		return VK_FORMAT_R32G32B32_UINT;
724 		case glu::TYPE_UINT_VEC4:		return VK_FORMAT_R32G32B32A32_UINT;
725 
726 		case glu::TYPE_FLOAT_MAT2:		return VK_FORMAT_R32G32_SFLOAT;
727 		case glu::TYPE_FLOAT_MAT2X3:	return VK_FORMAT_R32G32B32_SFLOAT;
728 		case glu::TYPE_FLOAT_MAT2X4:	return VK_FORMAT_R32G32B32A32_SFLOAT;
729 		case glu::TYPE_FLOAT_MAT3X2:	return VK_FORMAT_R32G32_SFLOAT;
730 		case glu::TYPE_FLOAT_MAT3:		return VK_FORMAT_R32G32B32_SFLOAT;
731 		case glu::TYPE_FLOAT_MAT3X4:	return VK_FORMAT_R32G32B32A32_SFLOAT;
732 		case glu::TYPE_FLOAT_MAT4X2:	return VK_FORMAT_R32G32_SFLOAT;
733 		case glu::TYPE_FLOAT_MAT4X3:	return VK_FORMAT_R32G32B32_SFLOAT;
734 		case glu::TYPE_FLOAT_MAT4:		return VK_FORMAT_R32G32B32A32_SFLOAT;
735 		default:
736 			DE_ASSERT(false);
737 			return VK_FORMAT_UNDEFINED;
738 	}
739 }
740 
addAttribute(deUint32 bindingLocation,VkFormat format,deUint32 sizePerElement,deUint32 count,const void * dataPtr)741 void FragmentOutExecutor::addAttribute (deUint32 bindingLocation, VkFormat format, deUint32 sizePerElement, deUint32 count, const void* dataPtr)
742 {
743 	// Portability requires stride to be multiply of minVertexInputBindingStrideAlignment
744 	// this value is usually 4 and current tests meet this requirement but
745 	// if this changes in future then this limit should be verified in checkSupport
746 #ifndef CTS_USES_VULKANSC
747 	if (m_context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
748 		((sizePerElement % m_context.getPortabilitySubsetProperties().minVertexInputBindingStrideAlignment) != 0))
749 	{
750 		DE_FATAL("stride is not multiply of minVertexInputBindingStrideAlignment");
751 	}
752 #endif // CTS_USES_VULKANSC
753 
754 	// Add binding specification
755 	const deUint32							binding = (deUint32)m_vertexBindingDescriptions.size();
756 	const VkVertexInputBindingDescription	bindingDescription =
757 	{
758 		binding,
759 		sizePerElement,
760 		VK_VERTEX_INPUT_RATE_VERTEX
761 	};
762 
763 	m_vertexBindingDescriptions.push_back(bindingDescription);
764 
765 	// Add location and format specification
766 	const VkVertexInputAttributeDescription attributeDescription =
767 	{
768 		bindingLocation,			// deUint32	location;
769 		binding,					// deUint32	binding;
770 		format,						// VkFormat	format;
771 		0u,							// deUint32	offsetInBytes;
772 	};
773 
774 	m_vertexAttributeDescriptions.push_back(attributeDescription);
775 
776 	// Upload data to buffer
777 	const VkDevice				vkDevice			= m_context.getDevice();
778 	const DeviceInterface&		vk					= m_context.getDeviceInterface();
779 	const deUint32				queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
780 
781 	const VkDeviceSize			inputSize			= sizePerElement * count;
782 	const VkBufferCreateInfo	vertexBufferParams	=
783 	{
784 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		// VkStructureType		sType;
785 		DE_NULL,									// const void*			pNext;
786 		0u,											// VkBufferCreateFlags	flags;
787 		inputSize,									// VkDeviceSize			size;
788 		VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,			// VkBufferUsageFlags	usage;
789 		VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode		sharingMode;
790 		1u,											// deUint32				queueFamilyCount;
791 		&queueFamilyIndex							// const deUint32*		pQueueFamilyIndices;
792 	};
793 
794 	Move<VkBuffer>			buffer	= createBuffer(vk, vkDevice, &vertexBufferParams);
795 	de::MovePtr<Allocation>	alloc	= m_context.getDefaultAllocator().allocate(getBufferMemoryRequirements(vk, vkDevice, *buffer), MemoryRequirement::HostVisible);
796 
797 	VK_CHECK(vk.bindBufferMemory(vkDevice, *buffer, alloc->getMemory(), alloc->getOffset()));
798 
799 	deMemcpy(alloc->getHostPtr(), dataPtr, (size_t)inputSize);
800 	flushAlloc(vk, vkDevice, *alloc);
801 
802 	m_vertexBuffers.push_back(de::SharedPtr<Unique<VkBuffer> >(new Unique<VkBuffer>(buffer)));
803 	m_vertexBufferAllocs.push_back(AllocationSp(alloc.release()));
804 }
805 
bindAttributes(int numValues,const void * const * inputs)806 void FragmentOutExecutor::bindAttributes (int numValues, const void* const* inputs)
807 {
808 	// Input attributes
809 	for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
810 	{
811 		const Symbol&		symbol			= m_shaderSpec.inputs[inputNdx];
812 		const void*			ptr				= inputs[inputNdx];
813 		const glu::DataType	basicType		= symbol.varType.getBasicType();
814 		const int			vecSize			= glu::getDataTypeScalarSize(basicType);
815 		const VkFormat		format			= getAttributeFormat(basicType);
816 		int					elementSize		= 0;
817 		int					numAttrsToAdd	= 1;
818 
819 		if (glu::isDataTypeDoubleOrDVec(basicType))
820 			elementSize = sizeof(double);
821 		if (glu::isDataTypeFloatOrVec(basicType))
822 			elementSize = sizeof(float);
823 		else if (glu::isDataTypeFloat16OrVec(basicType))
824 			elementSize = sizeof(deUint16);
825 		else if (glu::isDataTypeIntOrIVec(basicType))
826 			elementSize = sizeof(int);
827 		else if (glu::isDataTypeUintOrUVec(basicType))
828 			elementSize = sizeof(deUint32);
829 		else if (glu::isDataTypeMatrix(basicType))
830 		{
831 			int		numRows	= glu::getDataTypeMatrixNumRows(basicType);
832 			int		numCols	= glu::getDataTypeMatrixNumColumns(basicType);
833 
834 			elementSize = numRows * numCols * (int)sizeof(float);
835 			numAttrsToAdd = numCols;
836 		}
837 		else
838 			DE_ASSERT(false);
839 
840 		// add attributes, in case of matrix every column is binded as an attribute
841 		for (int attrNdx = 0; attrNdx < numAttrsToAdd; attrNdx++)
842 		{
843 			addAttribute((deUint32)m_vertexBindingDescriptions.size(), format, elementSize * vecSize, numValues, ptr);
844 		}
845 	}
846 }
847 
clearRenderData(void)848 void FragmentOutExecutor::clearRenderData (void)
849 {
850 	m_vertexBindingDescriptions.clear();
851 	m_vertexAttributeDescriptions.clear();
852 	m_vertexBuffers.clear();
853 	m_vertexBufferAllocs.clear();
854 }
855 
createEmptyDescriptorSetLayout(const DeviceInterface & vkd,VkDevice device)856 static Move<VkDescriptorSetLayout> createEmptyDescriptorSetLayout (const DeviceInterface& vkd, VkDevice device)
857 {
858 	const VkDescriptorSetLayoutCreateInfo	createInfo	=
859 	{
860 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
861 		DE_NULL,
862 		(VkDescriptorSetLayoutCreateFlags)0,
863 		0u,
864 		DE_NULL,
865 	};
866 	return createDescriptorSetLayout(vkd, device, &createInfo);
867 }
868 
createEmptyDescriptorPool(const DeviceInterface & vkd,VkDevice device)869 static Move<VkDescriptorPool> createEmptyDescriptorPool (const DeviceInterface& vkd, VkDevice device)
870 {
871 	const VkDescriptorPoolSize			emptySize	=
872 	{
873 		VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
874 		1u,
875 	};
876 	const VkDescriptorPoolCreateInfo	createInfo	=
877 	{
878 		VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
879 		DE_NULL,
880 		(VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
881 		1u,
882 		1u,
883 		&emptySize
884 	};
885 	return createDescriptorPool(vkd, device, &createInfo);
886 }
887 
allocateSingleDescriptorSet(const DeviceInterface & vkd,VkDevice device,VkDescriptorPool pool,VkDescriptorSetLayout layout)888 static Move<VkDescriptorSet> allocateSingleDescriptorSet (const DeviceInterface& vkd, VkDevice device, VkDescriptorPool pool, VkDescriptorSetLayout layout)
889 {
890 	const VkDescriptorSetAllocateInfo	allocInfo	=
891 	{
892 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
893 		DE_NULL,
894 		pool,
895 		1u,
896 		&layout,
897 	};
898 	return allocateDescriptorSet(vkd, device, &allocInfo);
899 }
900 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)901 void FragmentOutExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
902 {
903 	const VkDevice										vkDevice				= m_context.getDevice();
904 	const DeviceInterface&								vk						= m_context.getDeviceInterface();
905 	const VkQueue										queue					= m_context.getUniversalQueue();
906 	const deUint32										queueFamilyIndex		= m_context.getUniversalQueueFamilyIndex();
907 	Allocator&											memAlloc				= m_context.getDefaultAllocator();
908 
909 	const deUint32										renderSizeX				= de::min(static_cast<deUint32>(128), (deUint32)numValues);
910 	const deUint32										renderSizeY				= ((deUint32)numValues / renderSizeX) + (((deUint32)numValues % renderSizeX != 0) ? 1u : 0u);
911 	const tcu::UVec2									renderSize				(renderSizeX, renderSizeY);
912 	std::vector<tcu::Vec2>								positions;
913 
914 	const bool											useGeometryShader		= m_shaderType == glu::SHADERTYPE_GEOMETRY;
915 
916 	std::vector<VkImageSp>								colorImages;
917 	std::vector<VkImageMemoryBarrier>					colorImagePreRenderBarriers;
918 	std::vector<VkImageMemoryBarrier>					colorImagePostRenderBarriers;
919 	std::vector<AllocationSp>							colorImageAllocs;
920 	std::vector<VkAttachmentDescription>				attachments;
921 	std::vector<VkClearValue>							attachmentClearValues;
922 	std::vector<VkImageViewSp>							colorImageViews;
923 
924 	std::vector<VkPipelineColorBlendAttachmentState>	colorBlendAttachmentStates;
925 	std::vector<VkAttachmentReference>					colorAttachmentReferences;
926 
927 	Move<VkRenderPass>									renderPass;
928 	Move<VkFramebuffer>									framebuffer;
929 	Move<VkPipelineLayout>								pipelineLayout;
930 	Move<VkPipeline>									graphicsPipeline;
931 
932 	Move<VkShaderModule>								vertexShaderModule;
933 	Move<VkShaderModule>								geometryShaderModule;
934 	Move<VkShaderModule>								fragmentShaderModule;
935 
936 	Move<VkCommandPool>									cmdPool;
937 	Move<VkCommandBuffer>								cmdBuffer;
938 
939 	Unique<VkDescriptorSetLayout>						emptyDescriptorSetLayout	(createEmptyDescriptorSetLayout(vk, vkDevice));
940 	Unique<VkDescriptorPool>							emptyDescriptorPool			(createEmptyDescriptorPool(vk, vkDevice));
941 	Unique<VkDescriptorSet>								emptyDescriptorSet			(allocateSingleDescriptorSet(vk, vkDevice, *emptyDescriptorPool, *emptyDescriptorSetLayout));
942 
943 	clearRenderData();
944 
945 	// Compute positions - 1px points are used to drive fragment shading.
946 	positions = computeVertexPositions(numValues, renderSize.cast<int>());
947 
948 	// Bind attributes
949 	addAttribute(0u, VK_FORMAT_R32G32_SFLOAT, sizeof(tcu::Vec2), (deUint32)positions.size(), &positions[0]);
950 	bindAttributes(numValues, inputs);
951 
952 	// Create color images
953 	{
954 		const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
955 		{
956 			VK_FALSE,																	// VkBool32						blendEnable;
957 			VK_BLEND_FACTOR_ONE,														// VkBlendFactor				srcColorBlendFactor;
958 			VK_BLEND_FACTOR_ZERO,														// VkBlendFactor				dstColorBlendFactor;
959 			VK_BLEND_OP_ADD,															// VkBlendOp					blendOpColor;
960 			VK_BLEND_FACTOR_ONE,														// VkBlendFactor				srcAlphaBlendFactor;
961 			VK_BLEND_FACTOR_ZERO,														// VkBlendFactor				destAlphaBlendFactor;
962 			VK_BLEND_OP_ADD,															// VkBlendOp					blendOpAlpha;
963 			(VK_COLOR_COMPONENT_R_BIT |
964 			 VK_COLOR_COMPONENT_G_BIT |
965 			 VK_COLOR_COMPONENT_B_BIT |
966 			 VK_COLOR_COMPONENT_A_BIT)													// VkColorComponentFlags		colorWriteMask;
967 		};
968 
969 		for (int outNdx = 0; outNdx < (int)m_outputLayout.locationSymbols.size(); ++outNdx)
970 		{
971 			const bool		isDouble	= glu::isDataTypeDoubleOrDVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
972 			const bool		isFloat		= isDataTypeFloatOrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
973 			const bool		isFloat16b	= glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
974 			const bool		isSigned	= isDataTypeIntOrIVec (m_shaderSpec.outputs[outNdx].varType.getBasicType());
975 			const bool		isBool		= isDataTypeBoolOrBVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
976 			const VkFormat	colorFormat = (isDouble ? VK_FORMAT_R64G64B64A64_SFLOAT : (isFloat16b ? VK_FORMAT_R16G16B16A16_SFLOAT : (isFloat ? VK_FORMAT_R32G32B32A32_SFLOAT : (isSigned || isBool ? VK_FORMAT_R32G32B32A32_SINT : VK_FORMAT_R32G32B32A32_UINT))));
977 
978 			{
979 				const VkFormatProperties	formatProperties	= getPhysicalDeviceFormatProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), colorFormat);
980 				if ((formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) == 0)
981 					TCU_THROW(NotSupportedError, "Image format doesn't support COLOR_ATTACHMENT_BIT");
982 			}
983 
984 			const VkImageCreateInfo	 colorImageParams =
985 			{
986 				VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,										// VkStructureType				sType;
987 				DE_NULL,																	// const void*					pNext;
988 				0u,																			// VkImageCreateFlags			flags;
989 				VK_IMAGE_TYPE_2D,															// VkImageType					imageType;
990 				colorFormat,																// VkFormat						format;
991 				{ renderSize.x(), renderSize.y(), 1u },										// VkExtent3D					extent;
992 				1u,																			// deUint32						mipLevels;
993 				1u,																			// deUint32						arraySize;
994 				VK_SAMPLE_COUNT_1_BIT,														// VkSampleCountFlagBits		samples;
995 				VK_IMAGE_TILING_OPTIMAL,													// VkImageTiling				tiling;
996 				VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT,		// VkImageUsageFlags			usage;
997 				VK_SHARING_MODE_EXCLUSIVE,													// VkSharingMode				sharingMode;
998 				1u,																			// deUint32						queueFamilyCount;
999 				&queueFamilyIndex,															// const deUint32*				pQueueFamilyIndices;
1000 				VK_IMAGE_LAYOUT_UNDEFINED,													// VkImageLayout				initialLayout;
1001 			};
1002 
1003 			const VkAttachmentDescription colorAttachmentDescription =
1004 			{
1005 				0u,																			// VkAttachmentDescriptorFlags	flags;
1006 				colorFormat,																// VkFormat						format;
1007 				VK_SAMPLE_COUNT_1_BIT,														// VkSampleCountFlagBits		samples;
1008 				VK_ATTACHMENT_LOAD_OP_CLEAR,												// VkAttachmentLoadOp			loadOp;
1009 				VK_ATTACHMENT_STORE_OP_STORE,												// VkAttachmentStoreOp			storeOp;
1010 				VK_ATTACHMENT_LOAD_OP_DONT_CARE,											// VkAttachmentLoadOp			stencilLoadOp;
1011 				VK_ATTACHMENT_STORE_OP_DONT_CARE,											// VkAttachmentStoreOp			stencilStoreOp;
1012 				VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,									// VkImageLayout				initialLayout;
1013 				VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,									// VkImageLayout				finalLayout;
1014 			};
1015 
1016 			Move<VkImage> colorImage = createImage(vk, vkDevice, &colorImageParams);
1017 			colorImages.push_back(de::SharedPtr<Unique<VkImage> >(new Unique<VkImage>(colorImage)));
1018 			attachmentClearValues.push_back(getDefaultClearColor());
1019 
1020 			// Allocate and bind color image memory
1021 			{
1022 				de::MovePtr<Allocation> colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *((const VkImage*) colorImages.back().get())), MemoryRequirement::Any);
1023 				VK_CHECK(vk.bindImageMemory(vkDevice, colorImages.back().get()->get(), colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
1024 				colorImageAllocs.push_back(de::SharedPtr<Allocation>(colorImageAlloc.release()));
1025 
1026 				attachments.push_back(colorAttachmentDescription);
1027 				colorBlendAttachmentStates.push_back(colorBlendAttachmentState);
1028 
1029 				const VkAttachmentReference colorAttachmentReference =
1030 				{
1031 					(deUint32) (colorImages.size() - 1),			//	deUint32		attachment;
1032 					VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL		//	VkImageLayout	layout;
1033 				};
1034 
1035 				colorAttachmentReferences.push_back(colorAttachmentReference);
1036 			}
1037 
1038 			// Create color attachment view
1039 			{
1040 				const VkImageViewCreateInfo colorImageViewParams =
1041 				{
1042 					VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,			// VkStructureType			sType;
1043 					DE_NULL,											// const void*				pNext;
1044 					0u,													// VkImageViewCreateFlags	flags;
1045 					colorImages.back().get()->get(),					// VkImage					image;
1046 					VK_IMAGE_VIEW_TYPE_2D,								// VkImageViewType			viewType;
1047 					colorFormat,										// VkFormat					format;
1048 					{
1049 						VK_COMPONENT_SWIZZLE_R,							// VkComponentSwizzle		r;
1050 						VK_COMPONENT_SWIZZLE_G,							// VkComponentSwizzle		g;
1051 						VK_COMPONENT_SWIZZLE_B,							// VkComponentSwizzle		b;
1052 						VK_COMPONENT_SWIZZLE_A							// VkComponentSwizzle		a;
1053 					},													// VkComponentMapping		components;
1054 					{
1055 						VK_IMAGE_ASPECT_COLOR_BIT,						// VkImageAspectFlags		aspectMask;
1056 						0u,												// deUint32					baseMipLevel;
1057 						1u,												// deUint32					mipLevels;
1058 						0u,												// deUint32					baseArraySlice;
1059 						1u												// deUint32					arraySize;
1060 					}													// VkImageSubresourceRange	subresourceRange;
1061 				};
1062 
1063 				Move<VkImageView> colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
1064 				colorImageViews.push_back(de::SharedPtr<Unique<VkImageView> >(new Unique<VkImageView>(colorImageView)));
1065 
1066 				const VkImageMemoryBarrier	colorImagePreRenderBarrier =
1067 				{
1068 					VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,					// sType
1069 					DE_NULL,												// pNext
1070 					0u,														// srcAccessMask
1071 					(VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1072 					VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT),					// dstAccessMask
1073 					VK_IMAGE_LAYOUT_UNDEFINED,								// oldLayout
1074 					VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,				// newLayout
1075 					VK_QUEUE_FAMILY_IGNORED,								// srcQueueFamilyIndex
1076 					VK_QUEUE_FAMILY_IGNORED,								// dstQueueFamilyIndex
1077 					colorImages.back().get()->get(),						// image
1078 					{
1079 						VK_IMAGE_ASPECT_COLOR_BIT,								// aspectMask
1080 						0u,														// baseMipLevel
1081 						1u,														// levelCount
1082 						0u,														// baseArrayLayer
1083 						1u,														// layerCount
1084 					}														// subresourceRange
1085 				};
1086 				colorImagePreRenderBarriers.push_back(colorImagePreRenderBarrier);
1087 
1088 				const VkImageMemoryBarrier	colorImagePostRenderBarrier =
1089 				{
1090 					VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,					// sType
1091 					DE_NULL,												// pNext
1092 					(VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1093 					VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT),					// srcAccessMask
1094 					VK_ACCESS_TRANSFER_READ_BIT,							// dstAccessMask
1095 					VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,				// oldLayout
1096 					VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,					// newLayout
1097 					VK_QUEUE_FAMILY_IGNORED,								// srcQueueFamilyIndex
1098 					VK_QUEUE_FAMILY_IGNORED,								// dstQueueFamilyIndex
1099 					colorImages.back().get()->get(),						// image
1100 					{
1101 						VK_IMAGE_ASPECT_COLOR_BIT,								// aspectMask
1102 						0u,														// baseMipLevel
1103 						1u,														// levelCount
1104 						0u,														// baseArrayLayer
1105 						1u,														// layerCount
1106 					}														// subresourceRange
1107 				};
1108 				colorImagePostRenderBarriers.push_back(colorImagePostRenderBarrier);
1109 			}
1110 		}
1111 	}
1112 
1113 	// Create render pass
1114 	{
1115 		const VkSubpassDescription subpassDescription =
1116 		{
1117 			0u,													// VkSubpassDescriptionFlags	flags;
1118 			VK_PIPELINE_BIND_POINT_GRAPHICS,					// VkPipelineBindPoint			pipelineBindPoint;
1119 			0u,													// deUint32						inputCount;
1120 			DE_NULL,											// const VkAttachmentReference*	pInputAttachments;
1121 			(deUint32)colorImages.size(),						// deUint32						colorCount;
1122 			&colorAttachmentReferences[0],						// const VkAttachmentReference*	colorAttachments;
1123 			DE_NULL,											// const VkAttachmentReference*	resolveAttachments;
1124 			DE_NULL,											// VkAttachmentReference		depthStencilAttachment;
1125 			0u,													// deUint32						preserveCount;
1126 			DE_NULL												// const VkAttachmentReference*	pPreserveAttachments;
1127 		};
1128 
1129 		const VkRenderPassCreateInfo renderPassParams =
1130 		{
1131 			VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,			// VkStructureType					sType;
1132 			DE_NULL,											// const void*						pNext;
1133 			(VkRenderPassCreateFlags)0,							// VkRenderPassCreateFlags			flags;
1134 			(deUint32)attachments.size(),						// deUint32							attachmentCount;
1135 			&attachments[0],									// const VkAttachmentDescription*	pAttachments;
1136 			1u,													// deUint32							subpassCount;
1137 			&subpassDescription,								// const VkSubpassDescription*		pSubpasses;
1138 			0u,													// deUint32							dependencyCount;
1139 			DE_NULL												// const VkSubpassDependency*		pDependencies;
1140 		};
1141 
1142 		renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
1143 	}
1144 
1145 	// Create framebuffer
1146 	{
1147 		std::vector<VkImageView> views(colorImageViews.size());
1148 		for (size_t i = 0; i < colorImageViews.size(); i++)
1149 		{
1150 			views[i] = colorImageViews[i].get()->get();
1151 		}
1152 
1153 		const VkFramebufferCreateInfo framebufferParams =
1154 		{
1155 			VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,			// VkStructureType				sType;
1156 			DE_NULL,											// const void*					pNext;
1157 			0u,													// VkFramebufferCreateFlags		flags;
1158 			*renderPass,										// VkRenderPass					renderPass;
1159 			(deUint32)views.size(),								// deUint32						attachmentCount;
1160 			&views[0],											// const VkImageView*			pAttachments;
1161 			(deUint32)renderSize.x(),							// deUint32						width;
1162 			(deUint32)renderSize.y(),							// deUint32						height;
1163 			1u													// deUint32						layers;
1164 		};
1165 
1166 		framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
1167 	}
1168 
1169 	// Create pipeline layout
1170 	{
1171 		const VkDescriptorSetLayout			setLayouts[]			=
1172 		{
1173 			*emptyDescriptorSetLayout,
1174 			m_extraResourcesLayout
1175 		};
1176 		const VkPipelineLayoutCreateInfo	pipelineLayoutParams	=
1177 		{
1178 			VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,		// VkStructureType				sType;
1179 			DE_NULL,											// const void*					pNext;
1180 			(VkPipelineLayoutCreateFlags)0,						// VkPipelineLayoutCreateFlags	flags;
1181 			(m_extraResourcesLayout != 0 ? 2u : 0u),			// deUint32						descriptorSetCount;
1182 			setLayouts,											// const VkDescriptorSetLayout*	pSetLayouts;
1183 			0u,													// deUint32						pushConstantRangeCount;
1184 			DE_NULL												// const VkPushConstantRange*	pPushConstantRanges;
1185 		};
1186 
1187 		pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
1188 	}
1189 
1190 	// Create shaders
1191 	{
1192 		vertexShaderModule		= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
1193 		fragmentShaderModule	= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
1194 
1195 		if (useGeometryShader)
1196 		{
1197 			if (m_context.getDeviceFeatures().shaderTessellationAndGeometryPointSize)
1198 				geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom_point_size"), 0);
1199 			else
1200 				geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom"), 0);
1201 		}
1202 	}
1203 
1204 	// Create pipeline
1205 	{
1206 		const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
1207 		{
1208 			VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType								sType;
1209 			DE_NULL,													// const void*									pNext;
1210 			(VkPipelineVertexInputStateCreateFlags)0,					// VkPipelineVertexInputStateCreateFlags		flags;
1211 			(deUint32)m_vertexBindingDescriptions.size(),				// deUint32										bindingCount;
1212 			&m_vertexBindingDescriptions[0],							// const VkVertexInputBindingDescription*		pVertexBindingDescriptions;
1213 			(deUint32)m_vertexAttributeDescriptions.size(),				// deUint32										attributeCount;
1214 			&m_vertexAttributeDescriptions[0],							// const VkVertexInputAttributeDescription*		pvertexAttributeDescriptions;
1215 		};
1216 
1217 		const std::vector<VkViewport>	viewports	(1, makeViewport(renderSize));
1218 		const std::vector<VkRect2D>		scissors	(1, makeRect2D(renderSize));
1219 
1220 		const VkPipelineColorBlendStateCreateInfo colorBlendStateParams =
1221 		{
1222 			VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,		// VkStructureType								sType;
1223 			DE_NULL,														// const void*									pNext;
1224 			(VkPipelineColorBlendStateCreateFlags)0,						// VkPipelineColorBlendStateCreateFlags			flags;
1225 			VK_FALSE,														// VkBool32										logicOpEnable;
1226 			VK_LOGIC_OP_COPY,												// VkLogicOp									logicOp;
1227 			(deUint32)colorBlendAttachmentStates.size(),					// deUint32										attachmentCount;
1228 			&colorBlendAttachmentStates[0],									// const VkPipelineColorBlendAttachmentState*	pAttachments;
1229 			{ 0.0f, 0.0f, 0.0f, 0.0f }										// float										blendConst[4];
1230 		};
1231 
1232 		graphicsPipeline = makeGraphicsPipeline(vk,														// const DeviceInterface&                        vk
1233 												vkDevice,												// const VkDevice                                device
1234 												*pipelineLayout,										// const VkPipelineLayout                        pipelineLayout
1235 												*vertexShaderModule,									// const VkShaderModule                          vertexShaderModule
1236 												DE_NULL,												// const VkShaderModule                          tessellationControlShaderModule
1237 												DE_NULL,												// const VkShaderModule                          tessellationEvalShaderModule
1238 												useGeometryShader ? *geometryShaderModule : DE_NULL,	// const VkShaderModule                          geometryShaderModule
1239 												*fragmentShaderModule,									// const VkShaderModule                          fragmentShaderModule
1240 												*renderPass,											// const VkRenderPass                            renderPass
1241 												viewports,												// const std::vector<VkViewport>&                viewports
1242 												scissors,												// const std::vector<VkRect2D>&                  scissors
1243 												VK_PRIMITIVE_TOPOLOGY_POINT_LIST,						// const VkPrimitiveTopology                     topology
1244 												0u,														// const deUint32                                subpass
1245 												0u,														// const deUint32                                patchControlPoints
1246 												&vertexInputStateParams,								// const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
1247 												DE_NULL,												// const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
1248 												DE_NULL,												// const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
1249 												DE_NULL,												// const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
1250 												&colorBlendStateParams);								// const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
1251 	}
1252 
1253 	// Create command pool
1254 	cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1255 
1256 	// Create command buffer
1257 	{
1258 		cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1259 
1260 		beginCommandBuffer(vk, *cmdBuffer);
1261 
1262 		vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0,
1263 							  0, (const VkMemoryBarrier*)DE_NULL,
1264 							  0, (const VkBufferMemoryBarrier*)DE_NULL,
1265 							  (deUint32)colorImagePreRenderBarriers.size(), colorImagePreRenderBarriers.empty() ? DE_NULL : &colorImagePreRenderBarriers[0]);
1266 		beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), (deUint32)attachmentClearValues.size(), &attachmentClearValues[0]);
1267 
1268 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
1269 
1270 		if (m_extraResourcesLayout != 0)
1271 		{
1272 			DE_ASSERT(extraResources != 0);
1273 			const VkDescriptorSet	descriptorSets[]	= { *emptyDescriptorSet, extraResources };
1274 			vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, DE_LENGTH_OF_ARRAY(descriptorSets), descriptorSets, 0u, DE_NULL);
1275 		}
1276 		else
1277 			DE_ASSERT(extraResources == 0);
1278 
1279 		const deUint32 numberOfVertexAttributes = (deUint32)m_vertexBuffers.size();
1280 
1281 		std::vector<VkDeviceSize> offsets(numberOfVertexAttributes, 0);
1282 
1283 		std::vector<VkBuffer> buffers(numberOfVertexAttributes);
1284 		for (size_t i = 0; i < numberOfVertexAttributes; i++)
1285 		{
1286 			buffers[i] = m_vertexBuffers[i].get()->get();
1287 		}
1288 
1289 		vk.cmdBindVertexBuffers(*cmdBuffer, 0, numberOfVertexAttributes, &buffers[0], &offsets[0]);
1290 		vk.cmdDraw(*cmdBuffer, (deUint32)positions.size(), 1u, 0u, 0u);
1291 
1292 		endRenderPass(vk, *cmdBuffer);
1293 		vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0,
1294 							  0, (const VkMemoryBarrier*)DE_NULL,
1295 							  0, (const VkBufferMemoryBarrier*)DE_NULL,
1296 							  (deUint32)colorImagePostRenderBarriers.size(), colorImagePostRenderBarriers.empty() ? DE_NULL : &colorImagePostRenderBarriers[0]);
1297 
1298 		endCommandBuffer(vk, *cmdBuffer);
1299 	}
1300 
1301 	// Execute Draw
1302 	submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
1303 
1304 	// Read back result and output
1305 	{
1306 		const VkDeviceSize imageSizeBytes = (VkDeviceSize)(4 * sizeof(deUint32) * renderSize.x() * renderSize.y());
1307 		const VkBufferCreateInfo readImageBufferParams =
1308 		{
1309 			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		// VkStructureType		sType;
1310 			DE_NULL,									// const void*			pNext;
1311 			0u,											// VkBufferCreateFlags	flags;
1312 			imageSizeBytes,								// VkDeviceSize			size;
1313 			VK_BUFFER_USAGE_TRANSFER_DST_BIT,			// VkBufferUsageFlags	usage;
1314 			VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode		sharingMode;
1315 			1u,											// deUint32				queueFamilyCount;
1316 			&queueFamilyIndex,							// const deUint32*		pQueueFamilyIndices;
1317 		};
1318 
1319 		// constants for image copy
1320 		Move<VkCommandPool>	copyCmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1321 
1322 		const VkBufferImageCopy copyParams =
1323 		{
1324 			0u,											// VkDeviceSize			bufferOffset;
1325 			(deUint32)renderSize.x(),					// deUint32				bufferRowLength;
1326 			(deUint32)renderSize.y(),					// deUint32				bufferImageHeight;
1327 			{
1328 				VK_IMAGE_ASPECT_COLOR_BIT,				// VkImageAspect		aspect;
1329 				0u,										// deUint32				mipLevel;
1330 				0u,										// deUint32				arraySlice;
1331 				1u,										// deUint32				arraySize;
1332 			},											// VkImageSubresource	imageSubresource;
1333 			{ 0u, 0u, 0u },								// VkOffset3D			imageOffset;
1334 			{ renderSize.x(), renderSize.y(), 1u }		// VkExtent3D			imageExtent;
1335 		};
1336 
1337 		// Read back pixels.
1338 		for (int outNdx = 0; outNdx < (int)m_shaderSpec.outputs.size(); ++outNdx)
1339 		{
1340 			const Symbol&				output			= m_shaderSpec.outputs[outNdx];
1341 			const int					outSize			= output.varType.getScalarSize();
1342 			const int					outVecSize		= glu::getDataTypeNumComponents(output.varType.getBasicType());
1343 			const int					outNumLocs		= glu::getDataTypeNumLocations(output.varType.getBasicType());
1344 			const int					outLocation		= de::lookup(m_outputLayout.locationMap, output.name);
1345 
1346 			for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
1347 			{
1348 				tcu::TextureLevel			tmpBuf;
1349 				const tcu::TextureFormat	format = getRenderbufferFormatForOutput(output.varType, false);
1350 				const tcu::TextureFormat	readFormat (tcu::TextureFormat::RGBA, format.type);
1351 				const Unique<VkBuffer>		readImageBuffer(createBuffer(vk, vkDevice, &readImageBufferParams));
1352 				const de::UniquePtr<Allocation> readImageBufferMemory(memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *readImageBuffer), MemoryRequirement::HostVisible));
1353 
1354 				VK_CHECK(vk.bindBufferMemory(vkDevice, *readImageBuffer, readImageBufferMemory->getMemory(), readImageBufferMemory->getOffset()));
1355 
1356 				// Copy image to buffer
1357 				{
1358 
1359 					Move<VkCommandBuffer> copyCmdBuffer = allocateCommandBuffer(vk, vkDevice, *copyCmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1360 
1361 					beginCommandBuffer(vk, *copyCmdBuffer);
1362 					vk.cmdCopyImageToBuffer(*copyCmdBuffer, colorImages[outLocation + locNdx].get()->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *readImageBuffer, 1u, &copyParams);
1363 
1364 					// Insert a barrier so data written by the transfer is available to the host
1365 					{
1366 						const VkBufferMemoryBarrier barrier =
1367 						{
1368 							VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,	// VkStructureType    sType;
1369 							DE_NULL,									// const void*        pNext;
1370 							VK_ACCESS_TRANSFER_WRITE_BIT,				// VkAccessFlags      srcAccessMask;
1371 							VK_ACCESS_HOST_READ_BIT,					// VkAccessFlags      dstAccessMask;
1372 							VK_QUEUE_FAMILY_IGNORED,					// uint32_t           srcQueueFamilyIndex;
1373 							VK_QUEUE_FAMILY_IGNORED,					// uint32_t           dstQueueFamilyIndex;
1374 							*readImageBuffer,							// VkBuffer           buffer;
1375 							0,											// VkDeviceSize       offset;
1376 							VK_WHOLE_SIZE,								// VkDeviceSize       size;
1377 						};
1378 
1379 						vk.cmdPipelineBarrier(*copyCmdBuffer, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
1380 											0, (const VkMemoryBarrier*)DE_NULL,
1381 											1, &barrier,
1382 											0, (const VkImageMemoryBarrier*)DE_NULL);
1383 					}
1384 
1385 					endCommandBuffer(vk, *copyCmdBuffer);
1386 
1387 					submitCommandsAndWait(vk, vkDevice, queue, copyCmdBuffer.get());
1388 				}
1389 
1390 				invalidateAlloc(vk, vkDevice, *readImageBufferMemory);
1391 
1392 				tmpBuf.setStorage(readFormat, renderSize.x(), renderSize.y());
1393 
1394 				const tcu::TextureFormat resultFormat(tcu::TextureFormat::RGBA, format.type);
1395 				const tcu::ConstPixelBufferAccess resultAccess(resultFormat, renderSize.x(), renderSize.y(), 1, readImageBufferMemory->getHostPtr());
1396 
1397 				tcu::copy(tmpBuf.getAccess(), resultAccess);
1398 
1399 				if (isOutput16Bit(static_cast<size_t>(outNdx)))
1400 				{
1401 					deUint16*	dstPtrBase = static_cast<deUint16*>(outputs[outNdx]);
1402 					if (outSize == 4 && outNumLocs == 1)
1403 						deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint16));
1404 					else
1405 					{
1406 						for (int valNdx = 0; valNdx < numValues; valNdx++)
1407 						{
1408 							const deUint16* srcPtr = (const deUint16*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1409 							deUint16*		dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1410 							deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint16));
1411 						}
1412 					}
1413 				}
1414 				else
1415 				{
1416 					deUint32*	dstPtrBase = static_cast<deUint32*>(outputs[outNdx]);
1417 					if (outSize == 4 && outNumLocs == 1)
1418 						deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint32));
1419 					else
1420 					{
1421 						for (int valNdx = 0; valNdx < numValues; valNdx++)
1422 						{
1423 							const deUint32* srcPtr = (const deUint32*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1424 							deUint32*		dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1425 							deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint32));
1426 						}
1427 					}
1428 				}
1429 			}
1430 		}
1431 	}
1432 }
1433 
1434 // VertexShaderExecutor
1435 
1436 class VertexShaderExecutor : public FragmentOutExecutor
1437 {
1438 public:
1439 								VertexShaderExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1440 	virtual						~VertexShaderExecutor	(void);
1441 
1442 	static void					generateSources			(const ShaderSpec& shaderSpec, SourceCollections& dst);
1443 };
1444 
VertexShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1445 VertexShaderExecutor::VertexShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1446 	: FragmentOutExecutor(context, glu::SHADERTYPE_VERTEX, shaderSpec, extraResourcesLayout)
1447 {
1448 }
1449 
~VertexShaderExecutor(void)1450 VertexShaderExecutor::~VertexShaderExecutor (void)
1451 {
1452 }
1453 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1454 void VertexShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1455 {
1456 	const FragmentOutputLayout	outputLayout	(computeFragmentOutputLayout(shaderSpec.outputs));
1457 
1458 	programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1459 	/* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1460 	programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1461 }
1462 
1463 // GeometryShaderExecutor
1464 
1465 class GeometryShaderExecutor : public FragmentOutExecutor
1466 {
1467 public:
1468 								GeometryShaderExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1469 	virtual						~GeometryShaderExecutor	(void);
1470 
1471 	static void					generateSources			(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1472 
1473 };
1474 
GeometryShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1475 GeometryShaderExecutor::GeometryShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1476 	: FragmentOutExecutor(context, glu::SHADERTYPE_GEOMETRY, shaderSpec, extraResourcesLayout)
1477 {
1478 	const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
1479 
1480 	if (!features.geometryShader)
1481 		TCU_THROW(NotSupportedError, "Geometry shader type not supported by device");
1482 }
1483 
~GeometryShaderExecutor(void)1484 GeometryShaderExecutor::~GeometryShaderExecutor (void)
1485 {
1486 }
1487 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1488 void GeometryShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1489 {
1490 	const FragmentOutputLayout	outputLayout	(computeFragmentOutputLayout(shaderSpec.outputs));
1491 
1492 	programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1493 
1494 	programCollection.glslSources.add("geom") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", false)) << shaderSpec.buildOptions;
1495 	programCollection.glslSources.add("geom_point_size") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", true)) << shaderSpec.buildOptions;
1496 
1497 	/* \todo [2015-09-18 rsipka] set useIntOutputs parameter if needed. */
1498 	programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "geom_out_", "o_")) << shaderSpec.buildOptions;
1499 
1500 }
1501 
1502 // FragmentShaderExecutor
1503 
1504 class FragmentShaderExecutor : public FragmentOutExecutor
1505 {
1506 public:
1507 								FragmentShaderExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1508 	virtual						~FragmentShaderExecutor (void);
1509 
1510 	static void					generateSources			(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1511 
1512 };
1513 
FragmentShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1514 FragmentShaderExecutor::FragmentShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1515 	: FragmentOutExecutor(context, glu::SHADERTYPE_FRAGMENT, shaderSpec, extraResourcesLayout)
1516 {
1517 }
1518 
~FragmentShaderExecutor(void)1519 FragmentShaderExecutor::~FragmentShaderExecutor (void)
1520 {
1521 }
1522 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1523 void FragmentShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1524 {
1525 	const FragmentOutputLayout	outputLayout	(computeFragmentOutputLayout(shaderSpec.outputs));
1526 
1527 	programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1528 	/* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1529 	programCollection.glslSources.add("frag") << glu::FragmentSource(generateFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1530 }
1531 
1532 // Shared utilities for compute and tess executors
1533 
getVecStd430ByteAlignment(glu::DataType type)1534 static deUint32 getVecStd430ByteAlignment (glu::DataType type)
1535 {
1536 	deUint32 baseSize;
1537 
1538 	switch (glu::getDataTypeScalarType(type))
1539 	{
1540 		case glu::TYPE_FLOAT16:	baseSize = 2u; break;
1541 		case glu::TYPE_DOUBLE:	baseSize = 8u; break;
1542 		default:				baseSize = 4u; break;
1543 	}
1544 
1545 	switch (glu::getDataTypeScalarSize(type))
1546 	{
1547 		case 1:		return baseSize;
1548 		case 2:		return baseSize * 2u;
1549 		case 3:		// fallthrough.
1550 		case 4:		return baseSize * 4u;
1551 		default:
1552 			DE_ASSERT(false);
1553 			return 0u;
1554 	}
1555 }
1556 
1557 class BufferIoExecutor : public ShaderExecutor
1558 {
1559 public:
1560 							BufferIoExecutor	(Context& context, const ShaderSpec& shaderSpec);
1561 	virtual					~BufferIoExecutor	(void);
1562 
1563 protected:
1564 	enum
1565 	{
1566 		INPUT_BUFFER_BINDING	= 0,
1567 		OUTPUT_BUFFER_BINDING	= 1,
1568 	};
1569 
1570 	void					initBuffers			(int numValues);
getInputBuffer(void) const1571 	VkBuffer				getInputBuffer		(void) const		{ return *m_inputBuffer;					}
getOutputBuffer(void) const1572 	VkBuffer				getOutputBuffer		(void) const		{ return *m_outputBuffer;					}
getInputStride(void) const1573 	deUint32				getInputStride		(void) const		{ return getLayoutStride(m_inputLayout);	}
getOutputStride(void) const1574 	deUint32				getOutputStride		(void) const		{ return getLayoutStride(m_outputLayout);	}
1575 
1576 	void					uploadInputBuffer	(const void* const* inputPtrs, int numValues, bool packFloat16Bit);
1577 	void					readOutputBuffer	(void* const* outputPtrs, int numValues);
1578 
1579 	static void				declareBufferBlocks	(std::ostream& src, const ShaderSpec& spec);
1580 	static void				generateExecBufferIo(std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName);
1581 
1582 protected:
1583 	Move<VkBuffer>			m_inputBuffer;
1584 	Move<VkBuffer>			m_outputBuffer;
1585 
1586 private:
1587 	struct VarLayout
1588 	{
1589 		deUint32		offset;
1590 		deUint32		stride;
1591 		deUint32		matrixStride;
1592 
VarLayoutvkt::shaderexecutor::__anond362d2740111::BufferIoExecutor::VarLayout1593 		VarLayout (void) : offset(0), stride(0), matrixStride(0) {}
1594 	};
1595 
1596 	static void				computeVarLayout	(const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout);
1597 	static deUint32			getLayoutStride		(const vector<VarLayout>& layout);
1598 
1599 	static void				copyToBuffer		(const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr, bool packFloat16Bit);
1600 	static void				copyFromBuffer		(const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
1601 
1602 	de::MovePtr<Allocation>	m_inputAlloc;
1603 	de::MovePtr<Allocation>	m_outputAlloc;
1604 
1605 	vector<VarLayout>		m_inputLayout;
1606 	vector<VarLayout>		m_outputLayout;
1607 };
1608 
BufferIoExecutor(Context & context,const ShaderSpec & shaderSpec)1609 BufferIoExecutor::BufferIoExecutor (Context& context, const ShaderSpec& shaderSpec)
1610 	: ShaderExecutor(context, shaderSpec)
1611 {
1612 	computeVarLayout(m_shaderSpec.inputs, &m_inputLayout);
1613 	computeVarLayout(m_shaderSpec.outputs, &m_outputLayout);
1614 }
1615 
~BufferIoExecutor(void)1616 BufferIoExecutor::~BufferIoExecutor (void)
1617 {
1618 }
1619 
getLayoutStride(const vector<VarLayout> & layout)1620 inline deUint32 BufferIoExecutor::getLayoutStride (const vector<VarLayout>& layout)
1621 {
1622 	return layout.empty() ? 0 : layout[0].stride;
1623 }
1624 
computeVarLayout(const std::vector<Symbol> & symbols,std::vector<VarLayout> * layout)1625 void BufferIoExecutor::computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout)
1626 {
1627 	deUint32	maxAlignment	= 0;
1628 	deUint32	curOffset		= 0;
1629 
1630 	DE_ASSERT(layout != DE_NULL);
1631 	DE_ASSERT(layout->empty());
1632 	layout->resize(symbols.size());
1633 
1634 	for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
1635 	{
1636 		const Symbol&		symbol		= symbols[varNdx];
1637 		const glu::DataType	basicType	= symbol.varType.getBasicType();
1638 		VarLayout&			layoutEntry	= (*layout)[varNdx];
1639 
1640 		if (glu::isDataTypeScalarOrVector(basicType))
1641 		{
1642 			const deUint32	alignment	= getVecStd430ByteAlignment(basicType);
1643 			const deUint32	size		= (deUint32)glu::getDataTypeScalarSize(basicType) * (isDataTypeDoubleType(basicType) ? (int)(sizeof(deUint64)) : (isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1644 
1645 			curOffset		= (deUint32)deAlign32((int)curOffset, (int)alignment);
1646 			maxAlignment	= de::max(maxAlignment, alignment);
1647 
1648 			layoutEntry.offset			= curOffset;
1649 			layoutEntry.matrixStride	= 0;
1650 
1651 			curOffset += size;
1652 		}
1653 		else if (glu::isDataTypeMatrix(basicType))
1654 		{
1655 			const int				numVecs			= glu::getDataTypeMatrixNumColumns(basicType);
1656 			const glu::DataType		vecType			= glu::getDataTypeVector(glu::getDataTypeScalarType(basicType), glu::getDataTypeMatrixNumRows(basicType));
1657 			const deUint32			vecAlignment	= getVecStd430ByteAlignment(vecType);
1658 
1659 			curOffset		= (deUint32)deAlign32((int)curOffset, (int)vecAlignment);
1660 			maxAlignment	= de::max(maxAlignment, vecAlignment);
1661 
1662 			layoutEntry.offset			= curOffset;
1663 			layoutEntry.matrixStride	= vecAlignment;
1664 
1665 			curOffset += vecAlignment*numVecs;
1666 		}
1667 		else
1668 			DE_ASSERT(false);
1669 	}
1670 
1671 	{
1672 		const deUint32	totalSize	= (deUint32)deAlign32(curOffset, maxAlignment);
1673 
1674 		for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
1675 			varIter->stride = totalSize;
1676 	}
1677 }
1678 
declareBufferBlocks(std::ostream & src,const ShaderSpec & spec)1679 void BufferIoExecutor::declareBufferBlocks (std::ostream& src, const ShaderSpec& spec)
1680 {
1681 	// Input struct
1682 	if (!spec.inputs.empty())
1683 	{
1684 		glu::StructType inputStruct("Inputs");
1685 		for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1686 			inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1687 		src << glu::declare(&inputStruct) << ";\n";
1688 	}
1689 
1690 	// Output struct
1691 	{
1692 		glu::StructType outputStruct("Outputs");
1693 		for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1694 			outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1695 		src << glu::declare(&outputStruct) << ";\n";
1696 	}
1697 
1698 	src << "\n";
1699 
1700 	if (!spec.inputs.empty())
1701 	{
1702 		src	<< "layout(set = 0, binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1703 			<< "{\n"
1704 			<< "	Inputs inputs[];\n"
1705 			<< "};\n";
1706 	}
1707 
1708 	src	<< "layout(set = 0, binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1709 		<< "{\n"
1710 		<< "	Outputs outputs[];\n"
1711 		<< "};\n"
1712 		<< "\n";
1713 }
1714 
generateExecBufferIo(std::ostream & src,const ShaderSpec & spec,const char * invocationNdxName)1715 void BufferIoExecutor::generateExecBufferIo (std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName)
1716 {
1717 	std::string	tname;
1718 	for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1719 	{
1720 		const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1721 		if (f16BitTest)
1722 		{
1723 			tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1724 		}
1725 		else
1726 		{
1727 			tname = glu::getDataTypeName(symIter->varType.getBasicType());
1728 		}
1729 		src << "\t" << tname << " "<< symIter->name << " = " << tname << "(inputs[" << invocationNdxName << "]." << symIter->name << ");\n";
1730 	}
1731 
1732 	for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1733 	{
1734 		const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1735 		if (f16BitTest)
1736 		{
1737 			tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1738 		}
1739 		else
1740 		{
1741 			tname = glu::getDataTypeName(symIter->varType.getBasicType());
1742 		}
1743 		src << "\t" << tname << " " << symIter->name << ";\n";
1744 		if (f16BitTest)
1745 		{
1746 			const char* ttname = glu::getDataTypeName(symIter->varType.getBasicType());
1747 			src << "\t" << ttname << " " << "packed_" << symIter->name << ";\n";
1748 		}
1749 	}
1750 
1751 	src << "\n";
1752 
1753 	{
1754 		std::istringstream	opSrc	(spec.source);
1755 		std::string			line;
1756 
1757 		while (std::getline(opSrc, line))
1758 			src << "\t" << line << "\n";
1759 	}
1760 
1761 	if (spec.packFloat16Bit)
1762 		packFloat16Bit (src, spec.outputs);
1763 
1764 	src << "\n";
1765 	for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1766 	{
1767 		const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1768 		if(f16BitTest)
1769 			src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = packed_" << symIter->name << ";\n";
1770 		else
1771 			src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1772 	}
1773 }
1774 
copyToBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr,bool packFloat16Bit)1775 void BufferIoExecutor::copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr, bool packFloat16Bit)
1776 {
1777 	if (varType.isBasicType())
1778 	{
1779 		const glu::DataType		basicType		= varType.getBasicType();
1780 		const bool				isMatrix		= glu::isDataTypeMatrix(basicType);
1781 		const int				scalarSize		= glu::getDataTypeScalarSize(basicType);
1782 		const int				numVecs			= isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1783 		const int				numComps		= scalarSize / numVecs;
1784 		const int				size			= (glu::isDataTypeDoubleType(basicType) ? (int)sizeof(deUint64) : (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1785 
1786 		for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1787 		{
1788 			for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1789 			{
1790 				const int		srcOffset		= size * (elemNdx * scalarSize + vecNdx * numComps);
1791 				const int		dstOffset		= layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1792 				const deUint8*	srcPtr			= (const deUint8*)srcBasePtr + srcOffset;
1793 				deUint8*		dstPtr			= (deUint8*)dstBasePtr + dstOffset;
1794 
1795 				if (packFloat16Bit)
1796 				{
1797 					// Convert the float values to 16 bit and store in the lower 16 bits of 32 bit ints.
1798 					for (int cmpNdx=0; cmpNdx < numComps; ++cmpNdx)
1799 					{
1800 						deFloat16 f16vals[2] = {};
1801 						f16vals[0] = deFloat32To16Round(((float*)srcPtr)[cmpNdx], DE_ROUNDINGMODE_TO_ZERO);
1802 						deMemcpy(dstPtr + cmpNdx * size, &f16vals[0], size);
1803 					}
1804 				}
1805 				else
1806 				{
1807 					deMemcpy(dstPtr, srcPtr, size * numComps);
1808 				}
1809 			}
1810 		}
1811 	}
1812 	else
1813 		throw tcu::InternalError("Unsupported type");
1814 }
1815 
copyFromBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr)1816 void BufferIoExecutor::copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1817 {
1818 	if (varType.isBasicType())
1819 	{
1820 		const glu::DataType		basicType		= varType.getBasicType();
1821 		const bool				isMatrix		= glu::isDataTypeMatrix(basicType);
1822 		const int				scalarSize		= glu::getDataTypeScalarSize(basicType);
1823 		const int				numVecs			= isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1824 		const int				numComps		= scalarSize / numVecs;
1825 
1826 		for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1827 		{
1828 			for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1829 			{
1830 				const int		size			= (glu::isDataTypeDoubleType(basicType) ? (int)sizeof(deUint64) : (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1831 				const int		srcOffset		= layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1832 				const int		dstOffset		= size * (elemNdx * scalarSize + vecNdx * numComps);
1833 				const deUint8*	srcPtr			= (const deUint8*)srcBasePtr + srcOffset;
1834 				deUint8*		dstPtr			= (deUint8*)dstBasePtr + dstOffset;
1835 
1836 				deMemcpy(dstPtr, srcPtr, size * numComps);
1837 			}
1838 		}
1839 	}
1840 	else
1841 		throw tcu::InternalError("Unsupported type");
1842 }
1843 
uploadInputBuffer(const void * const * inputPtrs,int numValues,bool packFloat16Bit)1844 void BufferIoExecutor::uploadInputBuffer (const void* const* inputPtrs, int numValues, bool packFloat16Bit)
1845 {
1846 	const VkDevice			vkDevice			= m_context.getDevice();
1847 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
1848 
1849 	const deUint32			inputStride			= getLayoutStride(m_inputLayout);
1850 	const int				inputBufferSize		= inputStride * numValues;
1851 
1852 	if (inputBufferSize == 0)
1853 		return; // No inputs
1854 
1855 	DE_ASSERT(m_shaderSpec.inputs.size() == m_inputLayout.size());
1856 	for (size_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
1857 	{
1858 		const glu::VarType&		varType		= m_shaderSpec.inputs[inputNdx].varType;
1859 		const VarLayout&		layout		= m_inputLayout[inputNdx];
1860 
1861 		copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], m_inputAlloc->getHostPtr(), packFloat16Bit);
1862 	}
1863 
1864 	flushAlloc(vk, vkDevice, *m_inputAlloc);
1865 }
1866 
readOutputBuffer(void * const * outputPtrs,int numValues)1867 void BufferIoExecutor::readOutputBuffer (void* const* outputPtrs, int numValues)
1868 {
1869 	const VkDevice			vkDevice			= m_context.getDevice();
1870 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
1871 
1872 	DE_ASSERT(numValues > 0); // At least some outputs are required.
1873 
1874 	invalidateAlloc(vk, vkDevice, *m_outputAlloc);
1875 
1876 	DE_ASSERT(m_shaderSpec.outputs.size() == m_outputLayout.size());
1877 	for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
1878 	{
1879 		const glu::VarType&		varType		= m_shaderSpec.outputs[outputNdx].varType;
1880 		const VarLayout&		layout		= m_outputLayout[outputNdx];
1881 
1882 		copyFromBuffer(varType, layout, numValues, m_outputAlloc->getHostPtr(), outputPtrs[outputNdx]);
1883 	}
1884 }
1885 
initBuffers(int numValues)1886 void BufferIoExecutor::initBuffers (int numValues)
1887 {
1888 	const deUint32				inputStride			= getLayoutStride(m_inputLayout);
1889 	const deUint32				outputStride		= getLayoutStride(m_outputLayout);
1890 	// Avoid creating zero-sized buffer/memory
1891 	const size_t				inputBufferSize		= de::max(numValues * inputStride, 1u);
1892 	const size_t				outputBufferSize	= numValues * outputStride;
1893 
1894 	// Upload data to buffer
1895 	const VkDevice				vkDevice			= m_context.getDevice();
1896 	const DeviceInterface&		vk					= m_context.getDeviceInterface();
1897 	const deUint32				queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
1898 	Allocator&					memAlloc			= m_context.getDefaultAllocator();
1899 
1900 	const VkBufferCreateInfo inputBufferParams =
1901 	{
1902 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		// VkStructureType		sType;
1903 		DE_NULL,									// const void*			pNext;
1904 		0u,											// VkBufferCreateFlags	flags;
1905 		inputBufferSize,							// VkDeviceSize			size;
1906 		VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,			// VkBufferUsageFlags	usage;
1907 		VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode		sharingMode;
1908 		1u,											// deUint32				queueFamilyCount;
1909 		&queueFamilyIndex							// const deUint32*		pQueueFamilyIndices;
1910 	};
1911 
1912 	m_inputBuffer = createBuffer(vk, vkDevice, &inputBufferParams);
1913 	m_inputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_inputBuffer), MemoryRequirement::HostVisible);
1914 
1915 	VK_CHECK(vk.bindBufferMemory(vkDevice, *m_inputBuffer, m_inputAlloc->getMemory(), m_inputAlloc->getOffset()));
1916 
1917 	const VkBufferCreateInfo outputBufferParams =
1918 	{
1919 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		// VkStructureType		sType;
1920 		DE_NULL,									// const void*			pNext;
1921 		0u,											// VkBufferCreateFlags	flags;
1922 		outputBufferSize,							// VkDeviceSize			size;
1923 		VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,			// VkBufferUsageFlags	usage;
1924 		VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode		sharingMode;
1925 		1u,											// deUint32				queueFamilyCount;
1926 		&queueFamilyIndex							// const deUint32*		pQueueFamilyIndices;
1927 	};
1928 
1929 	m_outputBuffer = createBuffer(vk, vkDevice, &outputBufferParams);
1930 	m_outputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_outputBuffer), MemoryRequirement::HostVisible);
1931 
1932 	VK_CHECK(vk.bindBufferMemory(vkDevice, *m_outputBuffer, m_outputAlloc->getMemory(), m_outputAlloc->getOffset()));
1933 }
1934 
1935 // ComputeShaderExecutor
1936 
1937 class ComputeShaderExecutor : public BufferIoExecutor
1938 {
1939 public:
1940 						ComputeShaderExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1941 	virtual				~ComputeShaderExecutor	(void);
1942 
1943 	static void			generateSources			(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1944 
1945 	virtual void		execute					(int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
1946 
1947 protected:
1948 	static std::string	generateComputeShader	(const ShaderSpec& spec);
1949 
1950 private:
1951 	const VkDescriptorSetLayout					m_extraResourcesLayout;
1952 };
1953 
ComputeShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1954 ComputeShaderExecutor::ComputeShaderExecutor(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1955 	: BufferIoExecutor			(context, shaderSpec)
1956 	, m_extraResourcesLayout	(extraResourcesLayout)
1957 {
1958 }
1959 
~ComputeShaderExecutor(void)1960 ComputeShaderExecutor::~ComputeShaderExecutor	(void)
1961 {
1962 }
1963 
getTypeSpirv(const glu::DataType type,const bool packFloat16Bit=false)1964 std::string getTypeSpirv(const glu::DataType type, const bool packFloat16Bit = false)
1965 {
1966 	switch(type)
1967 	{
1968 	case glu::TYPE_FLOAT16:
1969 		return "%f16";
1970 	case glu::TYPE_FLOAT16_VEC2:
1971 		return "%v2f16";
1972 	case glu::TYPE_FLOAT16_VEC3:
1973 		return "%v3f16";
1974 	case glu::TYPE_FLOAT16_VEC4:
1975 		return "%v4f16";
1976 	case glu::TYPE_FLOAT:
1977 		return packFloat16Bit ? "%u32" : "%f32";		// f16 values will be bitcast from ui32.
1978 	case glu::TYPE_FLOAT_VEC2:
1979 		return packFloat16Bit ? "%v2u32" : "%v2f32";	// f16 values will be bitcast from ui32.
1980 	case glu::TYPE_FLOAT_VEC3:
1981 		return packFloat16Bit ? "%v3u32" : "%v3f32";	// f16 values will be bitcast from ui32.
1982 	case glu::TYPE_FLOAT_VEC4:
1983 		return packFloat16Bit ? "%v4u32" : "%v4f32";	// f16 values will be bitcast from ui32.
1984 	case glu::TYPE_INT:
1985 		return "%i32";
1986 	case glu::TYPE_INT_VEC2:
1987 		return "%v2i32";
1988 	case glu::TYPE_INT_VEC3:
1989 		return "%v3i32";
1990 	case glu::TYPE_INT_VEC4:
1991 		return "%v4i32";
1992 	case glu::TYPE_DOUBLE:
1993 		return "%f64";
1994 	case glu::TYPE_DOUBLE_VEC2:
1995 		return "%v2f64";
1996 	case glu::TYPE_DOUBLE_VEC3:
1997 		return "%v3f64";
1998 	case glu::TYPE_DOUBLE_VEC4:
1999 		return "%v4f64";
2000 	default:
2001 		DE_ASSERT(0);
2002 		return "";
2003 	}
2004 }
2005 
moveBitOperation(std::string variableName,const int operationNdx)2006 std::string moveBitOperation (std::string variableName, const int operationNdx)
2007 {
2008 	std::ostringstream	src;
2009 	src << "\n"
2010 	<< "%operation_move_" << operationNdx << " = OpLoad %i32 " << variableName << "\n"
2011 	<< "%move1_" << operationNdx << " = OpShiftLeftLogical %i32 %operation_move_"<< operationNdx <<" %c_i32_1\n"
2012 	<< "OpStore " << variableName << " %move1_" << operationNdx << "\n";
2013 	return src.str();
2014 }
2015 
scalarComparison(const std::string operation,const int operationNdx,const glu::DataType type,const std::string & outputType,const int scalarSize)2016 std::string scalarComparison(const std::string operation, const int operationNdx, const glu::DataType type, const std::string& outputType, const int scalarSize)
2017 {
2018 	std::ostringstream	src;
2019 	std::string			boolType;
2020 
2021 	switch (type)
2022 	{
2023 	case glu::TYPE_FLOAT16:
2024 	case glu::TYPE_FLOAT:
2025 	case glu::TYPE_DOUBLE:
2026 		src << "\n"
2027 			<< "%operation_result_" << operationNdx << " = " << operation << " %bool %in0_val %in1_val\n"
2028 			<< "OpSelectionMerge %IF_" << operationNdx << " None\n"
2029 			<< "OpBranchConditional %operation_result_" << operationNdx << " %label_IF_" << operationNdx << " %IF_" << operationNdx << "\n"
2030 			<< "%label_IF_" << operationNdx << " = OpLabel\n"
2031 			<< "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n"
2032 			<< "%out_val_" << operationNdx << " = OpLoad %i32 %out0\n"
2033 			<< "%add_if_" << operationNdx << " = OpIAdd %i32 %out_val_" << operationNdx << " %operation_val_" << operationNdx << "\n"
2034 			<< "OpStore %out0 %add_if_" << operationNdx << "\n"
2035 			<< "OpBranch %IF_" << operationNdx << "\n"
2036 			<< "%IF_" << operationNdx << " = OpLabel\n";
2037 		return src.str();
2038 	case glu::TYPE_FLOAT16_VEC2:
2039 	case glu::TYPE_FLOAT_VEC2:
2040 	case glu::TYPE_DOUBLE_VEC2:
2041 		boolType = "%v2bool";
2042 		break;
2043 	case glu::TYPE_FLOAT16_VEC3:
2044 	case glu::TYPE_FLOAT_VEC3:
2045 	case glu::TYPE_DOUBLE_VEC3:
2046 		boolType = "%v3bool";
2047 		break;
2048 	case glu::TYPE_FLOAT16_VEC4:
2049 	case glu::TYPE_FLOAT_VEC4:
2050 	case glu::TYPE_DOUBLE_VEC4:
2051 		boolType = "%v4bool";
2052 		break;
2053 	default:
2054 		DE_ASSERT(0);
2055 		return "";
2056 	}
2057 
2058 	src << "\n"
2059 		<< "%operation_result_" << operationNdx << " = " << operation << " " << boolType << " %in0_val %in1_val\n"
2060 		<< "%ivec_result_" << operationNdx << " = OpSelect " << outputType << " %operation_result_" << operationNdx << " %c_" << &outputType[1] << "_1 %c_" << &outputType[1] << "_0\n"
2061 		<< "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n";
2062 
2063 	src << "%operation_vec_" << operationNdx << " = OpCompositeConstruct " << outputType;
2064 	for(int ndx = 0; ndx < scalarSize; ++ndx)
2065 		src << " %operation_val_" << operationNdx;
2066 	src << "\n";
2067 
2068 	src << "%toAdd" << operationNdx << " = OpIMul "<< outputType << " %ivec_result_" << operationNdx << " %operation_vec_" << operationNdx <<"\n"
2069 		<< "%out_val_" << operationNdx << " = OpLoad "<< outputType << " %out0\n"
2070 
2071 		<< "%add_if_" << operationNdx << " = OpIAdd " << outputType << " %out_val_" << operationNdx << " %toAdd" << operationNdx << "\n"
2072 		<< "OpStore %out0 %add_if_" << operationNdx << "\n";
2073 
2074 	return src.str();
2075 }
2076 
generateSpirv(const ShaderSpec & spec,const bool are16Bit,const bool are64Bit,const bool isMediump)2077 std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const bool are64Bit, const bool isMediump)
2078 {
2079 	static const std::string COMPARE_OPERATIONS[] =
2080 	{
2081 		"OpFOrdEqual",
2082 		"OpFOrdGreaterThan",
2083 		"OpFOrdLessThan",
2084 		"OpFOrdGreaterThanEqual",
2085 		"OpFOrdLessThanEqual",
2086 		"OpFUnordEqual",
2087 		"OpFUnordGreaterThan",
2088 		"OpFUnordLessThan",
2089 		"OpFUnordGreaterThanEqual",
2090 		"OpFUnordLessThanEqual"
2091 	};
2092 
2093 	int					moveBitNdx		= 0;
2094 	vector<std::string>	inputTypes;
2095 	vector<std::string>	outputTypes;
2096 	const std::string	packType		= spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
2097 
2098 	vector<bool>		floatResult;
2099 	for (const auto& symbol : spec.outputs)
2100 		floatResult.push_back(glu::isDataTypeFloatType(symbol.varType.getBasicType()));
2101 
2102 	const bool			anyFloatResult	= std::any_of(begin(floatResult), end(floatResult), [](bool b) { return b; });
2103 
2104 	vector<bool>		packFloatRes;
2105 	for (const auto& floatRes : floatResult)
2106 		packFloatRes.push_back(floatRes && spec.packFloat16Bit);
2107 
2108 	const bool			useF32Types		= (!are16Bit && !are64Bit);
2109 	const bool			useF64Types		= are64Bit;
2110 	const bool			useF16Types		= (spec.packFloat16Bit || are16Bit);
2111 
2112 	for (const auto& symbol : spec.inputs)
2113 		inputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2114 
2115 	for (const auto& symbol : spec.outputs)
2116 		outputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2117 
2118 	DE_ASSERT(!inputTypes.empty());
2119 	DE_ASSERT(!outputTypes.empty());
2120 
2121 	// Assert input and output types match the expected operations.
2122 	switch (spec.spirvCase)
2123 	{
2124 	case SPIRV_CASETYPE_COMPARE:
2125 	case SPIRV_CASETYPE_FREM:
2126 		DE_ASSERT(inputTypes.size() == 2);
2127 		DE_ASSERT(outputTypes.size() == 1);
2128 		break;
2129 	case SPIRV_CASETYPE_MODFSTRUCT:
2130 	case SPIRV_CASETYPE_FREXPSTRUCT:
2131 		DE_ASSERT(inputTypes.size() == 1);
2132 		DE_ASSERT(outputTypes.size() == 2);
2133 		break;
2134 	default:
2135 		DE_ASSERT(false);
2136 		break;
2137 	}
2138 
2139 	std::ostringstream	src;
2140 	src << "; SPIR-V\n"
2141 		"; Version: 1.0\n"
2142 		"; Generator: Khronos Glslang Reference Front End; 4\n"
2143 		"; Bound: 114\n"
2144 		"; Schema: 0\n"
2145 		"OpCapability Shader\n";
2146 
2147 	if (useF16Types)
2148 		src << "OpCapability Float16\n";
2149 
2150 	if (are16Bit)
2151 		src << "OpCapability StorageBuffer16BitAccess\n"
2152 			"OpCapability UniformAndStorageBuffer16BitAccess\n";
2153 
2154 	if (useF64Types)
2155 		src << "OpCapability Float64\n";
2156 
2157 	if (are16Bit)
2158 		src << "OpExtension \"SPV_KHR_16bit_storage\"\n";
2159 
2160 	src << "%glslstd450 = OpExtInstImport \"GLSL.std.450\"\n"
2161 		"OpMemoryModel Logical GLSL450\n"
2162 		"OpEntryPoint GLCompute %BP_main \"main\" %BP_id3uNum %BP_id3uID\n"
2163 		"OpExecutionMode %BP_main LocalSize 1 1 1\n"
2164 		"OpDecorate %BP_id3uNum BuiltIn NumWorkgroups\n"
2165 		"OpDecorate %BP_id3uID BuiltIn WorkgroupId\n";
2166 
2167 	// Input offsets and stride.
2168 	{
2169 		int offset	= 0;
2170 		int ndx		= 0;
2171 		int largest	= 0;
2172 		for (const auto& symbol : spec.inputs)
2173 		{
2174 			const int scalarSize		= symbol.varType.getScalarSize();
2175 			const int memberSize		= (scalarSize + ((scalarSize == 3) ? 1 : 0)) * (isDataTypeDoubleType(symbol.varType.getBasicType()) ? (int)sizeof(deUint64) : (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
2176 			const int extraMemberBytes	= (offset % memberSize);
2177 
2178 			offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2179 			src << "OpMemberDecorate %SSB0_IN "<< ndx <<" Offset " << offset << "\n";
2180 			++ndx;
2181 
2182 			if (memberSize > largest)
2183 				largest = memberSize;
2184 
2185 			offset += memberSize;
2186 		}
2187 		DE_ASSERT(largest > 0);
2188 		const int extraBytes	= (offset % largest);
2189 		const int stride		= offset + (extraBytes == 0 ? 0 : (largest - extraBytes));
2190 		src << "OpDecorate %up_SSB0_IN ArrayStride "<< stride << "\n";
2191 	}
2192 
2193 	src << "OpMemberDecorate %ssboIN 0 Offset 0\n"
2194 		"OpDecorate %ssboIN BufferBlock\n"
2195 		"OpDecorate %ssbo_src DescriptorSet 0\n"
2196 		"OpDecorate %ssbo_src Binding 0\n"
2197 		"\n";
2198 
2199 	if (isMediump)
2200 	{
2201 		for (size_t i = 0; i < inputTypes.size(); ++i)
2202 		{
2203 			src <<
2204 				"OpMemberDecorate %SSB0_IN " << i << " RelaxedPrecision\n"
2205 				"OpDecorate %in" << i << " RelaxedPrecision\n"
2206 				"OpDecorate %src_val_0_" << i << " RelaxedPrecision\n"
2207 				"OpDecorate %in" << i << "_val RelaxedPrecision\n"
2208 				;
2209 		}
2210 
2211 			if (anyFloatResult)
2212 			{
2213 				switch (spec.spirvCase)
2214 				{
2215 				case SPIRV_CASETYPE_FREM:
2216 					src << "OpDecorate %frem_result RelaxedPrecision\n";
2217 					break;
2218 				case SPIRV_CASETYPE_MODFSTRUCT:
2219 					src << "OpDecorate %modfstruct_result RelaxedPrecision\n";
2220 					break;
2221 				case SPIRV_CASETYPE_FREXPSTRUCT:
2222 					src << "OpDecorate %frexpstruct_result RelaxedPrecision\n";
2223 					break;
2224 				default:
2225 					DE_ASSERT(false);
2226 					break;
2227 				}
2228 
2229 				for (size_t i = 0; i < outputTypes.size(); ++i)
2230 				{
2231 					src << "OpMemberDecorate %SSB0_OUT " << i << " RelaxedPrecision\n";
2232 					src << "OpDecorate %out_val_final_" << i << " RelaxedPrecision\n";
2233 					src << "OpDecorate %out" << i << " RelaxedPrecision\n";
2234 				}
2235 			}
2236 	}
2237 
2238 	// Output offsets and stride.
2239 	{
2240 		int offset	= 0;
2241 		int ndx		= 0;
2242 		int largest	= 0;
2243 		for (const auto& symbol : spec.outputs)
2244 		{
2245 			const int scalarSize		= symbol.varType.getScalarSize();
2246 			const int memberSize		= (scalarSize + ((scalarSize == 3) ? 1 : 0)) * (isDataTypeDoubleType(symbol.varType.getBasicType()) ? (int)sizeof(deUint64) : (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
2247 			const int extraMemberBytes	= (offset % memberSize);
2248 
2249 			offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2250 			src << "OpMemberDecorate %SSB0_OUT " << ndx << " Offset " << offset << "\n";
2251 			++ndx;
2252 
2253 			if (memberSize > largest)
2254 				largest = memberSize;
2255 
2256 			offset += memberSize;
2257 		}
2258 		DE_ASSERT(largest > 0);
2259 		const int extraBytes	= (offset % largest);
2260 		const int stride		= offset + ((extraBytes == 0) ? 0 : (largest - extraBytes));
2261 		src << "OpDecorate %up_SSB0_OUT ArrayStride " << stride << "\n";
2262 	}
2263 
2264 	src << "OpMemberDecorate %ssboOUT 0 Offset 0\n"
2265 		"OpDecorate %ssboOUT BufferBlock\n"
2266 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
2267 		"OpDecorate %ssbo_dst Binding 1\n"
2268 		"\n"
2269 		"%void  = OpTypeVoid\n"
2270 		"%bool  = OpTypeBool\n"
2271 		"%v2bool = OpTypeVector %bool 2\n"
2272 		"%v3bool = OpTypeVector %bool 3\n"
2273 		"%v4bool = OpTypeVector %bool 4\n"
2274 		"%u32   = OpTypeInt 32 0\n";
2275 
2276 	if (useF32Types)
2277 		src << "%f32   = OpTypeFloat 32\n"
2278 			"%v2f32 = OpTypeVector %f32 2\n"
2279 			"%v3f32 = OpTypeVector %f32 3\n"
2280 			"%v4f32 = OpTypeVector %f32 4\n";
2281 
2282 	if (useF64Types)
2283 		src << "%f64   = OpTypeFloat 64\n"
2284 			"%v2f64 = OpTypeVector %f64 2\n"
2285 			"%v3f64 = OpTypeVector %f64 3\n"
2286 			"%v4f64 = OpTypeVector %f64 4\n";
2287 
2288 	if (useF16Types)
2289 		src << "%f16   = OpTypeFloat 16\n"
2290 			"%v2f16 = OpTypeVector %f16 2\n"
2291 			"%v3f16 = OpTypeVector %f16 3\n"
2292 			"%v4f16 = OpTypeVector %f16 4\n";
2293 
2294 	src << "%i32   = OpTypeInt 32 1\n"
2295 		"%v2i32 = OpTypeVector %i32 2\n"
2296 		"%v3i32 = OpTypeVector %i32 3\n"
2297 		"%v4i32 = OpTypeVector %i32 4\n"
2298 		"%v2u32 = OpTypeVector %u32 2\n"
2299 		"%v3u32 = OpTypeVector %u32 3\n"
2300 		"%v4u32 = OpTypeVector %u32 4\n"
2301 		"\n"
2302 		"%ip_u32   = OpTypePointer Input %u32\n"
2303 		"%ip_v3u32 = OpTypePointer Input %v3u32\n"
2304 		"%up_float = OpTypePointer Uniform " << inputTypes[0] << "\n"
2305 		"\n"
2306 		"%fp_operation = OpTypePointer Function %i32\n"
2307 		"%voidf        = OpTypeFunction %void\n"
2308 		"%fp_u32       = OpTypePointer Function %u32\n"
2309 		"%fp_it1       = OpTypePointer Function " << inputTypes[0] << "\n"
2310 		;
2311 
2312 	for (size_t i = 0; i < outputTypes.size(); ++i)
2313 	{
2314 		src << "%fp_out_" << i << "     = OpTypePointer Function " << outputTypes[i] << "\n"
2315 			<< "%up_out_" << i << "     = OpTypePointer Uniform " << outputTypes[i] << "\n";
2316 	}
2317 
2318 	if (spec.packFloat16Bit)
2319 		src << "%fp_f16  = OpTypePointer Function " << packType << "\n";
2320 
2321 	src << "%BP_id3uID = OpVariable %ip_v3u32 Input\n"
2322 		"%BP_id3uNum = OpVariable %ip_v3u32 Input\n"
2323 		"\n"
2324 		"%c_u32_0 = OpConstant %u32 0\n"
2325 		"%c_u32_1 = OpConstant %u32 1\n"
2326 		"%c_u32_2 = OpConstant %u32 2\n"
2327 		"%c_i32_0 = OpConstant %i32 0\n"
2328 		"%c_i32_1 = OpConstant %i32 1\n"
2329 		"\n";
2330 
2331 	if (useF32Types)
2332 		src <<
2333 			"%c_f32_0 = OpConstant %f32 0\n"
2334 			"%c_f32_1 = OpConstant %f32 1\n"
2335 			;
2336 
2337 	if (useF16Types)
2338 		src <<
2339 			"%c_f16_0 = OpConstant %f16 0\n"
2340 			"%c_f16_1 = OpConstant %f16 1\n"
2341 			"%c_f16_minus1 = OpConstant %f16 -0x1p+0"
2342 			;
2343 
2344 	if (useF64Types)
2345 		src <<
2346 			"%c_f64_0 = OpConstant %f64 0\n"
2347 			"%c_f64_1 = OpConstant %f64 1\n"
2348 		;
2349 
2350 	src << "\n"
2351 		"%c_v2i32_0 = OpConstantComposite %v2i32 %c_i32_0 %c_i32_0\n"
2352 		"%c_v2i32_1 = OpConstantComposite %v2i32 %c_i32_1 %c_i32_1\n"
2353 		"%c_v3i32_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
2354 		"%c_v3i32_1 = OpConstantComposite %v3i32 %c_i32_1 %c_i32_1 %c_i32_1\n"
2355 		"%c_v4i32_0 = OpConstantComposite %v4i32 %c_i32_0 %c_i32_0 %c_i32_0 %c_i32_0\n"
2356 		"%c_v4i32_1 = OpConstantComposite %v4i32 %c_i32_1 %c_i32_1 %c_i32_1 %c_i32_1\n"
2357 		"\n";
2358 
2359 	if (useF32Types)
2360 		src <<
2361 			"%c_v2f32_0 = OpConstantComposite %v2f32 %c_f32_0 %c_f32_0\n"
2362 			"%c_v2f32_1 = OpConstantComposite %v2f32 %c_f32_1 %c_f32_1\n"
2363 			"%c_v3f32_0 = OpConstantComposite %v3f32 %c_f32_0 %c_f32_0 %c_f32_0\n"
2364 			"%c_v3f32_1 = OpConstantComposite %v3f32 %c_f32_1 %c_f32_1 %c_f32_1\n"
2365 			"%c_v4f32_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_0\n"
2366 			"%c_v4f32_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
2367 			;
2368 
2369 	if (useF16Types)
2370 		src <<
2371 			"%c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
2372 			"%c_v2f16_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
2373 			"%c_v3f16_0 = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0 %c_f16_0\n"
2374 			"%c_v3f16_1 = OpConstantComposite %v3f16 %c_f16_1 %c_f16_1 %c_f16_1\n"
2375 			"%c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
2376 			"%c_v4f16_1 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
2377 			;
2378 
2379 	if (useF64Types)
2380 		src <<
2381 			"%c_v2f64_0 = OpConstantComposite %v2f64 %c_f64_0 %c_f64_0\n"
2382 			"%c_v2f64_1 = OpConstantComposite %v2f64 %c_f64_1 %c_f64_1\n"
2383 			"%c_v3f64_0 = OpConstantComposite %v3f64 %c_f64_0 %c_f64_0 %c_f64_0\n"
2384 			"%c_v3f64_1 = OpConstantComposite %v3f64 %c_f64_1 %c_f64_1 %c_f64_1\n"
2385 			"%c_v4f64_0 = OpConstantComposite %v4f64 %c_f64_0 %c_f64_0 %c_f64_0 %c_f64_0\n"
2386 			"%c_v4f64_1 = OpConstantComposite %v4f64 %c_f64_1 %c_f64_1 %c_f64_1 %c_f64_1\n"
2387 			"\n";
2388 
2389 	// Input struct.
2390 	{
2391 		src << "%SSB0_IN    = OpTypeStruct";
2392 		for (const auto& t : inputTypes)
2393 			src << " " << t;
2394 		src << "\n";
2395 	}
2396 
2397 	src <<
2398 		"%up_SSB0_IN = OpTypeRuntimeArray %SSB0_IN\n"
2399 		"%ssboIN     = OpTypeStruct %up_SSB0_IN\n"
2400 		"%up_ssboIN  = OpTypePointer Uniform %ssboIN\n"
2401 		"%ssbo_src   = OpVariable %up_ssboIN Uniform\n"
2402 		"\n";
2403 
2404 	// Output struct.
2405 	{
2406 		src << "%SSB0_OUT    = OpTypeStruct";
2407 		for (const auto& t : outputTypes)
2408 			src << " " << t;
2409 		src << "\n";
2410 	}
2411 
2412 	std::string modfStructMemberType;
2413 	std::string frexpStructFirstMemberType;
2414 	if (spec.spirvCase == SPIRV_CASETYPE_MODFSTRUCT)
2415 	{
2416 		modfStructMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2417 		src << "%modfstruct_ret_t = OpTypeStruct " << modfStructMemberType << " " << modfStructMemberType << "\n";
2418 	}
2419 	else if (spec.spirvCase == SPIRV_CASETYPE_FREXPSTRUCT)
2420 	{
2421 		frexpStructFirstMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2422 		src << "%frexpstruct_ret_t = OpTypeStruct " << frexpStructFirstMemberType << " " << outputTypes[1] << "\n";
2423 	}
2424 
2425 	src <<
2426 		"%up_SSB0_OUT = OpTypeRuntimeArray %SSB0_OUT\n"
2427 		"%ssboOUT     = OpTypeStruct %up_SSB0_OUT\n"
2428 		"%up_ssboOUT  = OpTypePointer Uniform %ssboOUT\n"
2429 		"%ssbo_dst    = OpVariable %up_ssboOUT Uniform\n"
2430 		"\n"
2431 		"%BP_main = OpFunction %void None %voidf\n"
2432 		"%BP_label = OpLabel\n"
2433 		"%invocationNdx = OpVariable %fp_u32 Function\n";
2434 
2435 	// Note: here we are supposing all inputs have the same type.
2436 	for (size_t i = 0; i < inputTypes.size(); ++i)
2437 		src << "%in" << i << " = OpVariable " << (spec.packFloat16Bit ? "%fp_f16" : "%fp_it1") << " Function\n";
2438 
2439 	for (size_t i = 0; i < outputTypes.size(); ++i)
2440 		src << "%out" << i << " = OpVariable " << (packFloatRes[i] ? std::string("%fp_f16") : std::string("%fp_out_") + de::toString(i)) << " Function\n";
2441 
2442 	src << "%operation = OpVariable %fp_operation Function\n"
2443 		"%BP_id_0_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_0\n"
2444 		"%BP_id_1_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_1\n"
2445 		"%BP_id_2_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_2\n"
2446 		"%BP_num_0_ptr  = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_0\n"
2447 		"%BP_num_1_ptr  = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_1\n"
2448 		"%BP_id_0_val = OpLoad %u32 %BP_id_0_ptr\n"
2449 		"%BP_id_1_val = OpLoad %u32 %BP_id_1_ptr\n"
2450 		"%BP_id_2_val = OpLoad %u32 %BP_id_2_ptr\n"
2451 		"%BP_num_0_val = OpLoad %u32 %BP_num_0_ptr\n"
2452 		"%BP_num_1_val = OpLoad %u32 %BP_num_1_ptr\n"
2453 		"\n"
2454 		"%mul_1 = OpIMul %u32 %BP_num_0_val %BP_num_1_val\n"
2455 		"%mul_2 = OpIMul %u32 %mul_1 %BP_id_2_val\n"
2456 		"%mul_3 = OpIMul %u32 %BP_num_0_val %BP_id_1_val\n"
2457 		"%add_1 = OpIAdd %u32 %mul_2 %mul_3\n"
2458 		"%add_2 = OpIAdd %u32 %add_1 %BP_id_0_val\n"
2459 		"OpStore %invocationNdx %add_2\n"
2460 		"%invocationNdx_val = OpLoad %u32 %invocationNdx\n";
2461 
2462 	// Load input values.
2463 	for (size_t inputNdx = 0; inputNdx < inputTypes.size(); ++inputNdx)
2464 	{
2465 		src << "\n"
2466 			<< "%src_ptr_0_" << inputNdx << " = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_" << inputNdx << "\n"
2467 			<< "%src_val_0_" << inputNdx << " = OpLoad " << inputTypes[inputNdx] << " %src_ptr_0_" << inputNdx << "\n";
2468 
2469 		if (spec.packFloat16Bit)
2470 		{
2471 			if (spec.inputs[inputNdx].varType.getScalarSize() > 1)
2472 			{
2473 				// Extract the val<inputNdx> u32 input channels into individual f16 values.
2474 				for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2475 				{
2476 					src << "%src_val_0_" << inputNdx << "_" << i << " = OpCompositeExtract %u32 %src_val_0_" << inputNdx << " " << i << "\n"
2477 						"%val_v2f16_0_" << inputNdx << "_" << i << " = OpBitcast %v2f16 %src_val_0_" << inputNdx << "_" << i << "\n"
2478 						"%val_f16_0_" << inputNdx << "_" << i << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << "_" << i << " 0\n";
2479 				}
2480 
2481 				// Construct the input vector.
2482 				src << "%val_f16_0_" << inputNdx << "   = OpCompositeConstruct " << packType;
2483 				for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2484 				{
2485 					src << " %val_f16_0_" << inputNdx << "_" << i;
2486 				}
2487 
2488 				src << "\n";
2489 				src << "OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2490 			}
2491 			else
2492 			{
2493 				src << "%val_v2f16_0_" << inputNdx << " = OpBitcast %v2f16 %src_val_0_" << inputNdx << "\n"
2494 					"%val_f16_0_" << inputNdx << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << " 0\n";
2495 
2496 				src <<	"OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2497 			}
2498 		}
2499 		else
2500 			src << "OpStore %in" << inputNdx << " %src_val_0_" << inputNdx << "\n";
2501 
2502 		src << "%in" << inputNdx << "_val = OpLoad " << (spec.packFloat16Bit ? packType : inputTypes[inputNdx]) << " %in" << inputNdx << "\n";
2503 	}
2504 
2505 	src << "\n"
2506 		"OpStore %operation %c_i32_1\n";
2507 
2508 	// Fill output values with dummy data.
2509 	for (size_t i = 0; i < outputTypes.size(); ++i)
2510 		src << "OpStore %out" << i << " %c_" << (packFloatRes[i] ? &packType[1] : &outputTypes[i][1]) << "_0\n";
2511 
2512 	src << "\n";
2513 
2514 	// Run operation.
2515 	switch (spec.spirvCase)
2516 	{
2517 	case SPIRV_CASETYPE_COMPARE:
2518 		for (int operationNdx = 0; operationNdx < DE_LENGTH_OF_ARRAY(COMPARE_OPERATIONS); ++operationNdx)
2519 		{
2520 			src << scalarComparison	(COMPARE_OPERATIONS[operationNdx], operationNdx,
2521 									spec.inputs[0].varType.getBasicType(),
2522 									outputTypes[0],
2523 									spec.outputs[0].varType.getScalarSize());
2524 			src << moveBitOperation("%operation", moveBitNdx);
2525 			++moveBitNdx;
2526 		}
2527 		break;
2528 	case SPIRV_CASETYPE_FREM:
2529 		src << "%frem_result = OpFRem " << (packFloatRes[0] ? packType : outputTypes[0]) << " %in0_val %in1_val\n"
2530 			<< "OpStore %out0 %frem_result\n";
2531 		break;
2532 	case SPIRV_CASETYPE_MODFSTRUCT:
2533 		src << "%modfstruct_result = OpExtInst %modfstruct_ret_t %glslstd450 ModfStruct %in0_val\n"
2534 			<< "%modfstruct_result_0 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 0\n"
2535 			<< "%modfstruct_result_1 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 1\n"
2536 			<< "OpStore %out0 %modfstruct_result_0\n"
2537 			<< "OpStore %out1 %modfstruct_result_1\n";
2538 		break;
2539 	case SPIRV_CASETYPE_FREXPSTRUCT:
2540 		src << "%frexpstruct_result = OpExtInst %frexpstruct_ret_t %glslstd450 FrexpStruct %in0_val\n"
2541 			<< "%frexpstruct_result_0 = OpCompositeExtract " << frexpStructFirstMemberType << " %frexpstruct_result 0\n"
2542 			<< "%frexpstruct_result_1 = OpCompositeExtract " << outputTypes[1] << " %frexpstruct_result 1\n"
2543 			<< "OpStore %out0 %frexpstruct_result_0\n"
2544 			<< "OpStore %out1 %frexpstruct_result_1\n";
2545 		break;
2546 	default:
2547 		DE_ASSERT(false);
2548 		break;
2549 	}
2550 
2551 	for (size_t outputNdx = 0; outputNdx < outputTypes.size(); ++outputNdx)
2552 	{
2553 		src << "\n"
2554 			"%out_val_final_" << outputNdx << " = OpLoad " << (packFloatRes[outputNdx] ? packType : outputTypes[outputNdx]) << " %out" << outputNdx << "\n"
2555 			"%ssbo_dst_ptr_" << outputNdx << " = OpAccessChain %up_out_" << outputNdx << " %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_" << outputNdx << "\n";
2556 
2557 		if (packFloatRes[outputNdx])
2558 		{
2559 			if (spec.outputs[outputNdx].varType.getScalarSize() > 1)
2560 			{
2561 				for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2562 				{
2563 					src << "%out_val_final_" << outputNdx << "_" << i << " = OpCompositeExtract %f16 %out_val_final_" << outputNdx << " " << i << "\n";
2564 					src << "%out_composite_" << outputNdx << "_" << i << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx << "_" << i << " %c_f16_minus1\n";
2565 					src << "%u32_val_" << outputNdx << "_" << i << " = OpBitcast %u32 %out_composite_" << outputNdx << "_" << i << "\n";
2566 				}
2567 
2568 				src << "%u32_final_val_" << outputNdx << " = OpCompositeConstruct " << outputTypes[outputNdx];
2569 				for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2570 					src << " %u32_val_" << outputNdx << "_" << i;
2571 				src << "\n";
2572 				src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %u32_final_val_" << outputNdx << "\n";
2573 			}
2574 			else
2575 			{
2576 				src <<
2577 					"%out_composite_" << outputNdx << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx << " %c_f16_minus1\n"
2578 					"%out_result_" << outputNdx << " = OpBitcast " << outputTypes[outputNdx] << " %out_composite_" << outputNdx << "\n"
2579 					"OpStore %ssbo_dst_ptr_" << outputNdx << " %out_result_" << outputNdx << "\n";
2580 			}
2581 		}
2582 		else
2583 		{
2584 			src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %out_val_final_" << outputNdx << "\n";
2585 		}
2586 	}
2587 
2588 	src << "\n"
2589 		"OpReturn\n"
2590 		"OpFunctionEnd\n";
2591 
2592 	return src.str();
2593 }
2594 
2595 
generateComputeShader(const ShaderSpec & spec)2596 std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec)
2597 {
2598 	if (spec.spirvCase != SPIRV_CASETYPE_NONE)
2599 	{
2600 		bool	are16Bit	= false;
2601 		bool	are64Bit	= false;
2602 		bool	isMediump	= false;
2603 		for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2604 		{
2605 			if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
2606 				are16Bit = true;
2607 
2608 			if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
2609 				are64Bit = true;
2610 
2611 			if (symIter->varType.getPrecision() == glu::PRECISION_MEDIUMP)
2612 				isMediump = true;
2613 
2614 			if (isMediump && are16Bit)
2615 				break;
2616 		}
2617 
2618 		return generateSpirv(spec, are16Bit, are64Bit, isMediump);
2619 	}
2620 	else
2621 	{
2622 		std::ostringstream src;
2623 		src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n";
2624 
2625 		if (!spec.globalDeclarations.empty())
2626 			src << spec.globalDeclarations << "\n";
2627 
2628 		src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
2629 			<< "\n";
2630 
2631 		declareBufferBlocks(src, spec);
2632 
2633 		src << "void main (void)\n"
2634 			<< "{\n"
2635 			<< "	uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2636 			<< "	                   + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2637 
2638 		generateExecBufferIo(src, spec, "invocationNdx");
2639 
2640 		src << "}\n";
2641 
2642 		return src.str();
2643 	}
2644 }
2645 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)2646 void ComputeShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
2647 {
2648 	if (shaderSpec.spirvCase != SPIRV_CASETYPE_NONE)
2649 		programCollection.spirvAsmSources.add("compute") << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3) << generateComputeShader(shaderSpec);
2650 	else
2651 		programCollection.glslSources.add("compute") << glu::ComputeSource(generateComputeShader(shaderSpec)) << shaderSpec.buildOptions;
2652 }
2653 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)2654 void ComputeShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
2655 {
2656 	const VkDevice					vkDevice				= m_context.getDevice();
2657 	const DeviceInterface&			vk						= m_context.getDeviceInterface();
2658 	const VkQueue					queue					= m_context.getUniversalQueue();
2659 	const deUint32					queueFamilyIndex		= m_context.getUniversalQueueFamilyIndex();
2660 
2661 	DescriptorPoolBuilder			descriptorPoolBuilder;
2662 	DescriptorSetLayoutBuilder		descriptorSetLayoutBuilder;
2663 
2664 	Move<VkShaderModule>			computeShaderModule;
2665 	Move<VkPipeline>				computePipeline;
2666 	Move<VkPipelineLayout>			pipelineLayout;
2667 	Move<VkCommandPool>				cmdPool;
2668 	Move<VkDescriptorPool>			descriptorPool;
2669 	Move<VkDescriptorSetLayout>		descriptorSetLayout;
2670 	Move<VkDescriptorSet>			descriptorSet;
2671 	const deUint32					numDescriptorSets		= (m_extraResourcesLayout != 0) ? 2u : 1u;
2672 
2673 	DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2674 
2675 	initBuffers(numValues);
2676 
2677 	// Setup input buffer & copy data
2678 	// For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
2679 	// storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
2680 	// the shader.
2681 	uploadInputBuffer(inputs, numValues, m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
2682 
2683 	// Create command pool
2684 	cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2685 
2686 	// Create command buffer
2687 
2688 	descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2689 	descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2690 	descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2691 	descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2692 
2693 	descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
2694 	descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2695 
2696 	const VkDescriptorSetAllocateInfo allocInfo =
2697 	{
2698 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2699 		DE_NULL,
2700 		*descriptorPool,
2701 		1u,
2702 		&*descriptorSetLayout
2703 	};
2704 
2705 	descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2706 
2707 	// Create pipeline layout
2708 	{
2709 		const VkDescriptorSetLayout			descriptorSetLayouts[]	=
2710 		{
2711 			*descriptorSetLayout,
2712 			m_extraResourcesLayout
2713 		};
2714 		const VkPipelineLayoutCreateInfo	pipelineLayoutParams	=
2715 		{
2716 			VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,		// VkStructureType				sType;
2717 			DE_NULL,											// const void*					pNext;
2718 			(VkPipelineLayoutCreateFlags)0,						// VkPipelineLayoutCreateFlags	flags;
2719 			numDescriptorSets,									// deUint32						CdescriptorSetCount;
2720 			descriptorSetLayouts,								// const VkDescriptorSetLayout*	pSetLayouts;
2721 			0u,													// deUint32						pushConstantRangeCount;
2722 			DE_NULL												// const VkPushConstantRange*	pPushConstantRanges;
2723 		};
2724 
2725 		pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2726 	}
2727 
2728 	// Create shaders
2729 	{
2730 		computeShaderModule		= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("compute"), 0);
2731 	}
2732 
2733 	// create pipeline
2734 	{
2735 		const VkPipelineShaderStageCreateInfo shaderStageParams[1] =
2736 		{
2737 			{
2738 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,		// VkStructureType						sType;
2739 				DE_NULL,													// const void*							pNext;
2740 				(VkPipelineShaderStageCreateFlags)0u,						// VkPipelineShaderStageCreateFlags		flags;
2741 				VK_SHADER_STAGE_COMPUTE_BIT,								// VkShaderStageFlagsBit				stage;
2742 				*computeShaderModule,										// VkShaderModule						shader;
2743 				"main",														// const char*							pName;
2744 				DE_NULL														// const VkSpecializationInfo*			pSpecializationInfo;
2745 			}
2746 		};
2747 
2748 		const VkComputePipelineCreateInfo computePipelineParams =
2749 		{
2750 			VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,		// VkStructureType									sType;
2751 			DE_NULL,											// const void*										pNext;
2752 			(VkPipelineCreateFlags)0,							// VkPipelineCreateFlags							flags;
2753 			*shaderStageParams,									// VkPipelineShaderStageCreateInfo					cs;
2754 			*pipelineLayout,									// VkPipelineLayout									layout;
2755 			0u,													// VkPipeline										basePipelineHandle;
2756 			0u,													// int32_t											basePipelineIndex;
2757 		};
2758 
2759 		computePipeline = createComputePipeline(vk, vkDevice, DE_NULL, &computePipelineParams);
2760 	}
2761 
2762 	const int			maxValuesPerInvocation	= m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
2763 	int					curOffset				= 0;
2764 	const deUint32		inputStride				= getInputStride();
2765 	const deUint32		outputStride			= getOutputStride();
2766 
2767 	while (curOffset < numValues)
2768 	{
2769 		Move<VkCommandBuffer>	cmdBuffer;
2770 		const int				numToExec	= de::min(maxValuesPerInvocation, numValues-curOffset);
2771 
2772 		// Update descriptors
2773 		{
2774 			DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2775 
2776 			const VkDescriptorBufferInfo outputDescriptorBufferInfo =
2777 			{
2778 				*m_outputBuffer,				// VkBuffer			buffer;
2779 				curOffset * outputStride,		// VkDeviceSize		offset;
2780 				numToExec * outputStride		// VkDeviceSize		range;
2781 			};
2782 
2783 			descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2784 
2785 			if (inputStride)
2786 			{
2787 				const VkDescriptorBufferInfo inputDescriptorBufferInfo =
2788 				{
2789 					*m_inputBuffer,					// VkBuffer			buffer;
2790 					curOffset * inputStride,		// VkDeviceSize		offset;
2791 					numToExec * inputStride			// VkDeviceSize		range;
2792 				};
2793 
2794 				descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2795 			}
2796 
2797 			descriptorSetUpdateBuilder.update(vk, vkDevice);
2798 		}
2799 
2800 		cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2801 		beginCommandBuffer(vk, *cmdBuffer);
2802 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
2803 
2804 		{
2805 			const VkDescriptorSet	descriptorSets[]	= { *descriptorSet, extraResources };
2806 			vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
2807 		}
2808 
2809 		vk.cmdDispatch(*cmdBuffer, numToExec, 1, 1);
2810 
2811 		// Insert a barrier so data written by the shader is available to the host
2812 		{
2813 			const VkBufferMemoryBarrier bufferBarrier =
2814 			{
2815 				VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,	// VkStructureType    sType;
2816 				DE_NULL,									// const void*        pNext;
2817 				VK_ACCESS_SHADER_WRITE_BIT,					// VkAccessFlags      srcAccessMask;
2818 				VK_ACCESS_HOST_READ_BIT,					// VkAccessFlags      dstAccessMask;
2819 				VK_QUEUE_FAMILY_IGNORED,					// uint32_t           srcQueueFamilyIndex;
2820 				VK_QUEUE_FAMILY_IGNORED,					// uint32_t           dstQueueFamilyIndex;
2821 				*m_outputBuffer,							// VkBuffer           buffer;
2822 				0,											// VkDeviceSize       offset;
2823 				VK_WHOLE_SIZE,								// VkDeviceSize       size;
2824 			};
2825 
2826 			vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
2827 								0, (const VkMemoryBarrier*)DE_NULL,
2828 								1, &bufferBarrier,
2829 								0, (const VkImageMemoryBarrier*)DE_NULL);
2830 		}
2831 
2832 		endCommandBuffer(vk, *cmdBuffer);
2833 
2834 		curOffset += numToExec;
2835 
2836 		// Execute
2837 		submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2838 	}
2839 
2840 	// Read back data
2841 	readOutputBuffer(outputs, numValues);
2842 }
2843 
2844 #ifndef CTS_USES_VULKANSC
2845 // MeshTaskShaderExecutor
2846 
2847 class MeshTaskShaderExecutor : public BufferIoExecutor
2848 {
2849 public:
2850 						MeshTaskShaderExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2851 	virtual				~MeshTaskShaderExecutor	(void);
2852 
2853 	static void			generateSources			(const ShaderSpec& shaderSpec, SourceCollections& programCollection, bool useTask);
2854 
2855 	virtual void		execute					(int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
2856 
2857 protected:
2858 	static std::string	generateMeshShader		(const ShaderSpec& spec, bool useTask);
2859 	static std::string	generateTaskShader		(const ShaderSpec& spec);
2860 
2861 private:
2862 	const VkDescriptorSetLayout					m_extraResourcesLayout;
2863 };
2864 
MeshTaskShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2865 MeshTaskShaderExecutor::MeshTaskShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
2866 	: BufferIoExecutor			(context, shaderSpec)
2867 	, m_extraResourcesLayout	(extraResourcesLayout)
2868 {
2869 }
2870 
~MeshTaskShaderExecutor(void)2871 MeshTaskShaderExecutor::~MeshTaskShaderExecutor (void)
2872 {
2873 }
2874 
generateMeshShader(const ShaderSpec & spec,bool useTask)2875 std::string MeshTaskShaderExecutor::generateMeshShader (const ShaderSpec& spec, bool useTask)
2876 {
2877 	DE_ASSERT(spec.spirvCase == SPIRV_CASETYPE_NONE);
2878 
2879 	std::ostringstream src;
2880 
2881 	if (useTask)
2882 	{
2883 		src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
2884 			<< "#extension GL_EXT_mesh_shader : enable\n"
2885 			<< "layout(local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
2886 			<< "layout(points) out;\n"
2887 			<< "layout(max_vertices=1, max_primitives=1) out;\n"
2888 			<< "\n"
2889 			<< "void main (void)\n"
2890 			<< "{\n"
2891 			<< "    SetMeshOutputsEXT(0u, 0u);\n"
2892 			<< "}\n";
2893 	}
2894 	else
2895 	{
2896 		src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
2897 			<< "#extension GL_EXT_mesh_shader : enable\n";
2898 
2899 		if (!spec.globalDeclarations.empty())
2900 			src << spec.globalDeclarations << "\n";
2901 
2902 		src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
2903 			<< "layout(points) out;\n"
2904 			<< "layout(max_vertices=1, max_primitives=1) out;\n"
2905 			<< "\n";
2906 
2907 		declareBufferBlocks(src, spec);
2908 
2909 		src << "void main (void)\n"
2910 			<< "{\n"
2911 			<< "	uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2912 			<< "	                   + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2913 
2914 		generateExecBufferIo(src, spec, "invocationNdx");
2915 
2916 		src << "	SetMeshOutputsEXT(0u, 0u);\n"
2917 			<< "}\n";
2918 	}
2919 
2920 	return src.str();
2921 }
2922 
generateTaskShader(const ShaderSpec & spec)2923 std::string MeshTaskShaderExecutor::generateTaskShader (const ShaderSpec& spec)
2924 {
2925 	std::ostringstream src;
2926 
2927 	src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
2928 		<< "#extension GL_EXT_mesh_shader : enable\n";
2929 
2930 	if (!spec.globalDeclarations.empty())
2931 		src << spec.globalDeclarations << "\n";
2932 
2933 	src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
2934 		<< "\n";
2935 
2936 	declareBufferBlocks(src, spec);
2937 
2938 	src << "void main (void)\n"
2939 		<< "{\n"
2940 		<< "    uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2941 		<< "                       + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2942 
2943 	generateExecBufferIo(src, spec, "invocationNdx");
2944 
2945 	src << "    EmitMeshTasksEXT(0u, 0u, 0u);\n"
2946 		<< "}\n";
2947 
2948 	return src.str();
2949 }
2950 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection,bool useTask)2951 void MeshTaskShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection, bool useTask)
2952 {
2953 	DE_ASSERT(shaderSpec.spirvCase == SPIRV_CASETYPE_NONE);
2954 	programCollection.glslSources.add("mesh") << glu::MeshSource(generateMeshShader(shaderSpec, useTask)) << shaderSpec.buildOptions;
2955 	if (useTask)
2956 		programCollection.glslSources.add("task") << glu::TaskSource(generateTaskShader(shaderSpec)) << shaderSpec.buildOptions;
2957 }
2958 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)2959 void MeshTaskShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
2960 {
2961 	const auto	vkDevice			= m_context.getDevice();
2962 	const auto&	vk					= m_context.getDeviceInterface();
2963 	const auto	queue				= m_context.getUniversalQueue();
2964 	const auto	queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
2965 	const auto	bindPoint			= VK_PIPELINE_BIND_POINT_GRAPHICS;
2966 	const auto&	binaries			= m_context.getBinaryCollection();
2967 	const bool	useTask				= binaries.contains("task");
2968 	const auto	shaderStage			= (useTask ? VK_SHADER_STAGE_TASK_BIT_EXT : VK_SHADER_STAGE_MESH_BIT_EXT);
2969 	const auto	pipelineStage		= (useTask ? VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT : VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT);
2970 
2971 	DE_ASSERT((m_extraResourcesLayout != DE_NULL) == (extraResources != DE_NULL));
2972 
2973 	// Create input and output buffers.
2974 	initBuffers(numValues);
2975 
2976 	// Setup input buffer & copy data
2977 	// For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
2978 	// storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
2979 	// the shader.
2980 	uploadInputBuffer(inputs, numValues, m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
2981 
2982 	// Create command pool
2983 	const auto cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2984 
2985 	// Descriptor pool, set layout and set.
2986 	DescriptorPoolBuilder		descriptorPoolBuilder;
2987 	DescriptorSetLayoutBuilder	descriptorSetLayoutBuilder;
2988 
2989 	descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, shaderStage);
2990 	descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2991 	descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, shaderStage);
2992 	descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2993 
2994 	const auto descriptorSetLayout	= descriptorSetLayoutBuilder.build(vk, vkDevice);
2995 	const auto descriptorPool		= descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2996 	const auto descriptorSet		= makeDescriptorSet(vk, vkDevice, descriptorPool.get(), descriptorSetLayout.get());
2997 
2998 	// Create pipeline layout
2999 	std::vector<VkDescriptorSetLayout> setLayouts;
3000 	setLayouts.push_back(descriptorSetLayout.get());
3001 	if (m_extraResourcesLayout != DE_NULL)
3002 		setLayouts.push_back(m_extraResourcesLayout);
3003 
3004 	const auto pipelineLayout = makePipelineLayout(vk, vkDevice, static_cast<uint32_t>(setLayouts.size()), de::dataOrNull(setLayouts));
3005 
3006 	// Create shaders
3007 	const auto meshShaderModule = createShaderModule(vk, vkDevice, binaries.get("mesh"));
3008 	const auto taskShaderModule = (useTask ? createShaderModule(vk, vkDevice, binaries.get("task")) : Move<VkShaderModule>());
3009 
3010 	// Render pass and framebuffer.
3011 	const auto fbExtent		= makeExtent2D(1u, 1u);
3012 	const auto renderPass	= makeRenderPass(vk, vkDevice);
3013 	const auto framebuffer	= makeFramebuffer(vk, vkDevice, renderPass.get(), 0u, nullptr, fbExtent.width, fbExtent.height);
3014 
3015 	const std::vector<VkViewport>	viewports	(1u, makeViewport(fbExtent));
3016 	const std::vector<VkRect2D>		scissors	(1u, makeRect2D(fbExtent));
3017 
3018 	// Create pipeline.
3019 	const auto meshPipeline	= makeGraphicsPipeline(
3020 		vk, vkDevice, pipelineLayout.get(),
3021 		taskShaderModule.get(), meshShaderModule.get(), DE_NULL,
3022 		renderPass.get(), viewports, scissors);
3023 
3024 	const int		maxValuesPerInvocation	= m_context.getMeshShaderPropertiesEXT().maxMeshWorkGroupSize[0];
3025 	const uint32_t	inputStride				= getInputStride();
3026 	const uint32_t	outputStride			= getOutputStride();
3027 	const auto		outputBufferBinding		= DescriptorSetUpdateBuilder::Location::binding(static_cast<uint32_t>(OUTPUT_BUFFER_BINDING));
3028 	const auto		inputBufferBinding		= DescriptorSetUpdateBuilder::Location::binding(static_cast<uint32_t>(INPUT_BUFFER_BINDING));
3029 	int				curOffset				= 0;
3030 
3031 	while (curOffset < numValues)
3032 	{
3033 		const auto remaining = numValues - curOffset;
3034 		const auto numToExec = de::min(maxValuesPerInvocation, remaining);
3035 
3036 		// Update descriptors
3037 		{
3038 			DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
3039 
3040 			const auto outputDescriptorBufferInfo = makeDescriptorBufferInfo(m_outputBuffer.get(), curOffset * outputStride, numToExec * outputStride);
3041 			descriptorSetUpdateBuilder.writeSingle(descriptorSet.get(), outputBufferBinding, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
3042 
3043 			if (inputStride)
3044 			{
3045 				const auto inputDescriptorBufferInfo = makeDescriptorBufferInfo(m_inputBuffer.get(), curOffset * inputStride, numToExec * inputStride);
3046 				descriptorSetUpdateBuilder.writeSingle(descriptorSet.get(), inputBufferBinding, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
3047 			}
3048 
3049 			descriptorSetUpdateBuilder.update(vk, vkDevice);
3050 		}
3051 
3052 		std::vector<VkDescriptorSet> descriptorSets;
3053 		descriptorSets.push_back(descriptorSet.get());
3054 		if (extraResources != DE_NULL)
3055 			descriptorSets.push_back(extraResources);
3056 
3057 		const auto bufferBarrier	= makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, m_outputBuffer.get(), 0ull, VK_WHOLE_SIZE);
3058 		const auto cmdBufferPtr		= allocateCommandBuffer(vk, vkDevice, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3059 		const auto cmdBuffer		= cmdBufferPtr.get();
3060 
3061 		// Record command buffer, including pipeline barrier from output buffer to the host.
3062 		beginCommandBuffer(vk, cmdBuffer);
3063 		beginRenderPass(vk, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u));
3064 		vk.cmdBindPipeline(cmdBuffer, bindPoint, meshPipeline.get());
3065 		vk.cmdBindDescriptorSets(cmdBuffer, bindPoint, pipelineLayout.get(), 0u, static_cast<uint32_t>(descriptorSets.size()), de::dataOrNull(descriptorSets), 0u, DE_NULL);
3066 		vk.cmdDrawMeshTasksEXT(cmdBuffer, numToExec, 1u, 1u);
3067 		endRenderPass(vk, cmdBuffer);
3068 		cmdPipelineBufferMemoryBarrier(vk, cmdBuffer, pipelineStage, VK_PIPELINE_STAGE_HOST_BIT, &bufferBarrier);
3069 		endCommandBuffer(vk, cmdBuffer);
3070 
3071 		// Execute
3072 		submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer);
3073 
3074 		curOffset += numToExec;
3075 	}
3076 
3077 	// Read back data
3078 	readOutputBuffer(outputs, numValues);
3079 }
3080 #endif // CTS_USES_VULKANSC
3081 
3082 // Tessellation utils
3083 
generateVertexShaderForTess(void)3084 static std::string generateVertexShaderForTess (void)
3085 {
3086 	std::ostringstream	src;
3087 	src << "#version 450\n"
3088 		<< "void main (void)\n{\n"
3089 		<< "	gl_Position = vec4(gl_VertexIndex/2, gl_VertexIndex%2, 0.0, 1.0);\n"
3090 		<< "}\n";
3091 
3092 	return src.str();
3093 }
3094 
3095 class TessellationExecutor : public BufferIoExecutor
3096 {
3097 public:
3098 					TessellationExecutor		(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3099 	virtual			~TessellationExecutor		(void);
3100 
3101 	void			renderTess					(deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources);
3102 
3103 private:
3104 	const VkDescriptorSetLayout					m_extraResourcesLayout;
3105 };
3106 
TessellationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3107 TessellationExecutor::TessellationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3108 	: BufferIoExecutor			(context, shaderSpec)
3109 	, m_extraResourcesLayout	(extraResourcesLayout)
3110 {
3111 	const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
3112 
3113 	if (!features.tessellationShader)
3114 		TCU_THROW(NotSupportedError, "Tessellation shader is not supported by device");
3115 }
3116 
~TessellationExecutor(void)3117 TessellationExecutor::~TessellationExecutor (void)
3118 {
3119 }
3120 
renderTess(deUint32 numValues,deUint32 vertexCount,deUint32 patchControlPoints,VkDescriptorSet extraResources)3121 void TessellationExecutor::renderTess (deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources)
3122 {
3123 	const size_t						inputBufferSize				= numValues * getInputStride();
3124 	const VkDevice						vkDevice					= m_context.getDevice();
3125 	const DeviceInterface&				vk							= m_context.getDeviceInterface();
3126 	const VkQueue						queue						= m_context.getUniversalQueue();
3127 	const deUint32						queueFamilyIndex			= m_context.getUniversalQueueFamilyIndex();
3128 	Allocator&							memAlloc					= m_context.getDefaultAllocator();
3129 
3130 	const tcu::UVec2					renderSize					(DEFAULT_RENDER_WIDTH, DEFAULT_RENDER_HEIGHT);
3131 
3132 	Move<VkImage>						colorImage;
3133 	de::MovePtr<Allocation>				colorImageAlloc;
3134 	VkFormat							colorFormat					= VK_FORMAT_R8G8B8A8_UNORM;
3135 	Move<VkImageView>					colorImageView;
3136 
3137 	Move<VkRenderPass>					renderPass;
3138 	Move<VkFramebuffer>					framebuffer;
3139 	Move<VkPipelineLayout>				pipelineLayout;
3140 	Move<VkPipeline>					graphicsPipeline;
3141 
3142 	Move<VkShaderModule>				vertexShaderModule;
3143 	Move<VkShaderModule>				tessControlShaderModule;
3144 	Move<VkShaderModule>				tessEvalShaderModule;
3145 	Move<VkShaderModule>				fragmentShaderModule;
3146 
3147 	Move<VkCommandPool>					cmdPool;
3148 	Move<VkCommandBuffer>				cmdBuffer;
3149 
3150 	Move<VkDescriptorPool>				descriptorPool;
3151 	Move<VkDescriptorSetLayout>			descriptorSetLayout;
3152 	Move<VkDescriptorSet>				descriptorSet;
3153 	const deUint32						numDescriptorSets			= (m_extraResourcesLayout != 0) ? 2u : 1u;
3154 
3155 	DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
3156 
3157 	// Create color image
3158 	{
3159 		const VkImageCreateInfo colorImageParams =
3160 		{
3161 			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,										// VkStructureType			sType;
3162 			DE_NULL,																	// const void*				pNext;
3163 			0u,																			// VkImageCreateFlags		flags;
3164 			VK_IMAGE_TYPE_2D,															// VkImageType				imageType;
3165 			colorFormat,																// VkFormat					format;
3166 			{ renderSize.x(), renderSize.y(), 1u },										// VkExtent3D				extent;
3167 			1u,																			// deUint32					mipLevels;
3168 			1u,																			// deUint32					arraySize;
3169 			VK_SAMPLE_COUNT_1_BIT,														// VkSampleCountFlagBits	samples;
3170 			VK_IMAGE_TILING_OPTIMAL,													// VkImageTiling			tiling;
3171 			VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT,		// VkImageUsageFlags		usage;
3172 			VK_SHARING_MODE_EXCLUSIVE,													// VkSharingMode			sharingMode;
3173 			1u,																			// deUint32					queueFamilyCount;
3174 			&queueFamilyIndex,															// const deUint32*			pQueueFamilyIndices;
3175 			VK_IMAGE_LAYOUT_UNDEFINED													// VkImageLayout			initialLayout;
3176 		};
3177 
3178 		colorImage = createImage(vk, vkDevice, &colorImageParams);
3179 
3180 		// Allocate and bind color image memory
3181 		colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *colorImage), MemoryRequirement::Any);
3182 		VK_CHECK(vk.bindImageMemory(vkDevice, *colorImage, colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
3183 	}
3184 
3185 	// Create color attachment view
3186 	{
3187 		const VkImageViewCreateInfo colorImageViewParams =
3188 		{
3189 			VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,			// VkStructureType			sType;
3190 			DE_NULL,											// const void*				pNext;
3191 			0u,													// VkImageViewCreateFlags	flags;
3192 			*colorImage,										// VkImage					image;
3193 			VK_IMAGE_VIEW_TYPE_2D,								// VkImageViewType			viewType;
3194 			colorFormat,										// VkFormat					format;
3195 			{
3196 				VK_COMPONENT_SWIZZLE_R,							// VkComponentSwizzle		r;
3197 				VK_COMPONENT_SWIZZLE_G,							// VkComponentSwizzle		g;
3198 				VK_COMPONENT_SWIZZLE_B,							// VkComponentSwizzle		b;
3199 				VK_COMPONENT_SWIZZLE_A							// VkComponentSwizzle		a;
3200 			},													// VkComponentsMapping		components;
3201 			{
3202 				VK_IMAGE_ASPECT_COLOR_BIT,						// VkImageAspectFlags		aspectMask;
3203 				0u,												// deUint32					baseMipLevel;
3204 				1u,												// deUint32					mipLevels;
3205 				0u,												// deUint32					baseArraylayer;
3206 				1u												// deUint32					layerCount;
3207 			}													// VkImageSubresourceRange	subresourceRange;
3208 		};
3209 
3210 		colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
3211 	}
3212 
3213 	// Create render pass
3214 	{
3215 		const VkAttachmentDescription colorAttachmentDescription =
3216 		{
3217 			0u,													// VkAttachmentDescriptorFlags	flags;
3218 			colorFormat,										// VkFormat						format;
3219 			VK_SAMPLE_COUNT_1_BIT,								// VkSampleCountFlagBits		samples;
3220 			VK_ATTACHMENT_LOAD_OP_CLEAR,						// VkAttachmentLoadOp			loadOp;
3221 			VK_ATTACHMENT_STORE_OP_STORE,						// VkAttachmentStoreOp			storeOp;
3222 			VK_ATTACHMENT_LOAD_OP_DONT_CARE,					// VkAttachmentLoadOp			stencilLoadOp;
3223 			VK_ATTACHMENT_STORE_OP_DONT_CARE,					// VkAttachmentStoreOp			stencilStoreOp;
3224 			VK_IMAGE_LAYOUT_UNDEFINED,							// VkImageLayout				initialLayout;
3225 			VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL			// VkImageLayout				finalLayout
3226 		};
3227 
3228 		const VkAttachmentDescription attachments[1] =
3229 		{
3230 			colorAttachmentDescription
3231 		};
3232 
3233 		const VkAttachmentReference colorAttachmentReference =
3234 		{
3235 			0u,													// deUint32			attachment;
3236 			VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL			// VkImageLayout	layout;
3237 		};
3238 
3239 		const VkSubpassDescription subpassDescription =
3240 		{
3241 			0u,													// VkSubpassDescriptionFlags	flags;
3242 			VK_PIPELINE_BIND_POINT_GRAPHICS,					// VkPipelineBindPoint			pipelineBindPoint;
3243 			0u,													// deUint32						inputCount;
3244 			DE_NULL,											// const VkAttachmentReference*	pInputAttachments;
3245 			1u,													// deUint32						colorCount;
3246 			&colorAttachmentReference,							// const VkAttachmentReference*	pColorAttachments;
3247 			DE_NULL,											// const VkAttachmentReference*	pResolveAttachments;
3248 			DE_NULL,											// VkAttachmentReference		depthStencilAttachment;
3249 			0u,													// deUint32						preserveCount;
3250 			DE_NULL												// const VkAttachmentReference* pPreserveAttachments;
3251 		};
3252 
3253 		const VkRenderPassCreateInfo renderPassParams =
3254 		{
3255 			VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,			// VkStructureType					sType;
3256 			DE_NULL,											// const void*						pNext;
3257 			0u,													// VkRenderPassCreateFlags			flags;
3258 			1u,													// deUint32							attachmentCount;
3259 			attachments,										// const VkAttachmentDescription*	pAttachments;
3260 			1u,													// deUint32							subpassCount;
3261 			&subpassDescription,								// const VkSubpassDescription*		pSubpasses;
3262 			0u,													// deUint32							dependencyCount;
3263 			DE_NULL												// const VkSubpassDependency*		pDependencies;
3264 		};
3265 
3266 		renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
3267 	}
3268 
3269 	// Create framebuffer
3270 	{
3271 		const VkFramebufferCreateInfo framebufferParams =
3272 		{
3273 			VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,			// VkStructureType				sType;
3274 			DE_NULL,											// const void*					pNext;
3275 			0u,													// VkFramebufferCreateFlags		flags;
3276 			*renderPass,										// VkRenderPass					renderPass;
3277 			1u,													// deUint32						attachmentCount;
3278 			&*colorImageView,									// const VkAttachmentBindInfo*	pAttachments;
3279 			(deUint32)renderSize.x(),							// deUint32						width;
3280 			(deUint32)renderSize.y(),							// deUint32						height;
3281 			1u													// deUint32						layers;
3282 		};
3283 
3284 		framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
3285 	}
3286 
3287 	// Create descriptors
3288 	{
3289 		DescriptorPoolBuilder		descriptorPoolBuilder;
3290 		DescriptorSetLayoutBuilder	descriptorSetLayoutBuilder;
3291 
3292 		descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3293 		descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3294 		descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3295 		descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3296 
3297 		descriptorSetLayout	= descriptorSetLayoutBuilder.build(vk, vkDevice);
3298 		descriptorPool		= descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3299 
3300 		const VkDescriptorSetAllocateInfo allocInfo =
3301 		{
3302 			VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
3303 			DE_NULL,
3304 			*descriptorPool,
3305 			1u,
3306 			&*descriptorSetLayout
3307 		};
3308 
3309 		descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
3310 		// Update descriptors
3311 		{
3312 			DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
3313 			const VkDescriptorBufferInfo outputDescriptorBufferInfo =
3314 			{
3315 				*m_outputBuffer,				// VkBuffer			buffer;
3316 				0u,								// VkDeviceSize		offset;
3317 				VK_WHOLE_SIZE					// VkDeviceSize		range;
3318 			};
3319 
3320 			descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
3321 
3322 			VkDescriptorBufferInfo inputDescriptorBufferInfo =
3323 			{
3324 				0,							// VkBuffer			buffer;
3325 				0u,							// VkDeviceSize		offset;
3326 				VK_WHOLE_SIZE				// VkDeviceSize		range;
3327 			};
3328 
3329 			if (inputBufferSize > 0)
3330 			{
3331 				inputDescriptorBufferInfo.buffer = *m_inputBuffer;
3332 
3333 				descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
3334 			}
3335 
3336 			descriptorSetUpdateBuilder.update(vk, vkDevice);
3337 		}
3338 	}
3339 
3340 	// Create pipeline layout
3341 	{
3342 		const VkDescriptorSetLayout			descriptorSetLayouts[]		=
3343 		{
3344 			*descriptorSetLayout,
3345 			m_extraResourcesLayout
3346 		};
3347 		const VkPipelineLayoutCreateInfo pipelineLayoutParams =
3348 		{
3349 			VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,		// VkStructureType				sType;
3350 			DE_NULL,											// const void*					pNext;
3351 			(VkPipelineLayoutCreateFlags)0,						// VkPipelineLayoutCreateFlags	flags;
3352 			numDescriptorSets,									// deUint32						descriptorSetCount;
3353 			descriptorSetLayouts,								// const VkDescriptorSetLayout*	pSetLayouts;
3354 			0u,													// deUint32						pushConstantRangeCount;
3355 			DE_NULL												// const VkPushConstantRange*	pPushConstantRanges;
3356 		};
3357 
3358 		pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
3359 	}
3360 
3361 	// Create shader modules
3362 	{
3363 		vertexShaderModule		= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
3364 		tessControlShaderModule	= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_control"), 0);
3365 		tessEvalShaderModule	= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_eval"), 0);
3366 		fragmentShaderModule	= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
3367 	}
3368 
3369 	// Create pipeline
3370 	{
3371 		const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
3372 		{
3373 			VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,		// VkStructureType							sType;
3374 			DE_NULL,														// const void*								pNext;
3375 			(VkPipelineVertexInputStateCreateFlags)0,						// VkPipelineVertexInputStateCreateFlags	flags;
3376 			0u,																// deUint32									bindingCount;
3377 			DE_NULL,														// const VkVertexInputBindingDescription*	pVertexBindingDescriptions;
3378 			0u,																// deUint32									attributeCount;
3379 			DE_NULL,														// const VkVertexInputAttributeDescription*	pvertexAttributeDescriptions;
3380 		};
3381 
3382 		const std::vector<VkViewport>	viewports	(1, makeViewport(renderSize));
3383 		const std::vector<VkRect2D>		scissors	(1, makeRect2D(renderSize));
3384 
3385 		graphicsPipeline = makeGraphicsPipeline(vk,									// const DeviceInterface&                        vk
3386 												vkDevice,							// const VkDevice                                device
3387 												*pipelineLayout,					// const VkPipelineLayout                        pipelineLayout
3388 												*vertexShaderModule,				// const VkShaderModule                          vertexShaderModule
3389 												*tessControlShaderModule,			// const VkShaderModule                          tessellationControlShaderModule
3390 												*tessEvalShaderModule,				// const VkShaderModule                          tessellationEvalShaderModule
3391 												DE_NULL,							// const VkShaderModule                          geometryShaderModule
3392 												*fragmentShaderModule,				// const VkShaderModule                          fragmentShaderModule
3393 												*renderPass,						// const VkRenderPass                            renderPass
3394 												viewports,							// const std::vector<VkViewport>&                viewports
3395 												scissors,							// const std::vector<VkRect2D>&                  scissors
3396 												VK_PRIMITIVE_TOPOLOGY_PATCH_LIST,	// const VkPrimitiveTopology                     topology
3397 												0u,									// const deUint32                                subpass
3398 												patchControlPoints,					// const deUint32                                patchControlPoints
3399 												&vertexInputStateParams);			// const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
3400 	}
3401 
3402 	// Create command pool
3403 	cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
3404 
3405 	// Create command buffer
3406 	{
3407 		const VkClearValue clearValue = getDefaultClearColor();
3408 
3409 		cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3410 
3411 		beginCommandBuffer(vk, *cmdBuffer);
3412 
3413 		beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), clearValue);
3414 
3415 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
3416 
3417 		{
3418 			const VkDescriptorSet	descriptorSets[]	= { *descriptorSet, extraResources };
3419 			vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
3420 		}
3421 
3422 		vk.cmdDraw(*cmdBuffer, vertexCount, 1, 0, 0);
3423 
3424 		endRenderPass(vk, *cmdBuffer);
3425 
3426 		// Insert a barrier so data written by the shader is available to the host
3427 		{
3428 			const VkBufferMemoryBarrier bufferBarrier =
3429 			{
3430 				VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,	// VkStructureType    sType;
3431 				DE_NULL,									// const void*        pNext;
3432 				VK_ACCESS_SHADER_WRITE_BIT,					// VkAccessFlags      srcAccessMask;
3433 				VK_ACCESS_HOST_READ_BIT,					// VkAccessFlags      dstAccessMask;
3434 				VK_QUEUE_FAMILY_IGNORED,					// uint32_t           srcQueueFamilyIndex;
3435 				VK_QUEUE_FAMILY_IGNORED,					// uint32_t           dstQueueFamilyIndex;
3436 				*m_outputBuffer,							// VkBuffer           buffer;
3437 				0,											// VkDeviceSize       offset;
3438 				VK_WHOLE_SIZE,								// VkDeviceSize       size;
3439 			};
3440 
3441 			vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
3442 								  0, (const VkMemoryBarrier*)DE_NULL,
3443 								  1, &bufferBarrier,
3444 								  0, (const VkImageMemoryBarrier*)DE_NULL);
3445 		}
3446 
3447 		endCommandBuffer(vk, *cmdBuffer);
3448 	}
3449 
3450 	// Execute Draw
3451 	submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
3452 }
3453 
3454 // TessControlExecutor
3455 
3456 class TessControlExecutor : public TessellationExecutor
3457 {
3458 public:
3459 						TessControlExecutor			(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3460 	virtual				~TessControlExecutor		(void);
3461 
3462 	static void			generateSources				(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
3463 
3464 	virtual void		execute						(int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
3465 
3466 protected:
3467 	static std::string	generateTessControlShader	(const ShaderSpec& shaderSpec);
3468 };
3469 
TessControlExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3470 TessControlExecutor::TessControlExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3471 	: TessellationExecutor(context, shaderSpec, extraResourcesLayout)
3472 {
3473 }
3474 
~TessControlExecutor(void)3475 TessControlExecutor::~TessControlExecutor (void)
3476 {
3477 }
3478 
generateTessControlShader(const ShaderSpec & shaderSpec)3479 std::string TessControlExecutor::generateTessControlShader (const ShaderSpec& shaderSpec)
3480 {
3481 	std::ostringstream src;
3482 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3483 
3484 	if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3485 		src << "#extension GL_EXT_tessellation_shader : require\n\n";
3486 
3487 	if (!shaderSpec.globalDeclarations.empty())
3488 		src << shaderSpec.globalDeclarations << "\n";
3489 
3490 	src << "\nlayout(vertices = 1) out;\n\n";
3491 
3492 	declareBufferBlocks(src, shaderSpec);
3493 
3494 	src << "void main (void)\n{\n";
3495 
3496 	for (int ndx = 0; ndx < 2; ndx++)
3497 		src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3498 
3499 	for (int ndx = 0; ndx < 4; ndx++)
3500 		src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3501 
3502 	src << "\n"
3503 		<< "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
3504 
3505 	generateExecBufferIo(src, shaderSpec, "invocationId");
3506 
3507 	src << "}\n";
3508 
3509 	return src.str();
3510 }
3511 
generateEmptyTessEvalShader()3512 static std::string generateEmptyTessEvalShader ()
3513 {
3514 	std::ostringstream src;
3515 
3516 	src << "#version 450\n"
3517 		   "#extension GL_EXT_tessellation_shader : require\n\n";
3518 
3519 	src << "layout(triangles, ccw) in;\n";
3520 
3521 	src << "\nvoid main (void)\n{\n"
3522 		<< "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
3523 		<< "}\n";
3524 
3525 	return src.str();
3526 }
3527 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3528 void TessControlExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
3529 {
3530 	programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3531 	programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generateTessControlShader(shaderSpec)) << shaderSpec.buildOptions;
3532 	programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateEmptyTessEvalShader()) << shaderSpec.buildOptions;
3533 	programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3534 }
3535 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3536 void TessControlExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
3537 {
3538 	const deUint32	patchSize	= 3;
3539 
3540 	initBuffers(numValues);
3541 
3542 	// Setup input buffer & copy data
3543 	uploadInputBuffer(inputs, numValues, false);
3544 
3545 	renderTess(numValues, patchSize * numValues, patchSize, extraResources);
3546 
3547 	// Read back data
3548 	readOutputBuffer(outputs, numValues);
3549 }
3550 
3551 // TessEvaluationExecutor
3552 
3553 class TessEvaluationExecutor : public TessellationExecutor
3554 {
3555 public:
3556 						TessEvaluationExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3557 	virtual				~TessEvaluationExecutor	(void);
3558 
3559 	static void			generateSources			(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
3560 
3561 	virtual void		execute					(int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
3562 
3563 protected:
3564 	static std::string	generateTessEvalShader	(const ShaderSpec& shaderSpec);
3565 };
3566 
TessEvaluationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3567 TessEvaluationExecutor::TessEvaluationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3568 	: TessellationExecutor (context, shaderSpec, extraResourcesLayout)
3569 {
3570 }
3571 
~TessEvaluationExecutor(void)3572 TessEvaluationExecutor::~TessEvaluationExecutor (void)
3573 {
3574 }
3575 
generatePassthroughTessControlShader(void)3576 static std::string generatePassthroughTessControlShader (void)
3577 {
3578 	std::ostringstream src;
3579 
3580 	src << "#version 450\n"
3581 		   "#extension GL_EXT_tessellation_shader : require\n\n";
3582 
3583 	src << "layout(vertices = 1) out;\n\n";
3584 
3585 	src << "void main (void)\n{\n";
3586 
3587 	for (int ndx = 0; ndx < 2; ndx++)
3588 		src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3589 
3590 	for (int ndx = 0; ndx < 4; ndx++)
3591 		src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3592 
3593 	src << "}\n";
3594 
3595 	return src.str();
3596 }
3597 
generateTessEvalShader(const ShaderSpec & shaderSpec)3598 std::string TessEvaluationExecutor::generateTessEvalShader (const ShaderSpec& shaderSpec)
3599 {
3600 	std::ostringstream src;
3601 
3602 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3603 
3604 	if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3605 		src << "#extension GL_EXT_tessellation_shader : require\n\n";
3606 
3607 	if (!shaderSpec.globalDeclarations.empty())
3608 		src << shaderSpec.globalDeclarations << "\n";
3609 
3610 	src << "\n";
3611 
3612 	src << "layout(isolines, equal_spacing) in;\n\n";
3613 
3614 	declareBufferBlocks(src, shaderSpec);
3615 
3616 	src << "void main (void)\n{\n"
3617 		<< "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
3618 		<< "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
3619 
3620 	generateExecBufferIo(src, shaderSpec, "invocationId");
3621 
3622 	src	<< "}\n";
3623 
3624 	return src.str();
3625 }
3626 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3627 void TessEvaluationExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
3628 {
3629 	programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3630 	programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generatePassthroughTessControlShader()) << shaderSpec.buildOptions;
3631 	programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec)) << shaderSpec.buildOptions;
3632 	programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3633 }
3634 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3635 void TessEvaluationExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
3636 {
3637 	const int	patchSize		= 2;
3638 	const int	alignedValues	= deAlign32(numValues, patchSize);
3639 
3640 	// Initialize buffers with aligned value count to make room for padding
3641 	initBuffers(alignedValues);
3642 
3643 	// Setup input buffer & copy data
3644 	uploadInputBuffer(inputs, numValues, false);
3645 
3646 	renderTess((deUint32)alignedValues, (deUint32)alignedValues, (deUint32)patchSize, extraResources);
3647 
3648 	// Read back data
3649 	readOutputBuffer(outputs, numValues);
3650 }
3651 
3652 } // anonymous
3653 
3654 // ShaderExecutor
3655 
~ShaderExecutor(void)3656 ShaderExecutor::~ShaderExecutor (void)
3657 {
3658 }
3659 
areInputs16Bit(void) const3660 bool ShaderExecutor::areInputs16Bit (void) const
3661 {
3662 	for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end(); ++symIter)
3663 	{
3664 		if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3665 			return true;
3666 	}
3667 	return false;
3668 }
3669 
areOutputs16Bit(void) const3670 bool ShaderExecutor::areOutputs16Bit (void) const
3671 {
3672 	for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end(); ++symIter)
3673 	{
3674 		if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3675 			return true;
3676 	}
3677 	return false;
3678 }
3679 
isOutput16Bit(const size_t ndx) const3680 bool ShaderExecutor::isOutput16Bit (const size_t ndx) const
3681 {
3682 	if (glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3683 		return true;
3684 	return false;
3685 }
3686 
areInputs64Bit(void) const3687 bool ShaderExecutor::areInputs64Bit (void) const
3688 {
3689 	for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end(); ++symIter)
3690 	{
3691 		if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3692 			return true;
3693 	}
3694 	return false;
3695 }
3696 
areOutputs64Bit(void) const3697 bool ShaderExecutor::areOutputs64Bit (void) const
3698 {
3699 	for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end(); ++symIter)
3700 	{
3701 		if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3702 			return true;
3703 	}
3704 	return false;
3705 }
3706 
isOutput64Bit(const size_t ndx) const3707 bool ShaderExecutor::isOutput64Bit (const size_t ndx) const
3708 {
3709 	if (glu::isDataTypeDoubleType(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3710 		return true;
3711 	return false;
3712 }
3713 
3714 // Utilities
3715 
generateSources(glu::ShaderType shaderType,const ShaderSpec & shaderSpec,vk::SourceCollections & dst)3716 void generateSources (glu::ShaderType shaderType, const ShaderSpec& shaderSpec, vk::SourceCollections& dst)
3717 {
3718 	switch (shaderType)
3719 	{
3720 		case glu::SHADERTYPE_VERTEX:					VertexShaderExecutor::generateSources	(shaderSpec, dst);						break;
3721 		case glu::SHADERTYPE_TESSELLATION_CONTROL:		TessControlExecutor::generateSources	(shaderSpec, dst);						break;
3722 		case glu::SHADERTYPE_TESSELLATION_EVALUATION:	TessEvaluationExecutor::generateSources	(shaderSpec, dst);						break;
3723 		case glu::SHADERTYPE_GEOMETRY:					GeometryShaderExecutor::generateSources	(shaderSpec, dst);						break;
3724 		case glu::SHADERTYPE_FRAGMENT:					FragmentShaderExecutor::generateSources	(shaderSpec, dst);						break;
3725 		case glu::SHADERTYPE_COMPUTE:					ComputeShaderExecutor::generateSources	(shaderSpec, dst);						break;
3726 #ifndef CTS_USES_VULKANSC
3727 		case glu::SHADERTYPE_MESH:						MeshTaskShaderExecutor::generateSources	(shaderSpec, dst, false/*useTask*/);	break;
3728 		case glu::SHADERTYPE_TASK:						MeshTaskShaderExecutor::generateSources	(shaderSpec, dst, true/*useTask*/);		break;
3729 #endif // CTS_USES_VULKANSC
3730 		default:
3731 			TCU_THROW(InternalError, "Unsupported shader type");
3732 	}
3733 }
3734 
createExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3735 ShaderExecutor* createExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3736 {
3737 	switch (shaderType)
3738 	{
3739 		case glu::SHADERTYPE_VERTEX:					return new VertexShaderExecutor		(context, shaderSpec, extraResourcesLayout);
3740 		case glu::SHADERTYPE_TESSELLATION_CONTROL:		return new TessControlExecutor		(context, shaderSpec, extraResourcesLayout);
3741 		case glu::SHADERTYPE_TESSELLATION_EVALUATION:	return new TessEvaluationExecutor	(context, shaderSpec, extraResourcesLayout);
3742 		case glu::SHADERTYPE_GEOMETRY:					return new GeometryShaderExecutor	(context, shaderSpec, extraResourcesLayout);
3743 		case glu::SHADERTYPE_FRAGMENT:					return new FragmentShaderExecutor	(context, shaderSpec, extraResourcesLayout);
3744 		case glu::SHADERTYPE_COMPUTE:					return new ComputeShaderExecutor	(context, shaderSpec, extraResourcesLayout);
3745 #ifndef CTS_USES_VULKANSC
3746 		case glu::SHADERTYPE_MESH:						return new MeshTaskShaderExecutor	(context, shaderSpec, extraResourcesLayout);
3747 		case glu::SHADERTYPE_TASK:						return new MeshTaskShaderExecutor	(context, shaderSpec, extraResourcesLayout);
3748 #endif // CTS_USES_VULKANSC
3749 		default:
3750 			TCU_THROW(InternalError, "Unsupported shader type");
3751 	}
3752 }
3753 
executorSupported(glu::ShaderType shaderType)3754 bool executorSupported(glu::ShaderType shaderType)
3755 {
3756 	switch (shaderType)
3757 	{
3758 	case glu::SHADERTYPE_VERTEX:
3759 	case glu::SHADERTYPE_TESSELLATION_CONTROL:
3760 	case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3761 	case glu::SHADERTYPE_GEOMETRY:
3762 	case glu::SHADERTYPE_FRAGMENT:
3763 	case glu::SHADERTYPE_COMPUTE:
3764 	case glu::SHADERTYPE_MESH:
3765 	case glu::SHADERTYPE_TASK:
3766 		return true;
3767 	default:
3768 		return false;
3769 	}
3770 }
3771 
checkSupportShader(Context & context,const glu::ShaderType shaderType)3772 void checkSupportShader(Context& context, const glu::ShaderType shaderType)
3773 {
3774 #ifndef CTS_USES_VULKANSC
3775 	// Stage support.
3776 	switch (shaderType)
3777 	{
3778 	case glu::SHADERTYPE_TESSELLATION_CONTROL:
3779 	case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3780 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_TESSELLATION_SHADER);
3781 		break;
3782 
3783 	case glu::SHADERTYPE_GEOMETRY:
3784 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_GEOMETRY_SHADER);
3785 		break;
3786 
3787 	case glu::SHADERTYPE_TASK:
3788 	case glu::SHADERTYPE_MESH:
3789 		{
3790 			context.requireDeviceFunctionality("VK_EXT_mesh_shader");
3791 
3792 			if (shaderType == glu::SHADERTYPE_TASK)
3793 			{
3794 				const auto& features = context.getMeshShaderFeaturesEXT();
3795 				if (!features.taskShader)
3796 					TCU_THROW(NotSupportedError, "taskShader not supported");
3797 			}
3798 		}
3799 		break;
3800 
3801 	default:
3802 		break;
3803 	}
3804 
3805 	// Stores and atomic operation support.
3806 	switch (shaderType)
3807 	{
3808 	case glu::SHADERTYPE_VERTEX:
3809 	case glu::SHADERTYPE_TESSELLATION_CONTROL:
3810 	case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3811 	case glu::SHADERTYPE_GEOMETRY:
3812 	case glu::SHADERTYPE_TASK:
3813 	case glu::SHADERTYPE_MESH:
3814 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
3815 		break;
3816 	case glu::SHADERTYPE_FRAGMENT:
3817 		context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_FRAGMENT_STORES_AND_ATOMICS);
3818 		break;
3819 	case glu::SHADERTYPE_COMPUTE:
3820 		break;
3821 	default:
3822 		DE_FATAL("Unsupported shader type");
3823 		break;
3824 	}
3825 
3826 	if (shaderType == glu::SHADERTYPE_TESSELLATION_EVALUATION &&
3827 		context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
3828 		!context.getPortabilitySubsetFeatures().tessellationIsolines)
3829 	{
3830 		TCU_THROW(NotSupportedError, "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
3831 	}
3832 #else
3833 	DE_UNREF(context);
3834 	DE_UNREF(shaderType);
3835 #endif // CTS_USES_VULKANSC
3836 }
3837 
3838 
3839 } // shaderexecutor
3840 } // vkt
3841