• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7  * Copyright (c) 2016 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Vulkan ShaderExecutor
24  *//*--------------------------------------------------------------------*/
25 
26 #include "vktShaderExecutor.hpp"
27 
28 #include "vkMemUtil.hpp"
29 #include "vkRef.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37 
38 #include "gluShaderUtil.hpp"
39 
40 #include "tcuVector.hpp"
41 #include "tcuTestLog.hpp"
42 #include "tcuTextureUtil.hpp"
43 
44 #include "deUniquePtr.hpp"
45 #include "deStringUtil.hpp"
46 #include "deSharedPtr.hpp"
47 #include "deFloat16.h"
48 
49 #include <map>
50 #include <sstream>
51 #include <iostream>
52 
53 using std::vector;
54 using namespace vk;
55 
56 namespace vkt
57 {
58 namespace shaderexecutor
59 {
60 namespace
61 {
62 
63 enum
64 {
65 	DEFAULT_RENDER_WIDTH	= 100,
66 	DEFAULT_RENDER_HEIGHT	= 100,
67 };
68 
69 // Common typedefs
70 
71 typedef de::SharedPtr<Unique<VkImage> >		VkImageSp;
72 typedef de::SharedPtr<Unique<VkImageView> >	VkImageViewSp;
73 typedef de::SharedPtr<Unique<VkBuffer> >	VkBufferSp;
74 typedef de::SharedPtr<Allocation>			AllocationSp;
75 
76 static VkFormat getAttributeFormat(const glu::DataType dataType);
77 
78 // Shader utilities
79 
getDefaultClearColor(void)80 static VkClearValue	getDefaultClearColor (void)
81 {
82 	return makeClearValueColorF32(0.125f, 0.25f, 0.5f, 1.0f);
83 }
84 
generateEmptyFragmentSource(void)85 static std::string generateEmptyFragmentSource (void)
86 {
87 	std::ostringstream src;
88 
89 	src << "#version 450\n"
90 		   "layout(location=0) out highp vec4 o_color;\n";
91 
92 	src << "void main (void)\n{\n";
93 	src << "	o_color = vec4(0.0);\n";
94 	src << "}\n";
95 
96 	return src.str();
97 }
98 
packFloat16Bit(std::ostream & src,const std::vector<Symbol> & outputs)99 void packFloat16Bit (std::ostream& src, const std::vector<Symbol>& outputs)
100 {
101 	for (vector<Symbol>::const_iterator symIter = outputs.begin(); symIter != outputs.end(); ++symIter)
102 	{
103 		if(glu::isDataTypeFloatType(symIter->varType.getBasicType()))
104 		{
105 			if(glu::isDataTypeVector(symIter->varType.getBasicType()))
106 			{
107 				for(int i = 0; i < glu::getDataTypeScalarSize(symIter->varType.getBasicType()); i++)
108 				{
109 					src << "\tpacked_" << symIter->name << "[" << i << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "], -1.0)));\n";
110 				}
111 			}
112 			else if (glu::isDataTypeMatrix(symIter->varType.getBasicType()))
113 			{
114 				int maxRow = 0;
115 				int maxCol = 0;
116 				switch (symIter->varType.getBasicType())
117 				{
118 				case glu::TYPE_FLOAT_MAT2:
119 					maxRow = maxCol = 2;
120 					break;
121 				case glu::TYPE_FLOAT_MAT2X3:
122 					maxRow = 2;
123 					maxCol = 3;
124 					break;
125 				case glu::TYPE_FLOAT_MAT2X4:
126 					maxRow = 2;
127 					maxCol = 4;
128 					break;
129 				case glu::TYPE_FLOAT_MAT3X2:
130 					maxRow = 3;
131 					maxCol = 2;
132 					break;
133 				case glu::TYPE_FLOAT_MAT3:
134 					maxRow = maxCol = 3;
135 					break;
136 				case glu::TYPE_FLOAT_MAT3X4:
137 					maxRow = 3;
138 					maxCol = 4;
139 					break;
140 				case glu::TYPE_FLOAT_MAT4X2:
141 					maxRow = 4;
142 					maxCol = 2;
143 					break;
144 				case glu::TYPE_FLOAT_MAT4X3:
145 					maxRow = 4;
146 					maxCol = 3;
147 					break;
148 				case glu::TYPE_FLOAT_MAT4:
149 					maxRow = maxCol = 4;
150 					break;
151 				default:
152 					DE_ASSERT(false);
153 					break;
154 				}
155 
156 				for(int i = 0; i < maxRow; i++)
157 				for(int j = 0; j < maxCol; j++)
158 				{
159 					src << "\tpacked_" << symIter->name << "[" << i << "][" << j << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "][" << j << "], -1.0)));\n";
160 				}
161 			}
162 			else
163 			{
164 					src << "\tpacked_" << symIter->name << " = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << ", -1.0)));\n";
165 			}
166 		}
167 	}
168 }
169 
generatePassthroughVertexShader(const ShaderSpec & shaderSpec,const char * inputPrefix,const char * outputPrefix)170 static std::string generatePassthroughVertexShader (const ShaderSpec& shaderSpec, const char* inputPrefix, const char* outputPrefix)
171 {
172 	std::ostringstream	src;
173 	int					location	= 0;
174 
175 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
176 
177 	if (!shaderSpec.globalDeclarations.empty())
178 		src << shaderSpec.globalDeclarations << "\n";
179 
180 	src << "layout(location = " << location << ") in highp vec4 a_position;\n";
181 
182 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
183 	{
184 		location++;
185 		src << "layout(location = "<< location << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n"
186 			<< "layout(location = " << location - 1 << ") flat out " << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
187 	}
188 
189 	src << "\nvoid main (void)\n{\n"
190 		<< "	gl_Position = a_position;\n"
191 		<< "	gl_PointSize = 1.0;\n";
192 
193 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
194 		src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
195 
196 	src << "}\n";
197 
198 	return src.str();
199 }
200 
generateVertexShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix)201 static std::string generateVertexShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
202 {
203 	DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
204 
205 	std::ostringstream	src;
206 
207 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
208 
209 	if (!shaderSpec.globalDeclarations.empty())
210 		src << shaderSpec.globalDeclarations << "\n";
211 
212 	src << "layout(location = 0) in highp vec4 a_position;\n";
213 
214 	int			locationNumber	= 1;
215 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
216 	{
217 		src <<  "layout(location = " << locationNumber << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
218 	}
219 
220 	locationNumber = 0;
221 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
222 	{
223 		DE_ASSERT(output->varType.isBasicType());
224 
225 		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
226 		{
227 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
228 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
229 			const glu::VarType		intType		(intBaseType, glu::PRECISION_HIGHP);
230 
231 			src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
232 		}
233 		else
234 			src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
235 	}
236 
237 	src << "\n"
238 		<< "void main (void)\n"
239 		<< "{\n"
240 		<< "	gl_Position = a_position;\n"
241 		<< "	gl_PointSize = 1.0;\n";
242 
243 	// Declare & fetch local input variables
244 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
245 	{
246 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
247 		{
248 			const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
249 			src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
250 		}
251 		else
252 			src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
253 	}
254 
255 	// Declare local output variables
256 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
257 	{
258 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
259 		{
260 			const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
261 			src << "\t" << tname << " " << output->name << ";\n";
262 			const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
263 			src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
264 		}
265 		else
266 			src << "\t" << glu::declare(output->varType, output->name) << ";\n";
267 	}
268 
269 	// Operation - indented to correct level.
270 	{
271 		std::istringstream	opSrc	(shaderSpec.source);
272 		std::string			line;
273 
274 		while (std::getline(opSrc, line))
275 			src << "\t" << line << "\n";
276 	}
277 
278 	if (shaderSpec.packFloat16Bit)
279 		packFloat16Bit(src, shaderSpec.outputs);
280 
281 	// Assignments to outputs.
282 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
283 	{
284 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
285 		{
286 			src << "\t" << outputPrefix << output->name << " = packed_" << output->name << ";\n";
287 		}
288 		else
289 		{
290 			if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
291 			{
292 				const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
293 				const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
294 
295 				src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
296 			}
297 			else
298 				src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
299 		}
300 	}
301 
302 	src << "}\n";
303 
304 	return src.str();
305 }
306 
307 struct FragmentOutputLayout
308 {
309 	std::vector<const Symbol*>		locationSymbols;		//! Symbols by location
310 	std::map<std::string, int>		locationMap;			//! Map from symbol name to start location
311 };
312 
generateFragShaderOutputDecl(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & outputPrefix)313 static void generateFragShaderOutputDecl (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& outputPrefix)
314 {
315 	for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
316 	{
317 		const Symbol&				output		= shaderSpec.outputs[outNdx];
318 		const int					location	= de::lookup(outLocationMap, output.name);
319 		const std::string			outVarName	= outputPrefix + output.name;
320 		glu::VariableDeclaration	decl		(output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, glu::Layout(location));
321 
322 		TCU_CHECK_INTERNAL(output.varType.isBasicType());
323 
324 		if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
325 		{
326 			const int			vecSize			= glu::getDataTypeScalarSize(output.varType.getBasicType());
327 			const glu::DataType	uintBasicType	= vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
328 			const glu::VarType	uintType		(uintBasicType, glu::PRECISION_HIGHP);
329 
330 			decl.varType = uintType;
331 			src << decl << ";\n";
332 		}
333 		else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
334 		{
335 			const int			vecSize			= glu::getDataTypeScalarSize(output.varType.getBasicType());
336 			const glu::DataType	intBasicType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
337 			const glu::VarType	intType			(intBasicType, glu::PRECISION_HIGHP);
338 
339 			decl.varType = intType;
340 			src << decl << ";\n";
341 		}
342 		else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
343 		{
344 			const int			vecSize			= glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
345 			const int			numVecs			= glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
346 			const glu::DataType	uintBasicType	= glu::getDataTypeUintVec(vecSize);
347 			const glu::VarType	uintType		(uintBasicType, glu::PRECISION_HIGHP);
348 
349 			decl.varType = uintType;
350 			for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
351 			{
352 				decl.name				= outVarName + "_" + de::toString(vecNdx);
353 				decl.layout.location	= location + vecNdx;
354 				src << decl << ";\n";
355 			}
356 		}
357 		else
358 			src << decl << ";\n";
359 	}
360 }
361 
generateFragShaderOutAssign(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::string & valuePrefix,const std::string & outputPrefix,const bool isInput16Bit=false)362 static void generateFragShaderOutAssign (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::string& valuePrefix, const std::string& outputPrefix, const bool isInput16Bit = false)
363 {
364 	if (isInput16Bit)
365 		packFloat16Bit(src, shaderSpec.outputs);
366 
367 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
368 	{
369 		const std::string packPrefix = (isInput16Bit && glu::isDataTypeFloatType(output->varType.getBasicType())) ? "packed_" : "";
370 
371 		if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
372 			src << "	o_" << output->name << " = floatBitsToUint(" << valuePrefix << output->name << ");\n";
373 		else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
374 		{
375 			const int	numVecs		= glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
376 
377 			for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
378 				if (useIntOutputs)
379 					src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = floatBitsToUint(" << valuePrefix << output->name << "[" << vecNdx << "]);\n";
380 				else
381 					src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = " << packPrefix << valuePrefix << output->name << "[" << vecNdx << "];\n";
382 		}
383 		else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
384 		{
385 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
386 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
387 
388 			src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << valuePrefix << output->name << ");\n";
389 		}
390 		else
391 			src << "\t" << outputPrefix << output->name << " = " << packPrefix << valuePrefix << output->name << ";\n";
392 	}
393 }
394 
generatePassthroughFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)395 static std::string generatePassthroughFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
396 {
397 	std::ostringstream	src;
398 
399 	src <<"#version 450\n";
400 
401 	if (!shaderSpec.globalDeclarations.empty())
402 		src << shaderSpec.globalDeclarations << "\n";
403 
404 	int locationNumber = 0;
405 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
406 	{
407 		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
408 		{
409 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
410 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
411 			const glu::VarType		intType		(intBaseType, glu::PRECISION_HIGHP);
412 
413 			src << "layout(location = " << locationNumber << ") flat in " << glu::declare(intType, inputPrefix + output->name) << ";\n";
414 		}
415 		else
416 			src << "layout(location = " << locationNumber << ") flat in " << glu::declare(output->varType, inputPrefix + output->name) << ";\n";
417 	}
418 
419 	generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
420 
421 	src << "\nvoid main (void)\n{\n";
422 
423 	generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, inputPrefix, outputPrefix);
424 
425 	src << "}\n";
426 
427 	return src.str();
428 }
429 
generateGeometryShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix,const bool pointSizeSupported)430 static std::string generateGeometryShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix, const bool pointSizeSupported)
431 {
432 	DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
433 
434 	std::ostringstream	src;
435 
436 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
437 
438 	if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
439 		src << "#extension GL_EXT_geometry_shader : require\n";
440 
441 	if (!shaderSpec.globalDeclarations.empty())
442 		src << shaderSpec.globalDeclarations << "\n";
443 
444 	src << "layout(points) in;\n"
445 		<< "layout(points, max_vertices = 1) out;\n";
446 
447 	int locationNumber = 0;
448 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
449 		src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << "[];\n";
450 
451 	locationNumber = 0;
452 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
453 	{
454 		DE_ASSERT(output->varType.isBasicType());
455 
456 		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
457 		{
458 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
459 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
460 			const glu::VarType		intType		(intBaseType, glu::PRECISION_HIGHP);
461 
462 			src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
463 		}
464 		else
465 			src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
466 	}
467 
468 	src << "\n"
469 		<< "void main (void)\n"
470 		<< "{\n"
471 		<< "	gl_Position = gl_in[0].gl_Position;\n"
472 		<< (pointSizeSupported ? "	gl_PointSize = gl_in[0].gl_PointSize;\n\n" : "");
473 
474 	// Fetch input variables
475 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
476 		src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << "[0];\n";
477 
478 	// Declare local output variables.
479 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
480 		src << "\t" << glu::declare(output->varType, output->name) << ";\n";
481 
482 	src << "\n";
483 
484 	// Operation - indented to correct level.
485 	{
486 		std::istringstream	opSrc	(shaderSpec.source);
487 		std::string			line;
488 
489 		while (std::getline(opSrc, line))
490 			src << "\t" << line << "\n";
491 	}
492 
493 	// Assignments to outputs.
494 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
495 	{
496 		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
497 		{
498 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
499 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
500 
501 			src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
502 		}
503 		else
504 			src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
505 	}
506 
507 	src << "	EmitVertex();\n"
508 		<< "	EndPrimitive();\n"
509 		<< "}\n";
510 
511 	return src.str();
512 }
513 
generateFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)514 static std::string generateFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
515 {
516 	std::ostringstream src;
517 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
518 	if (!shaderSpec.globalDeclarations.empty())
519 		src << shaderSpec.globalDeclarations << "\n";
520 
521 	int			locationNumber	= 0;
522 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
523 	{
524 		src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
525 	}
526 
527 	generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
528 
529 	src << "\nvoid main (void)\n{\n";
530 
531 	// Declare & fetch local input variables
532 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
533 	{
534 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
535 		{
536 			const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
537 			src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
538 		}
539 		else
540 			src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
541 	}
542 
543 	// Declare output variables
544 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
545 	{
546 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
547 		{
548 			const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
549 			src << "\t" << tname << " " << output->name << ";\n";
550 			const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
551 			src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
552 		}
553 		else
554 			src << "\t" << glu::declare(output->varType, output->name) << ";\n";
555 	}
556 
557 	// Operation - indented to correct level.
558 	{
559 		std::istringstream	opSrc	(shaderSpec.source);
560 		std::string			line;
561 
562 		while (std::getline(opSrc, line))
563 			src << "\t" << line << "\n";
564 	}
565 
566 	generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, "", outputPrefix, shaderSpec.packFloat16Bit);
567 
568 	src << "}\n";
569 
570 	return src.str();
571 }
572 
573 // FragmentOutExecutor
574 
575 class FragmentOutExecutor : public ShaderExecutor
576 {
577 public:
578 														FragmentOutExecutor		(Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
579 	virtual												~FragmentOutExecutor	(void);
580 
581 	virtual void										execute					(int					numValues,
582 																				 const void* const*		inputs,
583 																				 void* const*			outputs,
584 																				 VkDescriptorSet		extraResources);
585 
586 protected:
587 	const glu::ShaderType								m_shaderType;
588 	const FragmentOutputLayout							m_outputLayout;
589 
590 private:
591 	void												bindAttributes			(int					numValues,
592 																				 const void* const*		inputs);
593 
594 	void												addAttribute			(deUint32				bindingLocation,
595 																				 VkFormat				format,
596 																				 deUint32				sizePerElement,
597 																				 deUint32				count,
598 																				 const void*			dataPtr);
599 	// reinit render data members
600 	virtual void										clearRenderData			(void);
601 
602 	const VkDescriptorSetLayout							m_extraResourcesLayout;
603 
604 	std::vector<VkVertexInputBindingDescription>		m_vertexBindingDescriptions;
605 	std::vector<VkVertexInputAttributeDescription>		m_vertexAttributeDescriptions;
606 	std::vector<VkBufferSp>								m_vertexBuffers;
607 	std::vector<AllocationSp>							m_vertexBufferAllocs;
608 };
609 
computeFragmentOutputLayout(const std::vector<Symbol> & symbols)610 static FragmentOutputLayout computeFragmentOutputLayout (const std::vector<Symbol>& symbols)
611 {
612 	FragmentOutputLayout	ret;
613 	int						location	= 0;
614 
615 	for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
616 	{
617 		const int	numLocations	= glu::getDataTypeNumLocations(it->varType.getBasicType());
618 
619 		TCU_CHECK_INTERNAL(!de::contains(ret.locationMap, it->name));
620 		de::insert(ret.locationMap, it->name, location);
621 		location += numLocations;
622 
623 		for (int ndx = 0; ndx < numLocations; ++ndx)
624 			ret.locationSymbols.push_back(&*it);
625 	}
626 
627 	return ret;
628 }
629 
FragmentOutExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)630 FragmentOutExecutor::FragmentOutExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
631 	: ShaderExecutor			(context, shaderSpec)
632 	, m_shaderType				(shaderType)
633 	, m_outputLayout			(computeFragmentOutputLayout(m_shaderSpec.outputs))
634 	, m_extraResourcesLayout	(extraResourcesLayout)
635 {
636 	const VkPhysicalDevice		physicalDevice = m_context.getPhysicalDevice();
637 	const InstanceInterface&	vki = m_context.getInstanceInterface();
638 
639 	// Input attributes
640 	for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
641 	{
642 		const Symbol&				symbol = m_shaderSpec.inputs[inputNdx];
643 		const glu::DataType			basicType = symbol.varType.getBasicType();
644 		const VkFormat				format = getAttributeFormat(basicType);
645 		const VkFormatProperties	formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
646 		if ((formatProperties.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) == 0)
647 			TCU_THROW(NotSupportedError, "format not supported by device as vertex buffer attribute format");
648 	}
649 }
650 
~FragmentOutExecutor(void)651 FragmentOutExecutor::~FragmentOutExecutor (void)
652 {
653 }
654 
computeVertexPositions(int numValues,const tcu::IVec2 & renderSize)655 static std::vector<tcu::Vec2> computeVertexPositions (int numValues, const tcu::IVec2& renderSize)
656 {
657 	std::vector<tcu::Vec2> positions(numValues);
658 	for (int valNdx = 0; valNdx < numValues; valNdx++)
659 	{
660 		const int		ix		= valNdx % renderSize.x();
661 		const int		iy		= valNdx / renderSize.x();
662 		const float		fx		= -1.0f + 2.0f*((float(ix) + 0.5f) / float(renderSize.x()));
663 		const float		fy		= -1.0f + 2.0f*((float(iy) + 0.5f) / float(renderSize.y()));
664 
665 		positions[valNdx] = tcu::Vec2(fx, fy);
666 	}
667 
668 	return positions;
669 }
670 
getRenderbufferFormatForOutput(const glu::VarType & outputType,bool useIntOutputs)671 static tcu::TextureFormat getRenderbufferFormatForOutput (const glu::VarType& outputType, bool useIntOutputs)
672 {
673 	const tcu::TextureFormat::ChannelOrder channelOrderMap[] =
674 	{
675 		tcu::TextureFormat::R,
676 		tcu::TextureFormat::RG,
677 		tcu::TextureFormat::RGBA,	// No RGB variants available.
678 		tcu::TextureFormat::RGBA
679 	};
680 
681 	const glu::DataType					basicType		= outputType.getBasicType();
682 	const int							numComps		= glu::getDataTypeNumComponents(basicType);
683 	tcu::TextureFormat::ChannelType		channelType;
684 
685 	switch (glu::getDataTypeScalarType(basicType))
686 	{
687 		case glu::TYPE_UINT:	channelType = tcu::TextureFormat::UNSIGNED_INT32;														break;
688 		case glu::TYPE_INT:		channelType = tcu::TextureFormat::SIGNED_INT32;															break;
689 		case glu::TYPE_BOOL:	channelType = tcu::TextureFormat::SIGNED_INT32;															break;
690 		case glu::TYPE_FLOAT:	channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT;			break;
691 		case glu::TYPE_FLOAT16:	channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::HALF_FLOAT;		break;
692 		default:
693 			throw tcu::InternalError("Invalid output type");
694 	}
695 
696 	DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
697 
698 	return tcu::TextureFormat(channelOrderMap[numComps-1], channelType);
699 }
700 
getAttributeFormat(const glu::DataType dataType)701 static VkFormat getAttributeFormat (const glu::DataType dataType)
702 {
703 	switch (dataType)
704 	{
705 		case glu::TYPE_FLOAT16:			return VK_FORMAT_R16_SFLOAT;
706 		case glu::TYPE_FLOAT16_VEC2:	return VK_FORMAT_R16G16_SFLOAT;
707 		case glu::TYPE_FLOAT16_VEC3:	return VK_FORMAT_R16G16B16_SFLOAT;
708 		case glu::TYPE_FLOAT16_VEC4:	return VK_FORMAT_R16G16B16A16_SFLOAT;
709 
710 		case glu::TYPE_FLOAT:			return VK_FORMAT_R32_SFLOAT;
711 		case glu::TYPE_FLOAT_VEC2:		return VK_FORMAT_R32G32_SFLOAT;
712 		case glu::TYPE_FLOAT_VEC3:		return VK_FORMAT_R32G32B32_SFLOAT;
713 		case glu::TYPE_FLOAT_VEC4:		return VK_FORMAT_R32G32B32A32_SFLOAT;
714 
715 		case glu::TYPE_INT:				return VK_FORMAT_R32_SINT;
716 		case glu::TYPE_INT_VEC2:		return VK_FORMAT_R32G32_SINT;
717 		case glu::TYPE_INT_VEC3:		return VK_FORMAT_R32G32B32_SINT;
718 		case glu::TYPE_INT_VEC4:		return VK_FORMAT_R32G32B32A32_SINT;
719 
720 		case glu::TYPE_UINT:			return VK_FORMAT_R32_UINT;
721 		case glu::TYPE_UINT_VEC2:		return VK_FORMAT_R32G32_UINT;
722 		case glu::TYPE_UINT_VEC3:		return VK_FORMAT_R32G32B32_UINT;
723 		case glu::TYPE_UINT_VEC4:		return VK_FORMAT_R32G32B32A32_UINT;
724 
725 		case glu::TYPE_FLOAT_MAT2:		return VK_FORMAT_R32G32_SFLOAT;
726 		case glu::TYPE_FLOAT_MAT2X3:	return VK_FORMAT_R32G32B32_SFLOAT;
727 		case glu::TYPE_FLOAT_MAT2X4:	return VK_FORMAT_R32G32B32A32_SFLOAT;
728 		case glu::TYPE_FLOAT_MAT3X2:	return VK_FORMAT_R32G32_SFLOAT;
729 		case glu::TYPE_FLOAT_MAT3:		return VK_FORMAT_R32G32B32_SFLOAT;
730 		case glu::TYPE_FLOAT_MAT3X4:	return VK_FORMAT_R32G32B32A32_SFLOAT;
731 		case glu::TYPE_FLOAT_MAT4X2:	return VK_FORMAT_R32G32_SFLOAT;
732 		case glu::TYPE_FLOAT_MAT4X3:	return VK_FORMAT_R32G32B32_SFLOAT;
733 		case glu::TYPE_FLOAT_MAT4:		return VK_FORMAT_R32G32B32A32_SFLOAT;
734 		default:
735 			DE_ASSERT(false);
736 			return VK_FORMAT_UNDEFINED;
737 	}
738 }
739 
addAttribute(deUint32 bindingLocation,VkFormat format,deUint32 sizePerElement,deUint32 count,const void * dataPtr)740 void FragmentOutExecutor::addAttribute (deUint32 bindingLocation, VkFormat format, deUint32 sizePerElement, deUint32 count, const void* dataPtr)
741 {
742 	// Portability requires stride to be multiply of minVertexInputBindingStrideAlignment
743 	// this value is usually 4 and current tests meet this requirement but
744 	// if this changes in future then this limit should be verified in checkSupport
745 	if (m_context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
746 		((sizePerElement % m_context.getPortabilitySubsetProperties().minVertexInputBindingStrideAlignment) != 0))
747 	{
748 		DE_FATAL("stride is not multiply of minVertexInputBindingStrideAlignment");
749 	}
750 
751 	// Add binding specification
752 	const deUint32							binding = (deUint32)m_vertexBindingDescriptions.size();
753 	const VkVertexInputBindingDescription	bindingDescription =
754 	{
755 		binding,
756 		sizePerElement,
757 		VK_VERTEX_INPUT_RATE_VERTEX
758 	};
759 
760 	m_vertexBindingDescriptions.push_back(bindingDescription);
761 
762 	// Add location and format specification
763 	const VkVertexInputAttributeDescription attributeDescription =
764 	{
765 		bindingLocation,			// deUint32	location;
766 		binding,					// deUint32	binding;
767 		format,						// VkFormat	format;
768 		0u,							// deUint32	offsetInBytes;
769 	};
770 
771 	m_vertexAttributeDescriptions.push_back(attributeDescription);
772 
773 	// Upload data to buffer
774 	const VkDevice				vkDevice			= m_context.getDevice();
775 	const DeviceInterface&		vk					= m_context.getDeviceInterface();
776 	const deUint32				queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
777 
778 	const VkDeviceSize			inputSize			= sizePerElement * count;
779 	const VkBufferCreateInfo	vertexBufferParams	=
780 	{
781 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		// VkStructureType		sType;
782 		DE_NULL,									// const void*			pNext;
783 		0u,											// VkBufferCreateFlags	flags;
784 		inputSize,									// VkDeviceSize			size;
785 		VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,			// VkBufferUsageFlags	usage;
786 		VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode		sharingMode;
787 		1u,											// deUint32				queueFamilyCount;
788 		&queueFamilyIndex							// const deUint32*		pQueueFamilyIndices;
789 	};
790 
791 	Move<VkBuffer>			buffer	= createBuffer(vk, vkDevice, &vertexBufferParams);
792 	de::MovePtr<Allocation>	alloc	= m_context.getDefaultAllocator().allocate(getBufferMemoryRequirements(vk, vkDevice, *buffer), MemoryRequirement::HostVisible);
793 
794 	VK_CHECK(vk.bindBufferMemory(vkDevice, *buffer, alloc->getMemory(), alloc->getOffset()));
795 
796 	deMemcpy(alloc->getHostPtr(), dataPtr, (size_t)inputSize);
797 	flushAlloc(vk, vkDevice, *alloc);
798 
799 	m_vertexBuffers.push_back(de::SharedPtr<Unique<VkBuffer> >(new Unique<VkBuffer>(buffer)));
800 	m_vertexBufferAllocs.push_back(AllocationSp(alloc.release()));
801 }
802 
bindAttributes(int numValues,const void * const * inputs)803 void FragmentOutExecutor::bindAttributes (int numValues, const void* const* inputs)
804 {
805 	// Input attributes
806 	for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
807 	{
808 		const Symbol&		symbol			= m_shaderSpec.inputs[inputNdx];
809 		const void*			ptr				= inputs[inputNdx];
810 		const glu::DataType	basicType		= symbol.varType.getBasicType();
811 		const int			vecSize			= glu::getDataTypeScalarSize(basicType);
812 		const VkFormat		format			= getAttributeFormat(basicType);
813 		int					elementSize		= 0;
814 		int					numAttrsToAdd	= 1;
815 
816 		if (glu::isDataTypeDoubleOrDVec(basicType))
817 			elementSize = sizeof(double);
818 		if (glu::isDataTypeFloatOrVec(basicType))
819 			elementSize = sizeof(float);
820 		else if (glu::isDataTypeFloat16OrVec(basicType))
821 			elementSize = sizeof(deUint16);
822 		else if (glu::isDataTypeIntOrIVec(basicType))
823 			elementSize = sizeof(int);
824 		else if (glu::isDataTypeUintOrUVec(basicType))
825 			elementSize = sizeof(deUint32);
826 		else if (glu::isDataTypeMatrix(basicType))
827 		{
828 			int		numRows	= glu::getDataTypeMatrixNumRows(basicType);
829 			int		numCols	= glu::getDataTypeMatrixNumColumns(basicType);
830 
831 			elementSize = numRows * numCols * (int)sizeof(float);
832 			numAttrsToAdd = numCols;
833 		}
834 		else
835 			DE_ASSERT(false);
836 
837 		// add attributes, in case of matrix every column is binded as an attribute
838 		for (int attrNdx = 0; attrNdx < numAttrsToAdd; attrNdx++)
839 		{
840 			addAttribute((deUint32)m_vertexBindingDescriptions.size(), format, elementSize * vecSize, numValues, ptr);
841 		}
842 	}
843 }
844 
clearRenderData(void)845 void FragmentOutExecutor::clearRenderData (void)
846 {
847 	m_vertexBindingDescriptions.clear();
848 	m_vertexAttributeDescriptions.clear();
849 	m_vertexBuffers.clear();
850 	m_vertexBufferAllocs.clear();
851 }
852 
createEmptyDescriptorSetLayout(const DeviceInterface & vkd,VkDevice device)853 static Move<VkDescriptorSetLayout> createEmptyDescriptorSetLayout (const DeviceInterface& vkd, VkDevice device)
854 {
855 	const VkDescriptorSetLayoutCreateInfo	createInfo	=
856 	{
857 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
858 		DE_NULL,
859 		(VkDescriptorSetLayoutCreateFlags)0,
860 		0u,
861 		DE_NULL,
862 	};
863 	return createDescriptorSetLayout(vkd, device, &createInfo);
864 }
865 
createDummyDescriptorPool(const DeviceInterface & vkd,VkDevice device)866 static Move<VkDescriptorPool> createDummyDescriptorPool (const DeviceInterface& vkd, VkDevice device)
867 {
868 	const VkDescriptorPoolSize			dummySize	=
869 	{
870 		VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
871 		1u,
872 	};
873 	const VkDescriptorPoolCreateInfo	createInfo	=
874 	{
875 		VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
876 		DE_NULL,
877 		(VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
878 		1u,
879 		1u,
880 		&dummySize
881 	};
882 	return createDescriptorPool(vkd, device, &createInfo);
883 }
884 
allocateSingleDescriptorSet(const DeviceInterface & vkd,VkDevice device,VkDescriptorPool pool,VkDescriptorSetLayout layout)885 static Move<VkDescriptorSet> allocateSingleDescriptorSet (const DeviceInterface& vkd, VkDevice device, VkDescriptorPool pool, VkDescriptorSetLayout layout)
886 {
887 	const VkDescriptorSetAllocateInfo	allocInfo	=
888 	{
889 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
890 		DE_NULL,
891 		pool,
892 		1u,
893 		&layout,
894 	};
895 	return allocateDescriptorSet(vkd, device, &allocInfo);
896 }
897 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)898 void FragmentOutExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
899 {
900 	const VkDevice										vkDevice				= m_context.getDevice();
901 	const DeviceInterface&								vk						= m_context.getDeviceInterface();
902 	const VkQueue										queue					= m_context.getUniversalQueue();
903 	const deUint32										queueFamilyIndex		= m_context.getUniversalQueueFamilyIndex();
904 	Allocator&											memAlloc				= m_context.getDefaultAllocator();
905 
906 	const deUint32										renderSizeX				= de::min(static_cast<deUint32>(128), (deUint32)numValues);
907 	const deUint32										renderSizeY				= ((deUint32)numValues / renderSizeX) + (((deUint32)numValues % renderSizeX != 0) ? 1u : 0u);
908 	const tcu::UVec2									renderSize				(renderSizeX, renderSizeY);
909 	std::vector<tcu::Vec2>								positions;
910 
911 	const bool											useGeometryShader		= m_shaderType == glu::SHADERTYPE_GEOMETRY;
912 
913 	std::vector<VkImageSp>								colorImages;
914 	std::vector<VkImageMemoryBarrier>					colorImagePreRenderBarriers;
915 	std::vector<VkImageMemoryBarrier>					colorImagePostRenderBarriers;
916 	std::vector<AllocationSp>							colorImageAllocs;
917 	std::vector<VkAttachmentDescription>				attachments;
918 	std::vector<VkClearValue>							attachmentClearValues;
919 	std::vector<VkImageViewSp>							colorImageViews;
920 
921 	std::vector<VkPipelineColorBlendAttachmentState>	colorBlendAttachmentStates;
922 	std::vector<VkAttachmentReference>					colorAttachmentReferences;
923 
924 	Move<VkRenderPass>									renderPass;
925 	Move<VkFramebuffer>									framebuffer;
926 	Move<VkPipelineLayout>								pipelineLayout;
927 	Move<VkPipeline>									graphicsPipeline;
928 
929 	Move<VkShaderModule>								vertexShaderModule;
930 	Move<VkShaderModule>								geometryShaderModule;
931 	Move<VkShaderModule>								fragmentShaderModule;
932 
933 	Move<VkCommandPool>									cmdPool;
934 	Move<VkCommandBuffer>								cmdBuffer;
935 
936 	Unique<VkDescriptorSetLayout>						emptyDescriptorSetLayout	(createEmptyDescriptorSetLayout(vk, vkDevice));
937 	Unique<VkDescriptorPool>							dummyDescriptorPool			(createDummyDescriptorPool(vk, vkDevice));
938 	Unique<VkDescriptorSet>								emptyDescriptorSet			(allocateSingleDescriptorSet(vk, vkDevice, *dummyDescriptorPool, *emptyDescriptorSetLayout));
939 
940 	clearRenderData();
941 
942 	// Compute positions - 1px points are used to drive fragment shading.
943 	positions = computeVertexPositions(numValues, renderSize.cast<int>());
944 
945 	// Bind attributes
946 	addAttribute(0u, VK_FORMAT_R32G32_SFLOAT, sizeof(tcu::Vec2), (deUint32)positions.size(), &positions[0]);
947 	bindAttributes(numValues, inputs);
948 
949 	// Create color images
950 	{
951 		const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
952 		{
953 			VK_FALSE,																	// VkBool32						blendEnable;
954 			VK_BLEND_FACTOR_ONE,														// VkBlendFactor				srcColorBlendFactor;
955 			VK_BLEND_FACTOR_ZERO,														// VkBlendFactor				dstColorBlendFactor;
956 			VK_BLEND_OP_ADD,															// VkBlendOp					blendOpColor;
957 			VK_BLEND_FACTOR_ONE,														// VkBlendFactor				srcAlphaBlendFactor;
958 			VK_BLEND_FACTOR_ZERO,														// VkBlendFactor				destAlphaBlendFactor;
959 			VK_BLEND_OP_ADD,															// VkBlendOp					blendOpAlpha;
960 			(VK_COLOR_COMPONENT_R_BIT |
961 			 VK_COLOR_COMPONENT_G_BIT |
962 			 VK_COLOR_COMPONENT_B_BIT |
963 			 VK_COLOR_COMPONENT_A_BIT)													// VkColorComponentFlags		colorWriteMask;
964 		};
965 
966 		for (int outNdx = 0; outNdx < (int)m_outputLayout.locationSymbols.size(); ++outNdx)
967 		{
968 			const bool		isDouble	= glu::isDataTypeDoubleOrDVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
969 			const bool		isFloat		= isDataTypeFloatOrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
970 			const bool		isFloat16b	= glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
971 			const bool		isSigned	= isDataTypeIntOrIVec (m_shaderSpec.outputs[outNdx].varType.getBasicType());
972 			const bool		isBool		= isDataTypeBoolOrBVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
973 			const VkFormat	colorFormat = (isDouble ? VK_FORMAT_R64G64B64A64_SFLOAT : (isFloat16b ? VK_FORMAT_R16G16B16A16_SFLOAT : (isFloat ? VK_FORMAT_R32G32B32A32_SFLOAT : (isSigned || isBool ? VK_FORMAT_R32G32B32A32_SINT : VK_FORMAT_R32G32B32A32_UINT))));
974 
975 			{
976 				const VkFormatProperties	formatProperties	= getPhysicalDeviceFormatProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), colorFormat);
977 				if ((formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) == 0)
978 					TCU_THROW(NotSupportedError, "Image format doesn't support COLOR_ATTACHMENT_BIT");
979 			}
980 
981 			const VkImageCreateInfo	 colorImageParams =
982 			{
983 				VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,										// VkStructureType				sType;
984 				DE_NULL,																	// const void*					pNext;
985 				0u,																			// VkImageCreateFlags			flags;
986 				VK_IMAGE_TYPE_2D,															// VkImageType					imageType;
987 				colorFormat,																// VkFormat						format;
988 				{ renderSize.x(), renderSize.y(), 1u },										// VkExtent3D					extent;
989 				1u,																			// deUint32						mipLevels;
990 				1u,																			// deUint32						arraySize;
991 				VK_SAMPLE_COUNT_1_BIT,														// VkSampleCountFlagBits		samples;
992 				VK_IMAGE_TILING_OPTIMAL,													// VkImageTiling				tiling;
993 				VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT,		// VkImageUsageFlags			usage;
994 				VK_SHARING_MODE_EXCLUSIVE,													// VkSharingMode				sharingMode;
995 				1u,																			// deUint32						queueFamilyCount;
996 				&queueFamilyIndex,															// const deUint32*				pQueueFamilyIndices;
997 				VK_IMAGE_LAYOUT_UNDEFINED,													// VkImageLayout				initialLayout;
998 			};
999 
1000 			const VkAttachmentDescription colorAttachmentDescription =
1001 			{
1002 				0u,																			// VkAttachmentDescriptorFlags	flags;
1003 				colorFormat,																// VkFormat						format;
1004 				VK_SAMPLE_COUNT_1_BIT,														// VkSampleCountFlagBits		samples;
1005 				VK_ATTACHMENT_LOAD_OP_CLEAR,												// VkAttachmentLoadOp			loadOp;
1006 				VK_ATTACHMENT_STORE_OP_STORE,												// VkAttachmentStoreOp			storeOp;
1007 				VK_ATTACHMENT_LOAD_OP_DONT_CARE,											// VkAttachmentLoadOp			stencilLoadOp;
1008 				VK_ATTACHMENT_STORE_OP_DONT_CARE,											// VkAttachmentStoreOp			stencilStoreOp;
1009 				VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,									// VkImageLayout				initialLayout;
1010 				VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,									// VkImageLayout				finalLayout;
1011 			};
1012 
1013 			Move<VkImage> colorImage = createImage(vk, vkDevice, &colorImageParams);
1014 			colorImages.push_back(de::SharedPtr<Unique<VkImage> >(new Unique<VkImage>(colorImage)));
1015 			attachmentClearValues.push_back(getDefaultClearColor());
1016 
1017 			// Allocate and bind color image memory
1018 			{
1019 				de::MovePtr<Allocation> colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *((const VkImage*) colorImages.back().get())), MemoryRequirement::Any);
1020 				VK_CHECK(vk.bindImageMemory(vkDevice, colorImages.back().get()->get(), colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
1021 				colorImageAllocs.push_back(de::SharedPtr<Allocation>(colorImageAlloc.release()));
1022 
1023 				attachments.push_back(colorAttachmentDescription);
1024 				colorBlendAttachmentStates.push_back(colorBlendAttachmentState);
1025 
1026 				const VkAttachmentReference colorAttachmentReference =
1027 				{
1028 					(deUint32) (colorImages.size() - 1),			//	deUint32		attachment;
1029 					VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL		//	VkImageLayout	layout;
1030 				};
1031 
1032 				colorAttachmentReferences.push_back(colorAttachmentReference);
1033 			}
1034 
1035 			// Create color attachment view
1036 			{
1037 				const VkImageViewCreateInfo colorImageViewParams =
1038 				{
1039 					VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,			// VkStructureType			sType;
1040 					DE_NULL,											// const void*				pNext;
1041 					0u,													// VkImageViewCreateFlags	flags;
1042 					colorImages.back().get()->get(),					// VkImage					image;
1043 					VK_IMAGE_VIEW_TYPE_2D,								// VkImageViewType			viewType;
1044 					colorFormat,										// VkFormat					format;
1045 					{
1046 						VK_COMPONENT_SWIZZLE_R,							// VkComponentSwizzle		r;
1047 						VK_COMPONENT_SWIZZLE_G,							// VkComponentSwizzle		g;
1048 						VK_COMPONENT_SWIZZLE_B,							// VkComponentSwizzle		b;
1049 						VK_COMPONENT_SWIZZLE_A							// VkComponentSwizzle		a;
1050 					},													// VkComponentMapping		components;
1051 					{
1052 						VK_IMAGE_ASPECT_COLOR_BIT,						// VkImageAspectFlags		aspectMask;
1053 						0u,												// deUint32					baseMipLevel;
1054 						1u,												// deUint32					mipLevels;
1055 						0u,												// deUint32					baseArraySlice;
1056 						1u												// deUint32					arraySize;
1057 					}													// VkImageSubresourceRange	subresourceRange;
1058 				};
1059 
1060 				Move<VkImageView> colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
1061 				colorImageViews.push_back(de::SharedPtr<Unique<VkImageView> >(new Unique<VkImageView>(colorImageView)));
1062 
1063 				const VkImageMemoryBarrier	colorImagePreRenderBarrier =
1064 				{
1065 					VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,					// sType
1066 					DE_NULL,												// pNext
1067 					0u,														// srcAccessMask
1068 					(VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1069 					VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT),					// dstAccessMask
1070 					VK_IMAGE_LAYOUT_UNDEFINED,								// oldLayout
1071 					VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,				// newLayout
1072 					VK_QUEUE_FAMILY_IGNORED,								// srcQueueFamilyIndex
1073 					VK_QUEUE_FAMILY_IGNORED,								// dstQueueFamilyIndex
1074 					colorImages.back().get()->get(),						// image
1075 					{
1076 						VK_IMAGE_ASPECT_COLOR_BIT,								// aspectMask
1077 						0u,														// baseMipLevel
1078 						1u,														// levelCount
1079 						0u,														// baseArrayLayer
1080 						1u,														// layerCount
1081 					}														// subresourceRange
1082 				};
1083 				colorImagePreRenderBarriers.push_back(colorImagePreRenderBarrier);
1084 
1085 				const VkImageMemoryBarrier	colorImagePostRenderBarrier =
1086 				{
1087 					VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,					// sType
1088 					DE_NULL,												// pNext
1089 					(VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1090 					VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT),					// srcAccessMask
1091 					VK_ACCESS_TRANSFER_READ_BIT,							// dstAccessMask
1092 					VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,				// oldLayout
1093 					VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,					// newLayout
1094 					VK_QUEUE_FAMILY_IGNORED,								// srcQueueFamilyIndex
1095 					VK_QUEUE_FAMILY_IGNORED,								// dstQueueFamilyIndex
1096 					colorImages.back().get()->get(),						// image
1097 					{
1098 						VK_IMAGE_ASPECT_COLOR_BIT,								// aspectMask
1099 						0u,														// baseMipLevel
1100 						1u,														// levelCount
1101 						0u,														// baseArrayLayer
1102 						1u,														// layerCount
1103 					}														// subresourceRange
1104 				};
1105 				colorImagePostRenderBarriers.push_back(colorImagePostRenderBarrier);
1106 			}
1107 		}
1108 	}
1109 
1110 	// Create render pass
1111 	{
1112 		const VkSubpassDescription subpassDescription =
1113 		{
1114 			0u,													// VkSubpassDescriptionFlags	flags;
1115 			VK_PIPELINE_BIND_POINT_GRAPHICS,					// VkPipelineBindPoint			pipelineBindPoint;
1116 			0u,													// deUint32						inputCount;
1117 			DE_NULL,											// const VkAttachmentReference*	pInputAttachments;
1118 			(deUint32)colorImages.size(),						// deUint32						colorCount;
1119 			&colorAttachmentReferences[0],						// const VkAttachmentReference*	colorAttachments;
1120 			DE_NULL,											// const VkAttachmentReference*	resolveAttachments;
1121 			DE_NULL,											// VkAttachmentReference		depthStencilAttachment;
1122 			0u,													// deUint32						preserveCount;
1123 			DE_NULL												// const VkAttachmentReference*	pPreserveAttachments;
1124 		};
1125 
1126 		const VkRenderPassCreateInfo renderPassParams =
1127 		{
1128 			VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,			// VkStructureType					sType;
1129 			DE_NULL,											// const void*						pNext;
1130 			(VkRenderPassCreateFlags)0,							// VkRenderPassCreateFlags			flags;
1131 			(deUint32)attachments.size(),						// deUint32							attachmentCount;
1132 			&attachments[0],									// const VkAttachmentDescription*	pAttachments;
1133 			1u,													// deUint32							subpassCount;
1134 			&subpassDescription,								// const VkSubpassDescription*		pSubpasses;
1135 			0u,													// deUint32							dependencyCount;
1136 			DE_NULL												// const VkSubpassDependency*		pDependencies;
1137 		};
1138 
1139 		renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
1140 	}
1141 
1142 	// Create framebuffer
1143 	{
1144 		std::vector<VkImageView> views(colorImageViews.size());
1145 		for (size_t i = 0; i < colorImageViews.size(); i++)
1146 		{
1147 			views[i] = colorImageViews[i].get()->get();
1148 		}
1149 
1150 		const VkFramebufferCreateInfo framebufferParams =
1151 		{
1152 			VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,			// VkStructureType				sType;
1153 			DE_NULL,											// const void*					pNext;
1154 			0u,													// VkFramebufferCreateFlags		flags;
1155 			*renderPass,										// VkRenderPass					renderPass;
1156 			(deUint32)views.size(),								// deUint32						attachmentCount;
1157 			&views[0],											// const VkImageView*			pAttachments;
1158 			(deUint32)renderSize.x(),							// deUint32						width;
1159 			(deUint32)renderSize.y(),							// deUint32						height;
1160 			1u													// deUint32						layers;
1161 		};
1162 
1163 		framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
1164 	}
1165 
1166 	// Create pipeline layout
1167 	{
1168 		const VkDescriptorSetLayout			setLayouts[]			=
1169 		{
1170 			*emptyDescriptorSetLayout,
1171 			m_extraResourcesLayout
1172 		};
1173 		const VkPipelineLayoutCreateInfo	pipelineLayoutParams	=
1174 		{
1175 			VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,		// VkStructureType				sType;
1176 			DE_NULL,											// const void*					pNext;
1177 			(VkPipelineLayoutCreateFlags)0,						// VkPipelineLayoutCreateFlags	flags;
1178 			(m_extraResourcesLayout != 0 ? 2u : 0u),			// deUint32						descriptorSetCount;
1179 			setLayouts,											// const VkDescriptorSetLayout*	pSetLayouts;
1180 			0u,													// deUint32						pushConstantRangeCount;
1181 			DE_NULL												// const VkPushConstantRange*	pPushConstantRanges;
1182 		};
1183 
1184 		pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
1185 	}
1186 
1187 	// Create shaders
1188 	{
1189 		vertexShaderModule		= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
1190 		fragmentShaderModule	= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
1191 
1192 		if (useGeometryShader)
1193 		{
1194 			if (m_context.getDeviceFeatures().shaderTessellationAndGeometryPointSize)
1195 				geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom_point_size"), 0);
1196 			else
1197 				geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom"), 0);
1198 		}
1199 	}
1200 
1201 	// Create pipeline
1202 	{
1203 		const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
1204 		{
1205 			VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType								sType;
1206 			DE_NULL,													// const void*									pNext;
1207 			(VkPipelineVertexInputStateCreateFlags)0,					// VkPipelineVertexInputStateCreateFlags		flags;
1208 			(deUint32)m_vertexBindingDescriptions.size(),				// deUint32										bindingCount;
1209 			&m_vertexBindingDescriptions[0],							// const VkVertexInputBindingDescription*		pVertexBindingDescriptions;
1210 			(deUint32)m_vertexAttributeDescriptions.size(),				// deUint32										attributeCount;
1211 			&m_vertexAttributeDescriptions[0],							// const VkVertexInputAttributeDescription*		pvertexAttributeDescriptions;
1212 		};
1213 
1214 		const std::vector<VkViewport>	viewports	(1, makeViewport(renderSize));
1215 		const std::vector<VkRect2D>		scissors	(1, makeRect2D(renderSize));
1216 
1217 		const VkPipelineColorBlendStateCreateInfo colorBlendStateParams =
1218 		{
1219 			VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,		// VkStructureType								sType;
1220 			DE_NULL,														// const void*									pNext;
1221 			(VkPipelineColorBlendStateCreateFlags)0,						// VkPipelineColorBlendStateCreateFlags			flags;
1222 			VK_FALSE,														// VkBool32										logicOpEnable;
1223 			VK_LOGIC_OP_COPY,												// VkLogicOp									logicOp;
1224 			(deUint32)colorBlendAttachmentStates.size(),					// deUint32										attachmentCount;
1225 			&colorBlendAttachmentStates[0],									// const VkPipelineColorBlendAttachmentState*	pAttachments;
1226 			{ 0.0f, 0.0f, 0.0f, 0.0f }										// float										blendConst[4];
1227 		};
1228 
1229 		graphicsPipeline = makeGraphicsPipeline(vk,														// const DeviceInterface&                        vk
1230 												vkDevice,												// const VkDevice                                device
1231 												*pipelineLayout,										// const VkPipelineLayout                        pipelineLayout
1232 												*vertexShaderModule,									// const VkShaderModule                          vertexShaderModule
1233 												DE_NULL,												// const VkShaderModule                          tessellationControlShaderModule
1234 												DE_NULL,												// const VkShaderModule                          tessellationEvalShaderModule
1235 												useGeometryShader ? *geometryShaderModule : DE_NULL,	// const VkShaderModule                          geometryShaderModule
1236 												*fragmentShaderModule,									// const VkShaderModule                          fragmentShaderModule
1237 												*renderPass,											// const VkRenderPass                            renderPass
1238 												viewports,												// const std::vector<VkViewport>&                viewports
1239 												scissors,												// const std::vector<VkRect2D>&                  scissors
1240 												VK_PRIMITIVE_TOPOLOGY_POINT_LIST,						// const VkPrimitiveTopology                     topology
1241 												0u,														// const deUint32                                subpass
1242 												0u,														// const deUint32                                patchControlPoints
1243 												&vertexInputStateParams,								// const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
1244 												DE_NULL,												// const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
1245 												DE_NULL,												// const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
1246 												DE_NULL,												// const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
1247 												&colorBlendStateParams);								// const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
1248 	}
1249 
1250 	// Create command pool
1251 	cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1252 
1253 	// Create command buffer
1254 	{
1255 		cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1256 
1257 		beginCommandBuffer(vk, *cmdBuffer);
1258 
1259 		vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0,
1260 							  0, (const VkMemoryBarrier*)DE_NULL,
1261 							  0, (const VkBufferMemoryBarrier*)DE_NULL,
1262 							  (deUint32)colorImagePreRenderBarriers.size(), colorImagePreRenderBarriers.empty() ? DE_NULL : &colorImagePreRenderBarriers[0]);
1263 		beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), (deUint32)attachmentClearValues.size(), &attachmentClearValues[0]);
1264 
1265 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
1266 
1267 		if (m_extraResourcesLayout != 0)
1268 		{
1269 			DE_ASSERT(extraResources != 0);
1270 			const VkDescriptorSet	descriptorSets[]	= { *emptyDescriptorSet, extraResources };
1271 			vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, DE_LENGTH_OF_ARRAY(descriptorSets), descriptorSets, 0u, DE_NULL);
1272 		}
1273 		else
1274 			DE_ASSERT(extraResources == 0);
1275 
1276 		const deUint32 numberOfVertexAttributes = (deUint32)m_vertexBuffers.size();
1277 
1278 		std::vector<VkDeviceSize> offsets(numberOfVertexAttributes, 0);
1279 
1280 		std::vector<VkBuffer> buffers(numberOfVertexAttributes);
1281 		for (size_t i = 0; i < numberOfVertexAttributes; i++)
1282 		{
1283 			buffers[i] = m_vertexBuffers[i].get()->get();
1284 		}
1285 
1286 		vk.cmdBindVertexBuffers(*cmdBuffer, 0, numberOfVertexAttributes, &buffers[0], &offsets[0]);
1287 		vk.cmdDraw(*cmdBuffer, (deUint32)positions.size(), 1u, 0u, 0u);
1288 
1289 		endRenderPass(vk, *cmdBuffer);
1290 		vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0,
1291 							  0, (const VkMemoryBarrier*)DE_NULL,
1292 							  0, (const VkBufferMemoryBarrier*)DE_NULL,
1293 							  (deUint32)colorImagePostRenderBarriers.size(), colorImagePostRenderBarriers.empty() ? DE_NULL : &colorImagePostRenderBarriers[0]);
1294 
1295 		endCommandBuffer(vk, *cmdBuffer);
1296 	}
1297 
1298 	// Execute Draw
1299 	submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
1300 
1301 	// Read back result and output
1302 	{
1303 		const VkDeviceSize imageSizeBytes = (VkDeviceSize)(4 * sizeof(deUint32) * renderSize.x() * renderSize.y());
1304 		const VkBufferCreateInfo readImageBufferParams =
1305 		{
1306 			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		// VkStructureType		sType;
1307 			DE_NULL,									// const void*			pNext;
1308 			0u,											// VkBufferCreateFlags	flags;
1309 			imageSizeBytes,								// VkDeviceSize			size;
1310 			VK_BUFFER_USAGE_TRANSFER_DST_BIT,			// VkBufferUsageFlags	usage;
1311 			VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode		sharingMode;
1312 			1u,											// deUint32				queueFamilyCount;
1313 			&queueFamilyIndex,							// const deUint32*		pQueueFamilyIndices;
1314 		};
1315 
1316 		// constants for image copy
1317 		Move<VkCommandPool>	copyCmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1318 
1319 		const VkBufferImageCopy copyParams =
1320 		{
1321 			0u,											// VkDeviceSize			bufferOffset;
1322 			(deUint32)renderSize.x(),					// deUint32				bufferRowLength;
1323 			(deUint32)renderSize.y(),					// deUint32				bufferImageHeight;
1324 			{
1325 				VK_IMAGE_ASPECT_COLOR_BIT,				// VkImageAspect		aspect;
1326 				0u,										// deUint32				mipLevel;
1327 				0u,										// deUint32				arraySlice;
1328 				1u,										// deUint32				arraySize;
1329 			},											// VkImageSubresource	imageSubresource;
1330 			{ 0u, 0u, 0u },								// VkOffset3D			imageOffset;
1331 			{ renderSize.x(), renderSize.y(), 1u }		// VkExtent3D			imageExtent;
1332 		};
1333 
1334 		// Read back pixels.
1335 		for (int outNdx = 0; outNdx < (int)m_shaderSpec.outputs.size(); ++outNdx)
1336 		{
1337 			const Symbol&				output			= m_shaderSpec.outputs[outNdx];
1338 			const int					outSize			= output.varType.getScalarSize();
1339 			const int					outVecSize		= glu::getDataTypeNumComponents(output.varType.getBasicType());
1340 			const int					outNumLocs		= glu::getDataTypeNumLocations(output.varType.getBasicType());
1341 			const int					outLocation		= de::lookup(m_outputLayout.locationMap, output.name);
1342 
1343 			for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
1344 			{
1345 				tcu::TextureLevel			tmpBuf;
1346 				const tcu::TextureFormat	format = getRenderbufferFormatForOutput(output.varType, false);
1347 				const tcu::TextureFormat	readFormat (tcu::TextureFormat::RGBA, format.type);
1348 				const Unique<VkBuffer>		readImageBuffer(createBuffer(vk, vkDevice, &readImageBufferParams));
1349 				const de::UniquePtr<Allocation> readImageBufferMemory(memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *readImageBuffer), MemoryRequirement::HostVisible));
1350 
1351 				VK_CHECK(vk.bindBufferMemory(vkDevice, *readImageBuffer, readImageBufferMemory->getMemory(), readImageBufferMemory->getOffset()));
1352 
1353 				// Copy image to buffer
1354 				{
1355 
1356 					Move<VkCommandBuffer> copyCmdBuffer = allocateCommandBuffer(vk, vkDevice, *copyCmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1357 
1358 					beginCommandBuffer(vk, *copyCmdBuffer);
1359 					vk.cmdCopyImageToBuffer(*copyCmdBuffer, colorImages[outLocation + locNdx].get()->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *readImageBuffer, 1u, &copyParams);
1360 
1361 					// Insert a barrier so data written by the transfer is available to the host
1362 					{
1363 						const VkBufferMemoryBarrier barrier =
1364 						{
1365 							VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,	// VkStructureType    sType;
1366 							DE_NULL,									// const void*        pNext;
1367 							VK_ACCESS_TRANSFER_WRITE_BIT,				// VkAccessFlags      srcAccessMask;
1368 							VK_ACCESS_HOST_READ_BIT,					// VkAccessFlags      dstAccessMask;
1369 							VK_QUEUE_FAMILY_IGNORED,					// uint32_t           srcQueueFamilyIndex;
1370 							VK_QUEUE_FAMILY_IGNORED,					// uint32_t           dstQueueFamilyIndex;
1371 							*readImageBuffer,							// VkBuffer           buffer;
1372 							0,											// VkDeviceSize       offset;
1373 							VK_WHOLE_SIZE,								// VkDeviceSize       size;
1374 						};
1375 
1376 						vk.cmdPipelineBarrier(*copyCmdBuffer, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
1377 											0, (const VkMemoryBarrier*)DE_NULL,
1378 											1, &barrier,
1379 											0, (const VkImageMemoryBarrier*)DE_NULL);
1380 					}
1381 
1382 					endCommandBuffer(vk, *copyCmdBuffer);
1383 
1384 					submitCommandsAndWait(vk, vkDevice, queue, copyCmdBuffer.get());
1385 				}
1386 
1387 				invalidateAlloc(vk, vkDevice, *readImageBufferMemory);
1388 
1389 				tmpBuf.setStorage(readFormat, renderSize.x(), renderSize.y());
1390 
1391 				const tcu::TextureFormat resultFormat(tcu::TextureFormat::RGBA, format.type);
1392 				const tcu::ConstPixelBufferAccess resultAccess(resultFormat, renderSize.x(), renderSize.y(), 1, readImageBufferMemory->getHostPtr());
1393 
1394 				tcu::copy(tmpBuf.getAccess(), resultAccess);
1395 
1396 				if (isOutput16Bit(static_cast<size_t>(outNdx)))
1397 				{
1398 					deUint16*	dstPtrBase = static_cast<deUint16*>(outputs[outNdx]);
1399 					if (outSize == 4 && outNumLocs == 1)
1400 						deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint16));
1401 					else
1402 					{
1403 						for (int valNdx = 0; valNdx < numValues; valNdx++)
1404 						{
1405 							const deUint16* srcPtr = (const deUint16*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1406 							deUint16*		dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1407 							deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint16));
1408 						}
1409 					}
1410 				}
1411 				else
1412 				{
1413 					deUint32*	dstPtrBase = static_cast<deUint32*>(outputs[outNdx]);
1414 					if (outSize == 4 && outNumLocs == 1)
1415 						deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint32));
1416 					else
1417 					{
1418 						for (int valNdx = 0; valNdx < numValues; valNdx++)
1419 						{
1420 							const deUint32* srcPtr = (const deUint32*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1421 							deUint32*		dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1422 							deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint32));
1423 						}
1424 					}
1425 				}
1426 			}
1427 		}
1428 	}
1429 }
1430 
1431 // VertexShaderExecutor
1432 
1433 class VertexShaderExecutor : public FragmentOutExecutor
1434 {
1435 public:
1436 								VertexShaderExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1437 	virtual						~VertexShaderExecutor	(void);
1438 
1439 	static void					generateSources			(const ShaderSpec& shaderSpec, SourceCollections& dst);
1440 };
1441 
VertexShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1442 VertexShaderExecutor::VertexShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1443 	: FragmentOutExecutor(context, glu::SHADERTYPE_VERTEX, shaderSpec, extraResourcesLayout)
1444 {
1445 }
1446 
~VertexShaderExecutor(void)1447 VertexShaderExecutor::~VertexShaderExecutor (void)
1448 {
1449 }
1450 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1451 void VertexShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1452 {
1453 	const FragmentOutputLayout	outputLayout	(computeFragmentOutputLayout(shaderSpec.outputs));
1454 
1455 	programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1456 	/* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1457 	programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1458 }
1459 
1460 // GeometryShaderExecutor
1461 
1462 class GeometryShaderExecutor : public FragmentOutExecutor
1463 {
1464 public:
1465 								GeometryShaderExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1466 	virtual						~GeometryShaderExecutor	(void);
1467 
1468 	static void					generateSources			(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1469 
1470 };
1471 
GeometryShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1472 GeometryShaderExecutor::GeometryShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1473 	: FragmentOutExecutor(context, glu::SHADERTYPE_GEOMETRY, shaderSpec, extraResourcesLayout)
1474 {
1475 	const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
1476 
1477 	if (!features.geometryShader)
1478 		TCU_THROW(NotSupportedError, "Geometry shader type not supported by device");
1479 }
1480 
~GeometryShaderExecutor(void)1481 GeometryShaderExecutor::~GeometryShaderExecutor (void)
1482 {
1483 }
1484 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1485 void GeometryShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1486 {
1487 	const FragmentOutputLayout	outputLayout	(computeFragmentOutputLayout(shaderSpec.outputs));
1488 
1489 	programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1490 
1491 	programCollection.glslSources.add("geom") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", false)) << shaderSpec.buildOptions;
1492 	programCollection.glslSources.add("geom_point_size") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", true)) << shaderSpec.buildOptions;
1493 
1494 	/* \todo [2015-09-18 rsipka] set useIntOutputs parameter if needed. */
1495 	programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "geom_out_", "o_")) << shaderSpec.buildOptions;
1496 
1497 }
1498 
1499 // FragmentShaderExecutor
1500 
1501 class FragmentShaderExecutor : public FragmentOutExecutor
1502 {
1503 public:
1504 								FragmentShaderExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1505 	virtual						~FragmentShaderExecutor (void);
1506 
1507 	static void					generateSources			(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1508 
1509 };
1510 
FragmentShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1511 FragmentShaderExecutor::FragmentShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1512 	: FragmentOutExecutor(context, glu::SHADERTYPE_FRAGMENT, shaderSpec, extraResourcesLayout)
1513 {
1514 }
1515 
~FragmentShaderExecutor(void)1516 FragmentShaderExecutor::~FragmentShaderExecutor (void)
1517 {
1518 }
1519 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1520 void FragmentShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1521 {
1522 	const FragmentOutputLayout	outputLayout	(computeFragmentOutputLayout(shaderSpec.outputs));
1523 
1524 	programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1525 	/* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1526 	programCollection.glslSources.add("frag") << glu::FragmentSource(generateFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1527 }
1528 
1529 // Shared utilities for compute and tess executors
1530 
getVecStd430ByteAlignment(glu::DataType type)1531 static deUint32 getVecStd430ByteAlignment (glu::DataType type)
1532 {
1533 	deUint32 baseSize;
1534 
1535 	switch (glu::getDataTypeScalarType(type))
1536 	{
1537 		case glu::TYPE_FLOAT16:	baseSize = 2u; break;
1538 		case glu::TYPE_DOUBLE:	baseSize = 8u; break;
1539 		default:				baseSize = 4u; break;
1540 	}
1541 
1542 	switch (glu::getDataTypeScalarSize(type))
1543 	{
1544 		case 1:		return baseSize;
1545 		case 2:		return baseSize * 2u;
1546 		case 3:		// fallthrough.
1547 		case 4:		return baseSize * 4u;
1548 		default:
1549 			DE_ASSERT(false);
1550 			return 0u;
1551 	}
1552 }
1553 
1554 class BufferIoExecutor : public ShaderExecutor
1555 {
1556 public:
1557 							BufferIoExecutor	(Context& context, const ShaderSpec& shaderSpec);
1558 	virtual					~BufferIoExecutor	(void);
1559 
1560 protected:
1561 	enum
1562 	{
1563 		INPUT_BUFFER_BINDING	= 0,
1564 		OUTPUT_BUFFER_BINDING	= 1,
1565 	};
1566 
1567 	void					initBuffers			(int numValues);
getInputBuffer(void) const1568 	VkBuffer				getInputBuffer		(void) const		{ return *m_inputBuffer;					}
getOutputBuffer(void) const1569 	VkBuffer				getOutputBuffer		(void) const		{ return *m_outputBuffer;					}
getInputStride(void) const1570 	deUint32				getInputStride		(void) const		{ return getLayoutStride(m_inputLayout);	}
getOutputStride(void) const1571 	deUint32				getOutputStride		(void) const		{ return getLayoutStride(m_outputLayout);	}
1572 
1573 	void					uploadInputBuffer	(const void* const* inputPtrs, int numValues, bool packFloat16Bit);
1574 	void					readOutputBuffer	(void* const* outputPtrs, int numValues);
1575 
1576 	static void				declareBufferBlocks	(std::ostream& src, const ShaderSpec& spec);
1577 	static void				generateExecBufferIo(std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName);
1578 
1579 protected:
1580 	Move<VkBuffer>			m_inputBuffer;
1581 	Move<VkBuffer>			m_outputBuffer;
1582 
1583 private:
1584 	struct VarLayout
1585 	{
1586 		deUint32		offset;
1587 		deUint32		stride;
1588 		deUint32		matrixStride;
1589 
VarLayoutvkt::shaderexecutor::__anon4f1a0b6f0111::BufferIoExecutor::VarLayout1590 		VarLayout (void) : offset(0), stride(0), matrixStride(0) {}
1591 	};
1592 
1593 	static void				computeVarLayout	(const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout);
1594 	static deUint32			getLayoutStride		(const vector<VarLayout>& layout);
1595 
1596 	static void				copyToBuffer		(const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr, bool packFloat16Bit);
1597 	static void				copyFromBuffer		(const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
1598 
1599 	de::MovePtr<Allocation>	m_inputAlloc;
1600 	de::MovePtr<Allocation>	m_outputAlloc;
1601 
1602 	vector<VarLayout>		m_inputLayout;
1603 	vector<VarLayout>		m_outputLayout;
1604 };
1605 
BufferIoExecutor(Context & context,const ShaderSpec & shaderSpec)1606 BufferIoExecutor::BufferIoExecutor (Context& context, const ShaderSpec& shaderSpec)
1607 	: ShaderExecutor(context, shaderSpec)
1608 {
1609 	computeVarLayout(m_shaderSpec.inputs, &m_inputLayout);
1610 	computeVarLayout(m_shaderSpec.outputs, &m_outputLayout);
1611 }
1612 
~BufferIoExecutor(void)1613 BufferIoExecutor::~BufferIoExecutor (void)
1614 {
1615 }
1616 
getLayoutStride(const vector<VarLayout> & layout)1617 inline deUint32 BufferIoExecutor::getLayoutStride (const vector<VarLayout>& layout)
1618 {
1619 	return layout.empty() ? 0 : layout[0].stride;
1620 }
1621 
computeVarLayout(const std::vector<Symbol> & symbols,std::vector<VarLayout> * layout)1622 void BufferIoExecutor::computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout)
1623 {
1624 	deUint32	maxAlignment	= 0;
1625 	deUint32	curOffset		= 0;
1626 
1627 	DE_ASSERT(layout != DE_NULL);
1628 	DE_ASSERT(layout->empty());
1629 	layout->resize(symbols.size());
1630 
1631 	for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
1632 	{
1633 		const Symbol&		symbol		= symbols[varNdx];
1634 		const glu::DataType	basicType	= symbol.varType.getBasicType();
1635 		VarLayout&			layoutEntry	= (*layout)[varNdx];
1636 
1637 		if (glu::isDataTypeScalarOrVector(basicType))
1638 		{
1639 			const deUint32	alignment	= getVecStd430ByteAlignment(basicType);
1640 			const deUint32	size		= (deUint32)glu::getDataTypeScalarSize(basicType) * (isDataTypeDoubleType(basicType) ? (int)(sizeof(deUint64)) : (isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1641 
1642 			curOffset		= (deUint32)deAlign32((int)curOffset, (int)alignment);
1643 			maxAlignment	= de::max(maxAlignment, alignment);
1644 
1645 			layoutEntry.offset			= curOffset;
1646 			layoutEntry.matrixStride	= 0;
1647 
1648 			curOffset += size;
1649 		}
1650 		else if (glu::isDataTypeMatrix(basicType))
1651 		{
1652 			const int				numVecs			= glu::getDataTypeMatrixNumColumns(basicType);
1653 			const glu::DataType		vecType			= glu::getDataTypeVector(glu::getDataTypeScalarType(basicType), glu::getDataTypeMatrixNumRows(basicType));
1654 			const deUint32			vecAlignment	= getVecStd430ByteAlignment(vecType);
1655 
1656 			curOffset		= (deUint32)deAlign32((int)curOffset, (int)vecAlignment);
1657 			maxAlignment	= de::max(maxAlignment, vecAlignment);
1658 
1659 			layoutEntry.offset			= curOffset;
1660 			layoutEntry.matrixStride	= vecAlignment;
1661 
1662 			curOffset += vecAlignment*numVecs;
1663 		}
1664 		else
1665 			DE_ASSERT(false);
1666 	}
1667 
1668 	{
1669 		const deUint32	totalSize	= (deUint32)deAlign32(curOffset, maxAlignment);
1670 
1671 		for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
1672 			varIter->stride = totalSize;
1673 	}
1674 }
1675 
declareBufferBlocks(std::ostream & src,const ShaderSpec & spec)1676 void BufferIoExecutor::declareBufferBlocks (std::ostream& src, const ShaderSpec& spec)
1677 {
1678 	// Input struct
1679 	if (!spec.inputs.empty())
1680 	{
1681 		glu::StructType inputStruct("Inputs");
1682 		for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1683 			inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1684 		src << glu::declare(&inputStruct) << ";\n";
1685 	}
1686 
1687 	// Output struct
1688 	{
1689 		glu::StructType outputStruct("Outputs");
1690 		for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1691 			outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1692 		src << glu::declare(&outputStruct) << ";\n";
1693 	}
1694 
1695 	src << "\n";
1696 
1697 	if (!spec.inputs.empty())
1698 	{
1699 		src	<< "layout(set = 0, binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1700 			<< "{\n"
1701 			<< "	Inputs inputs[];\n"
1702 			<< "};\n";
1703 	}
1704 
1705 	src	<< "layout(set = 0, binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1706 		<< "{\n"
1707 		<< "	Outputs outputs[];\n"
1708 		<< "};\n"
1709 		<< "\n";
1710 }
1711 
generateExecBufferIo(std::ostream & src,const ShaderSpec & spec,const char * invocationNdxName)1712 void BufferIoExecutor::generateExecBufferIo (std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName)
1713 {
1714 	std::string	tname;
1715 	for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1716 	{
1717 		const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1718 		if (f16BitTest)
1719 		{
1720 			tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1721 		}
1722 		else
1723 		{
1724 			tname = glu::getDataTypeName(symIter->varType.getBasicType());
1725 		}
1726 		src << "\t" << tname << " "<< symIter->name << " = " << tname << "(inputs[" << invocationNdxName << "]." << symIter->name << ");\n";
1727 	}
1728 
1729 	for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1730 	{
1731 		const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1732 		if (f16BitTest)
1733 		{
1734 			tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1735 		}
1736 		else
1737 		{
1738 			tname = glu::getDataTypeName(symIter->varType.getBasicType());
1739 		}
1740 		src << "\t" << tname << " " << symIter->name << ";\n";
1741 		if (f16BitTest)
1742 		{
1743 			const char* ttname = glu::getDataTypeName(symIter->varType.getBasicType());
1744 			src << "\t" << ttname << " " << "packed_" << symIter->name << ";\n";
1745 		}
1746 	}
1747 
1748 	src << "\n";
1749 
1750 	{
1751 		std::istringstream	opSrc	(spec.source);
1752 		std::string			line;
1753 
1754 		while (std::getline(opSrc, line))
1755 			src << "\t" << line << "\n";
1756 	}
1757 
1758 	if (spec.packFloat16Bit)
1759 		packFloat16Bit (src, spec.outputs);
1760 
1761 	src << "\n";
1762 	for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1763 	{
1764 		const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1765 		if(f16BitTest)
1766 			src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = packed_" << symIter->name << ";\n";
1767 		else
1768 			src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1769 	}
1770 }
1771 
copyToBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr,bool packFloat16Bit)1772 void BufferIoExecutor::copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr, bool packFloat16Bit)
1773 {
1774 	if (varType.isBasicType())
1775 	{
1776 		const glu::DataType		basicType		= varType.getBasicType();
1777 		const bool				isMatrix		= glu::isDataTypeMatrix(basicType);
1778 		const int				scalarSize		= glu::getDataTypeScalarSize(basicType);
1779 		const int				numVecs			= isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1780 		const int				numComps		= scalarSize / numVecs;
1781 		const int				size			= (glu::isDataTypeDoubleType(basicType) ? (int)sizeof(deUint64) : (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1782 
1783 		for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1784 		{
1785 			for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1786 			{
1787 				const int		srcOffset		= size * (elemNdx * scalarSize + vecNdx * numComps);
1788 				const int		dstOffset		= layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1789 				const deUint8*	srcPtr			= (const deUint8*)srcBasePtr + srcOffset;
1790 				deUint8*		dstPtr			= (deUint8*)dstBasePtr + dstOffset;
1791 
1792 				if (packFloat16Bit)
1793 				{
1794 					// Convert the float values to 16 bit and store in the lower 16 bits of 32 bit ints.
1795 					for (int cmpNdx=0; cmpNdx < numComps; ++cmpNdx)
1796 					{
1797 						deFloat16 f16vals[2] = {};
1798 						f16vals[0] = deFloat32To16Round(((float*)srcPtr)[cmpNdx], DE_ROUNDINGMODE_TO_ZERO);
1799 						deMemcpy(dstPtr + cmpNdx * size, &f16vals[0], size);
1800 					}
1801 				}
1802 				else
1803 				{
1804 					deMemcpy(dstPtr, srcPtr, size * numComps);
1805 				}
1806 			}
1807 		}
1808 	}
1809 	else
1810 		throw tcu::InternalError("Unsupported type");
1811 }
1812 
copyFromBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr)1813 void BufferIoExecutor::copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1814 {
1815 	if (varType.isBasicType())
1816 	{
1817 		const glu::DataType		basicType		= varType.getBasicType();
1818 		const bool				isMatrix		= glu::isDataTypeMatrix(basicType);
1819 		const int				scalarSize		= glu::getDataTypeScalarSize(basicType);
1820 		const int				numVecs			= isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1821 		const int				numComps		= scalarSize / numVecs;
1822 
1823 		for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1824 		{
1825 			for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1826 			{
1827 				const int		size			= (glu::isDataTypeDoubleType(basicType) ? (int)sizeof(deUint64) : (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1828 				const int		srcOffset		= layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1829 				const int		dstOffset		= size * (elemNdx * scalarSize + vecNdx * numComps);
1830 				const deUint8*	srcPtr			= (const deUint8*)srcBasePtr + srcOffset;
1831 				deUint8*		dstPtr			= (deUint8*)dstBasePtr + dstOffset;
1832 
1833 				deMemcpy(dstPtr, srcPtr, size * numComps);
1834 			}
1835 		}
1836 	}
1837 	else
1838 		throw tcu::InternalError("Unsupported type");
1839 }
1840 
uploadInputBuffer(const void * const * inputPtrs,int numValues,bool packFloat16Bit)1841 void BufferIoExecutor::uploadInputBuffer (const void* const* inputPtrs, int numValues, bool packFloat16Bit)
1842 {
1843 	const VkDevice			vkDevice			= m_context.getDevice();
1844 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
1845 
1846 	const deUint32			inputStride			= getLayoutStride(m_inputLayout);
1847 	const int				inputBufferSize		= inputStride * numValues;
1848 
1849 	if (inputBufferSize == 0)
1850 		return; // No inputs
1851 
1852 	DE_ASSERT(m_shaderSpec.inputs.size() == m_inputLayout.size());
1853 	for (size_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
1854 	{
1855 		const glu::VarType&		varType		= m_shaderSpec.inputs[inputNdx].varType;
1856 		const VarLayout&		layout		= m_inputLayout[inputNdx];
1857 
1858 		copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], m_inputAlloc->getHostPtr(), packFloat16Bit);
1859 	}
1860 
1861 	flushAlloc(vk, vkDevice, *m_inputAlloc);
1862 }
1863 
readOutputBuffer(void * const * outputPtrs,int numValues)1864 void BufferIoExecutor::readOutputBuffer (void* const* outputPtrs, int numValues)
1865 {
1866 	const VkDevice			vkDevice			= m_context.getDevice();
1867 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
1868 
1869 	DE_ASSERT(numValues > 0); // At least some outputs are required.
1870 
1871 	invalidateAlloc(vk, vkDevice, *m_outputAlloc);
1872 
1873 	DE_ASSERT(m_shaderSpec.outputs.size() == m_outputLayout.size());
1874 	for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
1875 	{
1876 		const glu::VarType&		varType		= m_shaderSpec.outputs[outputNdx].varType;
1877 		const VarLayout&		layout		= m_outputLayout[outputNdx];
1878 
1879 		copyFromBuffer(varType, layout, numValues, m_outputAlloc->getHostPtr(), outputPtrs[outputNdx]);
1880 	}
1881 }
1882 
initBuffers(int numValues)1883 void BufferIoExecutor::initBuffers (int numValues)
1884 {
1885 	const deUint32				inputStride			= getLayoutStride(m_inputLayout);
1886 	const deUint32				outputStride		= getLayoutStride(m_outputLayout);
1887 	// Avoid creating zero-sized buffer/memory
1888 	const size_t				inputBufferSize		= de::max(numValues * inputStride, 1u);
1889 	const size_t				outputBufferSize	= numValues * outputStride;
1890 
1891 	// Upload data to buffer
1892 	const VkDevice				vkDevice			= m_context.getDevice();
1893 	const DeviceInterface&		vk					= m_context.getDeviceInterface();
1894 	const deUint32				queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
1895 	Allocator&					memAlloc			= m_context.getDefaultAllocator();
1896 
1897 	const VkBufferCreateInfo inputBufferParams =
1898 	{
1899 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		// VkStructureType		sType;
1900 		DE_NULL,									// const void*			pNext;
1901 		0u,											// VkBufferCreateFlags	flags;
1902 		inputBufferSize,							// VkDeviceSize			size;
1903 		VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,			// VkBufferUsageFlags	usage;
1904 		VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode		sharingMode;
1905 		1u,											// deUint32				queueFamilyCount;
1906 		&queueFamilyIndex							// const deUint32*		pQueueFamilyIndices;
1907 	};
1908 
1909 	m_inputBuffer = createBuffer(vk, vkDevice, &inputBufferParams);
1910 	m_inputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_inputBuffer), MemoryRequirement::HostVisible);
1911 
1912 	VK_CHECK(vk.bindBufferMemory(vkDevice, *m_inputBuffer, m_inputAlloc->getMemory(), m_inputAlloc->getOffset()));
1913 
1914 	const VkBufferCreateInfo outputBufferParams =
1915 	{
1916 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		// VkStructureType		sType;
1917 		DE_NULL,									// const void*			pNext;
1918 		0u,											// VkBufferCreateFlags	flags;
1919 		outputBufferSize,							// VkDeviceSize			size;
1920 		VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,			// VkBufferUsageFlags	usage;
1921 		VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode		sharingMode;
1922 		1u,											// deUint32				queueFamilyCount;
1923 		&queueFamilyIndex							// const deUint32*		pQueueFamilyIndices;
1924 	};
1925 
1926 	m_outputBuffer = createBuffer(vk, vkDevice, &outputBufferParams);
1927 	m_outputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_outputBuffer), MemoryRequirement::HostVisible);
1928 
1929 	VK_CHECK(vk.bindBufferMemory(vkDevice, *m_outputBuffer, m_outputAlloc->getMemory(), m_outputAlloc->getOffset()));
1930 }
1931 
1932 // ComputeShaderExecutor
1933 
1934 class ComputeShaderExecutor : public BufferIoExecutor
1935 {
1936 public:
1937 						ComputeShaderExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1938 	virtual				~ComputeShaderExecutor	(void);
1939 
1940 	static void			generateSources			(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1941 
1942 	virtual void		execute					(int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
1943 
1944 protected:
1945 	static std::string	generateComputeShader	(const ShaderSpec& spec);
1946 
1947 private:
1948 	const VkDescriptorSetLayout					m_extraResourcesLayout;
1949 };
1950 
ComputeShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1951 ComputeShaderExecutor::ComputeShaderExecutor(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1952 	: BufferIoExecutor			(context, shaderSpec)
1953 	, m_extraResourcesLayout	(extraResourcesLayout)
1954 {
1955 }
1956 
~ComputeShaderExecutor(void)1957 ComputeShaderExecutor::~ComputeShaderExecutor	(void)
1958 {
1959 }
1960 
getTypeSpirv(const glu::DataType type,const bool packFloat16Bit=false)1961 std::string getTypeSpirv(const glu::DataType type, const bool packFloat16Bit = false)
1962 {
1963 	switch(type)
1964 	{
1965 	case glu::TYPE_FLOAT16:
1966 		return "%f16";
1967 	case glu::TYPE_FLOAT16_VEC2:
1968 		return "%v2f16";
1969 	case glu::TYPE_FLOAT16_VEC3:
1970 		return "%v3f16";
1971 	case glu::TYPE_FLOAT16_VEC4:
1972 		return "%v4f16";
1973 	case glu::TYPE_FLOAT:
1974 		return packFloat16Bit ? "%u32" : "%f32";		// f16 values will be bitcast from ui32.
1975 	case glu::TYPE_FLOAT_VEC2:
1976 		return packFloat16Bit ? "%v2u32" : "%v2f32";	// f16 values will be bitcast from ui32.
1977 	case glu::TYPE_FLOAT_VEC3:
1978 		return packFloat16Bit ? "%v3u32" : "%v3f32";	// f16 values will be bitcast from ui32.
1979 	case glu::TYPE_FLOAT_VEC4:
1980 		return packFloat16Bit ? "%v4u32" : "%v4f32";	// f16 values will be bitcast from ui32.
1981 	case glu::TYPE_INT:
1982 		return "%i32";
1983 	case glu::TYPE_INT_VEC2:
1984 		return "%v2i32";
1985 	case glu::TYPE_INT_VEC3:
1986 		return "%v3i32";
1987 	case glu::TYPE_INT_VEC4:
1988 		return "%v4i32";
1989 	case glu::TYPE_DOUBLE:
1990 		return "%f64";
1991 	case glu::TYPE_DOUBLE_VEC2:
1992 		return "%v2f64";
1993 	case glu::TYPE_DOUBLE_VEC3:
1994 		return "%v3f64";
1995 	case glu::TYPE_DOUBLE_VEC4:
1996 		return "%v4f64";
1997 	default:
1998 		DE_ASSERT(0);
1999 		return "";
2000 	}
2001 }
2002 
moveBitOperation(std::string variableName,const int operationNdx)2003 std::string moveBitOperation (std::string variableName, const int operationNdx)
2004 {
2005 	std::ostringstream	src;
2006 	src << "\n"
2007 	<< "%operation_move_" << operationNdx << " = OpLoad %i32 " << variableName << "\n"
2008 	<< "%move1_" << operationNdx << " = OpShiftLeftLogical %i32 %operation_move_"<< operationNdx <<" %c_i32_1\n"
2009 	<< "OpStore " << variableName << " %move1_" << operationNdx << "\n";
2010 	return src.str();
2011 }
2012 
scalarComparison(const std::string operation,const int operationNdx,const glu::DataType type,const std::string & outputType,const int scalarSize)2013 std::string scalarComparison(const std::string operation, const int operationNdx, const glu::DataType type, const std::string& outputType, const int scalarSize)
2014 {
2015 	std::ostringstream	src;
2016 	std::string			boolType;
2017 
2018 	switch (type)
2019 	{
2020 	case glu::TYPE_FLOAT16:
2021 	case glu::TYPE_FLOAT:
2022 	case glu::TYPE_DOUBLE:
2023 		src << "\n"
2024 			<< "%operation_result_" << operationNdx << " = " << operation << " %bool %in0_val %in1_val\n"
2025 			<< "OpSelectionMerge %IF_" << operationNdx << " None\n"
2026 			<< "OpBranchConditional %operation_result_" << operationNdx << " %label_IF_" << operationNdx << " %IF_" << operationNdx << "\n"
2027 			<< "%label_IF_" << operationNdx << " = OpLabel\n"
2028 			<< "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n"
2029 			<< "%out_val_" << operationNdx << " = OpLoad %i32 %out0\n"
2030 			<< "%add_if_" << operationNdx << " = OpIAdd %i32 %out_val_" << operationNdx << " %operation_val_" << operationNdx << "\n"
2031 			<< "OpStore %out0 %add_if_" << operationNdx << "\n"
2032 			<< "OpBranch %IF_" << operationNdx << "\n"
2033 			<< "%IF_" << operationNdx << " = OpLabel\n";
2034 		return src.str();
2035 	case glu::TYPE_FLOAT16_VEC2:
2036 	case glu::TYPE_FLOAT_VEC2:
2037 	case glu::TYPE_DOUBLE_VEC2:
2038 		boolType = "%v2bool";
2039 		break;
2040 	case glu::TYPE_FLOAT16_VEC3:
2041 	case glu::TYPE_FLOAT_VEC3:
2042 	case glu::TYPE_DOUBLE_VEC3:
2043 		boolType = "%v3bool";
2044 		break;
2045 	case glu::TYPE_FLOAT16_VEC4:
2046 	case glu::TYPE_FLOAT_VEC4:
2047 	case glu::TYPE_DOUBLE_VEC4:
2048 		boolType = "%v4bool";
2049 		break;
2050 	default:
2051 		DE_ASSERT(0);
2052 		return "";
2053 	}
2054 
2055 	src << "\n"
2056 		<< "%operation_result_" << operationNdx << " = " << operation << " " << boolType << " %in0_val %in1_val\n"
2057 		<< "%ivec_result_" << operationNdx << " = OpSelect " << outputType << " %operation_result_" << operationNdx << " %c_" << &outputType[1] << "_1 %c_" << &outputType[1] << "_0\n"
2058 		<< "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n";
2059 
2060 	src << "%operation_vec_" << operationNdx << " = OpCompositeConstruct " << outputType;
2061 	for(int ndx = 0; ndx < scalarSize; ++ndx)
2062 		src << " %operation_val_" << operationNdx;
2063 	src << "\n";
2064 
2065 	src << "%toAdd" << operationNdx << " = OpIMul "<< outputType << " %ivec_result_" << operationNdx << " %operation_vec_" << operationNdx <<"\n"
2066 		<< "%out_val_" << operationNdx << " = OpLoad "<< outputType << " %out0\n"
2067 
2068 		<< "%add_if_" << operationNdx << " = OpIAdd " << outputType << " %out_val_" << operationNdx << " %toAdd" << operationNdx << "\n"
2069 		<< "OpStore %out0 %add_if_" << operationNdx << "\n";
2070 
2071 	return src.str();
2072 }
2073 
generateSpirv(const ShaderSpec & spec,const bool are16Bit,const bool are64Bit,const bool isMediump)2074 std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const bool are64Bit, const bool isMediump)
2075 {
2076 	static const std::string COMPARE_OPERATIONS[] =
2077 	{
2078 		"OpFOrdEqual",
2079 		"OpFOrdGreaterThan",
2080 		"OpFOrdLessThan",
2081 		"OpFOrdGreaterThanEqual",
2082 		"OpFOrdLessThanEqual",
2083 		"OpFUnordEqual",
2084 		"OpFUnordGreaterThan",
2085 		"OpFUnordLessThan",
2086 		"OpFUnordGreaterThanEqual",
2087 		"OpFUnordLessThanEqual"
2088 	};
2089 
2090 	int					moveBitNdx		= 0;
2091 	vector<std::string>	inputTypes;
2092 	vector<std::string>	outputTypes;
2093 	const std::string	packType		= spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
2094 
2095 	vector<bool>		floatResult;
2096 	for (const auto& symbol : spec.outputs)
2097 		floatResult.push_back(glu::isDataTypeFloatType(symbol.varType.getBasicType()));
2098 
2099 	const bool			anyFloatResult	= std::any_of(begin(floatResult), end(floatResult), [](bool b) { return b; });
2100 
2101 	vector<bool>		packFloatRes;
2102 	for (const auto& floatRes : floatResult)
2103 		packFloatRes.push_back(floatRes && spec.packFloat16Bit);
2104 
2105 	const bool			useF32Types		= (!are16Bit && !are64Bit);
2106 	const bool			useF64Types		= are64Bit;
2107 	const bool			useF16Types		= (spec.packFloat16Bit || are16Bit);
2108 
2109 	for (const auto& symbol : spec.inputs)
2110 		inputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2111 
2112 	for (const auto& symbol : spec.outputs)
2113 		outputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2114 
2115 	DE_ASSERT(!inputTypes.empty());
2116 	DE_ASSERT(!outputTypes.empty());
2117 
2118 	// Assert input and output types match the expected operations.
2119 	switch (spec.spirvCase)
2120 	{
2121 	case SPIRV_CASETYPE_COMPARE:
2122 	case SPIRV_CASETYPE_FREM:
2123 		DE_ASSERT(inputTypes.size() == 2);
2124 		DE_ASSERT(outputTypes.size() == 1);
2125 		break;
2126 	case SPIRV_CASETYPE_MODFSTRUCT:
2127 	case SPIRV_CASETYPE_FREXPSTRUCT:
2128 		DE_ASSERT(inputTypes.size() == 1);
2129 		DE_ASSERT(outputTypes.size() == 2);
2130 		break;
2131 	default:
2132 		DE_ASSERT(false);
2133 		break;
2134 	}
2135 
2136 	std::ostringstream	src;
2137 	src << "; SPIR-V\n"
2138 		"; Version: 1.0\n"
2139 		"; Generator: Khronos Glslang Reference Front End; 4\n"
2140 		"; Bound: 114\n"
2141 		"; Schema: 0\n"
2142 		"OpCapability Shader\n";
2143 
2144 	if (useF16Types)
2145 		src << "OpCapability Float16\n";
2146 
2147 	if (are16Bit)
2148 		src << "OpCapability StorageBuffer16BitAccess\n"
2149 			"OpCapability UniformAndStorageBuffer16BitAccess\n";
2150 
2151 	if (useF64Types)
2152 		src << "OpCapability Float64\n";
2153 
2154 	if (are16Bit)
2155 		src << "OpExtension \"SPV_KHR_16bit_storage\"\n";
2156 
2157 	src << "%glslstd450 = OpExtInstImport \"GLSL.std.450\"\n"
2158 		"OpMemoryModel Logical GLSL450\n"
2159 		"OpEntryPoint GLCompute %BP_main \"main\" %BP_id3uNum %BP_id3uID\n"
2160 		"OpExecutionMode %BP_main LocalSize 1 1 1\n"
2161 		"OpDecorate %BP_id3uNum BuiltIn NumWorkgroups\n"
2162 		"OpDecorate %BP_id3uID BuiltIn WorkgroupId\n";
2163 
2164 	// Input offsets and stride.
2165 	{
2166 		int offset	= 0;
2167 		int ndx		= 0;
2168 		int largest	= 0;
2169 		for (const auto& symbol : spec.inputs)
2170 		{
2171 			const int scalarSize		= symbol.varType.getScalarSize();
2172 			const int memberSize		= (scalarSize + ((scalarSize == 3) ? 1 : 0)) * (isDataTypeDoubleType(symbol.varType.getBasicType()) ? (int)sizeof(deUint64) : (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
2173 			const int extraMemberBytes	= (offset % memberSize);
2174 
2175 			offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2176 			src << "OpMemberDecorate %SSB0_IN "<< ndx <<" Offset " << offset << "\n";
2177 			++ndx;
2178 
2179 			if (memberSize > largest)
2180 				largest = memberSize;
2181 
2182 			offset += memberSize;
2183 		}
2184 		DE_ASSERT(largest > 0);
2185 		const int extraBytes	= (offset % largest);
2186 		const int stride		= offset + (extraBytes == 0 ? 0 : (largest - extraBytes));
2187 		src << "OpDecorate %up_SSB0_IN ArrayStride "<< stride << "\n";
2188 	}
2189 
2190 	src << "OpMemberDecorate %ssboIN 0 Offset 0\n"
2191 		"OpDecorate %ssboIN BufferBlock\n"
2192 		"OpDecorate %ssbo_src DescriptorSet 0\n"
2193 		"OpDecorate %ssbo_src Binding 0\n"
2194 		"\n";
2195 
2196 	if (isMediump)
2197 	{
2198 		for (size_t i = 0; i < inputTypes.size(); ++i)
2199 		{
2200 			src <<
2201 				"OpMemberDecorate %SSB0_IN " << i << " RelaxedPrecision\n"
2202 				"OpDecorate %in" << i << " RelaxedPrecision\n"
2203 				"OpDecorate %src_val_0_" << i << " RelaxedPrecision\n"
2204 				"OpDecorate %in" << i << "_val RelaxedPrecision\n"
2205 				;
2206 		}
2207 
2208 			if (anyFloatResult)
2209 			{
2210 				switch (spec.spirvCase)
2211 				{
2212 				case SPIRV_CASETYPE_FREM:
2213 					src << "OpDecorate %frem_result RelaxedPrecision\n";
2214 					break;
2215 				case SPIRV_CASETYPE_MODFSTRUCT:
2216 					src << "OpDecorate %modfstruct_result RelaxedPrecision\n";
2217 					break;
2218 				case SPIRV_CASETYPE_FREXPSTRUCT:
2219 					src << "OpDecorate %frexpstruct_result RelaxedPrecision\n";
2220 					break;
2221 				default:
2222 					DE_ASSERT(false);
2223 					break;
2224 				}
2225 
2226 				for (size_t i = 0; i < outputTypes.size(); ++i)
2227 				{
2228 					src << "OpMemberDecorate %SSB0_OUT " << i << " RelaxedPrecision\n";
2229 					src << "OpDecorate %out_val_final_" << i << " RelaxedPrecision\n";
2230 					src << "OpDecorate %out" << i << " RelaxedPrecision\n";
2231 				}
2232 			}
2233 	}
2234 
2235 	// Output offsets and stride.
2236 	{
2237 		int offset	= 0;
2238 		int ndx		= 0;
2239 		int largest	= 0;
2240 		for (const auto& symbol : spec.outputs)
2241 		{
2242 			const int scalarSize		= symbol.varType.getScalarSize();
2243 			const int memberSize		= (scalarSize + ((scalarSize == 3) ? 1 : 0)) * (isDataTypeDoubleType(symbol.varType.getBasicType()) ? (int)sizeof(deUint64) : (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
2244 			const int extraMemberBytes	= (offset % memberSize);
2245 
2246 			offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2247 			src << "OpMemberDecorate %SSB0_OUT " << ndx << " Offset " << offset << "\n";
2248 			++ndx;
2249 
2250 			if (memberSize > largest)
2251 				largest = memberSize;
2252 
2253 			offset += memberSize;
2254 		}
2255 		DE_ASSERT(largest > 0);
2256 		const int extraBytes	= (offset % largest);
2257 		const int stride		= offset + ((extraBytes == 0) ? 0 : (largest - extraBytes));
2258 		src << "OpDecorate %up_SSB0_OUT ArrayStride " << stride << "\n";
2259 	}
2260 
2261 	src << "OpMemberDecorate %ssboOUT 0 Offset 0\n"
2262 		"OpDecorate %ssboOUT BufferBlock\n"
2263 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
2264 		"OpDecorate %ssbo_dst Binding 1\n"
2265 		"\n"
2266 		"%void  = OpTypeVoid\n"
2267 		"%bool  = OpTypeBool\n"
2268 		"%v2bool = OpTypeVector %bool 2\n"
2269 		"%v3bool = OpTypeVector %bool 3\n"
2270 		"%v4bool = OpTypeVector %bool 4\n"
2271 		"%u32   = OpTypeInt 32 0\n";
2272 
2273 	if (useF32Types)
2274 		src << "%f32   = OpTypeFloat 32\n"
2275 			"%v2f32 = OpTypeVector %f32 2\n"
2276 			"%v3f32 = OpTypeVector %f32 3\n"
2277 			"%v4f32 = OpTypeVector %f32 4\n";
2278 
2279 	if (useF64Types)
2280 		src << "%f64   = OpTypeFloat 64\n"
2281 			"%v2f64 = OpTypeVector %f64 2\n"
2282 			"%v3f64 = OpTypeVector %f64 3\n"
2283 			"%v4f64 = OpTypeVector %f64 4\n";
2284 
2285 	if (useF16Types)
2286 		src << "%f16   = OpTypeFloat 16\n"
2287 			"%v2f16 = OpTypeVector %f16 2\n"
2288 			"%v3f16 = OpTypeVector %f16 3\n"
2289 			"%v4f16 = OpTypeVector %f16 4\n";
2290 
2291 	src << "%i32   = OpTypeInt 32 1\n"
2292 		"%v2i32 = OpTypeVector %i32 2\n"
2293 		"%v3i32 = OpTypeVector %i32 3\n"
2294 		"%v4i32 = OpTypeVector %i32 4\n"
2295 		"%v2u32 = OpTypeVector %u32 2\n"
2296 		"%v3u32 = OpTypeVector %u32 3\n"
2297 		"%v4u32 = OpTypeVector %u32 4\n"
2298 		"\n"
2299 		"%ip_u32   = OpTypePointer Input %u32\n"
2300 		"%ip_v3u32 = OpTypePointer Input %v3u32\n"
2301 		"%up_float = OpTypePointer Uniform " << inputTypes[0] << "\n"
2302 		"\n"
2303 		"%fp_operation = OpTypePointer Function %i32\n"
2304 		"%voidf        = OpTypeFunction %void\n"
2305 		"%fp_u32       = OpTypePointer Function %u32\n"
2306 		"%fp_it1       = OpTypePointer Function " << inputTypes[0] << "\n"
2307 		;
2308 
2309 	for (size_t i = 0; i < outputTypes.size(); ++i)
2310 	{
2311 		src << "%fp_out_" << i << "     = OpTypePointer Function " << outputTypes[i] << "\n"
2312 			<< "%up_out_" << i << "     = OpTypePointer Uniform " << outputTypes[i] << "\n";
2313 	}
2314 
2315 	if (spec.packFloat16Bit)
2316 		src << "%fp_f16  = OpTypePointer Function " << packType << "\n";
2317 
2318 	src << "%BP_id3uID = OpVariable %ip_v3u32 Input\n"
2319 		"%BP_id3uNum = OpVariable %ip_v3u32 Input\n"
2320 		"\n"
2321 		"%c_u32_0 = OpConstant %u32 0\n"
2322 		"%c_u32_1 = OpConstant %u32 1\n"
2323 		"%c_u32_2 = OpConstant %u32 2\n"
2324 		"%c_i32_0 = OpConstant %i32 0\n"
2325 		"%c_i32_1 = OpConstant %i32 1\n"
2326 		"\n";
2327 
2328 	if (useF32Types)
2329 		src <<
2330 			"%c_f32_0 = OpConstant %f32 0\n"
2331 			"%c_f32_1 = OpConstant %f32 1\n"
2332 			;
2333 
2334 	if (useF16Types)
2335 		src <<
2336 			"%c_f16_0 = OpConstant %f16 0\n"
2337 			"%c_f16_1 = OpConstant %f16 1\n"
2338 			"%c_f16_minus1 = OpConstant %f16 -0x1p+0"
2339 			;
2340 
2341 	if (useF64Types)
2342 		src <<
2343 			"%c_f64_0 = OpConstant %f64 0\n"
2344 			"%c_f64_1 = OpConstant %f64 1\n"
2345 		;
2346 
2347 	src << "\n"
2348 		"%c_v2i32_0 = OpConstantComposite %v2i32 %c_i32_0 %c_i32_0\n"
2349 		"%c_v2i32_1 = OpConstantComposite %v2i32 %c_i32_1 %c_i32_1\n"
2350 		"%c_v3i32_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
2351 		"%c_v3i32_1 = OpConstantComposite %v3i32 %c_i32_1 %c_i32_1 %c_i32_1\n"
2352 		"%c_v4i32_0 = OpConstantComposite %v4i32 %c_i32_0 %c_i32_0 %c_i32_0 %c_i32_0\n"
2353 		"%c_v4i32_1 = OpConstantComposite %v4i32 %c_i32_1 %c_i32_1 %c_i32_1 %c_i32_1\n"
2354 		"\n";
2355 
2356 	if (useF32Types)
2357 		src <<
2358 			"%c_v2f32_0 = OpConstantComposite %v2f32 %c_f32_0 %c_f32_0\n"
2359 			"%c_v2f32_1 = OpConstantComposite %v2f32 %c_f32_1 %c_f32_1\n"
2360 			"%c_v3f32_0 = OpConstantComposite %v3f32 %c_f32_0 %c_f32_0 %c_f32_0\n"
2361 			"%c_v3f32_1 = OpConstantComposite %v3f32 %c_f32_1 %c_f32_1 %c_f32_1\n"
2362 			"%c_v4f32_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_0\n"
2363 			"%c_v4f32_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
2364 			;
2365 
2366 	if (useF16Types)
2367 		src <<
2368 			"%c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
2369 			"%c_v2f16_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
2370 			"%c_v3f16_0 = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0 %c_f16_0\n"
2371 			"%c_v3f16_1 = OpConstantComposite %v3f16 %c_f16_1 %c_f16_1 %c_f16_1\n"
2372 			"%c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
2373 			"%c_v4f16_1 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
2374 			;
2375 
2376 	if (useF64Types)
2377 		src <<
2378 			"%c_v2f64_0 = OpConstantComposite %v2f64 %c_f64_0 %c_f64_0\n"
2379 			"%c_v2f64_1 = OpConstantComposite %v2f64 %c_f64_1 %c_f64_1\n"
2380 			"%c_v3f64_0 = OpConstantComposite %v3f64 %c_f64_0 %c_f64_0 %c_f64_0\n"
2381 			"%c_v3f64_1 = OpConstantComposite %v3f64 %c_f64_1 %c_f64_1 %c_f64_1\n"
2382 			"%c_v4f64_0 = OpConstantComposite %v4f64 %c_f64_0 %c_f64_0 %c_f64_0 %c_f64_0\n"
2383 			"%c_v4f64_1 = OpConstantComposite %v4f64 %c_f64_1 %c_f64_1 %c_f64_1 %c_f64_1\n"
2384 			"\n";
2385 
2386 	// Input struct.
2387 	{
2388 		src << "%SSB0_IN    = OpTypeStruct";
2389 		for (const auto& t : inputTypes)
2390 			src << " " << t;
2391 		src << "\n";
2392 	}
2393 
2394 	src <<
2395 		"%up_SSB0_IN = OpTypeRuntimeArray %SSB0_IN\n"
2396 		"%ssboIN     = OpTypeStruct %up_SSB0_IN\n"
2397 		"%up_ssboIN  = OpTypePointer Uniform %ssboIN\n"
2398 		"%ssbo_src   = OpVariable %up_ssboIN Uniform\n"
2399 		"\n";
2400 
2401 	// Output struct.
2402 	{
2403 		src << "%SSB0_OUT    = OpTypeStruct";
2404 		for (const auto& t : outputTypes)
2405 			src << " " << t;
2406 		src << "\n";
2407 	}
2408 
2409 	std::string modfStructMemberType;
2410 	std::string frexpStructFirstMemberType;
2411 	if (spec.spirvCase == SPIRV_CASETYPE_MODFSTRUCT)
2412 	{
2413 		modfStructMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2414 		src << "%modfstruct_ret_t = OpTypeStruct " << modfStructMemberType << " " << modfStructMemberType << "\n";
2415 	}
2416 	else if (spec.spirvCase == SPIRV_CASETYPE_FREXPSTRUCT)
2417 	{
2418 		frexpStructFirstMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2419 		src << "%frexpstruct_ret_t = OpTypeStruct " << frexpStructFirstMemberType << " " << outputTypes[1] << "\n";
2420 	}
2421 
2422 	src <<
2423 		"%up_SSB0_OUT = OpTypeRuntimeArray %SSB0_OUT\n"
2424 		"%ssboOUT     = OpTypeStruct %up_SSB0_OUT\n"
2425 		"%up_ssboOUT  = OpTypePointer Uniform %ssboOUT\n"
2426 		"%ssbo_dst    = OpVariable %up_ssboOUT Uniform\n"
2427 		"\n"
2428 		"%BP_main = OpFunction %void None %voidf\n"
2429 		"%BP_label = OpLabel\n"
2430 		"%invocationNdx = OpVariable %fp_u32 Function\n";
2431 
2432 	// Note: here we are supposing all inputs have the same type.
2433 	for (size_t i = 0; i < inputTypes.size(); ++i)
2434 		src << "%in" << i << " = OpVariable " << (spec.packFloat16Bit ? "%fp_f16" : "%fp_it1") << " Function\n";
2435 
2436 	for (size_t i = 0; i < outputTypes.size(); ++i)
2437 		src << "%out" << i << " = OpVariable " << (packFloatRes[i] ? std::string("%fp_f16") : std::string("%fp_out_") + de::toString(i)) << " Function\n";
2438 
2439 	src << "%operation = OpVariable %fp_operation Function\n"
2440 		"%BP_id_0_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_0\n"
2441 		"%BP_id_1_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_1\n"
2442 		"%BP_id_2_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_2\n"
2443 		"%BP_num_0_ptr  = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_0\n"
2444 		"%BP_num_1_ptr  = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_1\n"
2445 		"%BP_id_0_val = OpLoad %u32 %BP_id_0_ptr\n"
2446 		"%BP_id_1_val = OpLoad %u32 %BP_id_1_ptr\n"
2447 		"%BP_id_2_val = OpLoad %u32 %BP_id_2_ptr\n"
2448 		"%BP_num_0_val = OpLoad %u32 %BP_num_0_ptr\n"
2449 		"%BP_num_1_val = OpLoad %u32 %BP_num_1_ptr\n"
2450 		"\n"
2451 		"%mul_1 = OpIMul %u32 %BP_num_0_val %BP_num_1_val\n"
2452 		"%mul_2 = OpIMul %u32 %mul_1 %BP_id_2_val\n"
2453 		"%mul_3 = OpIMul %u32 %BP_num_0_val %BP_id_1_val\n"
2454 		"%add_1 = OpIAdd %u32 %mul_2 %mul_3\n"
2455 		"%add_2 = OpIAdd %u32 %add_1 %BP_id_0_val\n"
2456 		"OpStore %invocationNdx %add_2\n"
2457 		"%invocationNdx_val = OpLoad %u32 %invocationNdx\n";
2458 
2459 	// Load input values.
2460 	for (size_t inputNdx = 0; inputNdx < inputTypes.size(); ++inputNdx)
2461 	{
2462 		src << "\n"
2463 			<< "%src_ptr_0_" << inputNdx << " = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_" << inputNdx << "\n"
2464 			<< "%src_val_0_" << inputNdx << " = OpLoad " << inputTypes[inputNdx] << " %src_ptr_0_" << inputNdx << "\n";
2465 
2466 		if (spec.packFloat16Bit)
2467 		{
2468 			if (spec.inputs[inputNdx].varType.getScalarSize() > 1)
2469 			{
2470 				// Extract the val<inputNdx> u32 input channels into individual f16 values.
2471 				for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2472 				{
2473 					src << "%src_val_0_" << inputNdx << "_" << i << " = OpCompositeExtract %u32 %src_val_0_" << inputNdx << " " << i << "\n"
2474 						"%val_v2f16_0_" << inputNdx << "_" << i << " = OpBitcast %v2f16 %src_val_0_" << inputNdx << "_" << i << "\n"
2475 						"%val_f16_0_" << inputNdx << "_" << i << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << "_" << i << " 0\n";
2476 				}
2477 
2478 				// Construct the input vector.
2479 				src << "%val_f16_0_" << inputNdx << "   = OpCompositeConstruct " << packType;
2480 				for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2481 				{
2482 					src << " %val_f16_0_" << inputNdx << "_" << i;
2483 				}
2484 
2485 				src << "\n";
2486 				src << "OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2487 			}
2488 			else
2489 			{
2490 				src << "%val_v2f16_0_" << inputNdx << " = OpBitcast %v2f16 %src_val_0_" << inputNdx << "\n"
2491 					"%val_f16_0_" << inputNdx << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << " 0\n";
2492 
2493 				src <<	"OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2494 			}
2495 		}
2496 		else
2497 			src << "OpStore %in" << inputNdx << " %src_val_0_" << inputNdx << "\n";
2498 
2499 		src << "%in" << inputNdx << "_val = OpLoad " << (spec.packFloat16Bit ? packType : inputTypes[inputNdx]) << " %in" << inputNdx << "\n";
2500 	}
2501 
2502 	src << "\n"
2503 		"OpStore %operation %c_i32_1\n";
2504 
2505 	// Fill output values with dummy data.
2506 	for (size_t i = 0; i < outputTypes.size(); ++i)
2507 		src << "OpStore %out" << i << " %c_" << (packFloatRes[i] ? &packType[1] : &outputTypes[i][1]) << "_0\n";
2508 
2509 	src << "\n";
2510 
2511 	// Run operation.
2512 	switch (spec.spirvCase)
2513 	{
2514 	case SPIRV_CASETYPE_COMPARE:
2515 		for (int operationNdx = 0; operationNdx < DE_LENGTH_OF_ARRAY(COMPARE_OPERATIONS); ++operationNdx)
2516 		{
2517 			src << scalarComparison	(COMPARE_OPERATIONS[operationNdx], operationNdx,
2518 									spec.inputs[0].varType.getBasicType(),
2519 									outputTypes[0],
2520 									spec.outputs[0].varType.getScalarSize());
2521 			src << moveBitOperation("%operation", moveBitNdx);
2522 			++moveBitNdx;
2523 		}
2524 		break;
2525 	case SPIRV_CASETYPE_FREM:
2526 		src << "%frem_result = OpFRem " << (packFloatRes[0] ? packType : outputTypes[0]) << " %in0_val %in1_val\n"
2527 			<< "OpStore %out0 %frem_result\n";
2528 		break;
2529 	case SPIRV_CASETYPE_MODFSTRUCT:
2530 		src << "%modfstruct_result = OpExtInst %modfstruct_ret_t %glslstd450 ModfStruct %in0_val\n"
2531 			<< "%modfstruct_result_0 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 0\n"
2532 			<< "%modfstruct_result_1 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 1\n"
2533 			<< "OpStore %out0 %modfstruct_result_0\n"
2534 			<< "OpStore %out1 %modfstruct_result_1\n";
2535 		break;
2536 	case SPIRV_CASETYPE_FREXPSTRUCT:
2537 		src << "%frexpstruct_result = OpExtInst %frexpstruct_ret_t %glslstd450 FrexpStruct %in0_val\n"
2538 			<< "%frexpstruct_result_0 = OpCompositeExtract " << frexpStructFirstMemberType << " %frexpstruct_result 0\n"
2539 			<< "%frexpstruct_result_1 = OpCompositeExtract " << outputTypes[1] << " %frexpstruct_result 1\n"
2540 			<< "OpStore %out0 %frexpstruct_result_0\n"
2541 			<< "OpStore %out1 %frexpstruct_result_1\n";
2542 		break;
2543 	default:
2544 		DE_ASSERT(false);
2545 		break;
2546 	}
2547 
2548 	for (size_t outputNdx = 0; outputNdx < outputTypes.size(); ++outputNdx)
2549 	{
2550 		src << "\n"
2551 			"%out_val_final_" << outputNdx << " = OpLoad " << (packFloatRes[outputNdx] ? packType : outputTypes[outputNdx]) << " %out" << outputNdx << "\n"
2552 			"%ssbo_dst_ptr_" << outputNdx << " = OpAccessChain %up_out_" << outputNdx << " %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_" << outputNdx << "\n";
2553 
2554 		if (packFloatRes[outputNdx])
2555 		{
2556 			if (spec.outputs[outputNdx].varType.getScalarSize() > 1)
2557 			{
2558 				for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2559 				{
2560 					src << "%out_val_final_" << outputNdx << "_" << i << " = OpCompositeExtract %f16 %out_val_final_" << outputNdx << " " << i << "\n";
2561 					src << "%out_composite_" << outputNdx << "_" << i << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx << "_" << i << " %c_f16_minus1\n";
2562 					src << "%u32_val_" << outputNdx << "_" << i << " = OpBitcast %u32 %out_composite_" << outputNdx << "_" << i << "\n";
2563 				}
2564 
2565 				src << "%u32_final_val_" << outputNdx << " = OpCompositeConstruct " << outputTypes[outputNdx];
2566 				for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2567 					src << " %u32_val_" << outputNdx << "_" << i;
2568 				src << "\n";
2569 				src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %u32_final_val_" << outputNdx << "\n";
2570 			}
2571 			else
2572 			{
2573 				src <<
2574 					"%out_composite_" << outputNdx << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx << " %c_f16_minus1\n"
2575 					"%out_result_" << outputNdx << " = OpBitcast " << outputTypes[outputNdx] << " %out_composite_" << outputNdx << "\n"
2576 					"OpStore %ssbo_dst_ptr_" << outputNdx << " %out_result_" << outputNdx << "\n";
2577 			}
2578 		}
2579 		else
2580 		{
2581 			src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %out_val_final_" << outputNdx << "\n";
2582 		}
2583 	}
2584 
2585 	src << "\n"
2586 		"OpReturn\n"
2587 		"OpFunctionEnd\n";
2588 
2589 	return src.str();
2590 }
2591 
2592 
generateComputeShader(const ShaderSpec & spec)2593 std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec)
2594 {
2595 	if (spec.spirvCase != SPIRV_CASETYPE_NONE)
2596 	{
2597 		bool	are16Bit	= false;
2598 		bool	are64Bit	= false;
2599 		bool	isMediump	= false;
2600 		for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2601 		{
2602 			if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
2603 				are16Bit = true;
2604 
2605 			if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
2606 				are64Bit = true;
2607 
2608 			if (symIter->varType.getPrecision() == glu::PRECISION_MEDIUMP)
2609 				isMediump = true;
2610 
2611 			if (isMediump && are16Bit)
2612 				break;
2613 		}
2614 
2615 		return generateSpirv(spec, are16Bit, are64Bit, isMediump);
2616 	}
2617 	else
2618 	{
2619 		std::ostringstream src;
2620 		src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n";
2621 
2622 		if (!spec.globalDeclarations.empty())
2623 			src << spec.globalDeclarations << "\n";
2624 
2625 		src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
2626 			<< "\n";
2627 
2628 		declareBufferBlocks(src, spec);
2629 
2630 		src << "void main (void)\n"
2631 			<< "{\n"
2632 			<< "	uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2633 			<< "	                   + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2634 
2635 		generateExecBufferIo(src, spec, "invocationNdx");
2636 
2637 		src << "}\n";
2638 
2639 		return src.str();
2640 	}
2641 }
2642 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)2643 void ComputeShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
2644 {
2645 	if (shaderSpec.spirvCase != SPIRV_CASETYPE_NONE)
2646 		programCollection.spirvAsmSources.add("compute") << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3) << generateComputeShader(shaderSpec);
2647 	else
2648 		programCollection.glslSources.add("compute") << glu::ComputeSource(generateComputeShader(shaderSpec)) << shaderSpec.buildOptions;
2649 }
2650 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)2651 void ComputeShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
2652 {
2653 	const VkDevice					vkDevice				= m_context.getDevice();
2654 	const DeviceInterface&			vk						= m_context.getDeviceInterface();
2655 	const VkQueue					queue					= m_context.getUniversalQueue();
2656 	const deUint32					queueFamilyIndex		= m_context.getUniversalQueueFamilyIndex();
2657 
2658 	DescriptorPoolBuilder			descriptorPoolBuilder;
2659 	DescriptorSetLayoutBuilder		descriptorSetLayoutBuilder;
2660 
2661 	Move<VkShaderModule>			computeShaderModule;
2662 	Move<VkPipeline>				computePipeline;
2663 	Move<VkPipelineLayout>			pipelineLayout;
2664 	Move<VkCommandPool>				cmdPool;
2665 	Move<VkDescriptorPool>			descriptorPool;
2666 	Move<VkDescriptorSetLayout>		descriptorSetLayout;
2667 	Move<VkDescriptorSet>			descriptorSet;
2668 	const deUint32					numDescriptorSets		= (m_extraResourcesLayout != 0) ? 2u : 1u;
2669 
2670 	DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2671 
2672 	initBuffers(numValues);
2673 
2674 	// Setup input buffer & copy data
2675 	// For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
2676 	// storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
2677 	// the shader.
2678 	uploadInputBuffer(inputs, numValues, m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
2679 
2680 	// Create command pool
2681 	cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2682 
2683 	// Create command buffer
2684 
2685 	descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2686 	descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2687 	descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2688 	descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2689 
2690 	descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
2691 	descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2692 
2693 	const VkDescriptorSetAllocateInfo allocInfo =
2694 	{
2695 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2696 		DE_NULL,
2697 		*descriptorPool,
2698 		1u,
2699 		&*descriptorSetLayout
2700 	};
2701 
2702 	descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2703 
2704 	// Create pipeline layout
2705 	{
2706 		const VkDescriptorSetLayout			descriptorSetLayouts[]	=
2707 		{
2708 			*descriptorSetLayout,
2709 			m_extraResourcesLayout
2710 		};
2711 		const VkPipelineLayoutCreateInfo	pipelineLayoutParams	=
2712 		{
2713 			VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,		// VkStructureType				sType;
2714 			DE_NULL,											// const void*					pNext;
2715 			(VkPipelineLayoutCreateFlags)0,						// VkPipelineLayoutCreateFlags	flags;
2716 			numDescriptorSets,									// deUint32						CdescriptorSetCount;
2717 			descriptorSetLayouts,								// const VkDescriptorSetLayout*	pSetLayouts;
2718 			0u,													// deUint32						pushConstantRangeCount;
2719 			DE_NULL												// const VkPushConstantRange*	pPushConstantRanges;
2720 		};
2721 
2722 		pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2723 	}
2724 
2725 	// Create shaders
2726 	{
2727 		computeShaderModule		= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("compute"), 0);
2728 	}
2729 
2730 	// create pipeline
2731 	{
2732 		const VkPipelineShaderStageCreateInfo shaderStageParams[1] =
2733 		{
2734 			{
2735 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,		// VkStructureType						sType;
2736 				DE_NULL,													// const void*							pNext;
2737 				(VkPipelineShaderStageCreateFlags)0u,						// VkPipelineShaderStageCreateFlags		flags;
2738 				VK_SHADER_STAGE_COMPUTE_BIT,								// VkShaderStageFlagsBit				stage;
2739 				*computeShaderModule,										// VkShaderModule						shader;
2740 				"main",														// const char*							pName;
2741 				DE_NULL														// const VkSpecializationInfo*			pSpecializationInfo;
2742 			}
2743 		};
2744 
2745 		const VkComputePipelineCreateInfo computePipelineParams =
2746 		{
2747 			VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,		// VkStructureType									sType;
2748 			DE_NULL,											// const void*										pNext;
2749 			(VkPipelineCreateFlags)0,							// VkPipelineCreateFlags							flags;
2750 			*shaderStageParams,									// VkPipelineShaderStageCreateInfo					cs;
2751 			*pipelineLayout,									// VkPipelineLayout									layout;
2752 			0u,													// VkPipeline										basePipelineHandle;
2753 			0u,													// int32_t											basePipelineIndex;
2754 		};
2755 
2756 		computePipeline = createComputePipeline(vk, vkDevice, DE_NULL, &computePipelineParams);
2757 	}
2758 
2759 	const int			maxValuesPerInvocation	= m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
2760 	int					curOffset				= 0;
2761 	const deUint32		inputStride				= getInputStride();
2762 	const deUint32		outputStride			= getOutputStride();
2763 
2764 	while (curOffset < numValues)
2765 	{
2766 		Move<VkCommandBuffer>	cmdBuffer;
2767 		const int				numToExec	= de::min(maxValuesPerInvocation, numValues-curOffset);
2768 
2769 		// Update descriptors
2770 		{
2771 			DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2772 
2773 			const VkDescriptorBufferInfo outputDescriptorBufferInfo =
2774 			{
2775 				*m_outputBuffer,				// VkBuffer			buffer;
2776 				curOffset * outputStride,		// VkDeviceSize		offset;
2777 				numToExec * outputStride		// VkDeviceSize		range;
2778 			};
2779 
2780 			descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2781 
2782 			if (inputStride)
2783 			{
2784 				const VkDescriptorBufferInfo inputDescriptorBufferInfo =
2785 				{
2786 					*m_inputBuffer,					// VkBuffer			buffer;
2787 					curOffset * inputStride,		// VkDeviceSize		offset;
2788 					numToExec * inputStride			// VkDeviceSize		range;
2789 				};
2790 
2791 				descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2792 			}
2793 
2794 			descriptorSetUpdateBuilder.update(vk, vkDevice);
2795 		}
2796 
2797 		cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2798 		beginCommandBuffer(vk, *cmdBuffer);
2799 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
2800 
2801 		{
2802 			const VkDescriptorSet	descriptorSets[]	= { *descriptorSet, extraResources };
2803 			vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
2804 		}
2805 
2806 		vk.cmdDispatch(*cmdBuffer, numToExec, 1, 1);
2807 
2808 		// Insert a barrier so data written by the shader is available to the host
2809 		{
2810 			const VkBufferMemoryBarrier bufferBarrier =
2811 			{
2812 				VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,	// VkStructureType    sType;
2813 				DE_NULL,									// const void*        pNext;
2814 				VK_ACCESS_SHADER_WRITE_BIT,					// VkAccessFlags      srcAccessMask;
2815 				VK_ACCESS_HOST_READ_BIT,					// VkAccessFlags      dstAccessMask;
2816 				VK_QUEUE_FAMILY_IGNORED,					// uint32_t           srcQueueFamilyIndex;
2817 				VK_QUEUE_FAMILY_IGNORED,					// uint32_t           dstQueueFamilyIndex;
2818 				*m_outputBuffer,							// VkBuffer           buffer;
2819 				0,											// VkDeviceSize       offset;
2820 				VK_WHOLE_SIZE,								// VkDeviceSize       size;
2821 			};
2822 
2823 			vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
2824 								0, (const VkMemoryBarrier*)DE_NULL,
2825 								1, &bufferBarrier,
2826 								0, (const VkImageMemoryBarrier*)DE_NULL);
2827 		}
2828 
2829 		endCommandBuffer(vk, *cmdBuffer);
2830 
2831 		curOffset += numToExec;
2832 
2833 		// Execute
2834 		submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2835 	}
2836 
2837 	// Read back data
2838 	readOutputBuffer(outputs, numValues);
2839 }
2840 
2841 // Tessellation utils
2842 
generateVertexShaderForTess(void)2843 static std::string generateVertexShaderForTess (void)
2844 {
2845 	std::ostringstream	src;
2846 	src << "#version 450\n"
2847 		<< "void main (void)\n{\n"
2848 		<< "	gl_Position = vec4(gl_VertexIndex/2, gl_VertexIndex%2, 0.0, 1.0);\n"
2849 		<< "}\n";
2850 
2851 	return src.str();
2852 }
2853 
2854 class TessellationExecutor : public BufferIoExecutor
2855 {
2856 public:
2857 					TessellationExecutor		(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2858 	virtual			~TessellationExecutor		(void);
2859 
2860 	void			renderTess					(deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources);
2861 
2862 private:
2863 	const VkDescriptorSetLayout					m_extraResourcesLayout;
2864 };
2865 
TessellationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2866 TessellationExecutor::TessellationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
2867 	: BufferIoExecutor			(context, shaderSpec)
2868 	, m_extraResourcesLayout	(extraResourcesLayout)
2869 {
2870 	const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
2871 
2872 	if (!features.tessellationShader)
2873 		TCU_THROW(NotSupportedError, "Tessellation shader is not supported by device");
2874 }
2875 
~TessellationExecutor(void)2876 TessellationExecutor::~TessellationExecutor (void)
2877 {
2878 }
2879 
renderTess(deUint32 numValues,deUint32 vertexCount,deUint32 patchControlPoints,VkDescriptorSet extraResources)2880 void TessellationExecutor::renderTess (deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources)
2881 {
2882 	const size_t						inputBufferSize				= numValues * getInputStride();
2883 	const VkDevice						vkDevice					= m_context.getDevice();
2884 	const DeviceInterface&				vk							= m_context.getDeviceInterface();
2885 	const VkQueue						queue						= m_context.getUniversalQueue();
2886 	const deUint32						queueFamilyIndex			= m_context.getUniversalQueueFamilyIndex();
2887 	Allocator&							memAlloc					= m_context.getDefaultAllocator();
2888 
2889 	const tcu::UVec2					renderSize					(DEFAULT_RENDER_WIDTH, DEFAULT_RENDER_HEIGHT);
2890 
2891 	Move<VkImage>						colorImage;
2892 	de::MovePtr<Allocation>				colorImageAlloc;
2893 	VkFormat							colorFormat					= VK_FORMAT_R8G8B8A8_UNORM;
2894 	Move<VkImageView>					colorImageView;
2895 
2896 	Move<VkRenderPass>					renderPass;
2897 	Move<VkFramebuffer>					framebuffer;
2898 	Move<VkPipelineLayout>				pipelineLayout;
2899 	Move<VkPipeline>					graphicsPipeline;
2900 
2901 	Move<VkShaderModule>				vertexShaderModule;
2902 	Move<VkShaderModule>				tessControlShaderModule;
2903 	Move<VkShaderModule>				tessEvalShaderModule;
2904 	Move<VkShaderModule>				fragmentShaderModule;
2905 
2906 	Move<VkCommandPool>					cmdPool;
2907 	Move<VkCommandBuffer>				cmdBuffer;
2908 
2909 	Move<VkDescriptorPool>				descriptorPool;
2910 	Move<VkDescriptorSetLayout>			descriptorSetLayout;
2911 	Move<VkDescriptorSet>				descriptorSet;
2912 	const deUint32						numDescriptorSets			= (m_extraResourcesLayout != 0) ? 2u : 1u;
2913 
2914 	DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2915 
2916 	// Create color image
2917 	{
2918 		const VkImageCreateInfo colorImageParams =
2919 		{
2920 			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,										// VkStructureType			sType;
2921 			DE_NULL,																	// const void*				pNext;
2922 			0u,																			// VkImageCreateFlags		flags;
2923 			VK_IMAGE_TYPE_2D,															// VkImageType				imageType;
2924 			colorFormat,																// VkFormat					format;
2925 			{ renderSize.x(), renderSize.y(), 1u },										// VkExtent3D				extent;
2926 			1u,																			// deUint32					mipLevels;
2927 			1u,																			// deUint32					arraySize;
2928 			VK_SAMPLE_COUNT_1_BIT,														// VkSampleCountFlagBits	samples;
2929 			VK_IMAGE_TILING_OPTIMAL,													// VkImageTiling			tiling;
2930 			VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT,		// VkImageUsageFlags		usage;
2931 			VK_SHARING_MODE_EXCLUSIVE,													// VkSharingMode			sharingMode;
2932 			1u,																			// deUint32					queueFamilyCount;
2933 			&queueFamilyIndex,															// const deUint32*			pQueueFamilyIndices;
2934 			VK_IMAGE_LAYOUT_UNDEFINED													// VkImageLayout			initialLayout;
2935 		};
2936 
2937 		colorImage = createImage(vk, vkDevice, &colorImageParams);
2938 
2939 		// Allocate and bind color image memory
2940 		colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *colorImage), MemoryRequirement::Any);
2941 		VK_CHECK(vk.bindImageMemory(vkDevice, *colorImage, colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
2942 	}
2943 
2944 	// Create color attachment view
2945 	{
2946 		const VkImageViewCreateInfo colorImageViewParams =
2947 		{
2948 			VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,			// VkStructureType			sType;
2949 			DE_NULL,											// const void*				pNext;
2950 			0u,													// VkImageViewCreateFlags	flags;
2951 			*colorImage,										// VkImage					image;
2952 			VK_IMAGE_VIEW_TYPE_2D,								// VkImageViewType			viewType;
2953 			colorFormat,										// VkFormat					format;
2954 			{
2955 				VK_COMPONENT_SWIZZLE_R,							// VkComponentSwizzle		r;
2956 				VK_COMPONENT_SWIZZLE_G,							// VkComponentSwizzle		g;
2957 				VK_COMPONENT_SWIZZLE_B,							// VkComponentSwizzle		b;
2958 				VK_COMPONENT_SWIZZLE_A							// VkComponentSwizzle		a;
2959 			},													// VkComponentsMapping		components;
2960 			{
2961 				VK_IMAGE_ASPECT_COLOR_BIT,						// VkImageAspectFlags		aspectMask;
2962 				0u,												// deUint32					baseMipLevel;
2963 				1u,												// deUint32					mipLevels;
2964 				0u,												// deUint32					baseArraylayer;
2965 				1u												// deUint32					layerCount;
2966 			}													// VkImageSubresourceRange	subresourceRange;
2967 		};
2968 
2969 		colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
2970 	}
2971 
2972 	// Create render pass
2973 	{
2974 		const VkAttachmentDescription colorAttachmentDescription =
2975 		{
2976 			0u,													// VkAttachmentDescriptorFlags	flags;
2977 			colorFormat,										// VkFormat						format;
2978 			VK_SAMPLE_COUNT_1_BIT,								// VkSampleCountFlagBits		samples;
2979 			VK_ATTACHMENT_LOAD_OP_CLEAR,						// VkAttachmentLoadOp			loadOp;
2980 			VK_ATTACHMENT_STORE_OP_STORE,						// VkAttachmentStoreOp			storeOp;
2981 			VK_ATTACHMENT_LOAD_OP_DONT_CARE,					// VkAttachmentLoadOp			stencilLoadOp;
2982 			VK_ATTACHMENT_STORE_OP_DONT_CARE,					// VkAttachmentStoreOp			stencilStoreOp;
2983 			VK_IMAGE_LAYOUT_UNDEFINED,							// VkImageLayout				initialLayout;
2984 			VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL			// VkImageLayout				finalLayout
2985 		};
2986 
2987 		const VkAttachmentDescription attachments[1] =
2988 		{
2989 			colorAttachmentDescription
2990 		};
2991 
2992 		const VkAttachmentReference colorAttachmentReference =
2993 		{
2994 			0u,													// deUint32			attachment;
2995 			VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL			// VkImageLayout	layout;
2996 		};
2997 
2998 		const VkSubpassDescription subpassDescription =
2999 		{
3000 			0u,													// VkSubpassDescriptionFlags	flags;
3001 			VK_PIPELINE_BIND_POINT_GRAPHICS,					// VkPipelineBindPoint			pipelineBindPoint;
3002 			0u,													// deUint32						inputCount;
3003 			DE_NULL,											// const VkAttachmentReference*	pInputAttachments;
3004 			1u,													// deUint32						colorCount;
3005 			&colorAttachmentReference,							// const VkAttachmentReference*	pColorAttachments;
3006 			DE_NULL,											// const VkAttachmentReference*	pResolveAttachments;
3007 			DE_NULL,											// VkAttachmentReference		depthStencilAttachment;
3008 			0u,													// deUint32						preserveCount;
3009 			DE_NULL												// const VkAttachmentReference* pPreserveAttachments;
3010 		};
3011 
3012 		const VkRenderPassCreateInfo renderPassParams =
3013 		{
3014 			VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,			// VkStructureType					sType;
3015 			DE_NULL,											// const void*						pNext;
3016 			0u,													// VkRenderPassCreateFlags			flags;
3017 			1u,													// deUint32							attachmentCount;
3018 			attachments,										// const VkAttachmentDescription*	pAttachments;
3019 			1u,													// deUint32							subpassCount;
3020 			&subpassDescription,								// const VkSubpassDescription*		pSubpasses;
3021 			0u,													// deUint32							dependencyCount;
3022 			DE_NULL												// const VkSubpassDependency*		pDependencies;
3023 		};
3024 
3025 		renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
3026 	}
3027 
3028 	// Create framebuffer
3029 	{
3030 		const VkFramebufferCreateInfo framebufferParams =
3031 		{
3032 			VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,			// VkStructureType				sType;
3033 			DE_NULL,											// const void*					pNext;
3034 			0u,													// VkFramebufferCreateFlags		flags;
3035 			*renderPass,										// VkRenderPass					renderPass;
3036 			1u,													// deUint32						attachmentCount;
3037 			&*colorImageView,									// const VkAttachmentBindInfo*	pAttachments;
3038 			(deUint32)renderSize.x(),							// deUint32						width;
3039 			(deUint32)renderSize.y(),							// deUint32						height;
3040 			1u													// deUint32						layers;
3041 		};
3042 
3043 		framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
3044 	}
3045 
3046 	// Create descriptors
3047 	{
3048 		DescriptorPoolBuilder		descriptorPoolBuilder;
3049 		DescriptorSetLayoutBuilder	descriptorSetLayoutBuilder;
3050 
3051 		descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3052 		descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3053 		descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3054 		descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3055 
3056 		descriptorSetLayout	= descriptorSetLayoutBuilder.build(vk, vkDevice);
3057 		descriptorPool		= descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3058 
3059 		const VkDescriptorSetAllocateInfo allocInfo =
3060 		{
3061 			VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
3062 			DE_NULL,
3063 			*descriptorPool,
3064 			1u,
3065 			&*descriptorSetLayout
3066 		};
3067 
3068 		descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
3069 		// Update descriptors
3070 		{
3071 			DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
3072 			const VkDescriptorBufferInfo outputDescriptorBufferInfo =
3073 			{
3074 				*m_outputBuffer,				// VkBuffer			buffer;
3075 				0u,								// VkDeviceSize		offset;
3076 				VK_WHOLE_SIZE					// VkDeviceSize		range;
3077 			};
3078 
3079 			descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
3080 
3081 			VkDescriptorBufferInfo inputDescriptorBufferInfo =
3082 			{
3083 				0,							// VkBuffer			buffer;
3084 				0u,							// VkDeviceSize		offset;
3085 				VK_WHOLE_SIZE				// VkDeviceSize		range;
3086 			};
3087 
3088 			if (inputBufferSize > 0)
3089 			{
3090 				inputDescriptorBufferInfo.buffer = *m_inputBuffer;
3091 
3092 				descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
3093 			}
3094 
3095 			descriptorSetUpdateBuilder.update(vk, vkDevice);
3096 		}
3097 	}
3098 
3099 	// Create pipeline layout
3100 	{
3101 		const VkDescriptorSetLayout			descriptorSetLayouts[]		=
3102 		{
3103 			*descriptorSetLayout,
3104 			m_extraResourcesLayout
3105 		};
3106 		const VkPipelineLayoutCreateInfo pipelineLayoutParams =
3107 		{
3108 			VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,		// VkStructureType				sType;
3109 			DE_NULL,											// const void*					pNext;
3110 			(VkPipelineLayoutCreateFlags)0,						// VkPipelineLayoutCreateFlags	flags;
3111 			numDescriptorSets,									// deUint32						descriptorSetCount;
3112 			descriptorSetLayouts,								// const VkDescriptorSetLayout*	pSetLayouts;
3113 			0u,													// deUint32						pushConstantRangeCount;
3114 			DE_NULL												// const VkPushConstantRange*	pPushConstantRanges;
3115 		};
3116 
3117 		pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
3118 	}
3119 
3120 	// Create shader modules
3121 	{
3122 		vertexShaderModule		= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
3123 		tessControlShaderModule	= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_control"), 0);
3124 		tessEvalShaderModule	= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_eval"), 0);
3125 		fragmentShaderModule	= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
3126 	}
3127 
3128 	// Create pipeline
3129 	{
3130 		const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
3131 		{
3132 			VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,		// VkStructureType							sType;
3133 			DE_NULL,														// const void*								pNext;
3134 			(VkPipelineVertexInputStateCreateFlags)0,						// VkPipelineVertexInputStateCreateFlags	flags;
3135 			0u,																// deUint32									bindingCount;
3136 			DE_NULL,														// const VkVertexInputBindingDescription*	pVertexBindingDescriptions;
3137 			0u,																// deUint32									attributeCount;
3138 			DE_NULL,														// const VkVertexInputAttributeDescription*	pvertexAttributeDescriptions;
3139 		};
3140 
3141 		const std::vector<VkViewport>	viewports	(1, makeViewport(renderSize));
3142 		const std::vector<VkRect2D>		scissors	(1, makeRect2D(renderSize));
3143 
3144 		graphicsPipeline = makeGraphicsPipeline(vk,									// const DeviceInterface&                        vk
3145 												vkDevice,							// const VkDevice                                device
3146 												*pipelineLayout,					// const VkPipelineLayout                        pipelineLayout
3147 												*vertexShaderModule,				// const VkShaderModule                          vertexShaderModule
3148 												*tessControlShaderModule,			// const VkShaderModule                          tessellationControlShaderModule
3149 												*tessEvalShaderModule,				// const VkShaderModule                          tessellationEvalShaderModule
3150 												DE_NULL,							// const VkShaderModule                          geometryShaderModule
3151 												*fragmentShaderModule,				// const VkShaderModule                          fragmentShaderModule
3152 												*renderPass,						// const VkRenderPass                            renderPass
3153 												viewports,							// const std::vector<VkViewport>&                viewports
3154 												scissors,							// const std::vector<VkRect2D>&                  scissors
3155 												VK_PRIMITIVE_TOPOLOGY_PATCH_LIST,	// const VkPrimitiveTopology                     topology
3156 												0u,									// const deUint32                                subpass
3157 												patchControlPoints,					// const deUint32                                patchControlPoints
3158 												&vertexInputStateParams);			// const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
3159 	}
3160 
3161 	// Create command pool
3162 	cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
3163 
3164 	// Create command buffer
3165 	{
3166 		const VkClearValue clearValue = getDefaultClearColor();
3167 
3168 		cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3169 
3170 		beginCommandBuffer(vk, *cmdBuffer);
3171 
3172 		beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), clearValue);
3173 
3174 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
3175 
3176 		{
3177 			const VkDescriptorSet	descriptorSets[]	= { *descriptorSet, extraResources };
3178 			vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
3179 		}
3180 
3181 		vk.cmdDraw(*cmdBuffer, vertexCount, 1, 0, 0);
3182 
3183 		endRenderPass(vk, *cmdBuffer);
3184 
3185 		// Insert a barrier so data written by the shader is available to the host
3186 		{
3187 			const VkBufferMemoryBarrier bufferBarrier =
3188 			{
3189 				VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,	// VkStructureType    sType;
3190 				DE_NULL,									// const void*        pNext;
3191 				VK_ACCESS_SHADER_WRITE_BIT,					// VkAccessFlags      srcAccessMask;
3192 				VK_ACCESS_HOST_READ_BIT,					// VkAccessFlags      dstAccessMask;
3193 				VK_QUEUE_FAMILY_IGNORED,					// uint32_t           srcQueueFamilyIndex;
3194 				VK_QUEUE_FAMILY_IGNORED,					// uint32_t           dstQueueFamilyIndex;
3195 				*m_outputBuffer,							// VkBuffer           buffer;
3196 				0,											// VkDeviceSize       offset;
3197 				VK_WHOLE_SIZE,								// VkDeviceSize       size;
3198 			};
3199 
3200 			vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
3201 								  0, (const VkMemoryBarrier*)DE_NULL,
3202 								  1, &bufferBarrier,
3203 								  0, (const VkImageMemoryBarrier*)DE_NULL);
3204 		}
3205 
3206 		endCommandBuffer(vk, *cmdBuffer);
3207 	}
3208 
3209 	// Execute Draw
3210 	submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
3211 }
3212 
3213 // TessControlExecutor
3214 
3215 class TessControlExecutor : public TessellationExecutor
3216 {
3217 public:
3218 						TessControlExecutor			(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3219 	virtual				~TessControlExecutor		(void);
3220 
3221 	static void			generateSources				(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
3222 
3223 	virtual void		execute						(int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
3224 
3225 protected:
3226 	static std::string	generateTessControlShader	(const ShaderSpec& shaderSpec);
3227 };
3228 
TessControlExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3229 TessControlExecutor::TessControlExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3230 	: TessellationExecutor(context, shaderSpec, extraResourcesLayout)
3231 {
3232 }
3233 
~TessControlExecutor(void)3234 TessControlExecutor::~TessControlExecutor (void)
3235 {
3236 }
3237 
generateTessControlShader(const ShaderSpec & shaderSpec)3238 std::string TessControlExecutor::generateTessControlShader (const ShaderSpec& shaderSpec)
3239 {
3240 	std::ostringstream src;
3241 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3242 
3243 	if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3244 		src << "#extension GL_EXT_tessellation_shader : require\n\n";
3245 
3246 	if (!shaderSpec.globalDeclarations.empty())
3247 		src << shaderSpec.globalDeclarations << "\n";
3248 
3249 	src << "\nlayout(vertices = 1) out;\n\n";
3250 
3251 	declareBufferBlocks(src, shaderSpec);
3252 
3253 	src << "void main (void)\n{\n";
3254 
3255 	for (int ndx = 0; ndx < 2; ndx++)
3256 		src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3257 
3258 	for (int ndx = 0; ndx < 4; ndx++)
3259 		src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3260 
3261 	src << "\n"
3262 		<< "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
3263 
3264 	generateExecBufferIo(src, shaderSpec, "invocationId");
3265 
3266 	src << "}\n";
3267 
3268 	return src.str();
3269 }
3270 
generateEmptyTessEvalShader()3271 static std::string generateEmptyTessEvalShader ()
3272 {
3273 	std::ostringstream src;
3274 
3275 	src << "#version 450\n"
3276 		   "#extension GL_EXT_tessellation_shader : require\n\n";
3277 
3278 	src << "layout(triangles, ccw) in;\n";
3279 
3280 	src << "\nvoid main (void)\n{\n"
3281 		<< "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
3282 		<< "}\n";
3283 
3284 	return src.str();
3285 }
3286 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3287 void TessControlExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
3288 {
3289 	programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3290 	programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generateTessControlShader(shaderSpec)) << shaderSpec.buildOptions;
3291 	programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateEmptyTessEvalShader()) << shaderSpec.buildOptions;
3292 	programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3293 }
3294 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3295 void TessControlExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
3296 {
3297 	const deUint32	patchSize	= 3;
3298 
3299 	initBuffers(numValues);
3300 
3301 	// Setup input buffer & copy data
3302 	uploadInputBuffer(inputs, numValues, false);
3303 
3304 	renderTess(numValues, patchSize * numValues, patchSize, extraResources);
3305 
3306 	// Read back data
3307 	readOutputBuffer(outputs, numValues);
3308 }
3309 
3310 // TessEvaluationExecutor
3311 
3312 class TessEvaluationExecutor : public TessellationExecutor
3313 {
3314 public:
3315 						TessEvaluationExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3316 	virtual				~TessEvaluationExecutor	(void);
3317 
3318 	static void			generateSources			(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
3319 
3320 	virtual void		execute					(int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
3321 
3322 protected:
3323 	static std::string	generateTessEvalShader	(const ShaderSpec& shaderSpec);
3324 };
3325 
TessEvaluationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3326 TessEvaluationExecutor::TessEvaluationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3327 	: TessellationExecutor (context, shaderSpec, extraResourcesLayout)
3328 {
3329 }
3330 
~TessEvaluationExecutor(void)3331 TessEvaluationExecutor::~TessEvaluationExecutor (void)
3332 {
3333 }
3334 
generatePassthroughTessControlShader(void)3335 static std::string generatePassthroughTessControlShader (void)
3336 {
3337 	std::ostringstream src;
3338 
3339 	src << "#version 450\n"
3340 		   "#extension GL_EXT_tessellation_shader : require\n\n";
3341 
3342 	src << "layout(vertices = 1) out;\n\n";
3343 
3344 	src << "void main (void)\n{\n";
3345 
3346 	for (int ndx = 0; ndx < 2; ndx++)
3347 		src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3348 
3349 	for (int ndx = 0; ndx < 4; ndx++)
3350 		src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3351 
3352 	src << "}\n";
3353 
3354 	return src.str();
3355 }
3356 
generateTessEvalShader(const ShaderSpec & shaderSpec)3357 std::string TessEvaluationExecutor::generateTessEvalShader (const ShaderSpec& shaderSpec)
3358 {
3359 	std::ostringstream src;
3360 
3361 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3362 
3363 	if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3364 		src << "#extension GL_EXT_tessellation_shader : require\n\n";
3365 
3366 	if (!shaderSpec.globalDeclarations.empty())
3367 		src << shaderSpec.globalDeclarations << "\n";
3368 
3369 	src << "\n";
3370 
3371 	src << "layout(isolines, equal_spacing) in;\n\n";
3372 
3373 	declareBufferBlocks(src, shaderSpec);
3374 
3375 	src << "void main (void)\n{\n"
3376 		<< "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
3377 		<< "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
3378 
3379 	generateExecBufferIo(src, shaderSpec, "invocationId");
3380 
3381 	src	<< "}\n";
3382 
3383 	return src.str();
3384 }
3385 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3386 void TessEvaluationExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
3387 {
3388 	programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3389 	programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generatePassthroughTessControlShader()) << shaderSpec.buildOptions;
3390 	programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec)) << shaderSpec.buildOptions;
3391 	programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3392 }
3393 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3394 void TessEvaluationExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
3395 {
3396 	const int	patchSize		= 2;
3397 	const int	alignedValues	= deAlign32(numValues, patchSize);
3398 
3399 	// Initialize buffers with aligned value count to make room for padding
3400 	initBuffers(alignedValues);
3401 
3402 	// Setup input buffer & copy data
3403 	uploadInputBuffer(inputs, numValues, false);
3404 
3405 	renderTess((deUint32)alignedValues, (deUint32)alignedValues, (deUint32)patchSize, extraResources);
3406 
3407 	// Read back data
3408 	readOutputBuffer(outputs, numValues);
3409 }
3410 
3411 } // anonymous
3412 
3413 // ShaderExecutor
3414 
~ShaderExecutor(void)3415 ShaderExecutor::~ShaderExecutor (void)
3416 {
3417 }
3418 
areInputs16Bit(void) const3419 bool ShaderExecutor::areInputs16Bit (void) const
3420 {
3421 	for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end(); ++symIter)
3422 	{
3423 		if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3424 			return true;
3425 	}
3426 	return false;
3427 }
3428 
areOutputs16Bit(void) const3429 bool ShaderExecutor::areOutputs16Bit (void) const
3430 {
3431 	for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end(); ++symIter)
3432 	{
3433 		if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3434 			return true;
3435 	}
3436 	return false;
3437 }
3438 
isOutput16Bit(const size_t ndx) const3439 bool ShaderExecutor::isOutput16Bit (const size_t ndx) const
3440 {
3441 	if (glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3442 		return true;
3443 	return false;
3444 }
3445 
areInputs64Bit(void) const3446 bool ShaderExecutor::areInputs64Bit (void) const
3447 {
3448 	for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end(); ++symIter)
3449 	{
3450 		if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3451 			return true;
3452 	}
3453 	return false;
3454 }
3455 
areOutputs64Bit(void) const3456 bool ShaderExecutor::areOutputs64Bit (void) const
3457 {
3458 	for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end(); ++symIter)
3459 	{
3460 		if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3461 			return true;
3462 	}
3463 	return false;
3464 }
3465 
isOutput64Bit(const size_t ndx) const3466 bool ShaderExecutor::isOutput64Bit (const size_t ndx) const
3467 {
3468 	if (glu::isDataTypeDoubleType(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3469 		return true;
3470 	return false;
3471 }
3472 
3473 // Utilities
3474 
generateSources(glu::ShaderType shaderType,const ShaderSpec & shaderSpec,vk::SourceCollections & dst)3475 void generateSources (glu::ShaderType shaderType, const ShaderSpec& shaderSpec, vk::SourceCollections& dst)
3476 {
3477 	switch (shaderType)
3478 	{
3479 		case glu::SHADERTYPE_VERTEX:					VertexShaderExecutor::generateSources	(shaderSpec, dst);	break;
3480 		case glu::SHADERTYPE_TESSELLATION_CONTROL:		TessControlExecutor::generateSources	(shaderSpec, dst);	break;
3481 		case glu::SHADERTYPE_TESSELLATION_EVALUATION:	TessEvaluationExecutor::generateSources	(shaderSpec, dst);	break;
3482 		case glu::SHADERTYPE_GEOMETRY:					GeometryShaderExecutor::generateSources	(shaderSpec, dst);	break;
3483 		case glu::SHADERTYPE_FRAGMENT:					FragmentShaderExecutor::generateSources	(shaderSpec, dst);	break;
3484 		case glu::SHADERTYPE_COMPUTE:					ComputeShaderExecutor::generateSources	(shaderSpec, dst);	break;
3485 		default:
3486 			TCU_THROW(InternalError, "Unsupported shader type");
3487 	}
3488 }
3489 
createExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3490 ShaderExecutor* createExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3491 {
3492 	switch (shaderType)
3493 	{
3494 		case glu::SHADERTYPE_VERTEX:					return new VertexShaderExecutor		(context, shaderSpec, extraResourcesLayout);
3495 		case glu::SHADERTYPE_TESSELLATION_CONTROL:		return new TessControlExecutor		(context, shaderSpec, extraResourcesLayout);
3496 		case glu::SHADERTYPE_TESSELLATION_EVALUATION:	return new TessEvaluationExecutor	(context, shaderSpec, extraResourcesLayout);
3497 		case glu::SHADERTYPE_GEOMETRY:					return new GeometryShaderExecutor	(context, shaderSpec, extraResourcesLayout);
3498 		case glu::SHADERTYPE_FRAGMENT:					return new FragmentShaderExecutor	(context, shaderSpec, extraResourcesLayout);
3499 		case glu::SHADERTYPE_COMPUTE:					return new ComputeShaderExecutor	(context, shaderSpec, extraResourcesLayout);
3500 		default:
3501 			TCU_THROW(InternalError, "Unsupported shader type");
3502 	}
3503 }
3504 
executorSupported(glu::ShaderType shaderType)3505 bool  executorSupported(glu::ShaderType shaderType)
3506 {
3507 	switch (shaderType)
3508 	{
3509 	case glu::SHADERTYPE_VERTEX:
3510 	case glu::SHADERTYPE_TESSELLATION_CONTROL:
3511 	case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3512 	case glu::SHADERTYPE_GEOMETRY:
3513 	case glu::SHADERTYPE_FRAGMENT:
3514 	case glu::SHADERTYPE_COMPUTE:
3515 		return true;
3516 	default:
3517 		return false;
3518 	}
3519 }
3520 
checkSupportShader(Context & context,const glu::ShaderType shaderType)3521 void checkSupportShader(Context& context, const glu::ShaderType shaderType)
3522 {
3523 	if (shaderType == glu::SHADERTYPE_TESSELLATION_EVALUATION &&
3524 		context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
3525 		!context.getPortabilitySubsetFeatures().tessellationIsolines)
3526 	{
3527 		TCU_THROW(NotSupportedError, "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
3528 	}
3529 }
3530 
3531 
3532 } // shaderexecutor
3533 } // vkt
3534