• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7  * Copyright (c) 2016 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Vulkan ShaderExecutor
24  *//*--------------------------------------------------------------------*/
25 
26 #include "vktShaderExecutor.hpp"
27 
28 #include "vkMemUtil.hpp"
29 #include "vkRef.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37 
38 #include "gluShaderUtil.hpp"
39 
40 #include "tcuVector.hpp"
41 #include "tcuTestLog.hpp"
42 #include "tcuTextureUtil.hpp"
43 
44 #include "deUniquePtr.hpp"
45 #include "deStringUtil.hpp"
46 #include "deSharedPtr.hpp"
47 #include "deFloat16.h"
48 
49 #include <map>
50 #include <sstream>
51 #include <iostream>
52 
53 using std::vector;
54 using namespace vk;
55 
56 namespace vkt
57 {
58 namespace shaderexecutor
59 {
60 namespace
61 {
62 
63 enum
64 {
65 	DEFAULT_RENDER_WIDTH	= 100,
66 	DEFAULT_RENDER_HEIGHT	= 100,
67 };
68 
69 // Common typedefs
70 
71 typedef de::SharedPtr<Unique<VkImage> >		VkImageSp;
72 typedef de::SharedPtr<Unique<VkImageView> >	VkImageViewSp;
73 typedef de::SharedPtr<Unique<VkBuffer> >	VkBufferSp;
74 typedef de::SharedPtr<Allocation>			AllocationSp;
75 
76 static VkFormat getAttributeFormat(const glu::DataType dataType);
77 
78 // Shader utilities
79 
getDefaultClearColor(void)80 static VkClearValue	getDefaultClearColor (void)
81 {
82 	return makeClearValueColorF32(0.125f, 0.25f, 0.5f, 1.0f);
83 }
84 
generateEmptyFragmentSource(void)85 static std::string generateEmptyFragmentSource (void)
86 {
87 	std::ostringstream src;
88 
89 	src << "#version 450\n"
90 		   "layout(location=0) out highp vec4 o_color;\n";
91 
92 	src << "void main (void)\n{\n";
93 	src << "	o_color = vec4(0.0);\n";
94 	src << "}\n";
95 
96 	return src.str();
97 }
98 
packFloat16Bit(std::ostream & src,const std::vector<Symbol> & outputs)99 void packFloat16Bit (std::ostream& src, const std::vector<Symbol>& outputs)
100 {
101 	for (vector<Symbol>::const_iterator symIter = outputs.begin(); symIter != outputs.end(); ++symIter)
102 	{
103 		if(glu::isDataTypeFloatType(symIter->varType.getBasicType()))
104 		{
105 			if(glu::isDataTypeVector(symIter->varType.getBasicType()))
106 			{
107 				for(int i = 0; i < glu::getDataTypeScalarSize(symIter->varType.getBasicType()); i++)
108 				{
109 					src << "\tpacked_" << symIter->name << "[" << i << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "], -1.0)));\n";
110 				}
111 			}
112 			else if (glu::isDataTypeMatrix(symIter->varType.getBasicType()))
113 			{
114 				int maxRow = 0;
115 				int maxCol = 0;
116 				switch (symIter->varType.getBasicType())
117 				{
118 				case glu::TYPE_FLOAT_MAT2:
119 					maxRow = maxCol = 2;
120 					break;
121 				case glu::TYPE_FLOAT_MAT2X3:
122 					maxRow = 2;
123 					maxCol = 3;
124 					break;
125 				case glu::TYPE_FLOAT_MAT2X4:
126 					maxRow = 2;
127 					maxCol = 4;
128 					break;
129 				case glu::TYPE_FLOAT_MAT3X2:
130 					maxRow = 3;
131 					maxCol = 2;
132 					break;
133 				case glu::TYPE_FLOAT_MAT3:
134 					maxRow = maxCol = 3;
135 					break;
136 				case glu::TYPE_FLOAT_MAT3X4:
137 					maxRow = 3;
138 					maxCol = 4;
139 					break;
140 				case glu::TYPE_FLOAT_MAT4X2:
141 					maxRow = 4;
142 					maxCol = 2;
143 					break;
144 				case glu::TYPE_FLOAT_MAT4X3:
145 					maxRow = 4;
146 					maxCol = 3;
147 					break;
148 				case glu::TYPE_FLOAT_MAT4:
149 					maxRow = maxCol = 4;
150 					break;
151 				default:
152 					DE_ASSERT(false);
153 					break;
154 				}
155 
156 				for(int i = 0; i < maxRow; i++)
157 				for(int j = 0; j < maxCol; j++)
158 				{
159 					src << "\tpacked_" << symIter->name << "[" << i << "][" << j << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "][" << j << "], -1.0)));\n";
160 				}
161 			}
162 			else
163 			{
164 					src << "\tpacked_" << symIter->name << " = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << ", -1.0)));\n";
165 			}
166 		}
167 	}
168 }
169 
generatePassthroughVertexShader(const ShaderSpec & shaderSpec,const char * inputPrefix,const char * outputPrefix)170 static std::string generatePassthroughVertexShader (const ShaderSpec& shaderSpec, const char* inputPrefix, const char* outputPrefix)
171 {
172 	std::ostringstream	src;
173 	int					location	= 0;
174 
175 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
176 
177 	if (!shaderSpec.globalDeclarations.empty())
178 		src << shaderSpec.globalDeclarations << "\n";
179 
180 	src << "layout(location = " << location << ") in highp vec4 a_position;\n";
181 
182 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
183 	{
184 		location++;
185 		src << "layout(location = "<< location << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n"
186 			<< "layout(location = " << location - 1 << ") flat out " << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
187 	}
188 
189 	src << "\nvoid main (void)\n{\n"
190 		<< "	gl_Position = a_position;\n"
191 		<< "	gl_PointSize = 1.0;\n";
192 
193 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
194 		src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
195 
196 	src << "}\n";
197 
198 	return src.str();
199 }
200 
generateVertexShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix)201 static std::string generateVertexShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
202 {
203 	DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
204 
205 	std::ostringstream	src;
206 
207 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
208 
209 	if (!shaderSpec.globalDeclarations.empty())
210 		src << shaderSpec.globalDeclarations << "\n";
211 
212 	src << "layout(location = 0) in highp vec4 a_position;\n";
213 
214 	int			locationNumber	= 1;
215 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
216 	{
217 		src <<  "layout(location = " << locationNumber << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
218 	}
219 
220 	locationNumber = 0;
221 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
222 	{
223 		DE_ASSERT(output->varType.isBasicType());
224 
225 		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
226 		{
227 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
228 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
229 			const glu::VarType		intType		(intBaseType, glu::PRECISION_HIGHP);
230 
231 			src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
232 		}
233 		else
234 			src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
235 	}
236 
237 	src << "\n"
238 		<< "void main (void)\n"
239 		<< "{\n"
240 		<< "	gl_Position = a_position;\n"
241 		<< "	gl_PointSize = 1.0;\n";
242 
243 	// Declare & fetch local input variables
244 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
245 	{
246 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
247 		{
248 			const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
249 			src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
250 		}
251 		else
252 			src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
253 	}
254 
255 	// Declare local output variables
256 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
257 	{
258 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
259 		{
260 			const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
261 			src << "\t" << tname << " " << output->name << ";\n";
262 			const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
263 			src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
264 		}
265 		else
266 			src << "\t" << glu::declare(output->varType, output->name) << ";\n";
267 	}
268 
269 	// Operation - indented to correct level.
270 	{
271 		std::istringstream	opSrc	(shaderSpec.source);
272 		std::string			line;
273 
274 		while (std::getline(opSrc, line))
275 			src << "\t" << line << "\n";
276 	}
277 
278 	if (shaderSpec.packFloat16Bit)
279 		packFloat16Bit(src, shaderSpec.outputs);
280 
281 	// Assignments to outputs.
282 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
283 	{
284 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
285 		{
286 			src << "\t" << outputPrefix << output->name << " = packed_" << output->name << ";\n";
287 		}
288 		else
289 		{
290 			if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
291 			{
292 				const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
293 				const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
294 
295 				src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
296 			}
297 			else
298 				src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
299 		}
300 	}
301 
302 	src << "}\n";
303 
304 	return src.str();
305 }
306 
307 struct FragmentOutputLayout
308 {
309 	std::vector<const Symbol*>		locationSymbols;		//! Symbols by location
310 	std::map<std::string, int>		locationMap;			//! Map from symbol name to start location
311 };
312 
generateFragShaderOutputDecl(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & outputPrefix)313 static void generateFragShaderOutputDecl (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& outputPrefix)
314 {
315 	for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
316 	{
317 		const Symbol&				output		= shaderSpec.outputs[outNdx];
318 		const int					location	= de::lookup(outLocationMap, output.name);
319 		const std::string			outVarName	= outputPrefix + output.name;
320 		glu::VariableDeclaration	decl		(output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, glu::Layout(location));
321 
322 		TCU_CHECK_INTERNAL(output.varType.isBasicType());
323 
324 		if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
325 		{
326 			const int			vecSize			= glu::getDataTypeScalarSize(output.varType.getBasicType());
327 			const glu::DataType	uintBasicType	= vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
328 			const glu::VarType	uintType		(uintBasicType, glu::PRECISION_HIGHP);
329 
330 			decl.varType = uintType;
331 			src << decl << ";\n";
332 		}
333 		else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
334 		{
335 			const int			vecSize			= glu::getDataTypeScalarSize(output.varType.getBasicType());
336 			const glu::DataType	intBasicType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
337 			const glu::VarType	intType			(intBasicType, glu::PRECISION_HIGHP);
338 
339 			decl.varType = intType;
340 			src << decl << ";\n";
341 		}
342 		else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
343 		{
344 			const int			vecSize			= glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
345 			const int			numVecs			= glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
346 			const glu::DataType	uintBasicType	= glu::getDataTypeUintVec(vecSize);
347 			const glu::VarType	uintType		(uintBasicType, glu::PRECISION_HIGHP);
348 
349 			decl.varType = uintType;
350 			for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
351 			{
352 				decl.name				= outVarName + "_" + de::toString(vecNdx);
353 				decl.layout.location	= location + vecNdx;
354 				src << decl << ";\n";
355 			}
356 		}
357 		else
358 			src << decl << ";\n";
359 	}
360 }
361 
generateFragShaderOutAssign(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::string & valuePrefix,const std::string & outputPrefix,const bool isInput16Bit=false)362 static void generateFragShaderOutAssign (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::string& valuePrefix, const std::string& outputPrefix, const bool isInput16Bit = false)
363 {
364 	if (isInput16Bit)
365 		packFloat16Bit(src, shaderSpec.outputs);
366 
367 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
368 	{
369 		const std::string packPrefix = (isInput16Bit && glu::isDataTypeFloatType(output->varType.getBasicType())) ? "packed_" : "";
370 
371 		if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
372 			src << "	o_" << output->name << " = floatBitsToUint(" << valuePrefix << output->name << ");\n";
373 		else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
374 		{
375 			const int	numVecs		= glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
376 
377 			for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
378 				if (useIntOutputs)
379 					src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = floatBitsToUint(" << valuePrefix << output->name << "[" << vecNdx << "]);\n";
380 				else
381 					src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = " << packPrefix << valuePrefix << output->name << "[" << vecNdx << "];\n";
382 		}
383 		else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
384 		{
385 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
386 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
387 
388 			src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << valuePrefix << output->name << ");\n";
389 		}
390 		else
391 			src << "\t" << outputPrefix << output->name << " = " << packPrefix << valuePrefix << output->name << ";\n";
392 	}
393 }
394 
generatePassthroughFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)395 static std::string generatePassthroughFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
396 {
397 	std::ostringstream	src;
398 
399 	src <<"#version 450\n";
400 
401 	if (!shaderSpec.globalDeclarations.empty())
402 		src << shaderSpec.globalDeclarations << "\n";
403 
404 	int locationNumber = 0;
405 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
406 	{
407 		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
408 		{
409 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
410 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
411 			const glu::VarType		intType		(intBaseType, glu::PRECISION_HIGHP);
412 
413 			src << "layout(location = " << locationNumber << ") flat in " << glu::declare(intType, inputPrefix + output->name) << ";\n";
414 		}
415 		else
416 			src << "layout(location = " << locationNumber << ") flat in " << glu::declare(output->varType, inputPrefix + output->name) << ";\n";
417 	}
418 
419 	generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
420 
421 	src << "\nvoid main (void)\n{\n";
422 
423 	generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, inputPrefix, outputPrefix);
424 
425 	src << "}\n";
426 
427 	return src.str();
428 }
429 
generateGeometryShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix,const bool pointSizeSupported)430 static std::string generateGeometryShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix, const bool pointSizeSupported)
431 {
432 	DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
433 
434 	std::ostringstream	src;
435 
436 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
437 
438 	if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
439 		src << "#extension GL_EXT_geometry_shader : require\n";
440 
441 	if (!shaderSpec.globalDeclarations.empty())
442 		src << shaderSpec.globalDeclarations << "\n";
443 
444 	src << "layout(points) in;\n"
445 		<< "layout(points, max_vertices = 1) out;\n";
446 
447 	int locationNumber = 0;
448 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
449 		src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << "[];\n";
450 
451 	locationNumber = 0;
452 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
453 	{
454 		DE_ASSERT(output->varType.isBasicType());
455 
456 		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
457 		{
458 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
459 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
460 			const glu::VarType		intType		(intBaseType, glu::PRECISION_HIGHP);
461 
462 			src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
463 		}
464 		else
465 			src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
466 	}
467 
468 	src << "\n"
469 		<< "void main (void)\n"
470 		<< "{\n"
471 		<< "	gl_Position = gl_in[0].gl_Position;\n"
472 		<< (pointSizeSupported ? "	gl_PointSize = gl_in[0].gl_PointSize;\n\n" : "");
473 
474 	// Fetch input variables
475 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
476 		src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << "[0];\n";
477 
478 	// Declare local output variables.
479 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
480 		src << "\t" << glu::declare(output->varType, output->name) << ";\n";
481 
482 	src << "\n";
483 
484 	// Operation - indented to correct level.
485 	{
486 		std::istringstream	opSrc	(shaderSpec.source);
487 		std::string			line;
488 
489 		while (std::getline(opSrc, line))
490 			src << "\t" << line << "\n";
491 	}
492 
493 	// Assignments to outputs.
494 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
495 	{
496 		if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
497 		{
498 			const int				vecSize		= glu::getDataTypeScalarSize(output->varType.getBasicType());
499 			const glu::DataType		intBaseType	= vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
500 
501 			src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
502 		}
503 		else
504 			src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
505 	}
506 
507 	src << "	EmitVertex();\n"
508 		<< "	EndPrimitive();\n"
509 		<< "}\n";
510 
511 	return src.str();
512 }
513 
generateFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)514 static std::string generateFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
515 {
516 	std::ostringstream src;
517 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
518 	if (!shaderSpec.globalDeclarations.empty())
519 		src << shaderSpec.globalDeclarations << "\n";
520 
521 	int			locationNumber	= 0;
522 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
523 	{
524 		src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
525 	}
526 
527 	generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
528 
529 	src << "\nvoid main (void)\n{\n";
530 
531 	// Declare & fetch local input variables
532 	for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
533 	{
534 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
535 		{
536 			const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
537 			src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
538 		}
539 		else
540 			src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
541 	}
542 
543 	// Declare output variables
544 	for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
545 	{
546 		if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
547 		{
548 			const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
549 			src << "\t" << tname << " " << output->name << ";\n";
550 			const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
551 			src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
552 		}
553 		else
554 			src << "\t" << glu::declare(output->varType, output->name) << ";\n";
555 	}
556 
557 	// Operation - indented to correct level.
558 	{
559 		std::istringstream	opSrc	(shaderSpec.source);
560 		std::string			line;
561 
562 		while (std::getline(opSrc, line))
563 			src << "\t" << line << "\n";
564 	}
565 
566 	generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, "", outputPrefix, shaderSpec.packFloat16Bit);
567 
568 	src << "}\n";
569 
570 	return src.str();
571 }
572 
573 // FragmentOutExecutor
574 
575 class FragmentOutExecutor : public ShaderExecutor
576 {
577 public:
578 														FragmentOutExecutor		(Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
579 	virtual												~FragmentOutExecutor	(void);
580 
581 	virtual void										execute					(int					numValues,
582 																				 const void* const*		inputs,
583 																				 void* const*			outputs,
584 																				 VkDescriptorSet		extraResources);
585 
586 protected:
587 	const glu::ShaderType								m_shaderType;
588 	const FragmentOutputLayout							m_outputLayout;
589 
590 private:
591 	void												bindAttributes			(int					numValues,
592 																				 const void* const*		inputs);
593 
594 	void												addAttribute			(deUint32				bindingLocation,
595 																				 VkFormat				format,
596 																				 deUint32				sizePerElement,
597 																				 deUint32				count,
598 																				 const void*			dataPtr);
599 	// reinit render data members
600 	virtual void										clearRenderData			(void);
601 
602 	const VkDescriptorSetLayout							m_extraResourcesLayout;
603 
604 	std::vector<VkVertexInputBindingDescription>		m_vertexBindingDescriptions;
605 	std::vector<VkVertexInputAttributeDescription>		m_vertexAttributeDescriptions;
606 	std::vector<VkBufferSp>								m_vertexBuffers;
607 	std::vector<AllocationSp>							m_vertexBufferAllocs;
608 };
609 
computeFragmentOutputLayout(const std::vector<Symbol> & symbols)610 static FragmentOutputLayout computeFragmentOutputLayout (const std::vector<Symbol>& symbols)
611 {
612 	FragmentOutputLayout	ret;
613 	int						location	= 0;
614 
615 	for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
616 	{
617 		const int	numLocations	= glu::getDataTypeNumLocations(it->varType.getBasicType());
618 
619 		TCU_CHECK_INTERNAL(!de::contains(ret.locationMap, it->name));
620 		de::insert(ret.locationMap, it->name, location);
621 		location += numLocations;
622 
623 		for (int ndx = 0; ndx < numLocations; ++ndx)
624 			ret.locationSymbols.push_back(&*it);
625 	}
626 
627 	return ret;
628 }
629 
FragmentOutExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)630 FragmentOutExecutor::FragmentOutExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
631 	: ShaderExecutor			(context, shaderSpec)
632 	, m_shaderType				(shaderType)
633 	, m_outputLayout			(computeFragmentOutputLayout(m_shaderSpec.outputs))
634 	, m_extraResourcesLayout	(extraResourcesLayout)
635 {
636 	const VkPhysicalDevice		physicalDevice = m_context.getPhysicalDevice();
637 	const InstanceInterface&	vki = m_context.getInstanceInterface();
638 
639 	// Input attributes
640 	for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
641 	{
642 		const Symbol&				symbol = m_shaderSpec.inputs[inputNdx];
643 		const glu::DataType			basicType = symbol.varType.getBasicType();
644 		const VkFormat				format = getAttributeFormat(basicType);
645 		const VkFormatProperties	formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
646 		if ((formatProperties.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) == 0)
647 			TCU_THROW(NotSupportedError, "format not supported by device as vertex buffer attribute format");
648 	}
649 }
650 
~FragmentOutExecutor(void)651 FragmentOutExecutor::~FragmentOutExecutor (void)
652 {
653 }
654 
computeVertexPositions(int numValues,const tcu::IVec2 & renderSize)655 static std::vector<tcu::Vec2> computeVertexPositions (int numValues, const tcu::IVec2& renderSize)
656 {
657 	std::vector<tcu::Vec2> positions(numValues);
658 	for (int valNdx = 0; valNdx < numValues; valNdx++)
659 	{
660 		const int		ix		= valNdx % renderSize.x();
661 		const int		iy		= valNdx / renderSize.x();
662 		const float		fx		= -1.0f + 2.0f*((float(ix) + 0.5f) / float(renderSize.x()));
663 		const float		fy		= -1.0f + 2.0f*((float(iy) + 0.5f) / float(renderSize.y()));
664 
665 		positions[valNdx] = tcu::Vec2(fx, fy);
666 	}
667 
668 	return positions;
669 }
670 
getRenderbufferFormatForOutput(const glu::VarType & outputType,bool useIntOutputs)671 static tcu::TextureFormat getRenderbufferFormatForOutput (const glu::VarType& outputType, bool useIntOutputs)
672 {
673 	const tcu::TextureFormat::ChannelOrder channelOrderMap[] =
674 	{
675 		tcu::TextureFormat::R,
676 		tcu::TextureFormat::RG,
677 		tcu::TextureFormat::RGBA,	// No RGB variants available.
678 		tcu::TextureFormat::RGBA
679 	};
680 
681 	const glu::DataType					basicType		= outputType.getBasicType();
682 	const int							numComps		= glu::getDataTypeNumComponents(basicType);
683 	tcu::TextureFormat::ChannelType		channelType;
684 
685 	switch (glu::getDataTypeScalarType(basicType))
686 	{
687 		case glu::TYPE_UINT:	channelType = tcu::TextureFormat::UNSIGNED_INT32;														break;
688 		case glu::TYPE_INT:		channelType = tcu::TextureFormat::SIGNED_INT32;															break;
689 		case glu::TYPE_BOOL:	channelType = tcu::TextureFormat::SIGNED_INT32;															break;
690 		case glu::TYPE_FLOAT:	channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT;			break;
691 		case glu::TYPE_FLOAT16:	channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::HALF_FLOAT;		break;
692 		default:
693 			throw tcu::InternalError("Invalid output type");
694 	}
695 
696 	DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
697 
698 	return tcu::TextureFormat(channelOrderMap[numComps-1], channelType);
699 }
700 
getAttributeFormat(const glu::DataType dataType)701 static VkFormat getAttributeFormat (const glu::DataType dataType)
702 {
703 	switch (dataType)
704 	{
705 		case glu::TYPE_FLOAT16:			return VK_FORMAT_R16_SFLOAT;
706 		case glu::TYPE_FLOAT16_VEC2:	return VK_FORMAT_R16G16_SFLOAT;
707 		case glu::TYPE_FLOAT16_VEC3:	return VK_FORMAT_R16G16B16_SFLOAT;
708 		case glu::TYPE_FLOAT16_VEC4:	return VK_FORMAT_R16G16B16A16_SFLOAT;
709 
710 		case glu::TYPE_FLOAT:			return VK_FORMAT_R32_SFLOAT;
711 		case glu::TYPE_FLOAT_VEC2:		return VK_FORMAT_R32G32_SFLOAT;
712 		case glu::TYPE_FLOAT_VEC3:		return VK_FORMAT_R32G32B32_SFLOAT;
713 		case glu::TYPE_FLOAT_VEC4:		return VK_FORMAT_R32G32B32A32_SFLOAT;
714 
715 		case glu::TYPE_INT:				return VK_FORMAT_R32_SINT;
716 		case glu::TYPE_INT_VEC2:		return VK_FORMAT_R32G32_SINT;
717 		case glu::TYPE_INT_VEC3:		return VK_FORMAT_R32G32B32_SINT;
718 		case glu::TYPE_INT_VEC4:		return VK_FORMAT_R32G32B32A32_SINT;
719 
720 		case glu::TYPE_UINT:			return VK_FORMAT_R32_UINT;
721 		case glu::TYPE_UINT_VEC2:		return VK_FORMAT_R32G32_UINT;
722 		case glu::TYPE_UINT_VEC3:		return VK_FORMAT_R32G32B32_UINT;
723 		case glu::TYPE_UINT_VEC4:		return VK_FORMAT_R32G32B32A32_UINT;
724 
725 		case glu::TYPE_FLOAT_MAT2:		return VK_FORMAT_R32G32_SFLOAT;
726 		case glu::TYPE_FLOAT_MAT2X3:	return VK_FORMAT_R32G32B32_SFLOAT;
727 		case glu::TYPE_FLOAT_MAT2X4:	return VK_FORMAT_R32G32B32A32_SFLOAT;
728 		case glu::TYPE_FLOAT_MAT3X2:	return VK_FORMAT_R32G32_SFLOAT;
729 		case glu::TYPE_FLOAT_MAT3:		return VK_FORMAT_R32G32B32_SFLOAT;
730 		case glu::TYPE_FLOAT_MAT3X4:	return VK_FORMAT_R32G32B32A32_SFLOAT;
731 		case glu::TYPE_FLOAT_MAT4X2:	return VK_FORMAT_R32G32_SFLOAT;
732 		case glu::TYPE_FLOAT_MAT4X3:	return VK_FORMAT_R32G32B32_SFLOAT;
733 		case glu::TYPE_FLOAT_MAT4:		return VK_FORMAT_R32G32B32A32_SFLOAT;
734 		default:
735 			DE_ASSERT(false);
736 			return VK_FORMAT_UNDEFINED;
737 	}
738 }
739 
addAttribute(deUint32 bindingLocation,VkFormat format,deUint32 sizePerElement,deUint32 count,const void * dataPtr)740 void FragmentOutExecutor::addAttribute (deUint32 bindingLocation, VkFormat format, deUint32 sizePerElement, deUint32 count, const void* dataPtr)
741 {
742 	// Add binding specification
743 	const deUint32							binding = (deUint32)m_vertexBindingDescriptions.size();
744 	const VkVertexInputBindingDescription	bindingDescription =
745 	{
746 		binding,
747 		sizePerElement,
748 		VK_VERTEX_INPUT_RATE_VERTEX
749 	};
750 
751 	m_vertexBindingDescriptions.push_back(bindingDescription);
752 
753 	// Add location and format specification
754 	const VkVertexInputAttributeDescription attributeDescription =
755 	{
756 		bindingLocation,			// deUint32	location;
757 		binding,					// deUint32	binding;
758 		format,						// VkFormat	format;
759 		0u,							// deUint32	offsetInBytes;
760 	};
761 
762 	m_vertexAttributeDescriptions.push_back(attributeDescription);
763 
764 	// Upload data to buffer
765 	const VkDevice				vkDevice			= m_context.getDevice();
766 	const DeviceInterface&		vk					= m_context.getDeviceInterface();
767 	const deUint32				queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
768 
769 	const VkDeviceSize			inputSize			= sizePerElement * count;
770 	const VkBufferCreateInfo	vertexBufferParams	=
771 	{
772 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		// VkStructureType		sType;
773 		DE_NULL,									// const void*			pNext;
774 		0u,											// VkBufferCreateFlags	flags;
775 		inputSize,									// VkDeviceSize			size;
776 		VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,			// VkBufferUsageFlags	usage;
777 		VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode		sharingMode;
778 		1u,											// deUint32				queueFamilyCount;
779 		&queueFamilyIndex							// const deUint32*		pQueueFamilyIndices;
780 	};
781 
782 	Move<VkBuffer>			buffer	= createBuffer(vk, vkDevice, &vertexBufferParams);
783 	de::MovePtr<Allocation>	alloc	= m_context.getDefaultAllocator().allocate(getBufferMemoryRequirements(vk, vkDevice, *buffer), MemoryRequirement::HostVisible);
784 
785 	VK_CHECK(vk.bindBufferMemory(vkDevice, *buffer, alloc->getMemory(), alloc->getOffset()));
786 
787 	deMemcpy(alloc->getHostPtr(), dataPtr, (size_t)inputSize);
788 	flushAlloc(vk, vkDevice, *alloc);
789 
790 	m_vertexBuffers.push_back(de::SharedPtr<Unique<VkBuffer> >(new Unique<VkBuffer>(buffer)));
791 	m_vertexBufferAllocs.push_back(AllocationSp(alloc.release()));
792 }
793 
bindAttributes(int numValues,const void * const * inputs)794 void FragmentOutExecutor::bindAttributes (int numValues, const void* const* inputs)
795 {
796 	// Input attributes
797 	for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
798 	{
799 		const Symbol&		symbol			= m_shaderSpec.inputs[inputNdx];
800 		const void*			ptr				= inputs[inputNdx];
801 		const glu::DataType	basicType		= symbol.varType.getBasicType();
802 		const int			vecSize			= glu::getDataTypeScalarSize(basicType);
803 		const VkFormat		format			= getAttributeFormat(basicType);
804 		int					elementSize		= 0;
805 		int					numAttrsToAdd	= 1;
806 
807 		if (glu::isDataTypeDoubleOrDVec(basicType))
808 			elementSize = sizeof(double);
809 		if (glu::isDataTypeFloatOrVec(basicType))
810 			elementSize = sizeof(float);
811 		else if (glu::isDataTypeFloat16OrVec(basicType))
812 			elementSize = sizeof(deUint16);
813 		else if (glu::isDataTypeIntOrIVec(basicType))
814 			elementSize = sizeof(int);
815 		else if (glu::isDataTypeUintOrUVec(basicType))
816 			elementSize = sizeof(deUint32);
817 		else if (glu::isDataTypeMatrix(basicType))
818 		{
819 			int		numRows	= glu::getDataTypeMatrixNumRows(basicType);
820 			int		numCols	= glu::getDataTypeMatrixNumColumns(basicType);
821 
822 			elementSize = numRows * numCols * (int)sizeof(float);
823 			numAttrsToAdd = numCols;
824 		}
825 		else
826 			DE_ASSERT(false);
827 
828 		// add attributes, in case of matrix every column is binded as an attribute
829 		for (int attrNdx = 0; attrNdx < numAttrsToAdd; attrNdx++)
830 		{
831 			addAttribute((deUint32)m_vertexBindingDescriptions.size(), format, elementSize * vecSize, numValues, ptr);
832 		}
833 	}
834 }
835 
clearRenderData(void)836 void FragmentOutExecutor::clearRenderData (void)
837 {
838 	m_vertexBindingDescriptions.clear();
839 	m_vertexAttributeDescriptions.clear();
840 	m_vertexBuffers.clear();
841 	m_vertexBufferAllocs.clear();
842 }
843 
createEmptyDescriptorSetLayout(const DeviceInterface & vkd,VkDevice device)844 static Move<VkDescriptorSetLayout> createEmptyDescriptorSetLayout (const DeviceInterface& vkd, VkDevice device)
845 {
846 	const VkDescriptorSetLayoutCreateInfo	createInfo	=
847 	{
848 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
849 		DE_NULL,
850 		(VkDescriptorSetLayoutCreateFlags)0,
851 		0u,
852 		DE_NULL,
853 	};
854 	return createDescriptorSetLayout(vkd, device, &createInfo);
855 }
856 
createDummyDescriptorPool(const DeviceInterface & vkd,VkDevice device)857 static Move<VkDescriptorPool> createDummyDescriptorPool (const DeviceInterface& vkd, VkDevice device)
858 {
859 	const VkDescriptorPoolSize			dummySize	=
860 	{
861 		VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
862 		1u,
863 	};
864 	const VkDescriptorPoolCreateInfo	createInfo	=
865 	{
866 		VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
867 		DE_NULL,
868 		(VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
869 		1u,
870 		1u,
871 		&dummySize
872 	};
873 	return createDescriptorPool(vkd, device, &createInfo);
874 }
875 
allocateSingleDescriptorSet(const DeviceInterface & vkd,VkDevice device,VkDescriptorPool pool,VkDescriptorSetLayout layout)876 static Move<VkDescriptorSet> allocateSingleDescriptorSet (const DeviceInterface& vkd, VkDevice device, VkDescriptorPool pool, VkDescriptorSetLayout layout)
877 {
878 	const VkDescriptorSetAllocateInfo	allocInfo	=
879 	{
880 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
881 		DE_NULL,
882 		pool,
883 		1u,
884 		&layout,
885 	};
886 	return allocateDescriptorSet(vkd, device, &allocInfo);
887 }
888 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)889 void FragmentOutExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
890 {
891 	const VkDevice										vkDevice				= m_context.getDevice();
892 	const DeviceInterface&								vk						= m_context.getDeviceInterface();
893 	const VkQueue										queue					= m_context.getUniversalQueue();
894 	const deUint32										queueFamilyIndex		= m_context.getUniversalQueueFamilyIndex();
895 	Allocator&											memAlloc				= m_context.getDefaultAllocator();
896 
897 	const deUint32										renderSizeX				= de::min(static_cast<deUint32>(128), (deUint32)numValues);
898 	const deUint32										renderSizeY				= ((deUint32)numValues / renderSizeX) + (((deUint32)numValues % renderSizeX != 0) ? 1u : 0u);
899 	const tcu::UVec2									renderSize				(renderSizeX, renderSizeY);
900 	std::vector<tcu::Vec2>								positions;
901 
902 	const bool											useGeometryShader		= m_shaderType == glu::SHADERTYPE_GEOMETRY;
903 
904 	std::vector<VkImageSp>								colorImages;
905 	std::vector<VkImageMemoryBarrier>					colorImagePreRenderBarriers;
906 	std::vector<VkImageMemoryBarrier>					colorImagePostRenderBarriers;
907 	std::vector<AllocationSp>							colorImageAllocs;
908 	std::vector<VkAttachmentDescription>				attachments;
909 	std::vector<VkClearValue>							attachmentClearValues;
910 	std::vector<VkImageViewSp>							colorImageViews;
911 
912 	std::vector<VkPipelineColorBlendAttachmentState>	colorBlendAttachmentStates;
913 	std::vector<VkAttachmentReference>					colorAttachmentReferences;
914 
915 	Move<VkRenderPass>									renderPass;
916 	Move<VkFramebuffer>									framebuffer;
917 	Move<VkPipelineLayout>								pipelineLayout;
918 	Move<VkPipeline>									graphicsPipeline;
919 
920 	Move<VkShaderModule>								vertexShaderModule;
921 	Move<VkShaderModule>								geometryShaderModule;
922 	Move<VkShaderModule>								fragmentShaderModule;
923 
924 	Move<VkCommandPool>									cmdPool;
925 	Move<VkCommandBuffer>								cmdBuffer;
926 
927 	Unique<VkDescriptorSetLayout>						emptyDescriptorSetLayout	(createEmptyDescriptorSetLayout(vk, vkDevice));
928 	Unique<VkDescriptorPool>							dummyDescriptorPool			(createDummyDescriptorPool(vk, vkDevice));
929 	Unique<VkDescriptorSet>								emptyDescriptorSet			(allocateSingleDescriptorSet(vk, vkDevice, *dummyDescriptorPool, *emptyDescriptorSetLayout));
930 
931 	clearRenderData();
932 
933 	// Compute positions - 1px points are used to drive fragment shading.
934 	positions = computeVertexPositions(numValues, renderSize.cast<int>());
935 
936 	// Bind attributes
937 	addAttribute(0u, VK_FORMAT_R32G32_SFLOAT, sizeof(tcu::Vec2), (deUint32)positions.size(), &positions[0]);
938 	bindAttributes(numValues, inputs);
939 
940 	// Create color images
941 	{
942 		const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
943 		{
944 			VK_FALSE,																	// VkBool32						blendEnable;
945 			VK_BLEND_FACTOR_ONE,														// VkBlendFactor				srcColorBlendFactor;
946 			VK_BLEND_FACTOR_ZERO,														// VkBlendFactor				dstColorBlendFactor;
947 			VK_BLEND_OP_ADD,															// VkBlendOp					blendOpColor;
948 			VK_BLEND_FACTOR_ONE,														// VkBlendFactor				srcAlphaBlendFactor;
949 			VK_BLEND_FACTOR_ZERO,														// VkBlendFactor				destAlphaBlendFactor;
950 			VK_BLEND_OP_ADD,															// VkBlendOp					blendOpAlpha;
951 			(VK_COLOR_COMPONENT_R_BIT |
952 			 VK_COLOR_COMPONENT_G_BIT |
953 			 VK_COLOR_COMPONENT_B_BIT |
954 			 VK_COLOR_COMPONENT_A_BIT)													// VkColorComponentFlags		colorWriteMask;
955 		};
956 
957 		for (int outNdx = 0; outNdx < (int)m_outputLayout.locationSymbols.size(); ++outNdx)
958 		{
959 			const bool		isDouble	= glu::isDataTypeDoubleOrDVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
960 			const bool		isFloat		= isDataTypeFloatOrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
961 			const bool		isFloat16b	= glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
962 			const bool		isSigned	= isDataTypeIntOrIVec (m_shaderSpec.outputs[outNdx].varType.getBasicType());
963 			const bool		isBool		= isDataTypeBoolOrBVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
964 			const VkFormat	colorFormat = (isDouble ? VK_FORMAT_R64G64B64A64_SFLOAT : (isFloat16b ? VK_FORMAT_R16G16B16A16_SFLOAT : (isFloat ? VK_FORMAT_R32G32B32A32_SFLOAT : (isSigned || isBool ? VK_FORMAT_R32G32B32A32_SINT : VK_FORMAT_R32G32B32A32_UINT))));
965 
966 			{
967 				const VkFormatProperties	formatProperties	= getPhysicalDeviceFormatProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), colorFormat);
968 				if ((formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) == 0)
969 					TCU_THROW(NotSupportedError, "Image format doesn't support COLOR_ATTACHMENT_BIT");
970 			}
971 
972 			const VkImageCreateInfo	 colorImageParams =
973 			{
974 				VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,										// VkStructureType				sType;
975 				DE_NULL,																	// const void*					pNext;
976 				0u,																			// VkImageCreateFlags			flags;
977 				VK_IMAGE_TYPE_2D,															// VkImageType					imageType;
978 				colorFormat,																// VkFormat						format;
979 				{ renderSize.x(), renderSize.y(), 1u },										// VkExtent3D					extent;
980 				1u,																			// deUint32						mipLevels;
981 				1u,																			// deUint32						arraySize;
982 				VK_SAMPLE_COUNT_1_BIT,														// VkSampleCountFlagBits		samples;
983 				VK_IMAGE_TILING_OPTIMAL,													// VkImageTiling				tiling;
984 				VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT,		// VkImageUsageFlags			usage;
985 				VK_SHARING_MODE_EXCLUSIVE,													// VkSharingMode				sharingMode;
986 				1u,																			// deUint32						queueFamilyCount;
987 				&queueFamilyIndex,															// const deUint32*				pQueueFamilyIndices;
988 				VK_IMAGE_LAYOUT_UNDEFINED,													// VkImageLayout				initialLayout;
989 			};
990 
991 			const VkAttachmentDescription colorAttachmentDescription =
992 			{
993 				0u,																			// VkAttachmentDescriptorFlags	flags;
994 				colorFormat,																// VkFormat						format;
995 				VK_SAMPLE_COUNT_1_BIT,														// VkSampleCountFlagBits		samples;
996 				VK_ATTACHMENT_LOAD_OP_CLEAR,												// VkAttachmentLoadOp			loadOp;
997 				VK_ATTACHMENT_STORE_OP_STORE,												// VkAttachmentStoreOp			storeOp;
998 				VK_ATTACHMENT_LOAD_OP_DONT_CARE,											// VkAttachmentLoadOp			stencilLoadOp;
999 				VK_ATTACHMENT_STORE_OP_DONT_CARE,											// VkAttachmentStoreOp			stencilStoreOp;
1000 				VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,									// VkImageLayout				initialLayout;
1001 				VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,									// VkImageLayout				finalLayout;
1002 			};
1003 
1004 			Move<VkImage> colorImage = createImage(vk, vkDevice, &colorImageParams);
1005 			colorImages.push_back(de::SharedPtr<Unique<VkImage> >(new Unique<VkImage>(colorImage)));
1006 			attachmentClearValues.push_back(getDefaultClearColor());
1007 
1008 			// Allocate and bind color image memory
1009 			{
1010 				de::MovePtr<Allocation> colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *((const VkImage*) colorImages.back().get())), MemoryRequirement::Any);
1011 				VK_CHECK(vk.bindImageMemory(vkDevice, colorImages.back().get()->get(), colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
1012 				colorImageAllocs.push_back(de::SharedPtr<Allocation>(colorImageAlloc.release()));
1013 
1014 				attachments.push_back(colorAttachmentDescription);
1015 				colorBlendAttachmentStates.push_back(colorBlendAttachmentState);
1016 
1017 				const VkAttachmentReference colorAttachmentReference =
1018 				{
1019 					(deUint32) (colorImages.size() - 1),			//	deUint32		attachment;
1020 					VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL		//	VkImageLayout	layout;
1021 				};
1022 
1023 				colorAttachmentReferences.push_back(colorAttachmentReference);
1024 			}
1025 
1026 			// Create color attachment view
1027 			{
1028 				const VkImageViewCreateInfo colorImageViewParams =
1029 				{
1030 					VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,			// VkStructureType			sType;
1031 					DE_NULL,											// const void*				pNext;
1032 					0u,													// VkImageViewCreateFlags	flags;
1033 					colorImages.back().get()->get(),					// VkImage					image;
1034 					VK_IMAGE_VIEW_TYPE_2D,								// VkImageViewType			viewType;
1035 					colorFormat,										// VkFormat					format;
1036 					{
1037 						VK_COMPONENT_SWIZZLE_R,							// VkComponentSwizzle		r;
1038 						VK_COMPONENT_SWIZZLE_G,							// VkComponentSwizzle		g;
1039 						VK_COMPONENT_SWIZZLE_B,							// VkComponentSwizzle		b;
1040 						VK_COMPONENT_SWIZZLE_A							// VkComponentSwizzle		a;
1041 					},													// VkComponentMapping		components;
1042 					{
1043 						VK_IMAGE_ASPECT_COLOR_BIT,						// VkImageAspectFlags		aspectMask;
1044 						0u,												// deUint32					baseMipLevel;
1045 						1u,												// deUint32					mipLevels;
1046 						0u,												// deUint32					baseArraySlice;
1047 						1u												// deUint32					arraySize;
1048 					}													// VkImageSubresourceRange	subresourceRange;
1049 				};
1050 
1051 				Move<VkImageView> colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
1052 				colorImageViews.push_back(de::SharedPtr<Unique<VkImageView> >(new Unique<VkImageView>(colorImageView)));
1053 
1054 				const VkImageMemoryBarrier	colorImagePreRenderBarrier =
1055 				{
1056 					VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,					// sType
1057 					DE_NULL,												// pNext
1058 					0u,														// srcAccessMask
1059 					(VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1060 					VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT),					// dstAccessMask
1061 					VK_IMAGE_LAYOUT_UNDEFINED,								// oldLayout
1062 					VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,				// newLayout
1063 					VK_QUEUE_FAMILY_IGNORED,								// srcQueueFamilyIndex
1064 					VK_QUEUE_FAMILY_IGNORED,								// dstQueueFamilyIndex
1065 					colorImages.back().get()->get(),						// image
1066 					{
1067 						VK_IMAGE_ASPECT_COLOR_BIT,								// aspectMask
1068 						0u,														// baseMipLevel
1069 						1u,														// levelCount
1070 						0u,														// baseArrayLayer
1071 						1u,														// layerCount
1072 					}														// subresourceRange
1073 				};
1074 				colorImagePreRenderBarriers.push_back(colorImagePreRenderBarrier);
1075 
1076 				const VkImageMemoryBarrier	colorImagePostRenderBarrier =
1077 				{
1078 					VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,					// sType
1079 					DE_NULL,												// pNext
1080 					(VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1081 					VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT),					// srcAccessMask
1082 					VK_ACCESS_TRANSFER_READ_BIT,							// dstAccessMask
1083 					VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,				// oldLayout
1084 					VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,					// newLayout
1085 					VK_QUEUE_FAMILY_IGNORED,								// srcQueueFamilyIndex
1086 					VK_QUEUE_FAMILY_IGNORED,								// dstQueueFamilyIndex
1087 					colorImages.back().get()->get(),						// image
1088 					{
1089 						VK_IMAGE_ASPECT_COLOR_BIT,								// aspectMask
1090 						0u,														// baseMipLevel
1091 						1u,														// levelCount
1092 						0u,														// baseArrayLayer
1093 						1u,														// layerCount
1094 					}														// subresourceRange
1095 				};
1096 				colorImagePostRenderBarriers.push_back(colorImagePostRenderBarrier);
1097 			}
1098 		}
1099 	}
1100 
1101 	// Create render pass
1102 	{
1103 		const VkSubpassDescription subpassDescription =
1104 		{
1105 			0u,													// VkSubpassDescriptionFlags	flags;
1106 			VK_PIPELINE_BIND_POINT_GRAPHICS,					// VkPipelineBindPoint			pipelineBindPoint;
1107 			0u,													// deUint32						inputCount;
1108 			DE_NULL,											// const VkAttachmentReference*	pInputAttachments;
1109 			(deUint32)colorImages.size(),						// deUint32						colorCount;
1110 			&colorAttachmentReferences[0],						// const VkAttachmentReference*	colorAttachments;
1111 			DE_NULL,											// const VkAttachmentReference*	resolveAttachments;
1112 			DE_NULL,											// VkAttachmentReference		depthStencilAttachment;
1113 			0u,													// deUint32						preserveCount;
1114 			DE_NULL												// const VkAttachmentReference*	pPreserveAttachments;
1115 		};
1116 
1117 		const VkRenderPassCreateInfo renderPassParams =
1118 		{
1119 			VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,			// VkStructureType					sType;
1120 			DE_NULL,											// const void*						pNext;
1121 			(VkRenderPassCreateFlags)0,							// VkRenderPassCreateFlags			flags;
1122 			(deUint32)attachments.size(),						// deUint32							attachmentCount;
1123 			&attachments[0],									// const VkAttachmentDescription*	pAttachments;
1124 			1u,													// deUint32							subpassCount;
1125 			&subpassDescription,								// const VkSubpassDescription*		pSubpasses;
1126 			0u,													// deUint32							dependencyCount;
1127 			DE_NULL												// const VkSubpassDependency*		pDependencies;
1128 		};
1129 
1130 		renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
1131 	}
1132 
1133 	// Create framebuffer
1134 	{
1135 		std::vector<VkImageView> views(colorImageViews.size());
1136 		for (size_t i = 0; i < colorImageViews.size(); i++)
1137 		{
1138 			views[i] = colorImageViews[i].get()->get();
1139 		}
1140 
1141 		const VkFramebufferCreateInfo framebufferParams =
1142 		{
1143 			VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,			// VkStructureType				sType;
1144 			DE_NULL,											// const void*					pNext;
1145 			0u,													// VkFramebufferCreateFlags		flags;
1146 			*renderPass,										// VkRenderPass					renderPass;
1147 			(deUint32)views.size(),								// deUint32						attachmentCount;
1148 			&views[0],											// const VkImageView*			pAttachments;
1149 			(deUint32)renderSize.x(),							// deUint32						width;
1150 			(deUint32)renderSize.y(),							// deUint32						height;
1151 			1u													// deUint32						layers;
1152 		};
1153 
1154 		framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
1155 	}
1156 
1157 	// Create pipeline layout
1158 	{
1159 		const VkDescriptorSetLayout			setLayouts[]			=
1160 		{
1161 			*emptyDescriptorSetLayout,
1162 			m_extraResourcesLayout
1163 		};
1164 		const VkPipelineLayoutCreateInfo	pipelineLayoutParams	=
1165 		{
1166 			VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,		// VkStructureType				sType;
1167 			DE_NULL,											// const void*					pNext;
1168 			(VkPipelineLayoutCreateFlags)0,						// VkPipelineLayoutCreateFlags	flags;
1169 			(m_extraResourcesLayout != 0 ? 2u : 0u),			// deUint32						descriptorSetCount;
1170 			setLayouts,											// const VkDescriptorSetLayout*	pSetLayouts;
1171 			0u,													// deUint32						pushConstantRangeCount;
1172 			DE_NULL												// const VkPushConstantRange*	pPushConstantRanges;
1173 		};
1174 
1175 		pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
1176 	}
1177 
1178 	// Create shaders
1179 	{
1180 		vertexShaderModule		= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
1181 		fragmentShaderModule	= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
1182 
1183 		if (useGeometryShader)
1184 		{
1185 			if (m_context.getDeviceFeatures().shaderTessellationAndGeometryPointSize)
1186 				geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom_point_size"), 0);
1187 			else
1188 				geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom"), 0);
1189 		}
1190 	}
1191 
1192 	// Create pipeline
1193 	{
1194 		const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
1195 		{
1196 			VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType								sType;
1197 			DE_NULL,													// const void*									pNext;
1198 			(VkPipelineVertexInputStateCreateFlags)0,					// VkPipelineVertexInputStateCreateFlags		flags;
1199 			(deUint32)m_vertexBindingDescriptions.size(),				// deUint32										bindingCount;
1200 			&m_vertexBindingDescriptions[0],							// const VkVertexInputBindingDescription*		pVertexBindingDescriptions;
1201 			(deUint32)m_vertexAttributeDescriptions.size(),				// deUint32										attributeCount;
1202 			&m_vertexAttributeDescriptions[0],							// const VkVertexInputAttributeDescription*		pvertexAttributeDescriptions;
1203 		};
1204 
1205 		const std::vector<VkViewport>	viewports	(1, makeViewport(renderSize));
1206 		const std::vector<VkRect2D>		scissors	(1, makeRect2D(renderSize));
1207 
1208 		const VkPipelineColorBlendStateCreateInfo colorBlendStateParams =
1209 		{
1210 			VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,		// VkStructureType								sType;
1211 			DE_NULL,														// const void*									pNext;
1212 			(VkPipelineColorBlendStateCreateFlags)0,						// VkPipelineColorBlendStateCreateFlags			flags;
1213 			VK_FALSE,														// VkBool32										logicOpEnable;
1214 			VK_LOGIC_OP_COPY,												// VkLogicOp									logicOp;
1215 			(deUint32)colorBlendAttachmentStates.size(),					// deUint32										attachmentCount;
1216 			&colorBlendAttachmentStates[0],									// const VkPipelineColorBlendAttachmentState*	pAttachments;
1217 			{ 0.0f, 0.0f, 0.0f, 0.0f }										// float										blendConst[4];
1218 		};
1219 
1220 		graphicsPipeline = makeGraphicsPipeline(vk,														// const DeviceInterface&                        vk
1221 												vkDevice,												// const VkDevice                                device
1222 												*pipelineLayout,										// const VkPipelineLayout                        pipelineLayout
1223 												*vertexShaderModule,									// const VkShaderModule                          vertexShaderModule
1224 												DE_NULL,												// const VkShaderModule                          tessellationControlShaderModule
1225 												DE_NULL,												// const VkShaderModule                          tessellationEvalShaderModule
1226 												useGeometryShader ? *geometryShaderModule : DE_NULL,	// const VkShaderModule                          geometryShaderModule
1227 												*fragmentShaderModule,									// const VkShaderModule                          fragmentShaderModule
1228 												*renderPass,											// const VkRenderPass                            renderPass
1229 												viewports,												// const std::vector<VkViewport>&                viewports
1230 												scissors,												// const std::vector<VkRect2D>&                  scissors
1231 												VK_PRIMITIVE_TOPOLOGY_POINT_LIST,						// const VkPrimitiveTopology                     topology
1232 												0u,														// const deUint32                                subpass
1233 												0u,														// const deUint32                                patchControlPoints
1234 												&vertexInputStateParams,								// const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
1235 												DE_NULL,												// const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
1236 												DE_NULL,												// const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
1237 												DE_NULL,												// const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
1238 												&colorBlendStateParams);								// const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
1239 	}
1240 
1241 	// Create command pool
1242 	cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1243 
1244 	// Create command buffer
1245 	{
1246 		cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1247 
1248 		beginCommandBuffer(vk, *cmdBuffer);
1249 
1250 		vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0,
1251 							  0, (const VkMemoryBarrier*)DE_NULL,
1252 							  0, (const VkBufferMemoryBarrier*)DE_NULL,
1253 							  (deUint32)colorImagePreRenderBarriers.size(), colorImagePreRenderBarriers.empty() ? DE_NULL : &colorImagePreRenderBarriers[0]);
1254 		beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), (deUint32)attachmentClearValues.size(), &attachmentClearValues[0]);
1255 
1256 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
1257 
1258 		if (m_extraResourcesLayout != 0)
1259 		{
1260 			DE_ASSERT(extraResources != 0);
1261 			const VkDescriptorSet	descriptorSets[]	= { *emptyDescriptorSet, extraResources };
1262 			vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, DE_LENGTH_OF_ARRAY(descriptorSets), descriptorSets, 0u, DE_NULL);
1263 		}
1264 		else
1265 			DE_ASSERT(extraResources == 0);
1266 
1267 		const deUint32 numberOfVertexAttributes = (deUint32)m_vertexBuffers.size();
1268 
1269 		std::vector<VkDeviceSize> offsets(numberOfVertexAttributes, 0);
1270 
1271 		std::vector<VkBuffer> buffers(numberOfVertexAttributes);
1272 		for (size_t i = 0; i < numberOfVertexAttributes; i++)
1273 		{
1274 			buffers[i] = m_vertexBuffers[i].get()->get();
1275 		}
1276 
1277 		vk.cmdBindVertexBuffers(*cmdBuffer, 0, numberOfVertexAttributes, &buffers[0], &offsets[0]);
1278 		vk.cmdDraw(*cmdBuffer, (deUint32)positions.size(), 1u, 0u, 0u);
1279 
1280 		endRenderPass(vk, *cmdBuffer);
1281 		vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0,
1282 							  0, (const VkMemoryBarrier*)DE_NULL,
1283 							  0, (const VkBufferMemoryBarrier*)DE_NULL,
1284 							  (deUint32)colorImagePostRenderBarriers.size(), colorImagePostRenderBarriers.empty() ? DE_NULL : &colorImagePostRenderBarriers[0]);
1285 
1286 		endCommandBuffer(vk, *cmdBuffer);
1287 	}
1288 
1289 	// Execute Draw
1290 	submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
1291 
1292 	// Read back result and output
1293 	{
1294 		const VkDeviceSize imageSizeBytes = (VkDeviceSize)(4 * sizeof(deUint32) * renderSize.x() * renderSize.y());
1295 		const VkBufferCreateInfo readImageBufferParams =
1296 		{
1297 			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		// VkStructureType		sType;
1298 			DE_NULL,									// const void*			pNext;
1299 			0u,											// VkBufferCreateFlags	flags;
1300 			imageSizeBytes,								// VkDeviceSize			size;
1301 			VK_BUFFER_USAGE_TRANSFER_DST_BIT,			// VkBufferUsageFlags	usage;
1302 			VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode		sharingMode;
1303 			1u,											// deUint32				queueFamilyCount;
1304 			&queueFamilyIndex,							// const deUint32*		pQueueFamilyIndices;
1305 		};
1306 
1307 		// constants for image copy
1308 		Move<VkCommandPool>	copyCmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1309 
1310 		const VkBufferImageCopy copyParams =
1311 		{
1312 			0u,											// VkDeviceSize			bufferOffset;
1313 			(deUint32)renderSize.x(),					// deUint32				bufferRowLength;
1314 			(deUint32)renderSize.y(),					// deUint32				bufferImageHeight;
1315 			{
1316 				VK_IMAGE_ASPECT_COLOR_BIT,				// VkImageAspect		aspect;
1317 				0u,										// deUint32				mipLevel;
1318 				0u,										// deUint32				arraySlice;
1319 				1u,										// deUint32				arraySize;
1320 			},											// VkImageSubresource	imageSubresource;
1321 			{ 0u, 0u, 0u },								// VkOffset3D			imageOffset;
1322 			{ renderSize.x(), renderSize.y(), 1u }		// VkExtent3D			imageExtent;
1323 		};
1324 
1325 		// Read back pixels.
1326 		for (int outNdx = 0; outNdx < (int)m_shaderSpec.outputs.size(); ++outNdx)
1327 		{
1328 			const Symbol&				output			= m_shaderSpec.outputs[outNdx];
1329 			const int					outSize			= output.varType.getScalarSize();
1330 			const int					outVecSize		= glu::getDataTypeNumComponents(output.varType.getBasicType());
1331 			const int					outNumLocs		= glu::getDataTypeNumLocations(output.varType.getBasicType());
1332 			const int					outLocation		= de::lookup(m_outputLayout.locationMap, output.name);
1333 
1334 			for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
1335 			{
1336 				tcu::TextureLevel			tmpBuf;
1337 				const tcu::TextureFormat	format = getRenderbufferFormatForOutput(output.varType, false);
1338 				const tcu::TextureFormat	readFormat (tcu::TextureFormat::RGBA, format.type);
1339 				const Unique<VkBuffer>		readImageBuffer(createBuffer(vk, vkDevice, &readImageBufferParams));
1340 				const de::UniquePtr<Allocation> readImageBufferMemory(memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *readImageBuffer), MemoryRequirement::HostVisible));
1341 
1342 				VK_CHECK(vk.bindBufferMemory(vkDevice, *readImageBuffer, readImageBufferMemory->getMemory(), readImageBufferMemory->getOffset()));
1343 
1344 				// Copy image to buffer
1345 				{
1346 
1347 					Move<VkCommandBuffer> copyCmdBuffer = allocateCommandBuffer(vk, vkDevice, *copyCmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1348 
1349 					beginCommandBuffer(vk, *copyCmdBuffer);
1350 					vk.cmdCopyImageToBuffer(*copyCmdBuffer, colorImages[outLocation + locNdx].get()->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *readImageBuffer, 1u, &copyParams);
1351 
1352 					// Insert a barrier so data written by the transfer is available to the host
1353 					{
1354 						const VkBufferMemoryBarrier barrier =
1355 						{
1356 							VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,	// VkStructureType    sType;
1357 							DE_NULL,									// const void*        pNext;
1358 							VK_ACCESS_TRANSFER_WRITE_BIT,				// VkAccessFlags      srcAccessMask;
1359 							VK_ACCESS_HOST_READ_BIT,					// VkAccessFlags      dstAccessMask;
1360 							VK_QUEUE_FAMILY_IGNORED,					// uint32_t           srcQueueFamilyIndex;
1361 							VK_QUEUE_FAMILY_IGNORED,					// uint32_t           dstQueueFamilyIndex;
1362 							*readImageBuffer,							// VkBuffer           buffer;
1363 							0,											// VkDeviceSize       offset;
1364 							VK_WHOLE_SIZE,								// VkDeviceSize       size;
1365 						};
1366 
1367 						vk.cmdPipelineBarrier(*copyCmdBuffer, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
1368 											0, (const VkMemoryBarrier*)DE_NULL,
1369 											1, &barrier,
1370 											0, (const VkImageMemoryBarrier*)DE_NULL);
1371 					}
1372 
1373 					endCommandBuffer(vk, *copyCmdBuffer);
1374 
1375 					submitCommandsAndWait(vk, vkDevice, queue, copyCmdBuffer.get());
1376 				}
1377 
1378 				invalidateAlloc(vk, vkDevice, *readImageBufferMemory);
1379 
1380 				tmpBuf.setStorage(readFormat, renderSize.x(), renderSize.y());
1381 
1382 				const tcu::TextureFormat resultFormat(tcu::TextureFormat::RGBA, format.type);
1383 				const tcu::ConstPixelBufferAccess resultAccess(resultFormat, renderSize.x(), renderSize.y(), 1, readImageBufferMemory->getHostPtr());
1384 
1385 				tcu::copy(tmpBuf.getAccess(), resultAccess);
1386 
1387 				if (isOutput16Bit(static_cast<size_t>(outNdx)))
1388 				{
1389 					deUint16*	dstPtrBase = static_cast<deUint16*>(outputs[outNdx]);
1390 					if (outSize == 4 && outNumLocs == 1)
1391 						deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint16));
1392 					else
1393 					{
1394 						for (int valNdx = 0; valNdx < numValues; valNdx++)
1395 						{
1396 							const deUint16* srcPtr = (const deUint16*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1397 							deUint16*		dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1398 							deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint16));
1399 						}
1400 					}
1401 				}
1402 				else
1403 				{
1404 					deUint32*	dstPtrBase = static_cast<deUint32*>(outputs[outNdx]);
1405 					if (outSize == 4 && outNumLocs == 1)
1406 						deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint32));
1407 					else
1408 					{
1409 						for (int valNdx = 0; valNdx < numValues; valNdx++)
1410 						{
1411 							const deUint32* srcPtr = (const deUint32*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1412 							deUint32*		dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1413 							deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint32));
1414 						}
1415 					}
1416 				}
1417 			}
1418 		}
1419 	}
1420 }
1421 
1422 // VertexShaderExecutor
1423 
1424 class VertexShaderExecutor : public FragmentOutExecutor
1425 {
1426 public:
1427 								VertexShaderExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1428 	virtual						~VertexShaderExecutor	(void);
1429 
1430 	static void					generateSources			(const ShaderSpec& shaderSpec, SourceCollections& dst);
1431 };
1432 
VertexShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1433 VertexShaderExecutor::VertexShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1434 	: FragmentOutExecutor(context, glu::SHADERTYPE_VERTEX, shaderSpec, extraResourcesLayout)
1435 {
1436 }
1437 
~VertexShaderExecutor(void)1438 VertexShaderExecutor::~VertexShaderExecutor (void)
1439 {
1440 }
1441 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1442 void VertexShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1443 {
1444 	const FragmentOutputLayout	outputLayout	(computeFragmentOutputLayout(shaderSpec.outputs));
1445 
1446 	programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1447 	/* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1448 	programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1449 }
1450 
1451 // GeometryShaderExecutor
1452 
1453 class GeometryShaderExecutor : public FragmentOutExecutor
1454 {
1455 public:
1456 								GeometryShaderExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1457 	virtual						~GeometryShaderExecutor	(void);
1458 
1459 	static void					generateSources			(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1460 
1461 };
1462 
GeometryShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1463 GeometryShaderExecutor::GeometryShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1464 	: FragmentOutExecutor(context, glu::SHADERTYPE_GEOMETRY, shaderSpec, extraResourcesLayout)
1465 {
1466 	const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
1467 
1468 	if (!features.geometryShader)
1469 		TCU_THROW(NotSupportedError, "Geometry shader type not supported by device");
1470 }
1471 
~GeometryShaderExecutor(void)1472 GeometryShaderExecutor::~GeometryShaderExecutor (void)
1473 {
1474 }
1475 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1476 void GeometryShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1477 {
1478 	const FragmentOutputLayout	outputLayout	(computeFragmentOutputLayout(shaderSpec.outputs));
1479 
1480 	programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1481 
1482 	programCollection.glslSources.add("geom") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", false)) << shaderSpec.buildOptions;
1483 	programCollection.glslSources.add("geom_point_size") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", true)) << shaderSpec.buildOptions;
1484 
1485 	/* \todo [2015-09-18 rsipka] set useIntOutputs parameter if needed. */
1486 	programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "geom_out_", "o_")) << shaderSpec.buildOptions;
1487 
1488 }
1489 
1490 // FragmentShaderExecutor
1491 
1492 class FragmentShaderExecutor : public FragmentOutExecutor
1493 {
1494 public:
1495 								FragmentShaderExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1496 	virtual						~FragmentShaderExecutor (void);
1497 
1498 	static void					generateSources			(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1499 
1500 };
1501 
FragmentShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1502 FragmentShaderExecutor::FragmentShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1503 	: FragmentOutExecutor(context, glu::SHADERTYPE_FRAGMENT, shaderSpec, extraResourcesLayout)
1504 {
1505 }
1506 
~FragmentShaderExecutor(void)1507 FragmentShaderExecutor::~FragmentShaderExecutor (void)
1508 {
1509 }
1510 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1511 void FragmentShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1512 {
1513 	const FragmentOutputLayout	outputLayout	(computeFragmentOutputLayout(shaderSpec.outputs));
1514 
1515 	programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1516 	/* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1517 	programCollection.glslSources.add("frag") << glu::FragmentSource(generateFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1518 }
1519 
1520 // Shared utilities for compute and tess executors
1521 
getVecStd430ByteAlignment(glu::DataType type)1522 static deUint32 getVecStd430ByteAlignment (glu::DataType type)
1523 {
1524 	deUint32 baseSize;
1525 
1526 	switch (glu::getDataTypeScalarType(type))
1527 	{
1528 		case glu::TYPE_FLOAT16:	baseSize = 2u; break;
1529 		case glu::TYPE_DOUBLE:	baseSize = 8u; break;
1530 		default:				baseSize = 4u; break;
1531 	}
1532 
1533 	switch (glu::getDataTypeScalarSize(type))
1534 	{
1535 		case 1:		return baseSize;
1536 		case 2:		return baseSize * 2u;;
1537 		case 3:		// fallthrough.
1538 		case 4:		return baseSize * 4u;
1539 		default:
1540 			DE_ASSERT(false);
1541 			return 0u;
1542 	}
1543 }
1544 
1545 class BufferIoExecutor : public ShaderExecutor
1546 {
1547 public:
1548 							BufferIoExecutor	(Context& context, const ShaderSpec& shaderSpec);
1549 	virtual					~BufferIoExecutor	(void);
1550 
1551 protected:
1552 	enum
1553 	{
1554 		INPUT_BUFFER_BINDING	= 0,
1555 		OUTPUT_BUFFER_BINDING	= 1,
1556 	};
1557 
1558 	void					initBuffers			(int numValues);
getInputBuffer(void) const1559 	VkBuffer				getInputBuffer		(void) const		{ return *m_inputBuffer;					}
getOutputBuffer(void) const1560 	VkBuffer				getOutputBuffer		(void) const		{ return *m_outputBuffer;					}
getInputStride(void) const1561 	deUint32				getInputStride		(void) const		{ return getLayoutStride(m_inputLayout);	}
getOutputStride(void) const1562 	deUint32				getOutputStride		(void) const		{ return getLayoutStride(m_outputLayout);	}
1563 
1564 	void					uploadInputBuffer	(const void* const* inputPtrs, int numValues, bool packFloat16Bit);
1565 	void					readOutputBuffer	(void* const* outputPtrs, int numValues);
1566 
1567 	static void				declareBufferBlocks	(std::ostream& src, const ShaderSpec& spec);
1568 	static void				generateExecBufferIo(std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName);
1569 
1570 protected:
1571 	Move<VkBuffer>			m_inputBuffer;
1572 	Move<VkBuffer>			m_outputBuffer;
1573 
1574 private:
1575 	struct VarLayout
1576 	{
1577 		deUint32		offset;
1578 		deUint32		stride;
1579 		deUint32		matrixStride;
1580 
VarLayoutvkt::shaderexecutor::__anonabf4a9d60111::BufferIoExecutor::VarLayout1581 		VarLayout (void) : offset(0), stride(0), matrixStride(0) {}
1582 	};
1583 
1584 	static void				computeVarLayout	(const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout);
1585 	static deUint32			getLayoutStride		(const vector<VarLayout>& layout);
1586 
1587 	static void				copyToBuffer		(const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr, bool packFloat16Bit);
1588 	static void				copyFromBuffer		(const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
1589 
1590 	de::MovePtr<Allocation>	m_inputAlloc;
1591 	de::MovePtr<Allocation>	m_outputAlloc;
1592 
1593 	vector<VarLayout>		m_inputLayout;
1594 	vector<VarLayout>		m_outputLayout;
1595 };
1596 
BufferIoExecutor(Context & context,const ShaderSpec & shaderSpec)1597 BufferIoExecutor::BufferIoExecutor (Context& context, const ShaderSpec& shaderSpec)
1598 	: ShaderExecutor(context, shaderSpec)
1599 {
1600 	computeVarLayout(m_shaderSpec.inputs, &m_inputLayout);
1601 	computeVarLayout(m_shaderSpec.outputs, &m_outputLayout);
1602 }
1603 
~BufferIoExecutor(void)1604 BufferIoExecutor::~BufferIoExecutor (void)
1605 {
1606 }
1607 
getLayoutStride(const vector<VarLayout> & layout)1608 inline deUint32 BufferIoExecutor::getLayoutStride (const vector<VarLayout>& layout)
1609 {
1610 	return layout.empty() ? 0 : layout[0].stride;
1611 }
1612 
computeVarLayout(const std::vector<Symbol> & symbols,std::vector<VarLayout> * layout)1613 void BufferIoExecutor::computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout)
1614 {
1615 	deUint32	maxAlignment	= 0;
1616 	deUint32	curOffset		= 0;
1617 
1618 	DE_ASSERT(layout != DE_NULL);
1619 	DE_ASSERT(layout->empty());
1620 	layout->resize(symbols.size());
1621 
1622 	for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
1623 	{
1624 		const Symbol&		symbol		= symbols[varNdx];
1625 		const glu::DataType	basicType	= symbol.varType.getBasicType();
1626 		VarLayout&			layoutEntry	= (*layout)[varNdx];
1627 
1628 		if (glu::isDataTypeScalarOrVector(basicType))
1629 		{
1630 			const deUint32	alignment	= getVecStd430ByteAlignment(basicType);
1631 			const deUint32	size		= (deUint32)glu::getDataTypeScalarSize(basicType) * (isDataTypeDoubleType(basicType) ? (int)(sizeof(deUint64)) : (isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1632 
1633 			curOffset		= (deUint32)deAlign32((int)curOffset, (int)alignment);
1634 			maxAlignment	= de::max(maxAlignment, alignment);
1635 
1636 			layoutEntry.offset			= curOffset;
1637 			layoutEntry.matrixStride	= 0;
1638 
1639 			curOffset += size;
1640 		}
1641 		else if (glu::isDataTypeMatrix(basicType))
1642 		{
1643 			const int				numVecs			= glu::getDataTypeMatrixNumColumns(basicType);
1644 			const glu::DataType		vecType			= glu::getDataTypeVector(glu::getDataTypeScalarType(basicType), glu::getDataTypeMatrixNumRows(basicType));
1645 			const deUint32			vecAlignment	= getVecStd430ByteAlignment(vecType);
1646 
1647 			curOffset		= (deUint32)deAlign32((int)curOffset, (int)vecAlignment);
1648 			maxAlignment	= de::max(maxAlignment, vecAlignment);
1649 
1650 			layoutEntry.offset			= curOffset;
1651 			layoutEntry.matrixStride	= vecAlignment;
1652 
1653 			curOffset += vecAlignment*numVecs;
1654 		}
1655 		else
1656 			DE_ASSERT(false);
1657 	}
1658 
1659 	{
1660 		const deUint32	totalSize	= (deUint32)deAlign32(curOffset, maxAlignment);
1661 
1662 		for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
1663 			varIter->stride = totalSize;
1664 	}
1665 }
1666 
declareBufferBlocks(std::ostream & src,const ShaderSpec & spec)1667 void BufferIoExecutor::declareBufferBlocks (std::ostream& src, const ShaderSpec& spec)
1668 {
1669 	// Input struct
1670 	if (!spec.inputs.empty())
1671 	{
1672 		glu::StructType inputStruct("Inputs");
1673 		for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1674 			inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1675 		src << glu::declare(&inputStruct) << ";\n";
1676 	}
1677 
1678 	// Output struct
1679 	{
1680 		glu::StructType outputStruct("Outputs");
1681 		for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1682 			outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1683 		src << glu::declare(&outputStruct) << ";\n";
1684 	}
1685 
1686 	src << "\n";
1687 
1688 	if (!spec.inputs.empty())
1689 	{
1690 		src	<< "layout(set = 0, binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1691 			<< "{\n"
1692 			<< "	Inputs inputs[];\n"
1693 			<< "};\n";
1694 	}
1695 
1696 	src	<< "layout(set = 0, binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1697 		<< "{\n"
1698 		<< "	Outputs outputs[];\n"
1699 		<< "};\n"
1700 		<< "\n";
1701 }
1702 
generateExecBufferIo(std::ostream & src,const ShaderSpec & spec,const char * invocationNdxName)1703 void BufferIoExecutor::generateExecBufferIo (std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName)
1704 {
1705 	std::string	tname;
1706 	for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1707 	{
1708 		const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1709 		if (f16BitTest)
1710 		{
1711 			tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1712 		}
1713 		else
1714 		{
1715 			tname = glu::getDataTypeName(symIter->varType.getBasicType());
1716 		}
1717 		src << "\t" << tname << " "<< symIter->name << " = " << tname << "(inputs[" << invocationNdxName << "]." << symIter->name << ");\n";
1718 	}
1719 
1720 	for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1721 	{
1722 		const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1723 		if (f16BitTest)
1724 		{
1725 			tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1726 		}
1727 		else
1728 		{
1729 			tname = glu::getDataTypeName(symIter->varType.getBasicType());
1730 		}
1731 		src << "\t" << tname << " " << symIter->name << ";\n";
1732 		if (f16BitTest)
1733 		{
1734 			const char* ttname = glu::getDataTypeName(symIter->varType.getBasicType());
1735 			src << "\t" << ttname << " " << "packed_" << symIter->name << ";\n";
1736 		}
1737 	}
1738 
1739 	src << "\n";
1740 
1741 	{
1742 		std::istringstream	opSrc	(spec.source);
1743 		std::string			line;
1744 
1745 		while (std::getline(opSrc, line))
1746 			src << "\t" << line << "\n";
1747 	}
1748 
1749 	if (spec.packFloat16Bit)
1750 		packFloat16Bit (src, spec.outputs);
1751 
1752 	src << "\n";
1753 	for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1754 	{
1755 		const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1756 		if(f16BitTest)
1757 			src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = packed_" << symIter->name << ";\n";
1758 		else
1759 			src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1760 	}
1761 }
1762 
copyToBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr,bool packFloat16Bit)1763 void BufferIoExecutor::copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr, bool packFloat16Bit)
1764 {
1765 	if (varType.isBasicType())
1766 	{
1767 		const glu::DataType		basicType		= varType.getBasicType();
1768 		const bool				isMatrix		= glu::isDataTypeMatrix(basicType);
1769 		const int				scalarSize		= glu::getDataTypeScalarSize(basicType);
1770 		const int				numVecs			= isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1771 		const int				numComps		= scalarSize / numVecs;
1772 		const int				size			= (glu::isDataTypeDoubleType(basicType) ? (int)sizeof(deUint64) : (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1773 
1774 		for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1775 		{
1776 			for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1777 			{
1778 				const int		srcOffset		= size * (elemNdx * scalarSize + vecNdx * numComps);
1779 				const int		dstOffset		= layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1780 				const deUint8*	srcPtr			= (const deUint8*)srcBasePtr + srcOffset;
1781 				deUint8*		dstPtr			= (deUint8*)dstBasePtr + dstOffset;
1782 
1783 				if (packFloat16Bit)
1784 				{
1785 					// Convert the float values to 16 bit and store in the lower 16 bits of 32 bit ints.
1786 					for (int cmpNdx=0; cmpNdx < numComps; ++cmpNdx)
1787 					{
1788 						deFloat16 f16vals[2] = {};
1789 						f16vals[0] = deFloat32To16Round(((float*)srcPtr)[cmpNdx], DE_ROUNDINGMODE_TO_ZERO);
1790 						deMemcpy(dstPtr + cmpNdx * size, &f16vals[0], size);
1791 					}
1792 				}
1793 				else
1794 				{
1795 					deMemcpy(dstPtr, srcPtr, size * numComps);
1796 				}
1797 			}
1798 		}
1799 	}
1800 	else
1801 		throw tcu::InternalError("Unsupported type");
1802 }
1803 
copyFromBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr)1804 void BufferIoExecutor::copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1805 {
1806 	if (varType.isBasicType())
1807 	{
1808 		const glu::DataType		basicType		= varType.getBasicType();
1809 		const bool				isMatrix		= glu::isDataTypeMatrix(basicType);
1810 		const int				scalarSize		= glu::getDataTypeScalarSize(basicType);
1811 		const int				numVecs			= isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1812 		const int				numComps		= scalarSize / numVecs;
1813 
1814 		for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1815 		{
1816 			for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1817 			{
1818 				const int		size			= (glu::isDataTypeDoubleType(basicType) ? (int)sizeof(deUint64) : (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1819 				const int		srcOffset		= layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1820 				const int		dstOffset		= size * (elemNdx * scalarSize + vecNdx * numComps);
1821 				const deUint8*	srcPtr			= (const deUint8*)srcBasePtr + srcOffset;
1822 				deUint8*		dstPtr			= (deUint8*)dstBasePtr + dstOffset;
1823 
1824 				deMemcpy(dstPtr, srcPtr, size * numComps);
1825 			}
1826 		}
1827 	}
1828 	else
1829 		throw tcu::InternalError("Unsupported type");
1830 }
1831 
uploadInputBuffer(const void * const * inputPtrs,int numValues,bool packFloat16Bit)1832 void BufferIoExecutor::uploadInputBuffer (const void* const* inputPtrs, int numValues, bool packFloat16Bit)
1833 {
1834 	const VkDevice			vkDevice			= m_context.getDevice();
1835 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
1836 
1837 	const deUint32			inputStride			= getLayoutStride(m_inputLayout);
1838 	const int				inputBufferSize		= inputStride * numValues;
1839 
1840 	if (inputBufferSize == 0)
1841 		return; // No inputs
1842 
1843 	DE_ASSERT(m_shaderSpec.inputs.size() == m_inputLayout.size());
1844 	for (size_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
1845 	{
1846 		const glu::VarType&		varType		= m_shaderSpec.inputs[inputNdx].varType;
1847 		const VarLayout&		layout		= m_inputLayout[inputNdx];
1848 
1849 		copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], m_inputAlloc->getHostPtr(), packFloat16Bit);
1850 	}
1851 
1852 	flushAlloc(vk, vkDevice, *m_inputAlloc);
1853 }
1854 
readOutputBuffer(void * const * outputPtrs,int numValues)1855 void BufferIoExecutor::readOutputBuffer (void* const* outputPtrs, int numValues)
1856 {
1857 	const VkDevice			vkDevice			= m_context.getDevice();
1858 	const DeviceInterface&	vk					= m_context.getDeviceInterface();
1859 
1860 	DE_ASSERT(numValues > 0); // At least some outputs are required.
1861 
1862 	invalidateAlloc(vk, vkDevice, *m_outputAlloc);
1863 
1864 	DE_ASSERT(m_shaderSpec.outputs.size() == m_outputLayout.size());
1865 	for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
1866 	{
1867 		const glu::VarType&		varType		= m_shaderSpec.outputs[outputNdx].varType;
1868 		const VarLayout&		layout		= m_outputLayout[outputNdx];
1869 
1870 		copyFromBuffer(varType, layout, numValues, m_outputAlloc->getHostPtr(), outputPtrs[outputNdx]);
1871 	}
1872 }
1873 
initBuffers(int numValues)1874 void BufferIoExecutor::initBuffers (int numValues)
1875 {
1876 	const deUint32				inputStride			= getLayoutStride(m_inputLayout);
1877 	const deUint32				outputStride		= getLayoutStride(m_outputLayout);
1878 	// Avoid creating zero-sized buffer/memory
1879 	const size_t				inputBufferSize		= de::max(numValues * inputStride, 1u);
1880 	const size_t				outputBufferSize	= numValues * outputStride;
1881 
1882 	// Upload data to buffer
1883 	const VkDevice				vkDevice			= m_context.getDevice();
1884 	const DeviceInterface&		vk					= m_context.getDeviceInterface();
1885 	const deUint32				queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
1886 	Allocator&					memAlloc			= m_context.getDefaultAllocator();
1887 
1888 	const VkBufferCreateInfo inputBufferParams =
1889 	{
1890 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		// VkStructureType		sType;
1891 		DE_NULL,									// const void*			pNext;
1892 		0u,											// VkBufferCreateFlags	flags;
1893 		inputBufferSize,							// VkDeviceSize			size;
1894 		VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,			// VkBufferUsageFlags	usage;
1895 		VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode		sharingMode;
1896 		1u,											// deUint32				queueFamilyCount;
1897 		&queueFamilyIndex							// const deUint32*		pQueueFamilyIndices;
1898 	};
1899 
1900 	m_inputBuffer = createBuffer(vk, vkDevice, &inputBufferParams);
1901 	m_inputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_inputBuffer), MemoryRequirement::HostVisible);
1902 
1903 	VK_CHECK(vk.bindBufferMemory(vkDevice, *m_inputBuffer, m_inputAlloc->getMemory(), m_inputAlloc->getOffset()));
1904 
1905 	const VkBufferCreateInfo outputBufferParams =
1906 	{
1907 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,		// VkStructureType		sType;
1908 		DE_NULL,									// const void*			pNext;
1909 		0u,											// VkBufferCreateFlags	flags;
1910 		outputBufferSize,							// VkDeviceSize			size;
1911 		VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,			// VkBufferUsageFlags	usage;
1912 		VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode		sharingMode;
1913 		1u,											// deUint32				queueFamilyCount;
1914 		&queueFamilyIndex							// const deUint32*		pQueueFamilyIndices;
1915 	};
1916 
1917 	m_outputBuffer = createBuffer(vk, vkDevice, &outputBufferParams);
1918 	m_outputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_outputBuffer), MemoryRequirement::HostVisible);
1919 
1920 	VK_CHECK(vk.bindBufferMemory(vkDevice, *m_outputBuffer, m_outputAlloc->getMemory(), m_outputAlloc->getOffset()));
1921 }
1922 
1923 // ComputeShaderExecutor
1924 
1925 class ComputeShaderExecutor : public BufferIoExecutor
1926 {
1927 public:
1928 						ComputeShaderExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1929 	virtual				~ComputeShaderExecutor	(void);
1930 
1931 	static void			generateSources			(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1932 
1933 	virtual void		execute					(int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
1934 
1935 protected:
1936 	static std::string	generateComputeShader	(const ShaderSpec& spec);
1937 
1938 private:
1939 	const VkDescriptorSetLayout					m_extraResourcesLayout;
1940 };
1941 
ComputeShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1942 ComputeShaderExecutor::ComputeShaderExecutor(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1943 	: BufferIoExecutor			(context, shaderSpec)
1944 	, m_extraResourcesLayout	(extraResourcesLayout)
1945 {
1946 }
1947 
~ComputeShaderExecutor(void)1948 ComputeShaderExecutor::~ComputeShaderExecutor	(void)
1949 {
1950 }
1951 
getTypeSpirv(const glu::DataType type,const bool packFloat16Bit=false)1952 std::string getTypeSpirv(const glu::DataType type, const bool packFloat16Bit = false)
1953 {
1954 	switch(type)
1955 	{
1956 	case glu::TYPE_FLOAT16:
1957 		return "%f16";
1958 	case glu::TYPE_FLOAT16_VEC2:
1959 		return "%v2f16";
1960 	case glu::TYPE_FLOAT16_VEC3:
1961 		return "%v3f16";
1962 	case glu::TYPE_FLOAT16_VEC4:
1963 		return "%v4f16";
1964 	case glu::TYPE_FLOAT:
1965 		return packFloat16Bit ? "%u32" : "%f32";		// f16 values will be bitcast from ui32.
1966 	case glu::TYPE_FLOAT_VEC2:
1967 		return packFloat16Bit ? "%v2u32" : "%v2f32";	// f16 values will be bitcast from ui32.
1968 	case glu::TYPE_FLOAT_VEC3:
1969 		return packFloat16Bit ? "%v3u32" : "%v3f32";	// f16 values will be bitcast from ui32.
1970 	case glu::TYPE_FLOAT_VEC4:
1971 		return packFloat16Bit ? "%v4u32" : "%v4f32";	// f16 values will be bitcast from ui32.
1972 	case glu::TYPE_INT:
1973 		return "%i32";
1974 	case glu::TYPE_INT_VEC2:
1975 		return "%v2i32";
1976 	case glu::TYPE_INT_VEC3:
1977 		return "%v3i32";
1978 	case glu::TYPE_INT_VEC4:
1979 		return "%v4i32";
1980 	case glu::TYPE_DOUBLE:
1981 		return "%f64";
1982 	case glu::TYPE_DOUBLE_VEC2:
1983 		return "%v2f64";
1984 	case glu::TYPE_DOUBLE_VEC3:
1985 		return "%v3f64";
1986 	case glu::TYPE_DOUBLE_VEC4:
1987 		return "%v4f64";
1988 	default:
1989 		DE_ASSERT(0);
1990 		return "";
1991 		break;
1992 	}
1993 }
1994 
moveBitOperation(std::string variableName,const int operationNdx)1995 std::string moveBitOperation (std::string variableName, const int operationNdx)
1996 {
1997 	std::ostringstream	src;
1998 	src << "\n"
1999 	<< "%operation_move_" << operationNdx << " = OpLoad %i32 " << variableName << "\n"
2000 	<< "%move1_" << operationNdx << " = OpShiftLeftLogical %i32 %operation_move_"<< operationNdx <<" %c_i32_1\n"
2001 	<< "OpStore " << variableName << " %move1_" << operationNdx << "\n";
2002 	return src.str();
2003 }
2004 
scalarComparison(const std::string operation,const int operationNdx,const glu::DataType type,const std::string & outputType,const int scalarSize)2005 std::string scalarComparison(const std::string operation, const int operationNdx, const glu::DataType type, const std::string& outputType, const int scalarSize)
2006 {
2007 	std::ostringstream	src;
2008 	std::string			boolType;
2009 
2010 	switch (type)
2011 	{
2012 	case glu::TYPE_FLOAT16:
2013 	case glu::TYPE_FLOAT:
2014 	case glu::TYPE_DOUBLE:
2015 		src << "\n"
2016 			<< "%operation_result_" << operationNdx << " = " << operation << " %bool %in0_val %in1_val\n"
2017 			<< "OpSelectionMerge %IF_" << operationNdx << " None\n"
2018 			<< "OpBranchConditional %operation_result_" << operationNdx << " %label_IF_" << operationNdx << " %IF_" << operationNdx << "\n"
2019 			<< "%label_IF_" << operationNdx << " = OpLabel\n"
2020 			<< "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n"
2021 			<< "%out_val_" << operationNdx << " = OpLoad %i32 %out0\n"
2022 			<< "%add_if_" << operationNdx << " = OpIAdd %i32 %out_val_" << operationNdx << " %operation_val_" << operationNdx << "\n"
2023 			<< "OpStore %out0 %add_if_" << operationNdx << "\n"
2024 			<< "OpBranch %IF_" << operationNdx << "\n"
2025 			<< "%IF_" << operationNdx << " = OpLabel\n";
2026 		return src.str();
2027 	case glu::TYPE_FLOAT16_VEC2:
2028 	case glu::TYPE_FLOAT_VEC2:
2029 	case glu::TYPE_DOUBLE_VEC2:
2030 		boolType = "%v2bool";
2031 		break;
2032 	case glu::TYPE_FLOAT16_VEC3:
2033 	case glu::TYPE_FLOAT_VEC3:
2034 	case glu::TYPE_DOUBLE_VEC3:
2035 		boolType = "%v3bool";
2036 		break;
2037 	case glu::TYPE_FLOAT16_VEC4:
2038 	case glu::TYPE_FLOAT_VEC4:
2039 	case glu::TYPE_DOUBLE_VEC4:
2040 		boolType = "%v4bool";
2041 		break;
2042 	default:
2043 		DE_ASSERT(0);
2044 		return "";
2045 		break;
2046 	}
2047 
2048 	src << "\n"
2049 		<< "%operation_result_" << operationNdx << " = " << operation << " " << boolType << " %in0_val %in1_val\n"
2050 		<< "%ivec_result_" << operationNdx << " = OpSelect " << outputType << " %operation_result_" << operationNdx << " %c_" << &outputType[1] << "_1 %c_" << &outputType[1] << "_0\n"
2051 		<< "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n";
2052 
2053 	src << "%operation_vec_" << operationNdx << " = OpCompositeConstruct " << outputType;
2054 	for(int ndx = 0; ndx < scalarSize; ++ndx)
2055 		src << " %operation_val_" << operationNdx;
2056 	src << "\n";
2057 
2058 	src << "%toAdd" << operationNdx << " = OpIMul "<< outputType << " %ivec_result_" << operationNdx << " %operation_vec_" << operationNdx <<"\n"
2059 		<< "%out_val_" << operationNdx << " = OpLoad "<< outputType << " %out0\n"
2060 
2061 		<< "%add_if_" << operationNdx << " = OpIAdd " << outputType << " %out_val_" << operationNdx << " %toAdd" << operationNdx << "\n"
2062 		<< "OpStore %out0 %add_if_" << operationNdx << "\n";
2063 
2064 	return src.str();
2065 }
2066 
generateSpirv(const ShaderSpec & spec,const bool are16Bit,const bool are64Bit,const bool isMediump)2067 std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const bool are64Bit, const bool isMediump)
2068 {
2069 	static const std::string COMPARE_OPERATIONS[] =
2070 	{
2071 		"OpFOrdEqual",
2072 		"OpFOrdGreaterThan",
2073 		"OpFOrdLessThan",
2074 		"OpFOrdGreaterThanEqual",
2075 		"OpFOrdLessThanEqual",
2076 		"OpFUnordEqual",
2077 		"OpFUnordGreaterThan",
2078 		"OpFUnordLessThan",
2079 		"OpFUnordGreaterThanEqual",
2080 		"OpFUnordLessThanEqual"
2081 	};
2082 
2083 	int					moveBitNdx		= 0;
2084 	vector<std::string>	inputTypes;
2085 	vector<std::string>	outputTypes;
2086 	const std::string	packType		= spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
2087 
2088 	vector<bool>		floatResult;
2089 	for (const auto& symbol : spec.outputs)
2090 		floatResult.push_back(glu::isDataTypeFloatType(symbol.varType.getBasicType()));
2091 
2092 	const bool			anyFloatResult	= std::any_of(begin(floatResult), end(floatResult), [](bool b) { return b; });
2093 
2094 	vector<bool>		packFloatRes;
2095 	for (const auto& floatRes : floatResult)
2096 		packFloatRes.push_back(floatRes && spec.packFloat16Bit);
2097 
2098 	const bool			useF32Types		= (!are16Bit && !are64Bit);
2099 	const bool			useF64Types		= are64Bit;
2100 	const bool			useF16Types		= (spec.packFloat16Bit || are16Bit);
2101 
2102 	for (const auto& symbol : spec.inputs)
2103 		inputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2104 
2105 	for (const auto& symbol : spec.outputs)
2106 		outputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2107 
2108 	DE_ASSERT(!inputTypes.empty());
2109 	DE_ASSERT(!outputTypes.empty());
2110 
2111 	// Assert input and output types match the expected operations.
2112 	switch (spec.spirvCase)
2113 	{
2114 	case SPIRV_CASETYPE_COMPARE:
2115 	case SPIRV_CASETYPE_FREM:
2116 		DE_ASSERT(inputTypes.size() == 2);
2117 		DE_ASSERT(outputTypes.size() == 1);
2118 		break;
2119 	case SPIRV_CASETYPE_MODFSTRUCT:
2120 	case SPIRV_CASETYPE_FREXPSTRUCT:
2121 		DE_ASSERT(inputTypes.size() == 1);
2122 		DE_ASSERT(outputTypes.size() == 2);
2123 		break;
2124 	default:
2125 		DE_ASSERT(false);
2126 		break;
2127 	};
2128 
2129 	std::ostringstream	src;
2130 	src << "; SPIR-V\n"
2131 		"; Version: 1.0\n"
2132 		"; Generator: Khronos Glslang Reference Front End; 4\n"
2133 		"; Bound: 114\n"
2134 		"; Schema: 0\n"
2135 		"OpCapability Shader\n";
2136 
2137 	if (useF16Types)
2138 		src << "OpCapability Float16\n";
2139 
2140 	if (are16Bit)
2141 		src << "OpCapability StorageBuffer16BitAccess\n"
2142 			"OpCapability UniformAndStorageBuffer16BitAccess\n";
2143 
2144 	if (useF64Types)
2145 		src << "OpCapability Float64\n";
2146 
2147 	if (are16Bit)
2148 		src << "OpExtension \"SPV_KHR_16bit_storage\"\n";
2149 
2150 	src << "%glslstd450 = OpExtInstImport \"GLSL.std.450\"\n"
2151 		"OpMemoryModel Logical GLSL450\n"
2152 		"OpEntryPoint GLCompute %BP_main \"main\" %BP_id3uNum %BP_id3uID\n"
2153 		"OpExecutionMode %BP_main LocalSize 1 1 1\n"
2154 		"OpDecorate %BP_id3uNum BuiltIn NumWorkgroups\n"
2155 		"OpDecorate %BP_id3uID BuiltIn WorkgroupId\n";
2156 
2157 	// Input offsets and stride.
2158 	{
2159 		int offset	= 0;
2160 		int ndx		= 0;
2161 		int largest	= 0;
2162 		for (const auto& symbol : spec.inputs)
2163 		{
2164 			const int scalarSize		= symbol.varType.getScalarSize();
2165 			const int memberSize		= (scalarSize + ((scalarSize == 3) ? 1 : 0)) * (isDataTypeDoubleType(symbol.varType.getBasicType()) ? (int)sizeof(deUint64) : (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
2166 			const int extraMemberBytes	= (offset % memberSize);
2167 
2168 			offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2169 			src << "OpMemberDecorate %SSB0_IN "<< ndx <<" Offset " << offset << "\n";
2170 			++ndx;
2171 
2172 			if (memberSize > largest)
2173 				largest = memberSize;
2174 
2175 			offset += memberSize;
2176 		}
2177 		DE_ASSERT(largest > 0);
2178 		const int extraBytes	= (offset % largest);
2179 		const int stride		= offset + (extraBytes == 0 ? 0 : (largest - extraBytes));
2180 		src << "OpDecorate %up_SSB0_IN ArrayStride "<< stride << "\n";
2181 	}
2182 
2183 	src << "OpMemberDecorate %ssboIN 0 Offset 0\n"
2184 		"OpDecorate %ssboIN BufferBlock\n"
2185 		"OpDecorate %ssbo_src DescriptorSet 0\n"
2186 		"OpDecorate %ssbo_src Binding 0\n"
2187 		"\n";
2188 
2189 	if (isMediump)
2190 	{
2191 		for (size_t i = 0; i < inputTypes.size(); ++i)
2192 		{
2193 			src <<
2194 				"OpMemberDecorate %SSB0_IN " << i << " RelaxedPrecision\n"
2195 				"OpDecorate %in" << i << " RelaxedPrecision\n"
2196 				"OpDecorate %src_val_0_" << i << " RelaxedPrecision\n"
2197 				"OpDecorate %in" << i << "_val RelaxedPrecision\n"
2198 				;
2199 		}
2200 
2201 			if (anyFloatResult)
2202 			{
2203 				switch (spec.spirvCase)
2204 				{
2205 				case SPIRV_CASETYPE_FREM:
2206 					src << "OpDecorate %frem_result RelaxedPrecision\n";
2207 					break;
2208 				case SPIRV_CASETYPE_MODFSTRUCT:
2209 					src << "OpDecorate %modfstruct_result RelaxedPrecision\n";
2210 					break;
2211 				case SPIRV_CASETYPE_FREXPSTRUCT:
2212 					src << "OpDecorate %frexpstruct_result RelaxedPrecision\n";
2213 					break;
2214 				default:
2215 					DE_ASSERT(false);
2216 					break;
2217 				}
2218 
2219 				for (size_t i = 0; i < outputTypes.size(); ++i)
2220 				{
2221 					src << "OpMemberDecorate %SSB0_OUT " << i << " RelaxedPrecision\n";
2222 					src << "OpDecorate %out_val_final_" << i << " RelaxedPrecision\n";
2223 					src << "OpDecorate %out" << i << " RelaxedPrecision\n";
2224 				}
2225 			}
2226 	}
2227 
2228 	// Output offsets and stride.
2229 	{
2230 		int offset	= 0;
2231 		int ndx		= 0;
2232 		int largest	= 0;
2233 		for (const auto& symbol : spec.outputs)
2234 		{
2235 			const int scalarSize		= symbol.varType.getScalarSize();
2236 			const int memberSize		= (scalarSize + ((scalarSize == 3) ? 1 : 0)) * (isDataTypeDoubleType(symbol.varType.getBasicType()) ? (int)sizeof(deUint64) : (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
2237 			const int extraMemberBytes	= (offset % memberSize);
2238 
2239 			offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2240 			src << "OpMemberDecorate %SSB0_OUT " << ndx << " Offset " << offset << "\n";
2241 			++ndx;
2242 
2243 			if (memberSize > largest)
2244 				largest = memberSize;
2245 
2246 			offset += memberSize;
2247 		}
2248 		DE_ASSERT(largest > 0);
2249 		const int extraBytes	= (offset % largest);
2250 		const int stride		= offset + ((extraBytes == 0) ? 0 : (largest - extraBytes));
2251 		src << "OpDecorate %up_SSB0_OUT ArrayStride " << stride << "\n";
2252 	}
2253 
2254 	src << "OpMemberDecorate %ssboOUT 0 Offset 0\n"
2255 		"OpDecorate %ssboOUT BufferBlock\n"
2256 		"OpDecorate %ssbo_dst DescriptorSet 0\n"
2257 		"OpDecorate %ssbo_dst Binding 1\n"
2258 		"\n"
2259 		"%void  = OpTypeVoid\n"
2260 		"%bool  = OpTypeBool\n"
2261 		"%v2bool = OpTypeVector %bool 2\n"
2262 		"%v3bool = OpTypeVector %bool 3\n"
2263 		"%v4bool = OpTypeVector %bool 4\n"
2264 		"%u32   = OpTypeInt 32 0\n";
2265 
2266 	if (useF32Types)
2267 		src << "%f32   = OpTypeFloat 32\n"
2268 			"%v2f32 = OpTypeVector %f32 2\n"
2269 			"%v3f32 = OpTypeVector %f32 3\n"
2270 			"%v4f32 = OpTypeVector %f32 4\n";
2271 
2272 	if (useF64Types)
2273 		src << "%f64   = OpTypeFloat 64\n"
2274 			"%v2f64 = OpTypeVector %f64 2\n"
2275 			"%v3f64 = OpTypeVector %f64 3\n"
2276 			"%v4f64 = OpTypeVector %f64 4\n";
2277 
2278 	if (useF16Types)
2279 		src << "%f16   = OpTypeFloat 16\n"
2280 			"%v2f16 = OpTypeVector %f16 2\n"
2281 			"%v3f16 = OpTypeVector %f16 3\n"
2282 			"%v4f16 = OpTypeVector %f16 4\n";
2283 
2284 	src << "%i32   = OpTypeInt 32 1\n"
2285 		"%v2i32 = OpTypeVector %i32 2\n"
2286 		"%v3i32 = OpTypeVector %i32 3\n"
2287 		"%v4i32 = OpTypeVector %i32 4\n"
2288 		"%v2u32 = OpTypeVector %u32 2\n"
2289 		"%v3u32 = OpTypeVector %u32 3\n"
2290 		"%v4u32 = OpTypeVector %u32 4\n"
2291 		"\n"
2292 		"%ip_u32   = OpTypePointer Input %u32\n"
2293 		"%ip_v3u32 = OpTypePointer Input %v3u32\n"
2294 		"%up_float = OpTypePointer Uniform " << inputTypes[0] << "\n"
2295 		"\n"
2296 		"%fp_operation = OpTypePointer Function %i32\n"
2297 		"%voidf        = OpTypeFunction %void\n"
2298 		"%fp_u32       = OpTypePointer Function %u32\n"
2299 		"%fp_it1       = OpTypePointer Function " << inputTypes[0] << "\n"
2300 		;
2301 
2302 	for (size_t i = 0; i < outputTypes.size(); ++i)
2303 	{
2304 		src << "%fp_out_" << i << "     = OpTypePointer Function " << outputTypes[i] << "\n"
2305 			<< "%up_out_" << i << "     = OpTypePointer Uniform " << outputTypes[i] << "\n";
2306 	}
2307 
2308 	if (spec.packFloat16Bit)
2309 		src << "%fp_f16  = OpTypePointer Function " << packType << "\n";
2310 
2311 	src << "%BP_id3uID = OpVariable %ip_v3u32 Input\n"
2312 		"%BP_id3uNum = OpVariable %ip_v3u32 Input\n"
2313 		"\n"
2314 		"%c_u32_0 = OpConstant %u32 0\n"
2315 		"%c_u32_1 = OpConstant %u32 1\n"
2316 		"%c_u32_2 = OpConstant %u32 2\n"
2317 		"%c_i32_0 = OpConstant %i32 0\n"
2318 		"%c_i32_1 = OpConstant %i32 1\n"
2319 		"\n";
2320 
2321 	if (useF32Types)
2322 		src <<
2323 			"%c_f32_0 = OpConstant %f32 0\n"
2324 			"%c_f32_1 = OpConstant %f32 1\n"
2325 			;
2326 
2327 	if (useF16Types)
2328 		src <<
2329 			"%c_f16_0 = OpConstant %f16 0\n"
2330 			"%c_f16_1 = OpConstant %f16 1\n"
2331 			"%c_f16_minus1 = OpConstant %f16 -0x1p+0"
2332 			;
2333 
2334 	if (useF64Types)
2335 		src <<
2336 			"%c_f64_0 = OpConstant %f64 0\n"
2337 			"%c_f64_1 = OpConstant %f64 1\n"
2338 		;
2339 
2340 	src << "\n"
2341 		"%c_v2i32_0 = OpConstantComposite %v2i32 %c_i32_0 %c_i32_0\n"
2342 		"%c_v2i32_1 = OpConstantComposite %v2i32 %c_i32_1 %c_i32_1\n"
2343 		"%c_v3i32_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
2344 		"%c_v3i32_1 = OpConstantComposite %v3i32 %c_i32_1 %c_i32_1 %c_i32_1\n"
2345 		"%c_v4i32_0 = OpConstantComposite %v4i32 %c_i32_0 %c_i32_0 %c_i32_0 %c_i32_0\n"
2346 		"%c_v4i32_1 = OpConstantComposite %v4i32 %c_i32_1 %c_i32_1 %c_i32_1 %c_i32_1\n"
2347 		"\n";
2348 
2349 	if (useF32Types)
2350 		src <<
2351 			"%c_v2f32_0 = OpConstantComposite %v2f32 %c_f32_0 %c_f32_0\n"
2352 			"%c_v2f32_1 = OpConstantComposite %v2f32 %c_f32_1 %c_f32_1\n"
2353 			"%c_v3f32_0 = OpConstantComposite %v3f32 %c_f32_0 %c_f32_0 %c_f32_0\n"
2354 			"%c_v3f32_1 = OpConstantComposite %v3f32 %c_f32_1 %c_f32_1 %c_f32_1\n"
2355 			"%c_v4f32_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_0\n"
2356 			"%c_v4f32_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
2357 			;
2358 
2359 	if (useF16Types)
2360 		src <<
2361 			"%c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
2362 			"%c_v2f16_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
2363 			"%c_v3f16_0 = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0 %c_f16_0\n"
2364 			"%c_v3f16_1 = OpConstantComposite %v3f16 %c_f16_1 %c_f16_1 %c_f16_1\n"
2365 			"%c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
2366 			"%c_v4f16_1 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
2367 			;
2368 
2369 	if (useF64Types)
2370 		src <<
2371 			"%c_v2f64_0 = OpConstantComposite %v2f64 %c_f64_0 %c_f64_0\n"
2372 			"%c_v2f64_1 = OpConstantComposite %v2f64 %c_f64_1 %c_f64_1\n"
2373 			"%c_v3f64_0 = OpConstantComposite %v3f64 %c_f64_0 %c_f64_0 %c_f64_0\n"
2374 			"%c_v3f64_1 = OpConstantComposite %v3f64 %c_f64_1 %c_f64_1 %c_f64_1\n"
2375 			"%c_v4f64_0 = OpConstantComposite %v4f64 %c_f64_0 %c_f64_0 %c_f64_0 %c_f64_0\n"
2376 			"%c_v4f64_1 = OpConstantComposite %v4f64 %c_f64_1 %c_f64_1 %c_f64_1 %c_f64_1\n"
2377 			"\n";
2378 
2379 	// Input struct.
2380 	{
2381 		src << "%SSB0_IN    = OpTypeStruct";
2382 		for (const auto& t : inputTypes)
2383 			src << " " << t;
2384 		src << "\n";
2385 	}
2386 
2387 	src <<
2388 		"%up_SSB0_IN = OpTypeRuntimeArray %SSB0_IN\n"
2389 		"%ssboIN     = OpTypeStruct %up_SSB0_IN\n"
2390 		"%up_ssboIN  = OpTypePointer Uniform %ssboIN\n"
2391 		"%ssbo_src   = OpVariable %up_ssboIN Uniform\n"
2392 		"\n";
2393 
2394 	// Output struct.
2395 	{
2396 		src << "%SSB0_OUT    = OpTypeStruct";
2397 		for (const auto& t : outputTypes)
2398 			src << " " << t;
2399 		src << "\n";
2400 	}
2401 
2402 	std::string modfStructMemberType;
2403 	std::string frexpStructFirstMemberType;
2404 	if (spec.spirvCase == SPIRV_CASETYPE_MODFSTRUCT)
2405 	{
2406 		modfStructMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2407 		src << "%modfstruct_ret_t = OpTypeStruct " << modfStructMemberType << " " << modfStructMemberType << "\n";
2408 	}
2409 	else if (spec.spirvCase == SPIRV_CASETYPE_FREXPSTRUCT)
2410 	{
2411 		frexpStructFirstMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2412 		src << "%frexpstruct_ret_t = OpTypeStruct " << frexpStructFirstMemberType << " " << outputTypes[1] << "\n";
2413 	}
2414 
2415 	src <<
2416 		"%up_SSB0_OUT = OpTypeRuntimeArray %SSB0_OUT\n"
2417 		"%ssboOUT     = OpTypeStruct %up_SSB0_OUT\n"
2418 		"%up_ssboOUT  = OpTypePointer Uniform %ssboOUT\n"
2419 		"%ssbo_dst    = OpVariable %up_ssboOUT Uniform\n"
2420 		"\n"
2421 		"%BP_main = OpFunction %void None %voidf\n"
2422 		"%BP_label = OpLabel\n"
2423 		"%invocationNdx = OpVariable %fp_u32 Function\n";
2424 
2425 	// Note: here we are supposing all inputs have the same type.
2426 	for (size_t i = 0; i < inputTypes.size(); ++i)
2427 		src << "%in" << i << " = OpVariable " << (spec.packFloat16Bit ? "%fp_f16" : "%fp_it1") << " Function\n";
2428 
2429 	for (size_t i = 0; i < outputTypes.size(); ++i)
2430 		src << "%out" << i << " = OpVariable " << (packFloatRes[i] ? std::string("%fp_f16") : std::string("%fp_out_") + de::toString(i)) << " Function\n";
2431 
2432 	src << "%operation = OpVariable %fp_operation Function\n"
2433 		"%BP_id_0_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_0\n"
2434 		"%BP_id_1_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_1\n"
2435 		"%BP_id_2_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_2\n"
2436 		"%BP_num_0_ptr  = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_0\n"
2437 		"%BP_num_1_ptr  = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_1\n"
2438 		"%BP_id_0_val = OpLoad %u32 %BP_id_0_ptr\n"
2439 		"%BP_id_1_val = OpLoad %u32 %BP_id_1_ptr\n"
2440 		"%BP_id_2_val = OpLoad %u32 %BP_id_2_ptr\n"
2441 		"%BP_num_0_val = OpLoad %u32 %BP_num_0_ptr\n"
2442 		"%BP_num_1_val = OpLoad %u32 %BP_num_1_ptr\n"
2443 		"\n"
2444 		"%mul_1 = OpIMul %u32 %BP_num_0_val %BP_num_1_val\n"
2445 		"%mul_2 = OpIMul %u32 %mul_1 %BP_id_2_val\n"
2446 		"%mul_3 = OpIMul %u32 %BP_num_0_val %BP_id_1_val\n"
2447 		"%add_1 = OpIAdd %u32 %mul_2 %mul_3\n"
2448 		"%add_2 = OpIAdd %u32 %add_1 %BP_id_0_val\n"
2449 		"OpStore %invocationNdx %add_2\n"
2450 		"%invocationNdx_val = OpLoad %u32 %invocationNdx\n";
2451 
2452 	// Load input values.
2453 	for (size_t inputNdx = 0; inputNdx < inputTypes.size(); ++inputNdx)
2454 	{
2455 		src << "\n"
2456 			<< "%src_ptr_0_" << inputNdx << " = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_" << inputNdx << "\n"
2457 			<< "%src_val_0_" << inputNdx << " = OpLoad " << inputTypes[inputNdx] << " %src_ptr_0_" << inputNdx << "\n";
2458 
2459 		if (spec.packFloat16Bit)
2460 		{
2461 			if (spec.inputs[inputNdx].varType.getScalarSize() > 1)
2462 			{
2463 				// Extract the val<inputNdx> u32 input channels into individual f16 values.
2464 				for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2465 				{
2466 					src << "%src_val_0_" << inputNdx << "_" << i << " = OpCompositeExtract %u32 %src_val_0_" << inputNdx << " " << i << "\n"
2467 						"%val_v2f16_0_" << inputNdx << "_" << i << " = OpBitcast %v2f16 %src_val_0_" << inputNdx << "_" << i << "\n"
2468 						"%val_f16_0_" << inputNdx << "_" << i << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << "_" << i << " 0\n";
2469 				}
2470 
2471 				// Construct the input vector.
2472 				src << "%val_f16_0_" << inputNdx << "   = OpCompositeConstruct " << packType;
2473 				for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2474 				{
2475 					src << " %val_f16_0_" << inputNdx << "_" << i;
2476 				}
2477 
2478 				src << "\n";
2479 				src << "OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2480 			}
2481 			else
2482 			{
2483 				src << "%val_v2f16_0_" << inputNdx << " = OpBitcast %v2f16 %src_val_0_" << inputNdx << "\n"
2484 					"%val_f16_0_" << inputNdx << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << " 0\n";
2485 
2486 				src <<	"OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2487 			}
2488 		}
2489 		else
2490 			src << "OpStore %in" << inputNdx << " %src_val_0_" << inputNdx << "\n";
2491 
2492 		src << "%in" << inputNdx << "_val = OpLoad " << (spec.packFloat16Bit ? packType : inputTypes[inputNdx]) << " %in" << inputNdx << "\n";
2493 	}
2494 
2495 	src << "\n"
2496 		"OpStore %operation %c_i32_1\n";
2497 
2498 	// Fill output values with dummy data.
2499 	for (size_t i = 0; i < outputTypes.size(); ++i)
2500 		src << "OpStore %out" << i << " %c_" << (packFloatRes[i] ? &packType[1] : &outputTypes[i][1]) << "_0\n";
2501 
2502 	src << "\n";
2503 
2504 	// Run operation.
2505 	switch (spec.spirvCase)
2506 	{
2507 	case SPIRV_CASETYPE_COMPARE:
2508 		for (int operationNdx = 0; operationNdx < DE_LENGTH_OF_ARRAY(COMPARE_OPERATIONS); ++operationNdx)
2509 		{
2510 			src << scalarComparison	(COMPARE_OPERATIONS[operationNdx], operationNdx,
2511 									spec.inputs[0].varType.getBasicType(),
2512 									outputTypes[0],
2513 									spec.outputs[0].varType.getScalarSize());
2514 			src << moveBitOperation("%operation", moveBitNdx);
2515 			++moveBitNdx;
2516 		}
2517 		break;
2518 	case SPIRV_CASETYPE_FREM:
2519 		src << "%frem_result = OpFRem " << (packFloatRes[0] ? packType : outputTypes[0]) << " %in0_val %in1_val\n"
2520 			<< "OpStore %out0 %frem_result\n";
2521 		break;
2522 	case SPIRV_CASETYPE_MODFSTRUCT:
2523 		src << "%modfstruct_result = OpExtInst %modfstruct_ret_t %glslstd450 ModfStruct %in0_val\n"
2524 			<< "%modfstruct_result_0 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 0\n"
2525 			<< "%modfstruct_result_1 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 1\n"
2526 			<< "OpStore %out0 %modfstruct_result_0\n"
2527 			<< "OpStore %out1 %modfstruct_result_1\n";
2528 		break;
2529 	case SPIRV_CASETYPE_FREXPSTRUCT:
2530 		src << "%frexpstruct_result = OpExtInst %frexpstruct_ret_t %glslstd450 FrexpStruct %in0_val\n"
2531 			<< "%frexpstruct_result_0 = OpCompositeExtract " << frexpStructFirstMemberType << " %frexpstruct_result 0\n"
2532 			<< "%frexpstruct_result_1 = OpCompositeExtract " << outputTypes[1] << " %frexpstruct_result 1\n"
2533 			<< "OpStore %out0 %frexpstruct_result_0\n"
2534 			<< "OpStore %out1 %frexpstruct_result_1\n";
2535 		break;
2536 	default:
2537 		DE_ASSERT(false);
2538 		break;
2539 	}
2540 
2541 	for (size_t outputNdx = 0; outputNdx < outputTypes.size(); ++outputNdx)
2542 	{
2543 		src << "\n"
2544 			"%out_val_final_" << outputNdx << " = OpLoad " << (packFloatRes[outputNdx] ? packType : outputTypes[outputNdx]) << " %out" << outputNdx << "\n"
2545 			"%ssbo_dst_ptr_" << outputNdx << " = OpAccessChain %up_out_" << outputNdx << " %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_" << outputNdx << "\n";
2546 
2547 		if (packFloatRes[outputNdx])
2548 		{
2549 			if (spec.outputs[outputNdx].varType.getScalarSize() > 1)
2550 			{
2551 				for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2552 				{
2553 					src << "%out_val_final_" << outputNdx << "_" << i << " = OpCompositeExtract %f16 %out_val_final_" << outputNdx << " " << i << "\n";
2554 					src << "%out_composite_" << outputNdx << "_" << i << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx << "_" << i << " %c_f16_minus1\n";
2555 					src << "%u32_val_" << outputNdx << "_" << i << " = OpBitcast %u32 %out_composite_" << outputNdx << "_" << i << "\n";
2556 				}
2557 
2558 				src << "%u32_final_val_" << outputNdx << " = OpCompositeConstruct " << outputTypes[outputNdx];
2559 				for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2560 					src << " %u32_val_" << outputNdx << "_" << i;
2561 				src << "\n";
2562 				src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %u32_final_val_" << outputNdx << "\n";
2563 			}
2564 			else
2565 			{
2566 				src <<
2567 					"%out_composite_" << outputNdx << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx << " %c_f16_minus1\n"
2568 					"%out_result_" << outputNdx << " = OpBitcast " << outputTypes[outputNdx] << " %out_composite_" << outputNdx << "\n"
2569 					"OpStore %ssbo_dst_ptr_" << outputNdx << " %out_result_" << outputNdx << "\n";
2570 			}
2571 		}
2572 		else
2573 		{
2574 			src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %out_val_final_" << outputNdx << "\n";
2575 		}
2576 	}
2577 
2578 	src << "\n"
2579 		"OpReturn\n"
2580 		"OpFunctionEnd\n";
2581 
2582 	return src.str();
2583 }
2584 
2585 
generateComputeShader(const ShaderSpec & spec)2586 std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec)
2587 {
2588 	if (spec.spirvCase != SPIRV_CASETYPE_NONE)
2589 	{
2590 		bool	are16Bit	= false;
2591 		bool	are64Bit	= false;
2592 		bool	isMediump	= false;
2593 		for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2594 		{
2595 			if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
2596 				are16Bit = true;
2597 
2598 			if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
2599 				are64Bit = true;
2600 
2601 			if (symIter->varType.getPrecision() == glu::PRECISION_MEDIUMP)
2602 				isMediump = true;
2603 
2604 			if (isMediump && are16Bit)
2605 				break;
2606 		}
2607 
2608 		return generateSpirv(spec, are16Bit, are64Bit, isMediump);
2609 	}
2610 	else
2611 	{
2612 		std::ostringstream src;
2613 		src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n";
2614 
2615 		if (!spec.globalDeclarations.empty())
2616 			src << spec.globalDeclarations << "\n";
2617 
2618 		src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
2619 			<< "\n";
2620 
2621 		declareBufferBlocks(src, spec);
2622 
2623 		src << "void main (void)\n"
2624 			<< "{\n"
2625 			<< "	uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2626 			<< "	                   + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2627 
2628 		generateExecBufferIo(src, spec, "invocationNdx");
2629 
2630 		src << "}\n";
2631 
2632 		return src.str();
2633 	}
2634 }
2635 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)2636 void ComputeShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
2637 {
2638 	if (shaderSpec.spirvCase != SPIRV_CASETYPE_NONE)
2639 		programCollection.spirvAsmSources.add("compute") << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3) << generateComputeShader(shaderSpec);
2640 	else
2641 		programCollection.glslSources.add("compute") << glu::ComputeSource(generateComputeShader(shaderSpec)) << shaderSpec.buildOptions;
2642 }
2643 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)2644 void ComputeShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
2645 {
2646 	const VkDevice					vkDevice				= m_context.getDevice();
2647 	const DeviceInterface&			vk						= m_context.getDeviceInterface();
2648 	const VkQueue					queue					= m_context.getUniversalQueue();
2649 	const deUint32					queueFamilyIndex		= m_context.getUniversalQueueFamilyIndex();
2650 
2651 	DescriptorPoolBuilder			descriptorPoolBuilder;
2652 	DescriptorSetLayoutBuilder		descriptorSetLayoutBuilder;
2653 
2654 	Move<VkShaderModule>			computeShaderModule;
2655 	Move<VkPipeline>				computePipeline;
2656 	Move<VkPipelineLayout>			pipelineLayout;
2657 	Move<VkCommandPool>				cmdPool;
2658 	Move<VkDescriptorPool>			descriptorPool;
2659 	Move<VkDescriptorSetLayout>		descriptorSetLayout;
2660 	Move<VkDescriptorSet>			descriptorSet;
2661 	const deUint32					numDescriptorSets		= (m_extraResourcesLayout != 0) ? 2u : 1u;
2662 
2663 	DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2664 
2665 	initBuffers(numValues);
2666 
2667 	// Setup input buffer & copy data
2668 	// For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
2669 	// storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
2670 	// the shader.
2671 	uploadInputBuffer(inputs, numValues, m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
2672 
2673 	// Create command pool
2674 	cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2675 
2676 	// Create command buffer
2677 
2678 	descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2679 	descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2680 	descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2681 	descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2682 
2683 	descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
2684 	descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2685 
2686 	const VkDescriptorSetAllocateInfo allocInfo =
2687 	{
2688 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2689 		DE_NULL,
2690 		*descriptorPool,
2691 		1u,
2692 		&*descriptorSetLayout
2693 	};
2694 
2695 	descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2696 
2697 	// Create pipeline layout
2698 	{
2699 		const VkDescriptorSetLayout			descriptorSetLayouts[]	=
2700 		{
2701 			*descriptorSetLayout,
2702 			m_extraResourcesLayout
2703 		};
2704 		const VkPipelineLayoutCreateInfo	pipelineLayoutParams	=
2705 		{
2706 			VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,		// VkStructureType				sType;
2707 			DE_NULL,											// const void*					pNext;
2708 			(VkPipelineLayoutCreateFlags)0,						// VkPipelineLayoutCreateFlags	flags;
2709 			numDescriptorSets,									// deUint32						CdescriptorSetCount;
2710 			descriptorSetLayouts,								// const VkDescriptorSetLayout*	pSetLayouts;
2711 			0u,													// deUint32						pushConstantRangeCount;
2712 			DE_NULL												// const VkPushConstantRange*	pPushConstantRanges;
2713 		};
2714 
2715 		pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2716 	}
2717 
2718 	// Create shaders
2719 	{
2720 		computeShaderModule		= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("compute"), 0);
2721 	}
2722 
2723 	// create pipeline
2724 	{
2725 		const VkPipelineShaderStageCreateInfo shaderStageParams[1] =
2726 		{
2727 			{
2728 				VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,		// VkStructureType						sType;
2729 				DE_NULL,													// const void*							pNext;
2730 				(VkPipelineShaderStageCreateFlags)0u,						// VkPipelineShaderStageCreateFlags		flags;
2731 				VK_SHADER_STAGE_COMPUTE_BIT,								// VkShaderStageFlagsBit				stage;
2732 				*computeShaderModule,										// VkShaderModule						shader;
2733 				"main",														// const char*							pName;
2734 				DE_NULL														// const VkSpecializationInfo*			pSpecializationInfo;
2735 			}
2736 		};
2737 
2738 		const VkComputePipelineCreateInfo computePipelineParams =
2739 		{
2740 			VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,		// VkStructureType									sType;
2741 			DE_NULL,											// const void*										pNext;
2742 			(VkPipelineCreateFlags)0,							// VkPipelineCreateFlags							flags;
2743 			*shaderStageParams,									// VkPipelineShaderStageCreateInfo					cs;
2744 			*pipelineLayout,									// VkPipelineLayout									layout;
2745 			0u,													// VkPipeline										basePipelineHandle;
2746 			0u,													// int32_t											basePipelineIndex;
2747 		};
2748 
2749 		computePipeline = createComputePipeline(vk, vkDevice, DE_NULL, &computePipelineParams);
2750 	}
2751 
2752 	const int			maxValuesPerInvocation	= m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
2753 	int					curOffset				= 0;
2754 	const deUint32		inputStride				= getInputStride();
2755 	const deUint32		outputStride			= getOutputStride();
2756 
2757 	while (curOffset < numValues)
2758 	{
2759 		Move<VkCommandBuffer>	cmdBuffer;
2760 		const int				numToExec	= de::min(maxValuesPerInvocation, numValues-curOffset);
2761 
2762 		// Update descriptors
2763 		{
2764 			DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2765 
2766 			const VkDescriptorBufferInfo outputDescriptorBufferInfo =
2767 			{
2768 				*m_outputBuffer,				// VkBuffer			buffer;
2769 				curOffset * outputStride,		// VkDeviceSize		offset;
2770 				numToExec * outputStride		// VkDeviceSize		range;
2771 			};
2772 
2773 			descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2774 
2775 			if (inputStride)
2776 			{
2777 				const VkDescriptorBufferInfo inputDescriptorBufferInfo =
2778 				{
2779 					*m_inputBuffer,					// VkBuffer			buffer;
2780 					curOffset * inputStride,		// VkDeviceSize		offset;
2781 					numToExec * inputStride			// VkDeviceSize		range;
2782 				};
2783 
2784 				descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2785 			}
2786 
2787 			descriptorSetUpdateBuilder.update(vk, vkDevice);
2788 		}
2789 
2790 		cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2791 		beginCommandBuffer(vk, *cmdBuffer);
2792 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
2793 
2794 		{
2795 			const VkDescriptorSet	descriptorSets[]	= { *descriptorSet, extraResources };
2796 			vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
2797 		}
2798 
2799 		vk.cmdDispatch(*cmdBuffer, numToExec, 1, 1);
2800 
2801 		// Insert a barrier so data written by the shader is available to the host
2802 		{
2803 			const VkBufferMemoryBarrier bufferBarrier =
2804 			{
2805 				VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,	// VkStructureType    sType;
2806 				DE_NULL,									// const void*        pNext;
2807 				VK_ACCESS_SHADER_WRITE_BIT,					// VkAccessFlags      srcAccessMask;
2808 				VK_ACCESS_HOST_READ_BIT,					// VkAccessFlags      dstAccessMask;
2809 				VK_QUEUE_FAMILY_IGNORED,					// uint32_t           srcQueueFamilyIndex;
2810 				VK_QUEUE_FAMILY_IGNORED,					// uint32_t           dstQueueFamilyIndex;
2811 				*m_outputBuffer,							// VkBuffer           buffer;
2812 				0,											// VkDeviceSize       offset;
2813 				VK_WHOLE_SIZE,								// VkDeviceSize       size;
2814 			};
2815 
2816 			vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
2817 								0, (const VkMemoryBarrier*)DE_NULL,
2818 								1, &bufferBarrier,
2819 								0, (const VkImageMemoryBarrier*)DE_NULL);
2820 		}
2821 
2822 		endCommandBuffer(vk, *cmdBuffer);
2823 
2824 		curOffset += numToExec;
2825 
2826 		// Execute
2827 		submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2828 	}
2829 
2830 	// Read back data
2831 	readOutputBuffer(outputs, numValues);
2832 }
2833 
2834 // Tessellation utils
2835 
generateVertexShaderForTess(void)2836 static std::string generateVertexShaderForTess (void)
2837 {
2838 	std::ostringstream	src;
2839 	src << "#version 450\n"
2840 		<< "void main (void)\n{\n"
2841 		<< "	gl_Position = vec4(gl_VertexIndex/2, gl_VertexIndex%2, 0.0, 1.0);\n"
2842 		<< "}\n";
2843 
2844 	return src.str();
2845 }
2846 
2847 class TessellationExecutor : public BufferIoExecutor
2848 {
2849 public:
2850 					TessellationExecutor		(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2851 	virtual			~TessellationExecutor		(void);
2852 
2853 	void			renderTess					(deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources);
2854 
2855 private:
2856 	const VkDescriptorSetLayout					m_extraResourcesLayout;
2857 };
2858 
TessellationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2859 TessellationExecutor::TessellationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
2860 	: BufferIoExecutor			(context, shaderSpec)
2861 	, m_extraResourcesLayout	(extraResourcesLayout)
2862 {
2863 	const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
2864 
2865 	if (!features.tessellationShader)
2866 		TCU_THROW(NotSupportedError, "Tessellation shader is not supported by device");
2867 }
2868 
~TessellationExecutor(void)2869 TessellationExecutor::~TessellationExecutor (void)
2870 {
2871 }
2872 
renderTess(deUint32 numValues,deUint32 vertexCount,deUint32 patchControlPoints,VkDescriptorSet extraResources)2873 void TessellationExecutor::renderTess (deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources)
2874 {
2875 	const size_t						inputBufferSize				= numValues * getInputStride();
2876 	const VkDevice						vkDevice					= m_context.getDevice();
2877 	const DeviceInterface&				vk							= m_context.getDeviceInterface();
2878 	const VkQueue						queue						= m_context.getUniversalQueue();
2879 	const deUint32						queueFamilyIndex			= m_context.getUniversalQueueFamilyIndex();
2880 	Allocator&							memAlloc					= m_context.getDefaultAllocator();
2881 
2882 	const tcu::UVec2					renderSize					(DEFAULT_RENDER_WIDTH, DEFAULT_RENDER_HEIGHT);
2883 
2884 	Move<VkImage>						colorImage;
2885 	de::MovePtr<Allocation>				colorImageAlloc;
2886 	VkFormat							colorFormat					= VK_FORMAT_R8G8B8A8_UNORM;
2887 	Move<VkImageView>					colorImageView;
2888 
2889 	Move<VkRenderPass>					renderPass;
2890 	Move<VkFramebuffer>					framebuffer;
2891 	Move<VkPipelineLayout>				pipelineLayout;
2892 	Move<VkPipeline>					graphicsPipeline;
2893 
2894 	Move<VkShaderModule>				vertexShaderModule;
2895 	Move<VkShaderModule>				tessControlShaderModule;
2896 	Move<VkShaderModule>				tessEvalShaderModule;
2897 	Move<VkShaderModule>				fragmentShaderModule;
2898 
2899 	Move<VkCommandPool>					cmdPool;
2900 	Move<VkCommandBuffer>				cmdBuffer;
2901 
2902 	Move<VkDescriptorPool>				descriptorPool;
2903 	Move<VkDescriptorSetLayout>			descriptorSetLayout;
2904 	Move<VkDescriptorSet>				descriptorSet;
2905 	const deUint32						numDescriptorSets			= (m_extraResourcesLayout != 0) ? 2u : 1u;
2906 
2907 	DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2908 
2909 	// Create color image
2910 	{
2911 		const VkImageCreateInfo colorImageParams =
2912 		{
2913 			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,										// VkStructureType			sType;
2914 			DE_NULL,																	// const void*				pNext;
2915 			0u,																			// VkImageCreateFlags		flags;
2916 			VK_IMAGE_TYPE_2D,															// VkImageType				imageType;
2917 			colorFormat,																// VkFormat					format;
2918 			{ renderSize.x(), renderSize.y(), 1u },										// VkExtent3D				extent;
2919 			1u,																			// deUint32					mipLevels;
2920 			1u,																			// deUint32					arraySize;
2921 			VK_SAMPLE_COUNT_1_BIT,														// VkSampleCountFlagBits	samples;
2922 			VK_IMAGE_TILING_OPTIMAL,													// VkImageTiling			tiling;
2923 			VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT,		// VkImageUsageFlags		usage;
2924 			VK_SHARING_MODE_EXCLUSIVE,													// VkSharingMode			sharingMode;
2925 			1u,																			// deUint32					queueFamilyCount;
2926 			&queueFamilyIndex,															// const deUint32*			pQueueFamilyIndices;
2927 			VK_IMAGE_LAYOUT_UNDEFINED													// VkImageLayout			initialLayout;
2928 		};
2929 
2930 		colorImage = createImage(vk, vkDevice, &colorImageParams);
2931 
2932 		// Allocate and bind color image memory
2933 		colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *colorImage), MemoryRequirement::Any);
2934 		VK_CHECK(vk.bindImageMemory(vkDevice, *colorImage, colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
2935 	}
2936 
2937 	// Create color attachment view
2938 	{
2939 		const VkImageViewCreateInfo colorImageViewParams =
2940 		{
2941 			VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,			// VkStructureType			sType;
2942 			DE_NULL,											// const void*				pNext;
2943 			0u,													// VkImageViewCreateFlags	flags;
2944 			*colorImage,										// VkImage					image;
2945 			VK_IMAGE_VIEW_TYPE_2D,								// VkImageViewType			viewType;
2946 			colorFormat,										// VkFormat					format;
2947 			{
2948 				VK_COMPONENT_SWIZZLE_R,							// VkComponentSwizzle		r;
2949 				VK_COMPONENT_SWIZZLE_G,							// VkComponentSwizzle		g;
2950 				VK_COMPONENT_SWIZZLE_B,							// VkComponentSwizzle		b;
2951 				VK_COMPONENT_SWIZZLE_A							// VkComponentSwizzle		a;
2952 			},													// VkComponentsMapping		components;
2953 			{
2954 				VK_IMAGE_ASPECT_COLOR_BIT,						// VkImageAspectFlags		aspectMask;
2955 				0u,												// deUint32					baseMipLevel;
2956 				1u,												// deUint32					mipLevels;
2957 				0u,												// deUint32					baseArraylayer;
2958 				1u												// deUint32					layerCount;
2959 			}													// VkImageSubresourceRange	subresourceRange;
2960 		};
2961 
2962 		colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
2963 	}
2964 
2965 	// Create render pass
2966 	{
2967 		const VkAttachmentDescription colorAttachmentDescription =
2968 		{
2969 			0u,													// VkAttachmentDescriptorFlags	flags;
2970 			colorFormat,										// VkFormat						format;
2971 			VK_SAMPLE_COUNT_1_BIT,								// VkSampleCountFlagBits		samples;
2972 			VK_ATTACHMENT_LOAD_OP_CLEAR,						// VkAttachmentLoadOp			loadOp;
2973 			VK_ATTACHMENT_STORE_OP_STORE,						// VkAttachmentStoreOp			storeOp;
2974 			VK_ATTACHMENT_LOAD_OP_DONT_CARE,					// VkAttachmentLoadOp			stencilLoadOp;
2975 			VK_ATTACHMENT_STORE_OP_DONT_CARE,					// VkAttachmentStoreOp			stencilStoreOp;
2976 			VK_IMAGE_LAYOUT_UNDEFINED,							// VkImageLayout				initialLayout;
2977 			VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL			// VkImageLayout				finalLayout
2978 		};
2979 
2980 		const VkAttachmentDescription attachments[1] =
2981 		{
2982 			colorAttachmentDescription
2983 		};
2984 
2985 		const VkAttachmentReference colorAttachmentReference =
2986 		{
2987 			0u,													// deUint32			attachment;
2988 			VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL			// VkImageLayout	layout;
2989 		};
2990 
2991 		const VkSubpassDescription subpassDescription =
2992 		{
2993 			0u,													// VkSubpassDescriptionFlags	flags;
2994 			VK_PIPELINE_BIND_POINT_GRAPHICS,					// VkPipelineBindPoint			pipelineBindPoint;
2995 			0u,													// deUint32						inputCount;
2996 			DE_NULL,											// const VkAttachmentReference*	pInputAttachments;
2997 			1u,													// deUint32						colorCount;
2998 			&colorAttachmentReference,							// const VkAttachmentReference*	pColorAttachments;
2999 			DE_NULL,											// const VkAttachmentReference*	pResolveAttachments;
3000 			DE_NULL,											// VkAttachmentReference		depthStencilAttachment;
3001 			0u,													// deUint32						preserveCount;
3002 			DE_NULL												// const VkAttachmentReference* pPreserveAttachments;
3003 		};
3004 
3005 		const VkRenderPassCreateInfo renderPassParams =
3006 		{
3007 			VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,			// VkStructureType					sType;
3008 			DE_NULL,											// const void*						pNext;
3009 			0u,													// VkRenderPassCreateFlags			flags;
3010 			1u,													// deUint32							attachmentCount;
3011 			attachments,										// const VkAttachmentDescription*	pAttachments;
3012 			1u,													// deUint32							subpassCount;
3013 			&subpassDescription,								// const VkSubpassDescription*		pSubpasses;
3014 			0u,													// deUint32							dependencyCount;
3015 			DE_NULL												// const VkSubpassDependency*		pDependencies;
3016 		};
3017 
3018 		renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
3019 	}
3020 
3021 	// Create framebuffer
3022 	{
3023 		const VkFramebufferCreateInfo framebufferParams =
3024 		{
3025 			VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,			// VkStructureType				sType;
3026 			DE_NULL,											// const void*					pNext;
3027 			0u,													// VkFramebufferCreateFlags		flags;
3028 			*renderPass,										// VkRenderPass					renderPass;
3029 			1u,													// deUint32						attachmentCount;
3030 			&*colorImageView,									// const VkAttachmentBindInfo*	pAttachments;
3031 			(deUint32)renderSize.x(),							// deUint32						width;
3032 			(deUint32)renderSize.y(),							// deUint32						height;
3033 			1u													// deUint32						layers;
3034 		};
3035 
3036 		framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
3037 	}
3038 
3039 	// Create descriptors
3040 	{
3041 		DescriptorPoolBuilder		descriptorPoolBuilder;
3042 		DescriptorSetLayoutBuilder	descriptorSetLayoutBuilder;
3043 
3044 		descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3045 		descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3046 		descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3047 		descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3048 
3049 		descriptorSetLayout	= descriptorSetLayoutBuilder.build(vk, vkDevice);
3050 		descriptorPool		= descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3051 
3052 		const VkDescriptorSetAllocateInfo allocInfo =
3053 		{
3054 			VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
3055 			DE_NULL,
3056 			*descriptorPool,
3057 			1u,
3058 			&*descriptorSetLayout
3059 		};
3060 
3061 		descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
3062 		// Update descriptors
3063 		{
3064 			DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
3065 			const VkDescriptorBufferInfo outputDescriptorBufferInfo =
3066 			{
3067 				*m_outputBuffer,				// VkBuffer			buffer;
3068 				0u,								// VkDeviceSize		offset;
3069 				VK_WHOLE_SIZE					// VkDeviceSize		range;
3070 			};
3071 
3072 			descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
3073 
3074 			VkDescriptorBufferInfo inputDescriptorBufferInfo =
3075 			{
3076 				0,							// VkBuffer			buffer;
3077 				0u,							// VkDeviceSize		offset;
3078 				VK_WHOLE_SIZE				// VkDeviceSize		range;
3079 			};
3080 
3081 			if (inputBufferSize > 0)
3082 			{
3083 				inputDescriptorBufferInfo.buffer = *m_inputBuffer;
3084 
3085 				descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
3086 			}
3087 
3088 			descriptorSetUpdateBuilder.update(vk, vkDevice);
3089 		}
3090 	}
3091 
3092 	// Create pipeline layout
3093 	{
3094 		const VkDescriptorSetLayout			descriptorSetLayouts[]		=
3095 		{
3096 			*descriptorSetLayout,
3097 			m_extraResourcesLayout
3098 		};
3099 		const VkPipelineLayoutCreateInfo pipelineLayoutParams =
3100 		{
3101 			VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,		// VkStructureType				sType;
3102 			DE_NULL,											// const void*					pNext;
3103 			(VkPipelineLayoutCreateFlags)0,						// VkPipelineLayoutCreateFlags	flags;
3104 			numDescriptorSets,									// deUint32						descriptorSetCount;
3105 			descriptorSetLayouts,								// const VkDescriptorSetLayout*	pSetLayouts;
3106 			0u,													// deUint32						pushConstantRangeCount;
3107 			DE_NULL												// const VkPushConstantRange*	pPushConstantRanges;
3108 		};
3109 
3110 		pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
3111 	}
3112 
3113 	// Create shader modules
3114 	{
3115 		vertexShaderModule		= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
3116 		tessControlShaderModule	= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_control"), 0);
3117 		tessEvalShaderModule	= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_eval"), 0);
3118 		fragmentShaderModule	= createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
3119 	}
3120 
3121 	// Create pipeline
3122 	{
3123 		const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
3124 		{
3125 			VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,		// VkStructureType							sType;
3126 			DE_NULL,														// const void*								pNext;
3127 			(VkPipelineVertexInputStateCreateFlags)0,						// VkPipelineVertexInputStateCreateFlags	flags;
3128 			0u,																// deUint32									bindingCount;
3129 			DE_NULL,														// const VkVertexInputBindingDescription*	pVertexBindingDescriptions;
3130 			0u,																// deUint32									attributeCount;
3131 			DE_NULL,														// const VkVertexInputAttributeDescription*	pvertexAttributeDescriptions;
3132 		};
3133 
3134 		const std::vector<VkViewport>	viewports	(1, makeViewport(renderSize));
3135 		const std::vector<VkRect2D>		scissors	(1, makeRect2D(renderSize));
3136 
3137 		graphicsPipeline = makeGraphicsPipeline(vk,									// const DeviceInterface&                        vk
3138 												vkDevice,							// const VkDevice                                device
3139 												*pipelineLayout,					// const VkPipelineLayout                        pipelineLayout
3140 												*vertexShaderModule,				// const VkShaderModule                          vertexShaderModule
3141 												*tessControlShaderModule,			// const VkShaderModule                          tessellationControlShaderModule
3142 												*tessEvalShaderModule,				// const VkShaderModule                          tessellationEvalShaderModule
3143 												DE_NULL,							// const VkShaderModule                          geometryShaderModule
3144 												*fragmentShaderModule,				// const VkShaderModule                          fragmentShaderModule
3145 												*renderPass,						// const VkRenderPass                            renderPass
3146 												viewports,							// const std::vector<VkViewport>&                viewports
3147 												scissors,							// const std::vector<VkRect2D>&                  scissors
3148 												VK_PRIMITIVE_TOPOLOGY_PATCH_LIST,	// const VkPrimitiveTopology                     topology
3149 												0u,									// const deUint32                                subpass
3150 												patchControlPoints,					// const deUint32                                patchControlPoints
3151 												&vertexInputStateParams);			// const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
3152 	}
3153 
3154 	// Create command pool
3155 	cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
3156 
3157 	// Create command buffer
3158 	{
3159 		const VkClearValue clearValue = getDefaultClearColor();
3160 
3161 		cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3162 
3163 		beginCommandBuffer(vk, *cmdBuffer);
3164 
3165 		beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), clearValue);
3166 
3167 		vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
3168 
3169 		{
3170 			const VkDescriptorSet	descriptorSets[]	= { *descriptorSet, extraResources };
3171 			vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
3172 		}
3173 
3174 		vk.cmdDraw(*cmdBuffer, vertexCount, 1, 0, 0);
3175 
3176 		endRenderPass(vk, *cmdBuffer);
3177 
3178 		// Insert a barrier so data written by the shader is available to the host
3179 		{
3180 			const VkBufferMemoryBarrier bufferBarrier =
3181 			{
3182 				VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,	// VkStructureType    sType;
3183 				DE_NULL,									// const void*        pNext;
3184 				VK_ACCESS_SHADER_WRITE_BIT,					// VkAccessFlags      srcAccessMask;
3185 				VK_ACCESS_HOST_READ_BIT,					// VkAccessFlags      dstAccessMask;
3186 				VK_QUEUE_FAMILY_IGNORED,					// uint32_t           srcQueueFamilyIndex;
3187 				VK_QUEUE_FAMILY_IGNORED,					// uint32_t           dstQueueFamilyIndex;
3188 				*m_outputBuffer,							// VkBuffer           buffer;
3189 				0,											// VkDeviceSize       offset;
3190 				VK_WHOLE_SIZE,								// VkDeviceSize       size;
3191 			};
3192 
3193 			vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
3194 								  0, (const VkMemoryBarrier*)DE_NULL,
3195 								  1, &bufferBarrier,
3196 								  0, (const VkImageMemoryBarrier*)DE_NULL);
3197 		}
3198 
3199 		endCommandBuffer(vk, *cmdBuffer);
3200 	}
3201 
3202 	// Execute Draw
3203 	submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
3204 }
3205 
3206 // TessControlExecutor
3207 
3208 class TessControlExecutor : public TessellationExecutor
3209 {
3210 public:
3211 						TessControlExecutor			(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3212 	virtual				~TessControlExecutor		(void);
3213 
3214 	static void			generateSources				(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
3215 
3216 	virtual void		execute						(int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
3217 
3218 protected:
3219 	static std::string	generateTessControlShader	(const ShaderSpec& shaderSpec);
3220 };
3221 
TessControlExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3222 TessControlExecutor::TessControlExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3223 	: TessellationExecutor(context, shaderSpec, extraResourcesLayout)
3224 {
3225 }
3226 
~TessControlExecutor(void)3227 TessControlExecutor::~TessControlExecutor (void)
3228 {
3229 }
3230 
generateTessControlShader(const ShaderSpec & shaderSpec)3231 std::string TessControlExecutor::generateTessControlShader (const ShaderSpec& shaderSpec)
3232 {
3233 	std::ostringstream src;
3234 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3235 
3236 	if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3237 		src << "#extension GL_EXT_tessellation_shader : require\n\n";
3238 
3239 	if (!shaderSpec.globalDeclarations.empty())
3240 		src << shaderSpec.globalDeclarations << "\n";
3241 
3242 	src << "\nlayout(vertices = 1) out;\n\n";
3243 
3244 	declareBufferBlocks(src, shaderSpec);
3245 
3246 	src << "void main (void)\n{\n";
3247 
3248 	for (int ndx = 0; ndx < 2; ndx++)
3249 		src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3250 
3251 	for (int ndx = 0; ndx < 4; ndx++)
3252 		src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3253 
3254 	src << "\n"
3255 		<< "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
3256 
3257 	generateExecBufferIo(src, shaderSpec, "invocationId");
3258 
3259 	src << "}\n";
3260 
3261 	return src.str();
3262 }
3263 
generateEmptyTessEvalShader()3264 static std::string generateEmptyTessEvalShader ()
3265 {
3266 	std::ostringstream src;
3267 
3268 	src << "#version 450\n"
3269 		   "#extension GL_EXT_tessellation_shader : require\n\n";
3270 
3271 	src << "layout(triangles, ccw) in;\n";
3272 
3273 	src << "\nvoid main (void)\n{\n"
3274 		<< "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
3275 		<< "}\n";
3276 
3277 	return src.str();
3278 }
3279 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3280 void TessControlExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
3281 {
3282 	programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3283 	programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generateTessControlShader(shaderSpec)) << shaderSpec.buildOptions;
3284 	programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateEmptyTessEvalShader()) << shaderSpec.buildOptions;
3285 	programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3286 }
3287 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3288 void TessControlExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
3289 {
3290 	const deUint32	patchSize	= 3;
3291 
3292 	initBuffers(numValues);
3293 
3294 	// Setup input buffer & copy data
3295 	uploadInputBuffer(inputs, numValues, false);
3296 
3297 	renderTess(numValues, patchSize * numValues, patchSize, extraResources);
3298 
3299 	// Read back data
3300 	readOutputBuffer(outputs, numValues);
3301 }
3302 
3303 // TessEvaluationExecutor
3304 
3305 class TessEvaluationExecutor : public TessellationExecutor
3306 {
3307 public:
3308 						TessEvaluationExecutor	(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3309 	virtual				~TessEvaluationExecutor	(void);
3310 
3311 	static void			generateSources			(const ShaderSpec& shaderSpec, SourceCollections& programCollection);
3312 
3313 	virtual void		execute					(int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
3314 
3315 protected:
3316 	static std::string	generateTessEvalShader	(const ShaderSpec& shaderSpec);
3317 };
3318 
TessEvaluationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3319 TessEvaluationExecutor::TessEvaluationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3320 	: TessellationExecutor (context, shaderSpec, extraResourcesLayout)
3321 {
3322 }
3323 
~TessEvaluationExecutor(void)3324 TessEvaluationExecutor::~TessEvaluationExecutor (void)
3325 {
3326 }
3327 
generatePassthroughTessControlShader(void)3328 static std::string generatePassthroughTessControlShader (void)
3329 {
3330 	std::ostringstream src;
3331 
3332 	src << "#version 450\n"
3333 		   "#extension GL_EXT_tessellation_shader : require\n\n";
3334 
3335 	src << "layout(vertices = 1) out;\n\n";
3336 
3337 	src << "void main (void)\n{\n";
3338 
3339 	for (int ndx = 0; ndx < 2; ndx++)
3340 		src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3341 
3342 	for (int ndx = 0; ndx < 4; ndx++)
3343 		src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3344 
3345 	src << "}\n";
3346 
3347 	return src.str();
3348 }
3349 
generateTessEvalShader(const ShaderSpec & shaderSpec)3350 std::string TessEvaluationExecutor::generateTessEvalShader (const ShaderSpec& shaderSpec)
3351 {
3352 	std::ostringstream src;
3353 
3354 	src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3355 
3356 	if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3357 		src << "#extension GL_EXT_tessellation_shader : require\n\n";
3358 
3359 	if (!shaderSpec.globalDeclarations.empty())
3360 		src << shaderSpec.globalDeclarations << "\n";
3361 
3362 	src << "\n";
3363 
3364 	src << "layout(isolines, equal_spacing) in;\n\n";
3365 
3366 	declareBufferBlocks(src, shaderSpec);
3367 
3368 	src << "void main (void)\n{\n"
3369 		<< "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
3370 		<< "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
3371 
3372 	generateExecBufferIo(src, shaderSpec, "invocationId");
3373 
3374 	src	<< "}\n";
3375 
3376 	return src.str();
3377 }
3378 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3379 void TessEvaluationExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
3380 {
3381 	programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3382 	programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generatePassthroughTessControlShader()) << shaderSpec.buildOptions;
3383 	programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec)) << shaderSpec.buildOptions;
3384 	programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3385 }
3386 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3387 void TessEvaluationExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
3388 {
3389 	const int	patchSize		= 2;
3390 	const int	alignedValues	= deAlign32(numValues, patchSize);
3391 
3392 	// Initialize buffers with aligned value count to make room for padding
3393 	initBuffers(alignedValues);
3394 
3395 	// Setup input buffer & copy data
3396 	uploadInputBuffer(inputs, numValues, false);
3397 
3398 	renderTess((deUint32)alignedValues, (deUint32)alignedValues, (deUint32)patchSize, extraResources);
3399 
3400 	// Read back data
3401 	readOutputBuffer(outputs, numValues);
3402 }
3403 
3404 } // anonymous
3405 
3406 // ShaderExecutor
3407 
~ShaderExecutor(void)3408 ShaderExecutor::~ShaderExecutor (void)
3409 {
3410 }
3411 
areInputs16Bit(void) const3412 bool ShaderExecutor::areInputs16Bit (void) const
3413 {
3414 	for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end(); ++symIter)
3415 	{
3416 		if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3417 			return true;
3418 	}
3419 	return false;
3420 }
3421 
areOutputs16Bit(void) const3422 bool ShaderExecutor::areOutputs16Bit (void) const
3423 {
3424 	for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end(); ++symIter)
3425 	{
3426 		if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3427 			return true;
3428 	}
3429 	return false;
3430 }
3431 
isOutput16Bit(const size_t ndx) const3432 bool ShaderExecutor::isOutput16Bit (const size_t ndx) const
3433 {
3434 	if (glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3435 		return true;
3436 	return false;
3437 }
3438 
areInputs64Bit(void) const3439 bool ShaderExecutor::areInputs64Bit (void) const
3440 {
3441 	for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end(); ++symIter)
3442 	{
3443 		if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3444 			return true;
3445 	}
3446 	return false;
3447 }
3448 
areOutputs64Bit(void) const3449 bool ShaderExecutor::areOutputs64Bit (void) const
3450 {
3451 	for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end(); ++symIter)
3452 	{
3453 		if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3454 			return true;
3455 	}
3456 	return false;
3457 }
3458 
isOutput64Bit(const size_t ndx) const3459 bool ShaderExecutor::isOutput64Bit (const size_t ndx) const
3460 {
3461 	if (glu::isDataTypeDoubleType(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3462 		return true;
3463 	return false;
3464 }
3465 
3466 // Utilities
3467 
generateSources(glu::ShaderType shaderType,const ShaderSpec & shaderSpec,vk::SourceCollections & dst)3468 void generateSources (glu::ShaderType shaderType, const ShaderSpec& shaderSpec, vk::SourceCollections& dst)
3469 {
3470 	switch (shaderType)
3471 	{
3472 		case glu::SHADERTYPE_VERTEX:					VertexShaderExecutor::generateSources	(shaderSpec, dst);	break;
3473 		case glu::SHADERTYPE_TESSELLATION_CONTROL:		TessControlExecutor::generateSources	(shaderSpec, dst);	break;
3474 		case glu::SHADERTYPE_TESSELLATION_EVALUATION:	TessEvaluationExecutor::generateSources	(shaderSpec, dst);	break;
3475 		case glu::SHADERTYPE_GEOMETRY:					GeometryShaderExecutor::generateSources	(shaderSpec, dst);	break;
3476 		case glu::SHADERTYPE_FRAGMENT:					FragmentShaderExecutor::generateSources	(shaderSpec, dst);	break;
3477 		case glu::SHADERTYPE_COMPUTE:					ComputeShaderExecutor::generateSources	(shaderSpec, dst);	break;
3478 		default:
3479 			TCU_THROW(InternalError, "Unsupported shader type");
3480 	}
3481 }
3482 
createExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3483 ShaderExecutor* createExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3484 {
3485 	switch (shaderType)
3486 	{
3487 		case glu::SHADERTYPE_VERTEX:					return new VertexShaderExecutor		(context, shaderSpec, extraResourcesLayout);
3488 		case glu::SHADERTYPE_TESSELLATION_CONTROL:		return new TessControlExecutor		(context, shaderSpec, extraResourcesLayout);
3489 		case glu::SHADERTYPE_TESSELLATION_EVALUATION:	return new TessEvaluationExecutor	(context, shaderSpec, extraResourcesLayout);
3490 		case glu::SHADERTYPE_GEOMETRY:					return new GeometryShaderExecutor	(context, shaderSpec, extraResourcesLayout);
3491 		case glu::SHADERTYPE_FRAGMENT:					return new FragmentShaderExecutor	(context, shaderSpec, extraResourcesLayout);
3492 		case glu::SHADERTYPE_COMPUTE:					return new ComputeShaderExecutor	(context, shaderSpec, extraResourcesLayout);
3493 		default:
3494 			TCU_THROW(InternalError, "Unsupported shader type");
3495 	}
3496 }
3497 
executorSupported(glu::ShaderType shaderType)3498 bool  executorSupported(glu::ShaderType shaderType)
3499 {
3500 	switch (shaderType)
3501 	{
3502 	case glu::SHADERTYPE_VERTEX:
3503 	case glu::SHADERTYPE_TESSELLATION_CONTROL:
3504 	case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3505 	case glu::SHADERTYPE_GEOMETRY:
3506 	case glu::SHADERTYPE_FRAGMENT:
3507 	case glu::SHADERTYPE_COMPUTE:
3508 		return true;
3509 	default:
3510 		return false;
3511 	}
3512 }
3513 
checkSupportShader(Context & context,const glu::ShaderType shaderType)3514 void checkSupportShader(Context& context, const glu::ShaderType shaderType)
3515 {
3516 	if (shaderType == glu::SHADERTYPE_TESSELLATION_EVALUATION &&
3517 		context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
3518 		!context.getPortabilitySubsetFeatures().tessellationIsolines)
3519 	{
3520 		TCU_THROW(NotSupportedError, "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
3521 	}
3522 }
3523 
3524 
3525 } // shaderexecutor
3526 } // vkt
3527