• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*------------------------------------------------------------------------
2  * Vulkan Conformance Tests
3  * ------------------------
4  *
5  * Copyright (c) 2015 The Khronos Group Inc.
6  * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7  * Copyright (c) 2016 The Android Open Source Project
8  *
9  * Licensed under the Apache License, Version 2.0 (the "License");
10  * you may not use this file except in compliance with the License.
11  * You may obtain a copy of the License at
12  *
13  *      http://www.apache.org/licenses/LICENSE-2.0
14  *
15  * Unless required by applicable law or agreed to in writing, software
16  * distributed under the License is distributed on an "AS IS" BASIS,
17  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18  * See the License for the specific language governing permissions and
19  * limitations under the License.
20  *
21  *//*!
22  * \file
23  * \brief Vulkan ShaderExecutor
24  *//*--------------------------------------------------------------------*/
25 
26 #include "vktShaderExecutor.hpp"
27 
28 #include "vkMemUtil.hpp"
29 #include "vkRef.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37 #include "vkBarrierUtil.hpp"
38 
39 #include "gluShaderUtil.hpp"
40 
41 #include "tcuVector.hpp"
42 #include "tcuTestLog.hpp"
43 #include "tcuTextureUtil.hpp"
44 
45 #include "deUniquePtr.hpp"
46 #include "deStringUtil.hpp"
47 #include "deSharedPtr.hpp"
48 #include "deFloat16.h"
49 
50 #include <map>
51 #include <sstream>
52 #include <iostream>
53 
54 using std::vector;
55 using namespace vk;
56 
57 namespace vkt
58 {
59 namespace shaderexecutor
60 {
61 namespace
62 {
63 
64 enum
65 {
66     DEFAULT_RENDER_WIDTH  = 100,
67     DEFAULT_RENDER_HEIGHT = 100,
68 };
69 
70 // Common typedefs
71 
72 typedef de::SharedPtr<Unique<VkImage>> VkImageSp;
73 typedef de::SharedPtr<Unique<VkImageView>> VkImageViewSp;
74 typedef de::SharedPtr<Unique<VkBuffer>> VkBufferSp;
75 typedef de::SharedPtr<Allocation> AllocationSp;
76 
77 static VkFormat getAttributeFormat(const glu::DataType dataType);
78 
79 // Shader utilities
80 
getDefaultClearColor(void)81 static VkClearValue getDefaultClearColor(void)
82 {
83     return makeClearValueColorF32(0.125f, 0.25f, 0.5f, 1.0f);
84 }
85 
generateEmptyFragmentSource(void)86 static std::string generateEmptyFragmentSource(void)
87 {
88     std::ostringstream src;
89 
90     src << "#version 450\n"
91            "layout(location=0) out highp vec4 o_color;\n";
92 
93     src << "void main (void)\n{\n";
94     src << "    o_color = vec4(0.0);\n";
95     src << "}\n";
96 
97     return src.str();
98 }
99 
packFloat16Bit(std::ostream & src,const std::vector<Symbol> & outputs)100 void packFloat16Bit(std::ostream &src, const std::vector<Symbol> &outputs)
101 {
102     for (vector<Symbol>::const_iterator symIter = outputs.begin(); symIter != outputs.end(); ++symIter)
103     {
104         if (glu::isDataTypeFloatType(symIter->varType.getBasicType()))
105         {
106             if (glu::isDataTypeVector(symIter->varType.getBasicType()))
107             {
108                 for (int i = 0; i < glu::getDataTypeScalarSize(symIter->varType.getBasicType()); i++)
109                 {
110                     src << "\tpacked_" << symIter->name << "[" << i << "] = uintBitsToFloat(packFloat2x16(f16vec2("
111                         << symIter->name << "[" << i << "], -1.0)));\n";
112                 }
113             }
114             else if (glu::isDataTypeMatrix(symIter->varType.getBasicType()))
115             {
116                 int maxRow = 0;
117                 int maxCol = 0;
118                 switch (symIter->varType.getBasicType())
119                 {
120                 case glu::TYPE_FLOAT_MAT2:
121                     maxRow = maxCol = 2;
122                     break;
123                 case glu::TYPE_FLOAT_MAT2X3:
124                     maxRow = 2;
125                     maxCol = 3;
126                     break;
127                 case glu::TYPE_FLOAT_MAT2X4:
128                     maxRow = 2;
129                     maxCol = 4;
130                     break;
131                 case glu::TYPE_FLOAT_MAT3X2:
132                     maxRow = 3;
133                     maxCol = 2;
134                     break;
135                 case glu::TYPE_FLOAT_MAT3:
136                     maxRow = maxCol = 3;
137                     break;
138                 case glu::TYPE_FLOAT_MAT3X4:
139                     maxRow = 3;
140                     maxCol = 4;
141                     break;
142                 case glu::TYPE_FLOAT_MAT4X2:
143                     maxRow = 4;
144                     maxCol = 2;
145                     break;
146                 case glu::TYPE_FLOAT_MAT4X3:
147                     maxRow = 4;
148                     maxCol = 3;
149                     break;
150                 case glu::TYPE_FLOAT_MAT4:
151                     maxRow = maxCol = 4;
152                     break;
153                 default:
154                     DE_ASSERT(false);
155                     break;
156                 }
157 
158                 for (int i = 0; i < maxRow; i++)
159                     for (int j = 0; j < maxCol; j++)
160                     {
161                         src << "\tpacked_" << symIter->name << "[" << i << "][" << j
162                             << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "][" << j
163                             << "], -1.0)));\n";
164                     }
165             }
166             else
167             {
168                 src << "\tpacked_" << symIter->name << " = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name
169                     << ", -1.0)));\n";
170             }
171         }
172     }
173 }
174 
generatePassthroughVertexShader(const ShaderSpec & shaderSpec,const char * inputPrefix,const char * outputPrefix)175 static std::string generatePassthroughVertexShader(const ShaderSpec &shaderSpec, const char *inputPrefix,
176                                                    const char *outputPrefix)
177 {
178     std::ostringstream src;
179     int location = 0;
180 
181     src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
182 
183     if (!shaderSpec.globalDeclarations.empty())
184         src << shaderSpec.globalDeclarations << "\n";
185 
186     src << "layout(location = " << location << ") in highp vec4 a_position;\n";
187 
188     for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
189     {
190         location++;
191         src << "layout(location = " << location << ") in " << glu::declare(input->varType, inputPrefix + input->name)
192             << ";\n"
193             << "layout(location = " << location - 1 << ") flat out "
194             << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
195     }
196 
197     src << "\nvoid main (void)\n{\n"
198         << "    gl_Position = a_position;\n"
199         << "    gl_PointSize = 1.0;\n";
200 
201     for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
202         src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
203 
204     src << "}\n";
205 
206     return src.str();
207 }
208 
generateVertexShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix)209 static std::string generateVertexShader(const ShaderSpec &shaderSpec, const std::string &inputPrefix,
210                                         const std::string &outputPrefix)
211 {
212     DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
213 
214     std::ostringstream src;
215 
216     src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
217 
218     if (!shaderSpec.globalDeclarations.empty())
219         src << shaderSpec.globalDeclarations << "\n";
220 
221     src << "layout(location = 0) in highp vec4 a_position;\n";
222 
223     int locationNumber = 1;
224     for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end();
225          ++input, ++locationNumber)
226     {
227         src << "layout(location = " << locationNumber << ") in "
228             << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
229     }
230 
231     locationNumber = 0;
232     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
233          ++output, ++locationNumber)
234     {
235         DE_ASSERT(output->varType.isBasicType());
236 
237         if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
238         {
239             const int vecSize               = glu::getDataTypeScalarSize(output->varType.getBasicType());
240             const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
241             const glu::VarType intType(intBaseType, glu::PRECISION_HIGHP);
242 
243             src << "layout(location = " << locationNumber << ") flat out "
244                 << glu::declare(intType, outputPrefix + output->name) << ";\n";
245         }
246         else
247             src << "layout(location = " << locationNumber << ") flat out "
248                 << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
249     }
250 
251     src << "\n"
252         << "void main (void)\n"
253         << "{\n"
254         << "    gl_Position = a_position;\n"
255         << "    gl_PointSize = 1.0;\n";
256 
257     // Declare & fetch local input variables
258     for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
259     {
260         if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
261         {
262             const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
263             src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
264         }
265         else
266             src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
267     }
268 
269     // Declare local output variables
270     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
271          ++output)
272     {
273         if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
274         {
275             const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
276             src << "\t" << tname << " " << output->name << ";\n";
277             const char *tname2 = glu::getDataTypeName(output->varType.getBasicType());
278             src << "\t" << tname2 << " "
279                 << "packed_" << output->name << ";\n";
280         }
281         else
282             src << "\t" << glu::declare(output->varType, output->name) << ";\n";
283     }
284 
285     // Operation - indented to correct level.
286     {
287         std::istringstream opSrc(shaderSpec.source);
288         std::string line;
289 
290         while (std::getline(opSrc, line))
291             src << "\t" << line << "\n";
292     }
293 
294     if (shaderSpec.packFloat16Bit)
295         packFloat16Bit(src, shaderSpec.outputs);
296 
297     // Assignments to outputs.
298     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
299          ++output)
300     {
301         if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
302         {
303             src << "\t" << outputPrefix << output->name << " = packed_" << output->name << ";\n";
304         }
305         else
306         {
307             if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
308             {
309                 const int vecSize               = glu::getDataTypeScalarSize(output->varType.getBasicType());
310                 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
311 
312                 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "("
313                     << output->name << ");\n";
314             }
315             else
316                 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
317         }
318     }
319 
320     src << "}\n";
321 
322     return src.str();
323 }
324 
325 struct FragmentOutputLayout
326 {
327     std::vector<const Symbol *> locationSymbols; //! Symbols by location
328     std::map<std::string, int> locationMap;      //! Map from symbol name to start location
329 };
330 
generateFragShaderOutputDecl(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & outputPrefix)331 static void generateFragShaderOutputDecl(std::ostream &src, const ShaderSpec &shaderSpec, bool useIntOutputs,
332                                          const std::map<std::string, int> &outLocationMap,
333                                          const std::string &outputPrefix)
334 {
335     for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
336     {
337         const Symbol &output         = shaderSpec.outputs[outNdx];
338         const int location           = de::lookup(outLocationMap, output.name);
339         const std::string outVarName = outputPrefix + output.name;
340         glu::VariableDeclaration decl(output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST,
341                                       glu::Layout(location));
342 
343         TCU_CHECK_INTERNAL(output.varType.isBasicType());
344 
345         if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
346         {
347             const int vecSize                 = glu::getDataTypeScalarSize(output.varType.getBasicType());
348             const glu::DataType uintBasicType = vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
349             const glu::VarType uintType(uintBasicType, glu::PRECISION_HIGHP);
350 
351             decl.varType = uintType;
352             src << decl << ";\n";
353         }
354         else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
355         {
356             const int vecSize                = glu::getDataTypeScalarSize(output.varType.getBasicType());
357             const glu::DataType intBasicType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
358             const glu::VarType intType(intBasicType, glu::PRECISION_HIGHP);
359 
360             decl.varType = intType;
361             src << decl << ";\n";
362         }
363         else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
364         {
365             const int vecSize                 = glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
366             const int numVecs                 = glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
367             const glu::DataType uintBasicType = glu::getDataTypeUintVec(vecSize);
368             const glu::VarType uintType(uintBasicType, glu::PRECISION_HIGHP);
369 
370             decl.varType = uintType;
371             for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
372             {
373                 decl.name            = outVarName + "_" + de::toString(vecNdx);
374                 decl.layout.location = location + vecNdx;
375                 src << decl << ";\n";
376             }
377         }
378         else
379             src << decl << ";\n";
380     }
381 }
382 
generateFragShaderOutAssign(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::string & valuePrefix,const std::string & outputPrefix,const bool isInput16Bit=false)383 static void generateFragShaderOutAssign(std::ostream &src, const ShaderSpec &shaderSpec, bool useIntOutputs,
384                                         const std::string &valuePrefix, const std::string &outputPrefix,
385                                         const bool isInput16Bit = false)
386 {
387     if (isInput16Bit)
388         packFloat16Bit(src, shaderSpec.outputs);
389 
390     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
391          ++output)
392     {
393         const std::string packPrefix =
394             (isInput16Bit && glu::isDataTypeFloatType(output->varType.getBasicType())) ? "packed_" : "";
395 
396         if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
397             src << "    o_" << output->name << " = floatBitsToUint(" << valuePrefix << output->name << ");\n";
398         else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
399         {
400             const int numVecs = glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
401 
402             for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
403                 if (useIntOutputs)
404                     src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = floatBitsToUint(" << valuePrefix
405                         << output->name << "[" << vecNdx << "]);\n";
406                 else
407                     src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = " << packPrefix << valuePrefix
408                         << output->name << "[" << vecNdx << "];\n";
409         }
410         else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
411         {
412             const int vecSize               = glu::getDataTypeScalarSize(output->varType.getBasicType());
413             const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
414 
415             src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "("
416                 << valuePrefix << output->name << ");\n";
417         }
418         else
419             src << "\t" << outputPrefix << output->name << " = " << packPrefix << valuePrefix << output->name << ";\n";
420     }
421 }
422 
generatePassthroughFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)423 static std::string generatePassthroughFragmentShader(const ShaderSpec &shaderSpec, bool useIntOutputs,
424                                                      const std::map<std::string, int> &outLocationMap,
425                                                      const std::string &inputPrefix, const std::string &outputPrefix)
426 {
427     std::ostringstream src;
428 
429     src << "#version 450\n";
430 
431     if (!shaderSpec.globalDeclarations.empty())
432         src << shaderSpec.globalDeclarations << "\n";
433 
434     int locationNumber = 0;
435     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
436          ++output, ++locationNumber)
437     {
438         if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
439         {
440             const int vecSize               = glu::getDataTypeScalarSize(output->varType.getBasicType());
441             const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
442             const glu::VarType intType(intBaseType, glu::PRECISION_HIGHP);
443 
444             src << "layout(location = " << locationNumber << ") flat in "
445                 << glu::declare(intType, inputPrefix + output->name) << ";\n";
446         }
447         else
448             src << "layout(location = " << locationNumber << ") flat in "
449                 << glu::declare(output->varType, inputPrefix + output->name) << ";\n";
450     }
451 
452     generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
453 
454     src << "\nvoid main (void)\n{\n";
455 
456     generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, inputPrefix, outputPrefix);
457 
458     src << "}\n";
459 
460     return src.str();
461 }
462 
generateGeometryShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix,const bool pointSizeSupported)463 static std::string generateGeometryShader(const ShaderSpec &shaderSpec, const std::string &inputPrefix,
464                                           const std::string &outputPrefix, const bool pointSizeSupported)
465 {
466     DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
467 
468     std::ostringstream src;
469 
470     src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
471 
472     if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
473         src << "#extension GL_EXT_geometry_shader : require\n";
474 
475     if (!shaderSpec.globalDeclarations.empty())
476         src << shaderSpec.globalDeclarations << "\n";
477 
478     src << "layout(points) in;\n"
479         << "layout(points, max_vertices = 1) out;\n";
480 
481     int locationNumber = 0;
482     for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end();
483          ++input, ++locationNumber)
484         src << "layout(location = " << locationNumber << ") flat in "
485             << glu::declare(input->varType, inputPrefix + input->name) << "[];\n";
486 
487     locationNumber = 0;
488     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
489          ++output, ++locationNumber)
490     {
491         DE_ASSERT(output->varType.isBasicType());
492 
493         if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
494         {
495             const int vecSize               = glu::getDataTypeScalarSize(output->varType.getBasicType());
496             const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
497             const glu::VarType intType(intBaseType, glu::PRECISION_HIGHP);
498 
499             src << "layout(location = " << locationNumber << ") flat out "
500                 << glu::declare(intType, outputPrefix + output->name) << ";\n";
501         }
502         else
503             src << "layout(location = " << locationNumber << ") flat out "
504                 << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
505     }
506 
507     src << "\n"
508         << "void main (void)\n"
509         << "{\n"
510         << "    gl_Position = gl_in[0].gl_Position;\n"
511         << (pointSizeSupported ? "    gl_PointSize = gl_in[0].gl_PointSize;\n\n" : "");
512 
513     // Fetch input variables
514     for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
515         src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << "[0];\n";
516 
517     // Declare local output variables.
518     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
519          ++output)
520         src << "\t" << glu::declare(output->varType, output->name) << ";\n";
521 
522     src << "\n";
523 
524     // Operation - indented to correct level.
525     {
526         std::istringstream opSrc(shaderSpec.source);
527         std::string line;
528 
529         while (std::getline(opSrc, line))
530             src << "\t" << line << "\n";
531     }
532 
533     // Assignments to outputs.
534     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
535          ++output)
536     {
537         if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
538         {
539             const int vecSize               = glu::getDataTypeScalarSize(output->varType.getBasicType());
540             const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
541 
542             src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "("
543                 << output->name << ");\n";
544         }
545         else
546             src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
547     }
548 
549     src << "    EmitVertex();\n"
550         << "    EndPrimitive();\n"
551         << "}\n";
552 
553     return src.str();
554 }
555 
generateFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)556 static std::string generateFragmentShader(const ShaderSpec &shaderSpec, bool useIntOutputs,
557                                           const std::map<std::string, int> &outLocationMap,
558                                           const std::string &inputPrefix, const std::string &outputPrefix)
559 {
560     std::ostringstream src;
561     src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
562     if (!shaderSpec.globalDeclarations.empty())
563         src << shaderSpec.globalDeclarations << "\n";
564 
565     int locationNumber = 0;
566     for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end();
567          ++input, ++locationNumber)
568     {
569         src << "layout(location = " << locationNumber << ") flat in "
570             << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
571     }
572 
573     generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
574 
575     src << "\nvoid main (void)\n{\n";
576 
577     // Declare & fetch local input variables
578     for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
579     {
580         if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
581         {
582             const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
583             src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
584         }
585         else
586             src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
587     }
588 
589     // Declare output variables
590     for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end();
591          ++output)
592     {
593         if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
594         {
595             const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
596             src << "\t" << tname << " " << output->name << ";\n";
597             const char *tname2 = glu::getDataTypeName(output->varType.getBasicType());
598             src << "\t" << tname2 << " "
599                 << "packed_" << output->name << ";\n";
600         }
601         else
602             src << "\t" << glu::declare(output->varType, output->name) << ";\n";
603     }
604 
605     // Operation - indented to correct level.
606     {
607         std::istringstream opSrc(shaderSpec.source);
608         std::string line;
609 
610         while (std::getline(opSrc, line))
611             src << "\t" << line << "\n";
612     }
613 
614     generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, "", outputPrefix, shaderSpec.packFloat16Bit);
615 
616     src << "}\n";
617 
618     return src.str();
619 }
620 
621 // FragmentOutExecutor
622 
623 class FragmentOutExecutor : public ShaderExecutor
624 {
625 public:
626     FragmentOutExecutor(Context &context, glu::ShaderType shaderType, const ShaderSpec &shaderSpec,
627                         VkDescriptorSetLayout extraResourcesLayout);
628     virtual ~FragmentOutExecutor(void);
629 
630     virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
631                          VkDescriptorSet extraResources);
632 
633 protected:
634     const glu::ShaderType m_shaderType;
635     const FragmentOutputLayout m_outputLayout;
636 
637 private:
638     void bindAttributes(int numValues, const void *const *inputs);
639 
640     void addAttribute(uint32_t bindingLocation, VkFormat format, uint32_t sizePerElement, uint32_t count,
641                       const void *dataPtr);
642     // reinit render data members
643     virtual void clearRenderData(void);
644 
645     const VkDescriptorSetLayout m_extraResourcesLayout;
646 
647     std::vector<VkVertexInputBindingDescription> m_vertexBindingDescriptions;
648     std::vector<VkVertexInputAttributeDescription> m_vertexAttributeDescriptions;
649     std::vector<VkBufferSp> m_vertexBuffers;
650     std::vector<AllocationSp> m_vertexBufferAllocs;
651 };
652 
computeFragmentOutputLayout(const std::vector<Symbol> & symbols)653 static FragmentOutputLayout computeFragmentOutputLayout(const std::vector<Symbol> &symbols)
654 {
655     FragmentOutputLayout ret;
656     int location = 0;
657 
658     for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
659     {
660         const int numLocations = glu::getDataTypeNumLocations(it->varType.getBasicType());
661 
662         TCU_CHECK_INTERNAL(!de::contains(ret.locationMap, it->name));
663         de::insert(ret.locationMap, it->name, location);
664         location += numLocations;
665 
666         for (int ndx = 0; ndx < numLocations; ++ndx)
667             ret.locationSymbols.push_back(&*it);
668     }
669 
670     return ret;
671 }
672 
FragmentOutExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)673 FragmentOutExecutor::FragmentOutExecutor(Context &context, glu::ShaderType shaderType, const ShaderSpec &shaderSpec,
674                                          VkDescriptorSetLayout extraResourcesLayout)
675     : ShaderExecutor(context, shaderSpec)
676     , m_shaderType(shaderType)
677     , m_outputLayout(computeFragmentOutputLayout(m_shaderSpec.outputs))
678     , m_extraResourcesLayout(extraResourcesLayout)
679 {
680     const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
681     const InstanceInterface &vki          = m_context.getInstanceInterface();
682 
683     // Input attributes
684     for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
685     {
686         const Symbol &symbol                      = m_shaderSpec.inputs[inputNdx];
687         const glu::DataType basicType             = symbol.varType.getBasicType();
688         const VkFormat format                     = getAttributeFormat(basicType);
689         const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
690         if ((formatProperties.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) == 0)
691             TCU_THROW(NotSupportedError, "format not supported by device as vertex buffer attribute format");
692     }
693 }
694 
~FragmentOutExecutor(void)695 FragmentOutExecutor::~FragmentOutExecutor(void)
696 {
697 }
698 
computeVertexPositions(int numValues,const tcu::IVec2 & renderSize)699 static std::vector<tcu::Vec2> computeVertexPositions(int numValues, const tcu::IVec2 &renderSize)
700 {
701     std::vector<tcu::Vec2> positions(numValues);
702     for (int valNdx = 0; valNdx < numValues; valNdx++)
703     {
704         const int ix   = valNdx % renderSize.x();
705         const int iy   = valNdx / renderSize.x();
706         const float fx = -1.0f + 2.0f * ((float(ix) + 0.5f) / float(renderSize.x()));
707         const float fy = -1.0f + 2.0f * ((float(iy) + 0.5f) / float(renderSize.y()));
708 
709         positions[valNdx] = tcu::Vec2(fx, fy);
710     }
711 
712     return positions;
713 }
714 
getRenderbufferFormatForOutput(const glu::VarType & outputType,bool useIntOutputs)715 static tcu::TextureFormat getRenderbufferFormatForOutput(const glu::VarType &outputType, bool useIntOutputs)
716 {
717     const tcu::TextureFormat::ChannelOrder channelOrderMap[] = {tcu::TextureFormat::R, tcu::TextureFormat::RG,
718                                                                 tcu::TextureFormat::RGBA, // No RGB variants available.
719                                                                 tcu::TextureFormat::RGBA};
720 
721     const glu::DataType basicType = outputType.getBasicType();
722     const int numComps            = glu::getDataTypeNumComponents(basicType);
723     tcu::TextureFormat::ChannelType channelType;
724 
725     switch (glu::getDataTypeScalarType(basicType))
726     {
727     case glu::TYPE_UINT:
728         channelType = tcu::TextureFormat::UNSIGNED_INT32;
729         break;
730     case glu::TYPE_INT:
731         channelType = tcu::TextureFormat::SIGNED_INT32;
732         break;
733     case glu::TYPE_BOOL:
734         channelType = tcu::TextureFormat::SIGNED_INT32;
735         break;
736     case glu::TYPE_FLOAT:
737         channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT;
738         break;
739     case glu::TYPE_FLOAT16:
740         channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::HALF_FLOAT;
741         break;
742     default:
743         throw tcu::InternalError("Invalid output type");
744     }
745 
746     DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
747 
748     return tcu::TextureFormat(channelOrderMap[numComps - 1], channelType);
749 }
750 
getAttributeFormat(const glu::DataType dataType)751 static VkFormat getAttributeFormat(const glu::DataType dataType)
752 {
753     switch (dataType)
754     {
755     case glu::TYPE_FLOAT16:
756         return VK_FORMAT_R16_SFLOAT;
757     case glu::TYPE_FLOAT16_VEC2:
758         return VK_FORMAT_R16G16_SFLOAT;
759     case glu::TYPE_FLOAT16_VEC3:
760         return VK_FORMAT_R16G16B16_SFLOAT;
761     case glu::TYPE_FLOAT16_VEC4:
762         return VK_FORMAT_R16G16B16A16_SFLOAT;
763 
764     case glu::TYPE_FLOAT:
765         return VK_FORMAT_R32_SFLOAT;
766     case glu::TYPE_FLOAT_VEC2:
767         return VK_FORMAT_R32G32_SFLOAT;
768     case glu::TYPE_FLOAT_VEC3:
769         return VK_FORMAT_R32G32B32_SFLOAT;
770     case glu::TYPE_FLOAT_VEC4:
771         return VK_FORMAT_R32G32B32A32_SFLOAT;
772 
773     case glu::TYPE_INT:
774         return VK_FORMAT_R32_SINT;
775     case glu::TYPE_INT_VEC2:
776         return VK_FORMAT_R32G32_SINT;
777     case glu::TYPE_INT_VEC3:
778         return VK_FORMAT_R32G32B32_SINT;
779     case glu::TYPE_INT_VEC4:
780         return VK_FORMAT_R32G32B32A32_SINT;
781 
782     case glu::TYPE_UINT:
783         return VK_FORMAT_R32_UINT;
784     case glu::TYPE_UINT_VEC2:
785         return VK_FORMAT_R32G32_UINT;
786     case glu::TYPE_UINT_VEC3:
787         return VK_FORMAT_R32G32B32_UINT;
788     case glu::TYPE_UINT_VEC4:
789         return VK_FORMAT_R32G32B32A32_UINT;
790 
791     case glu::TYPE_FLOAT_MAT2:
792         return VK_FORMAT_R32G32_SFLOAT;
793     case glu::TYPE_FLOAT_MAT2X3:
794         return VK_FORMAT_R32G32B32_SFLOAT;
795     case glu::TYPE_FLOAT_MAT2X4:
796         return VK_FORMAT_R32G32B32A32_SFLOAT;
797     case glu::TYPE_FLOAT_MAT3X2:
798         return VK_FORMAT_R32G32_SFLOAT;
799     case glu::TYPE_FLOAT_MAT3:
800         return VK_FORMAT_R32G32B32_SFLOAT;
801     case glu::TYPE_FLOAT_MAT3X4:
802         return VK_FORMAT_R32G32B32A32_SFLOAT;
803     case glu::TYPE_FLOAT_MAT4X2:
804         return VK_FORMAT_R32G32_SFLOAT;
805     case glu::TYPE_FLOAT_MAT4X3:
806         return VK_FORMAT_R32G32B32_SFLOAT;
807     case glu::TYPE_FLOAT_MAT4:
808         return VK_FORMAT_R32G32B32A32_SFLOAT;
809     default:
810         DE_ASSERT(false);
811         return VK_FORMAT_UNDEFINED;
812     }
813 }
814 
addAttribute(uint32_t bindingLocation,VkFormat format,uint32_t sizePerElement,uint32_t count,const void * dataPtr)815 void FragmentOutExecutor::addAttribute(uint32_t bindingLocation, VkFormat format, uint32_t sizePerElement,
816                                        uint32_t count, const void *dataPtr)
817 {
818     // Portability requires stride to be multiply of minVertexInputBindingStrideAlignment
819     // this value is usually 4 and current tests meet this requirement but
820     // if this changes in future then this limit should be verified in checkSupport
821 #ifndef CTS_USES_VULKANSC
822     if (m_context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
823         ((sizePerElement % m_context.getPortabilitySubsetProperties().minVertexInputBindingStrideAlignment) != 0))
824     {
825         DE_FATAL("stride is not multiply of minVertexInputBindingStrideAlignment");
826     }
827 #endif // CTS_USES_VULKANSC
828 
829     // Add binding specification
830     const uint32_t binding                                   = (uint32_t)m_vertexBindingDescriptions.size();
831     const VkVertexInputBindingDescription bindingDescription = {binding, sizePerElement, VK_VERTEX_INPUT_RATE_VERTEX};
832 
833     m_vertexBindingDescriptions.push_back(bindingDescription);
834 
835     // Add location and format specification
836     const VkVertexInputAttributeDescription attributeDescription = {
837         bindingLocation, // uint32_t location;
838         binding,         // uint32_t binding;
839         format,          // VkFormat format;
840         0u,              // uint32_t offsetInBytes;
841     };
842 
843     m_vertexAttributeDescriptions.push_back(attributeDescription);
844 
845     // Upload data to buffer
846     const VkDevice vkDevice         = m_context.getDevice();
847     const DeviceInterface &vk       = m_context.getDeviceInterface();
848     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
849 
850     const VkDeviceSize inputSize                = sizePerElement * count;
851     const VkBufferCreateInfo vertexBufferParams = {
852         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
853         nullptr,                              // const void* pNext;
854         0u,                                   // VkBufferCreateFlags flags;
855         inputSize,                            // VkDeviceSize size;
856         VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,    // VkBufferUsageFlags usage;
857         VK_SHARING_MODE_EXCLUSIVE,            // VkSharingMode sharingMode;
858         1u,                                   // uint32_t queueFamilyCount;
859         &queueFamilyIndex                     // const uint32_t* pQueueFamilyIndices;
860     };
861 
862     Move<VkBuffer> buffer         = createBuffer(vk, vkDevice, &vertexBufferParams);
863     de::MovePtr<Allocation> alloc = m_context.getDefaultAllocator().allocate(
864         getBufferMemoryRequirements(vk, vkDevice, *buffer), MemoryRequirement::HostVisible);
865 
866     VK_CHECK(vk.bindBufferMemory(vkDevice, *buffer, alloc->getMemory(), alloc->getOffset()));
867 
868     deMemcpy(alloc->getHostPtr(), dataPtr, (size_t)inputSize);
869     flushAlloc(vk, vkDevice, *alloc);
870 
871     m_vertexBuffers.push_back(de::SharedPtr<Unique<VkBuffer>>(new Unique<VkBuffer>(buffer)));
872     m_vertexBufferAllocs.push_back(AllocationSp(alloc.release()));
873 }
874 
bindAttributes(int numValues,const void * const * inputs)875 void FragmentOutExecutor::bindAttributes(int numValues, const void *const *inputs)
876 {
877     // Input attributes
878     for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
879     {
880         const Symbol &symbol          = m_shaderSpec.inputs[inputNdx];
881         const void *ptr               = inputs[inputNdx];
882         const glu::DataType basicType = symbol.varType.getBasicType();
883         const int vecSize             = glu::getDataTypeScalarSize(basicType);
884         const VkFormat format         = getAttributeFormat(basicType);
885         int elementSize               = 0;
886         int numAttrsToAdd             = 1;
887 
888         if (glu::isDataTypeDoubleOrDVec(basicType))
889             elementSize = sizeof(double);
890         if (glu::isDataTypeFloatOrVec(basicType))
891             elementSize = sizeof(float);
892         else if (glu::isDataTypeFloat16OrVec(basicType))
893             elementSize = sizeof(uint16_t);
894         else if (glu::isDataTypeIntOrIVec(basicType))
895             elementSize = sizeof(int);
896         else if (glu::isDataTypeUintOrUVec(basicType))
897             elementSize = sizeof(uint32_t);
898         else if (glu::isDataTypeMatrix(basicType))
899         {
900             int numRows = glu::getDataTypeMatrixNumRows(basicType);
901             int numCols = glu::getDataTypeMatrixNumColumns(basicType);
902 
903             elementSize   = numRows * numCols * (int)sizeof(float);
904             numAttrsToAdd = numCols;
905         }
906         else
907             DE_ASSERT(false);
908 
909         // add attributes, in case of matrix every column is binded as an attribute
910         for (int attrNdx = 0; attrNdx < numAttrsToAdd; attrNdx++)
911         {
912             addAttribute((uint32_t)m_vertexBindingDescriptions.size(), format, elementSize * vecSize, numValues, ptr);
913         }
914     }
915 }
916 
clearRenderData(void)917 void FragmentOutExecutor::clearRenderData(void)
918 {
919     m_vertexBindingDescriptions.clear();
920     m_vertexAttributeDescriptions.clear();
921     m_vertexBuffers.clear();
922     m_vertexBufferAllocs.clear();
923 }
924 
createEmptyDescriptorSetLayout(const DeviceInterface & vkd,VkDevice device)925 static Move<VkDescriptorSetLayout> createEmptyDescriptorSetLayout(const DeviceInterface &vkd, VkDevice device)
926 {
927     const VkDescriptorSetLayoutCreateInfo createInfo = {
928         VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO, nullptr, (VkDescriptorSetLayoutCreateFlags)0, 0u, nullptr,
929     };
930     return createDescriptorSetLayout(vkd, device, &createInfo);
931 }
932 
createEmptyDescriptorPool(const DeviceInterface & vkd,VkDevice device)933 static Move<VkDescriptorPool> createEmptyDescriptorPool(const DeviceInterface &vkd, VkDevice device)
934 {
935     const VkDescriptorPoolSize emptySize = {
936         VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
937         1u,
938     };
939     const VkDescriptorPoolCreateInfo createInfo = {
940         VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
941         nullptr,
942         (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
943         1u,
944         1u,
945         &emptySize};
946     return createDescriptorPool(vkd, device, &createInfo);
947 }
948 
allocateSingleDescriptorSet(const DeviceInterface & vkd,VkDevice device,VkDescriptorPool pool,VkDescriptorSetLayout layout)949 static Move<VkDescriptorSet> allocateSingleDescriptorSet(const DeviceInterface &vkd, VkDevice device,
950                                                          VkDescriptorPool pool, VkDescriptorSetLayout layout)
951 {
952     const VkDescriptorSetAllocateInfo allocInfo = {
953         VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr, pool, 1u, &layout,
954     };
955     return allocateDescriptorSet(vkd, device, &allocInfo);
956 }
957 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)958 void FragmentOutExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
959                                   VkDescriptorSet extraResources)
960 {
961     const VkDevice vkDevice         = m_context.getDevice();
962     const DeviceInterface &vk       = m_context.getDeviceInterface();
963     const VkQueue queue             = m_context.getUniversalQueue();
964     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
965     Allocator &memAlloc             = m_context.getDefaultAllocator();
966 
967     const uint32_t renderSizeX = de::min(static_cast<uint32_t>(128), (uint32_t)numValues);
968     const uint32_t renderSizeY =
969         ((uint32_t)numValues / renderSizeX) + (((uint32_t)numValues % renderSizeX != 0) ? 1u : 0u);
970     const tcu::UVec2 renderSize(renderSizeX, renderSizeY);
971     std::vector<tcu::Vec2> positions;
972 
973     const bool useGeometryShader = m_shaderType == glu::SHADERTYPE_GEOMETRY;
974 
975     std::vector<VkImageSp> colorImages;
976     std::vector<VkImageMemoryBarrier> colorImagePreRenderBarriers;
977     std::vector<VkImageMemoryBarrier> colorImagePostRenderBarriers;
978     std::vector<AllocationSp> colorImageAllocs;
979     std::vector<VkAttachmentDescription> attachments;
980     std::vector<VkClearValue> attachmentClearValues;
981     std::vector<VkImageViewSp> colorImageViews;
982 
983     std::vector<VkPipelineColorBlendAttachmentState> colorBlendAttachmentStates;
984     std::vector<VkAttachmentReference> colorAttachmentReferences;
985 
986     Move<VkRenderPass> renderPass;
987     Move<VkFramebuffer> framebuffer;
988     Move<VkPipelineLayout> pipelineLayout;
989     Move<VkPipeline> graphicsPipeline;
990 
991     Move<VkShaderModule> vertexShaderModule;
992     Move<VkShaderModule> geometryShaderModule;
993     Move<VkShaderModule> fragmentShaderModule;
994 
995     Move<VkCommandPool> cmdPool;
996     Move<VkCommandBuffer> cmdBuffer;
997 
998     Unique<VkDescriptorSetLayout> emptyDescriptorSetLayout(createEmptyDescriptorSetLayout(vk, vkDevice));
999     Unique<VkDescriptorPool> emptyDescriptorPool(createEmptyDescriptorPool(vk, vkDevice));
1000     Unique<VkDescriptorSet> emptyDescriptorSet(
1001         allocateSingleDescriptorSet(vk, vkDevice, *emptyDescriptorPool, *emptyDescriptorSetLayout));
1002 
1003     clearRenderData();
1004 
1005     // Compute positions - 1px points are used to drive fragment shading.
1006     positions = computeVertexPositions(numValues, renderSize.cast<int>());
1007 
1008     // Bind attributes
1009     addAttribute(0u, VK_FORMAT_R32G32_SFLOAT, sizeof(tcu::Vec2), (uint32_t)positions.size(), &positions[0]);
1010     bindAttributes(numValues, inputs);
1011 
1012     // Create color images
1013     {
1014         const VkPipelineColorBlendAttachmentState colorBlendAttachmentState = {
1015             VK_FALSE,             // VkBool32 blendEnable;
1016             VK_BLEND_FACTOR_ONE,  // VkBlendFactor srcColorBlendFactor;
1017             VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
1018             VK_BLEND_OP_ADD,      // VkBlendOp blendOpColor;
1019             VK_BLEND_FACTOR_ONE,  // VkBlendFactor srcAlphaBlendFactor;
1020             VK_BLEND_FACTOR_ZERO, // VkBlendFactor destAlphaBlendFactor;
1021             VK_BLEND_OP_ADD,      // VkBlendOp blendOpAlpha;
1022             (VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT |
1023              VK_COLOR_COMPONENT_A_BIT) // VkColorComponentFlags colorWriteMask;
1024         };
1025 
1026         for (int outNdx = 0; outNdx < (int)m_outputLayout.locationSymbols.size(); ++outNdx)
1027         {
1028             const bool isDouble   = glu::isDataTypeDoubleOrDVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1029             const bool isFloat    = isDataTypeFloatOrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1030             const bool isFloat16b = glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1031             const bool isSigned   = isDataTypeIntOrIVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1032             const bool isBool     = isDataTypeBoolOrBVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
1033             const VkFormat colorFormat =
1034                 (isDouble ? VK_FORMAT_R64G64B64A64_SFLOAT :
1035                             (isFloat16b ? VK_FORMAT_R16G16B16A16_SFLOAT :
1036                                           (isFloat ? VK_FORMAT_R32G32B32A32_SFLOAT :
1037                                                      (isSigned || isBool ? VK_FORMAT_R32G32B32A32_SINT :
1038                                                                            VK_FORMAT_R32G32B32A32_UINT))));
1039 
1040             {
1041                 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(
1042                     m_context.getInstanceInterface(), m_context.getPhysicalDevice(), colorFormat);
1043                 if ((formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) == 0)
1044                     TCU_THROW(NotSupportedError, "Image format doesn't support COLOR_ATTACHMENT_BIT");
1045             }
1046 
1047             const VkImageCreateInfo colorImageParams = {
1048                 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,                                   // VkStructureType sType;
1049                 nullptr,                                                               // const void* pNext;
1050                 0u,                                                                    // VkImageCreateFlags flags;
1051                 VK_IMAGE_TYPE_2D,                                                      // VkImageType imageType;
1052                 colorFormat,                                                           // VkFormat format;
1053                 {renderSize.x(), renderSize.y(), 1u},                                  // VkExtent3D extent;
1054                 1u,                                                                    // uint32_t mipLevels;
1055                 1u,                                                                    // uint32_t arraySize;
1056                 VK_SAMPLE_COUNT_1_BIT,                                                 // VkSampleCountFlagBits samples;
1057                 VK_IMAGE_TILING_OPTIMAL,                                               // VkImageTiling tiling;
1058                 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
1059                 VK_SHARING_MODE_EXCLUSIVE,                                             // VkSharingMode sharingMode;
1060                 1u,                                                                    // uint32_t queueFamilyCount;
1061                 &queueFamilyIndex,         // const uint32_t* pQueueFamilyIndices;
1062                 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
1063             };
1064 
1065             const VkAttachmentDescription colorAttachmentDescription = {
1066                 0u,                                       // VkAttachmentDescriptorFlags flags;
1067                 colorFormat,                              // VkFormat format;
1068                 VK_SAMPLE_COUNT_1_BIT,                    // VkSampleCountFlagBits samples;
1069                 VK_ATTACHMENT_LOAD_OP_CLEAR,              // VkAttachmentLoadOp loadOp;
1070                 VK_ATTACHMENT_STORE_OP_STORE,             // VkAttachmentStoreOp storeOp;
1071                 VK_ATTACHMENT_LOAD_OP_DONT_CARE,          // VkAttachmentLoadOp stencilLoadOp;
1072                 VK_ATTACHMENT_STORE_OP_DONT_CARE,         // VkAttachmentStoreOp stencilStoreOp;
1073                 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout initialLayout;
1074                 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout finalLayout;
1075             };
1076 
1077             Move<VkImage> colorImage = createImage(vk, vkDevice, &colorImageParams);
1078             colorImages.push_back(de::SharedPtr<Unique<VkImage>>(new Unique<VkImage>(colorImage)));
1079             attachmentClearValues.push_back(getDefaultClearColor());
1080 
1081             // Allocate and bind color image memory
1082             {
1083                 de::MovePtr<Allocation> colorImageAlloc = memAlloc.allocate(
1084                     getImageMemoryRequirements(vk, vkDevice, *((const VkImage *)colorImages.back().get())),
1085                     MemoryRequirement::Any);
1086                 VK_CHECK(vk.bindImageMemory(vkDevice, colorImages.back().get()->get(), colorImageAlloc->getMemory(),
1087                                             colorImageAlloc->getOffset()));
1088                 colorImageAllocs.push_back(de::SharedPtr<Allocation>(colorImageAlloc.release()));
1089 
1090                 attachments.push_back(colorAttachmentDescription);
1091                 colorBlendAttachmentStates.push_back(colorBlendAttachmentState);
1092 
1093                 const VkAttachmentReference colorAttachmentReference = {
1094                     (uint32_t)(colorImages.size() - 1),      // uint32_t attachment;
1095                     VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
1096                 };
1097 
1098                 colorAttachmentReferences.push_back(colorAttachmentReference);
1099             }
1100 
1101             // Create color attachment view
1102             {
1103                 const VkImageViewCreateInfo colorImageViewParams = {
1104                     VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
1105                     nullptr,                                  // const void* pNext;
1106                     0u,                                       // VkImageViewCreateFlags flags;
1107                     colorImages.back().get()->get(),          // VkImage image;
1108                     VK_IMAGE_VIEW_TYPE_2D,                    // VkImageViewType viewType;
1109                     colorFormat,                              // VkFormat format;
1110                     {
1111                         VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
1112                         VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
1113                         VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
1114                         VK_COMPONENT_SWIZZLE_A  // VkComponentSwizzle a;
1115                     },                          // VkComponentMapping components;
1116                     {
1117                         VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
1118                         0u,                        // uint32_t baseMipLevel;
1119                         1u,                        // uint32_t mipLevels;
1120                         0u,                        // uint32_t baseArraySlice;
1121                         1u                         // uint32_t arraySize;
1122                     }                              // VkImageSubresourceRange subresourceRange;
1123                 };
1124 
1125                 Move<VkImageView> colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
1126                 colorImageViews.push_back(de::SharedPtr<Unique<VkImageView>>(new Unique<VkImageView>(colorImageView)));
1127 
1128                 const VkImageMemoryBarrier colorImagePreRenderBarrier = {
1129                     VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,                                       // sType
1130                     nullptr,                                                                      // pNext
1131                     0u,                                                                           // srcAccessMask
1132                     (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // dstAccessMask
1133                     VK_IMAGE_LAYOUT_UNDEFINED,                                                    // oldLayout
1134                     VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,                                     // newLayout
1135                     VK_QUEUE_FAMILY_IGNORED,                                                      // srcQueueFamilyIndex
1136                     VK_QUEUE_FAMILY_IGNORED,                                                      // dstQueueFamilyIndex
1137                     colorImages.back().get()->get(),                                              // image
1138                     {
1139                         VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1140                         0u,                        // baseMipLevel
1141                         1u,                        // levelCount
1142                         0u,                        // baseArrayLayer
1143                         1u,                        // layerCount
1144                     }                              // subresourceRange
1145                 };
1146                 colorImagePreRenderBarriers.push_back(colorImagePreRenderBarrier);
1147 
1148                 const VkImageMemoryBarrier colorImagePostRenderBarrier = {
1149                     VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,                                       // sType
1150                     nullptr,                                                                      // pNext
1151                     (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // srcAccessMask
1152                     VK_ACCESS_TRANSFER_READ_BIT,                                                  // dstAccessMask
1153                     VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,                                     // oldLayout
1154                     VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,                                         // newLayout
1155                     VK_QUEUE_FAMILY_IGNORED,                                                      // srcQueueFamilyIndex
1156                     VK_QUEUE_FAMILY_IGNORED,                                                      // dstQueueFamilyIndex
1157                     colorImages.back().get()->get(),                                              // image
1158                     {
1159                         VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1160                         0u,                        // baseMipLevel
1161                         1u,                        // levelCount
1162                         0u,                        // baseArrayLayer
1163                         1u,                        // layerCount
1164                     }                              // subresourceRange
1165                 };
1166                 colorImagePostRenderBarriers.push_back(colorImagePostRenderBarrier);
1167             }
1168         }
1169     }
1170 
1171     // Create render pass
1172     {
1173         const VkSubpassDescription subpassDescription = {
1174             0u,                              // VkSubpassDescriptionFlags flags;
1175             VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
1176             0u,                              // uint32_t inputCount;
1177             nullptr,                         // const VkAttachmentReference* pInputAttachments;
1178             (uint32_t)colorImages.size(),    // uint32_t colorCount;
1179             &colorAttachmentReferences[0],   // const VkAttachmentReference* colorAttachments;
1180             nullptr,                         // const VkAttachmentReference* resolveAttachments;
1181             nullptr,                         // VkAttachmentReference depthStencilAttachment;
1182             0u,                              // uint32_t preserveCount;
1183             nullptr                          // const VkAttachmentReference* pPreserveAttachments;
1184         };
1185 
1186         const VkRenderPassCreateInfo renderPassParams = {
1187             VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
1188             nullptr,                                   // const void* pNext;
1189             (VkRenderPassCreateFlags)0,                // VkRenderPassCreateFlags flags;
1190             (uint32_t)attachments.size(),              // uint32_t attachmentCount;
1191             &attachments[0],                           // const VkAttachmentDescription* pAttachments;
1192             1u,                                        // uint32_t subpassCount;
1193             &subpassDescription,                       // const VkSubpassDescription* pSubpasses;
1194             0u,                                        // uint32_t dependencyCount;
1195             nullptr                                    // const VkSubpassDependency* pDependencies;
1196         };
1197 
1198         renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
1199     }
1200 
1201     // Create framebuffer
1202     {
1203         std::vector<VkImageView> views(colorImageViews.size());
1204         for (size_t i = 0; i < colorImageViews.size(); i++)
1205         {
1206             views[i] = colorImageViews[i].get()->get();
1207         }
1208 
1209         const VkFramebufferCreateInfo framebufferParams = {
1210             VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
1211             nullptr,                                   // const void* pNext;
1212             0u,                                        // VkFramebufferCreateFlags flags;
1213             *renderPass,                               // VkRenderPass renderPass;
1214             (uint32_t)views.size(),                    // uint32_t attachmentCount;
1215             &views[0],                                 // const VkImageView* pAttachments;
1216             (uint32_t)renderSize.x(),                  // uint32_t width;
1217             (uint32_t)renderSize.y(),                  // uint32_t height;
1218             1u                                         // uint32_t layers;
1219         };
1220 
1221         framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
1222     }
1223 
1224     // Create pipeline layout
1225     {
1226         const VkDescriptorSetLayout setLayouts[]              = {*emptyDescriptorSetLayout, m_extraResourcesLayout};
1227         const VkPipelineLayoutCreateInfo pipelineLayoutParams = {
1228             VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,        // VkStructureType sType;
1229             nullptr,                                              // const void* pNext;
1230             (VkPipelineLayoutCreateFlags)0,                       // VkPipelineLayoutCreateFlags flags;
1231             (m_extraResourcesLayout != VK_NULL_HANDLE ? 2u : 0u), // uint32_t descriptorSetCount;
1232             setLayouts,                                           // const VkDescriptorSetLayout* pSetLayouts;
1233             0u,                                                   // uint32_t pushConstantRangeCount;
1234             nullptr                                               // const VkPushConstantRange* pPushConstantRanges;
1235         };
1236 
1237         pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
1238     }
1239 
1240     // Create shaders
1241     {
1242         vertexShaderModule   = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
1243         fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
1244 
1245         if (useGeometryShader)
1246         {
1247             if (m_context.getDeviceFeatures().shaderTessellationAndGeometryPointSize)
1248                 geometryShaderModule =
1249                     createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom_point_size"), 0);
1250             else
1251                 geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom"), 0);
1252         }
1253     }
1254 
1255     // Create pipeline
1256     {
1257         const VkPipelineVertexInputStateCreateInfo vertexInputStateParams = {
1258             VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1259             nullptr,                                                   // const void* pNext;
1260             (VkPipelineVertexInputStateCreateFlags)0,                  // VkPipelineVertexInputStateCreateFlags flags;
1261             (uint32_t)m_vertexBindingDescriptions.size(),              // uint32_t bindingCount;
1262             &m_vertexBindingDescriptions[0], // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1263             (uint32_t)m_vertexAttributeDescriptions.size(), // uint32_t attributeCount;
1264             &m_vertexAttributeDescriptions[0], // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
1265         };
1266 
1267         const std::vector<VkViewport> viewports(1, makeViewport(renderSize));
1268         const std::vector<VkRect2D> scissors(1, makeRect2D(renderSize));
1269 
1270         const VkPipelineColorBlendStateCreateInfo colorBlendStateParams = {
1271             VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
1272             nullptr,                                                  // const void* pNext;
1273             (VkPipelineColorBlendStateCreateFlags)0,                  // VkPipelineColorBlendStateCreateFlags flags;
1274             VK_FALSE,                                                 // VkBool32 logicOpEnable;
1275             VK_LOGIC_OP_COPY,                                         // VkLogicOp logicOp;
1276             (uint32_t)colorBlendAttachmentStates.size(),              // uint32_t attachmentCount;
1277             &colorBlendAttachmentStates[0], // const VkPipelineColorBlendAttachmentState* pAttachments;
1278             {0.0f, 0.0f, 0.0f, 0.0f}        // float blendConst[4];
1279         };
1280 
1281         graphicsPipeline = makeGraphicsPipeline(
1282             vk,                  // const DeviceInterface&                        vk
1283             vkDevice,            // const VkDevice                                device
1284             *pipelineLayout,     // const VkPipelineLayout                        pipelineLayout
1285             *vertexShaderModule, // const VkShaderModule                          vertexShaderModule
1286             VK_NULL_HANDLE,      // const VkShaderModule                          tessellationControlShaderModule
1287             VK_NULL_HANDLE,      // const VkShaderModule                          tessellationEvalShaderModule
1288             useGeometryShader ? *geometryShaderModule :
1289                                 VK_NULL_HANDLE, // const VkShaderModule                          geometryShaderModule
1290             *fragmentShaderModule,              // const VkShaderModule                          fragmentShaderModule
1291             *renderPass,                        // const VkRenderPass                            renderPass
1292             viewports,                          // const std::vector<VkViewport>&                viewports
1293             scissors,                           // const std::vector<VkRect2D>&                  scissors
1294             VK_PRIMITIVE_TOPOLOGY_POINT_LIST,   // const VkPrimitiveTopology                     topology
1295             0u,                                 // const uint32_t                                subpass
1296             0u,                                 // const uint32_t                                patchControlPoints
1297             &vertexInputStateParams, // const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
1298             nullptr,                 // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
1299             nullptr,                 // const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
1300             nullptr,                 // const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
1301             &colorBlendStateParams); // const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
1302     }
1303 
1304     // Create command pool
1305     cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1306 
1307     // Create command buffer
1308     {
1309         cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1310 
1311         beginCommandBuffer(vk, *cmdBuffer);
1312 
1313         vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
1314                               vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0, nullptr, 0,
1315                               nullptr, (uint32_t)colorImagePreRenderBarriers.size(),
1316                               colorImagePreRenderBarriers.empty() ? nullptr : &colorImagePreRenderBarriers[0]);
1317         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()),
1318                         (uint32_t)attachmentClearValues.size(), &attachmentClearValues[0]);
1319 
1320         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
1321 
1322         if (m_extraResourcesLayout != VK_NULL_HANDLE)
1323         {
1324             DE_ASSERT(extraResources != VK_NULL_HANDLE);
1325             const VkDescriptorSet descriptorSets[] = {*emptyDescriptorSet, extraResources};
1326             vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u,
1327                                      DE_LENGTH_OF_ARRAY(descriptorSets), descriptorSets, 0u, nullptr);
1328         }
1329         else
1330             DE_ASSERT(extraResources == VK_NULL_HANDLE);
1331 
1332         const uint32_t numberOfVertexAttributes = (uint32_t)m_vertexBuffers.size();
1333 
1334         std::vector<VkDeviceSize> offsets(numberOfVertexAttributes, 0);
1335 
1336         std::vector<VkBuffer> buffers(numberOfVertexAttributes);
1337         for (size_t i = 0; i < numberOfVertexAttributes; i++)
1338         {
1339             buffers[i] = m_vertexBuffers[i].get()->get();
1340         }
1341 
1342         vk.cmdBindVertexBuffers(*cmdBuffer, 0, numberOfVertexAttributes, &buffers[0], &offsets[0]);
1343         vk.cmdDraw(*cmdBuffer, (uint32_t)positions.size(), 1u, 0u, 0u);
1344 
1345         endRenderPass(vk, *cmdBuffer);
1346         vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
1347                               vk::VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0, nullptr, 0, nullptr,
1348                               (uint32_t)colorImagePostRenderBarriers.size(),
1349                               colorImagePostRenderBarriers.empty() ? nullptr : &colorImagePostRenderBarriers[0]);
1350 
1351         endCommandBuffer(vk, *cmdBuffer);
1352     }
1353 
1354     // Execute Draw
1355     submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
1356 
1357     // Read back result and output
1358     {
1359         const VkDeviceSize imageSizeBytes = (VkDeviceSize)(4 * sizeof(uint32_t) * renderSize.x() * renderSize.y());
1360         const VkBufferCreateInfo readImageBufferParams = {
1361             VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1362             nullptr,                              // const void* pNext;
1363             0u,                                   // VkBufferCreateFlags flags;
1364             imageSizeBytes,                       // VkDeviceSize size;
1365             VK_BUFFER_USAGE_TRANSFER_DST_BIT,     // VkBufferUsageFlags usage;
1366             VK_SHARING_MODE_EXCLUSIVE,            // VkSharingMode sharingMode;
1367             1u,                                   // uint32_t queueFamilyCount;
1368             &queueFamilyIndex,                    // const uint32_t* pQueueFamilyIndices;
1369         };
1370 
1371         // constants for image copy
1372         Move<VkCommandPool> copyCmdPool =
1373             createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1374 
1375         const VkBufferImageCopy copyParams = {
1376             0u,                       // VkDeviceSize bufferOffset;
1377             (uint32_t)renderSize.x(), // uint32_t bufferRowLength;
1378             (uint32_t)renderSize.y(), // uint32_t bufferImageHeight;
1379             {
1380                 VK_IMAGE_ASPECT_COLOR_BIT,       // VkImageAspect aspect;
1381                 0u,                              // uint32_t mipLevel;
1382                 0u,                              // uint32_t arraySlice;
1383                 1u,                              // uint32_t arraySize;
1384             },                                   // VkImageSubresource imageSubresource;
1385             {0u, 0u, 0u},                        // VkOffset3D imageOffset;
1386             {renderSize.x(), renderSize.y(), 1u} // VkExtent3D imageExtent;
1387         };
1388 
1389         // Read back pixels.
1390         for (int outNdx = 0; outNdx < (int)m_shaderSpec.outputs.size(); ++outNdx)
1391         {
1392             const Symbol &output  = m_shaderSpec.outputs[outNdx];
1393             const int outSize     = output.varType.getScalarSize();
1394             const int outVecSize  = glu::getDataTypeNumComponents(output.varType.getBasicType());
1395             const int outNumLocs  = glu::getDataTypeNumLocations(output.varType.getBasicType());
1396             const int outLocation = de::lookup(m_outputLayout.locationMap, output.name);
1397 
1398             for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
1399             {
1400                 tcu::TextureLevel tmpBuf;
1401                 const tcu::TextureFormat format = getRenderbufferFormatForOutput(output.varType, false);
1402                 const tcu::TextureFormat readFormat(tcu::TextureFormat::RGBA, format.type);
1403                 const Unique<VkBuffer> readImageBuffer(createBuffer(vk, vkDevice, &readImageBufferParams));
1404                 const de::UniquePtr<Allocation> readImageBufferMemory(memAlloc.allocate(
1405                     getBufferMemoryRequirements(vk, vkDevice, *readImageBuffer), MemoryRequirement::HostVisible));
1406 
1407                 VK_CHECK(vk.bindBufferMemory(vkDevice, *readImageBuffer, readImageBufferMemory->getMemory(),
1408                                              readImageBufferMemory->getOffset()));
1409 
1410                 // Copy image to buffer
1411                 {
1412 
1413                     Move<VkCommandBuffer> copyCmdBuffer =
1414                         allocateCommandBuffer(vk, vkDevice, *copyCmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1415 
1416                     beginCommandBuffer(vk, *copyCmdBuffer);
1417                     vk.cmdCopyImageToBuffer(*copyCmdBuffer, colorImages[outLocation + locNdx].get()->get(),
1418                                             VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *readImageBuffer, 1u, &copyParams);
1419 
1420                     // Insert a barrier so data written by the transfer is available to the host
1421                     {
1422                         const VkBufferMemoryBarrier barrier = {
1423                             VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType    sType;
1424                             nullptr,                                 // const void*        pNext;
1425                             VK_ACCESS_TRANSFER_WRITE_BIT,            // VkAccessFlags      srcAccessMask;
1426                             VK_ACCESS_HOST_READ_BIT,                 // VkAccessFlags      dstAccessMask;
1427                             VK_QUEUE_FAMILY_IGNORED,                 // uint32_t           srcQueueFamilyIndex;
1428                             VK_QUEUE_FAMILY_IGNORED,                 // uint32_t           dstQueueFamilyIndex;
1429                             *readImageBuffer,                        // VkBuffer           buffer;
1430                             0,                                       // VkDeviceSize       offset;
1431                             VK_WHOLE_SIZE,                           // VkDeviceSize       size;
1432                         };
1433 
1434                         vk.cmdPipelineBarrier(*copyCmdBuffer, vk::VK_PIPELINE_STAGE_TRANSFER_BIT,
1435                                               vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, nullptr, 1,
1436                                               &barrier, 0, nullptr);
1437                     }
1438 
1439                     endCommandBuffer(vk, *copyCmdBuffer);
1440 
1441                     submitCommandsAndWait(vk, vkDevice, queue, copyCmdBuffer.get());
1442                 }
1443 
1444                 invalidateAlloc(vk, vkDevice, *readImageBufferMemory);
1445 
1446                 tmpBuf.setStorage(readFormat, renderSize.x(), renderSize.y());
1447 
1448                 const tcu::TextureFormat resultFormat(tcu::TextureFormat::RGBA, format.type);
1449                 const tcu::ConstPixelBufferAccess resultAccess(resultFormat, renderSize.x(), renderSize.y(), 1,
1450                                                                readImageBufferMemory->getHostPtr());
1451 
1452                 tcu::copy(tmpBuf.getAccess(), resultAccess);
1453 
1454                 if (isOutput16Bit(static_cast<size_t>(outNdx)))
1455                 {
1456                     uint16_t *dstPtrBase = static_cast<uint16_t *>(outputs[outNdx]);
1457                     if (outSize == 4 && outNumLocs == 1)
1458                         deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(),
1459                                  numValues * outVecSize * sizeof(uint16_t));
1460                     else
1461                     {
1462                         for (int valNdx = 0; valNdx < numValues; valNdx++)
1463                         {
1464                             const uint16_t *srcPtr = (const uint16_t *)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1465                             uint16_t *dstPtr       = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1466                             deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(uint16_t));
1467                         }
1468                     }
1469                 }
1470                 else
1471                 {
1472                     uint32_t *dstPtrBase = static_cast<uint32_t *>(outputs[outNdx]);
1473                     if (outSize == 4 && outNumLocs == 1)
1474                         deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(),
1475                                  numValues * outVecSize * sizeof(uint32_t));
1476                     else
1477                     {
1478                         for (int valNdx = 0; valNdx < numValues; valNdx++)
1479                         {
1480                             const uint32_t *srcPtr = (const uint32_t *)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1481                             uint32_t *dstPtr       = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1482                             deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(uint32_t));
1483                         }
1484                     }
1485                 }
1486             }
1487         }
1488     }
1489 }
1490 
1491 // VertexShaderExecutor
1492 
1493 class VertexShaderExecutor : public FragmentOutExecutor
1494 {
1495 public:
1496     VertexShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1497     virtual ~VertexShaderExecutor(void);
1498 
1499     static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &dst);
1500 };
1501 
VertexShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1502 VertexShaderExecutor::VertexShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
1503                                            VkDescriptorSetLayout extraResourcesLayout)
1504     : FragmentOutExecutor(context, glu::SHADERTYPE_VERTEX, shaderSpec, extraResourcesLayout)
1505 {
1506 }
1507 
~VertexShaderExecutor(void)1508 VertexShaderExecutor::~VertexShaderExecutor(void)
1509 {
1510 }
1511 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1512 void VertexShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
1513 {
1514     const FragmentOutputLayout outputLayout(computeFragmentOutputLayout(shaderSpec.outputs));
1515 
1516     programCollection.glslSources.add("vert")
1517         << glu::VertexSource(generateVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1518     /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1519     programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(
1520                                                      shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_"))
1521                                               << shaderSpec.buildOptions;
1522 }
1523 
1524 // GeometryShaderExecutor
1525 
1526 class GeometryShaderExecutor : public FragmentOutExecutor
1527 {
1528 public:
1529     GeometryShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1530     virtual ~GeometryShaderExecutor(void);
1531 
1532     static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
1533 };
1534 
GeometryShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1535 GeometryShaderExecutor::GeometryShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
1536                                                VkDescriptorSetLayout extraResourcesLayout)
1537     : FragmentOutExecutor(context, glu::SHADERTYPE_GEOMETRY, shaderSpec, extraResourcesLayout)
1538 {
1539     const VkPhysicalDeviceFeatures &features = context.getDeviceFeatures();
1540 
1541     if (!features.geometryShader)
1542         TCU_THROW(NotSupportedError, "Geometry shader type not supported by device");
1543 }
1544 
~GeometryShaderExecutor(void)1545 GeometryShaderExecutor::~GeometryShaderExecutor(void)
1546 {
1547 }
1548 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1549 void GeometryShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
1550 {
1551     const FragmentOutputLayout outputLayout(computeFragmentOutputLayout(shaderSpec.outputs));
1552 
1553     programCollection.glslSources.add("vert")
1554         << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1555 
1556     programCollection.glslSources.add("geom")
1557         << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", false))
1558         << shaderSpec.buildOptions;
1559     programCollection.glslSources.add("geom_point_size")
1560         << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", true))
1561         << shaderSpec.buildOptions;
1562 
1563     /* \todo [2015-09-18 rsipka] set useIntOutputs parameter if needed. */
1564     programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(
1565                                                      shaderSpec, false, outputLayout.locationMap, "geom_out_", "o_"))
1566                                               << shaderSpec.buildOptions;
1567 }
1568 
1569 // FragmentShaderExecutor
1570 
1571 class FragmentShaderExecutor : public FragmentOutExecutor
1572 {
1573 public:
1574     FragmentShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1575     virtual ~FragmentShaderExecutor(void);
1576 
1577     static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
1578 };
1579 
FragmentShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1580 FragmentShaderExecutor::FragmentShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
1581                                                VkDescriptorSetLayout extraResourcesLayout)
1582     : FragmentOutExecutor(context, glu::SHADERTYPE_FRAGMENT, shaderSpec, extraResourcesLayout)
1583 {
1584 }
1585 
~FragmentShaderExecutor(void)1586 FragmentShaderExecutor::~FragmentShaderExecutor(void)
1587 {
1588 }
1589 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1590 void FragmentShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
1591 {
1592     const FragmentOutputLayout outputLayout(computeFragmentOutputLayout(shaderSpec.outputs));
1593 
1594     programCollection.glslSources.add("vert")
1595         << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1596     /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1597     programCollection.glslSources.add("frag")
1598         << glu::FragmentSource(generateFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_"))
1599         << shaderSpec.buildOptions;
1600 }
1601 
1602 // Shared utilities for compute and tess executors
1603 
getVecStd430ByteAlignment(glu::DataType type)1604 static uint32_t getVecStd430ByteAlignment(glu::DataType type)
1605 {
1606     uint32_t baseSize;
1607 
1608     switch (glu::getDataTypeScalarType(type))
1609     {
1610     case glu::TYPE_FLOAT16:
1611         baseSize = 2u;
1612         break;
1613     case glu::TYPE_DOUBLE:
1614         baseSize = 8u;
1615         break;
1616     default:
1617         baseSize = 4u;
1618         break;
1619     }
1620 
1621     switch (glu::getDataTypeScalarSize(type))
1622     {
1623     case 1:
1624         return baseSize;
1625     case 2:
1626         return baseSize * 2u;
1627     case 3: // fallthrough.
1628     case 4:
1629         return baseSize * 4u;
1630     default:
1631         DE_ASSERT(false);
1632         return 0u;
1633     }
1634 }
1635 
1636 class BufferIoExecutor : public ShaderExecutor
1637 {
1638 public:
1639     BufferIoExecutor(Context &context, const ShaderSpec &shaderSpec);
1640     virtual ~BufferIoExecutor(void);
1641 
1642 protected:
1643     enum
1644     {
1645         INPUT_BUFFER_BINDING  = 0,
1646         OUTPUT_BUFFER_BINDING = 1,
1647     };
1648 
1649     void initBuffers(int numValues);
getInputBuffer(void) const1650     VkBuffer getInputBuffer(void) const
1651     {
1652         return *m_inputBuffer;
1653     }
getOutputBuffer(void) const1654     VkBuffer getOutputBuffer(void) const
1655     {
1656         return *m_outputBuffer;
1657     }
getInputStride(void) const1658     uint32_t getInputStride(void) const
1659     {
1660         return getLayoutStride(m_inputLayout);
1661     }
getOutputStride(void) const1662     uint32_t getOutputStride(void) const
1663     {
1664         return getLayoutStride(m_outputLayout);
1665     }
1666 
1667     void uploadInputBuffer(const void *const *inputPtrs, int numValues, bool packFloat16Bit);
1668     void readOutputBuffer(void *const *outputPtrs, int numValues);
1669 
1670     static void declareBufferBlocks(std::ostream &src, const ShaderSpec &spec);
1671     static void generateExecBufferIo(std::ostream &src, const ShaderSpec &spec, const char *invocationNdxName);
1672 
1673 protected:
1674     Move<VkBuffer> m_inputBuffer;
1675     Move<VkBuffer> m_outputBuffer;
1676 
1677 private:
1678     struct VarLayout
1679     {
1680         uint32_t offset;
1681         uint32_t stride;
1682         uint32_t matrixStride;
1683 
VarLayoutvkt::shaderexecutor::__anon1a79f0250111::BufferIoExecutor::VarLayout1684         VarLayout(void) : offset(0), stride(0), matrixStride(0)
1685         {
1686         }
1687     };
1688 
1689     static void computeVarLayout(const std::vector<Symbol> &symbols, std::vector<VarLayout> *layout);
1690     static uint32_t getLayoutStride(const vector<VarLayout> &layout);
1691 
1692     static void copyToBuffer(const glu::VarType &varType, const VarLayout &layout, int numValues,
1693                              const void *srcBasePtr, void *dstBasePtr, bool packFloat16Bit);
1694     static void copyFromBuffer(const glu::VarType &varType, const VarLayout &layout, int numValues,
1695                                const void *srcBasePtr, void *dstBasePtr);
1696 
1697     de::MovePtr<Allocation> m_inputAlloc;
1698     de::MovePtr<Allocation> m_outputAlloc;
1699 
1700     vector<VarLayout> m_inputLayout;
1701     vector<VarLayout> m_outputLayout;
1702 };
1703 
BufferIoExecutor(Context & context,const ShaderSpec & shaderSpec)1704 BufferIoExecutor::BufferIoExecutor(Context &context, const ShaderSpec &shaderSpec) : ShaderExecutor(context, shaderSpec)
1705 {
1706     computeVarLayout(m_shaderSpec.inputs, &m_inputLayout);
1707     computeVarLayout(m_shaderSpec.outputs, &m_outputLayout);
1708 }
1709 
~BufferIoExecutor(void)1710 BufferIoExecutor::~BufferIoExecutor(void)
1711 {
1712 }
1713 
getLayoutStride(const vector<VarLayout> & layout)1714 inline uint32_t BufferIoExecutor::getLayoutStride(const vector<VarLayout> &layout)
1715 {
1716     return layout.empty() ? 0 : layout[0].stride;
1717 }
1718 
computeVarLayout(const std::vector<Symbol> & symbols,std::vector<VarLayout> * layout)1719 void BufferIoExecutor::computeVarLayout(const std::vector<Symbol> &symbols, std::vector<VarLayout> *layout)
1720 {
1721     uint32_t maxAlignment = 0;
1722     uint32_t curOffset    = 0;
1723 
1724     DE_ASSERT(layout != nullptr);
1725     DE_ASSERT(layout->empty());
1726     layout->resize(symbols.size());
1727 
1728     for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
1729     {
1730         const Symbol &symbol          = symbols[varNdx];
1731         const glu::DataType basicType = symbol.varType.getBasicType();
1732         VarLayout &layoutEntry        = (*layout)[varNdx];
1733 
1734         if (glu::isDataTypeScalarOrVector(basicType))
1735         {
1736             const uint32_t alignment = getVecStd430ByteAlignment(basicType);
1737             const uint32_t size =
1738                 (uint32_t)glu::getDataTypeScalarSize(basicType) *
1739                 (isDataTypeDoubleType(basicType) ?
1740                      (int)(sizeof(uint64_t)) :
1741                      (isDataTypeFloat16OrVec(basicType) ? (int)sizeof(uint16_t) : (int)sizeof(uint32_t)));
1742 
1743             curOffset    = (uint32_t)deAlign32((int)curOffset, (int)alignment);
1744             maxAlignment = de::max(maxAlignment, alignment);
1745 
1746             layoutEntry.offset       = curOffset;
1747             layoutEntry.matrixStride = 0;
1748 
1749             curOffset += size;
1750         }
1751         else if (glu::isDataTypeMatrix(basicType))
1752         {
1753             const int numVecs = glu::getDataTypeMatrixNumColumns(basicType);
1754             const glu::DataType vecType =
1755                 glu::getDataTypeVector(glu::getDataTypeScalarType(basicType), glu::getDataTypeMatrixNumRows(basicType));
1756             const uint32_t vecAlignment = getVecStd430ByteAlignment(vecType);
1757 
1758             curOffset    = (uint32_t)deAlign32((int)curOffset, (int)vecAlignment);
1759             maxAlignment = de::max(maxAlignment, vecAlignment);
1760 
1761             layoutEntry.offset       = curOffset;
1762             layoutEntry.matrixStride = vecAlignment;
1763 
1764             curOffset += vecAlignment * numVecs;
1765         }
1766         else
1767             DE_ASSERT(false);
1768     }
1769 
1770     {
1771         const uint32_t totalSize = (uint32_t)deAlign32(curOffset, maxAlignment);
1772 
1773         for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
1774             varIter->stride = totalSize;
1775     }
1776 }
1777 
declareBufferBlocks(std::ostream & src,const ShaderSpec & spec)1778 void BufferIoExecutor::declareBufferBlocks(std::ostream &src, const ShaderSpec &spec)
1779 {
1780     // Input struct
1781     if (!spec.inputs.empty())
1782     {
1783         glu::StructType inputStruct("Inputs");
1784         for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1785             inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1786         src << glu::declare(&inputStruct) << ";\n";
1787     }
1788 
1789     // Output struct
1790     {
1791         glu::StructType outputStruct("Outputs");
1792         for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1793             outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1794         src << glu::declare(&outputStruct) << ";\n";
1795     }
1796 
1797     src << "\n";
1798 
1799     if (!spec.inputs.empty())
1800     {
1801         src << "layout(set = 0, binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1802             << "{\n"
1803             << "    Inputs inputs[];\n"
1804             << "};\n";
1805     }
1806 
1807     src << "layout(set = 0, binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1808         << "{\n"
1809         << "    Outputs outputs[];\n"
1810         << "};\n"
1811         << "\n";
1812 }
1813 
generateExecBufferIo(std::ostream & src,const ShaderSpec & spec,const char * invocationNdxName)1814 void BufferIoExecutor::generateExecBufferIo(std::ostream &src, const ShaderSpec &spec, const char *invocationNdxName)
1815 {
1816     std::string tname;
1817     for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1818     {
1819         const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1820         if (f16BitTest)
1821         {
1822             tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1823         }
1824         else
1825         {
1826             tname = glu::getDataTypeName(symIter->varType.getBasicType());
1827         }
1828         src << "\t" << tname << " " << symIter->name << " = " << tname << "(inputs[" << invocationNdxName << "]."
1829             << symIter->name << ");\n";
1830     }
1831 
1832     for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1833     {
1834         const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1835         if (f16BitTest)
1836         {
1837             tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1838         }
1839         else
1840         {
1841             tname = glu::getDataTypeName(symIter->varType.getBasicType());
1842         }
1843         src << "\t" << tname << " " << symIter->name << ";\n";
1844         if (f16BitTest)
1845         {
1846             const char *ttname = glu::getDataTypeName(symIter->varType.getBasicType());
1847             src << "\t" << ttname << " "
1848                 << "packed_" << symIter->name << ";\n";
1849         }
1850     }
1851 
1852     src << "\n";
1853 
1854     {
1855         std::istringstream opSrc(spec.source);
1856         std::string line;
1857 
1858         while (std::getline(opSrc, line))
1859             src << "\t" << line << "\n";
1860     }
1861 
1862     if (spec.packFloat16Bit)
1863         packFloat16Bit(src, spec.outputs);
1864 
1865     src << "\n";
1866     for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1867     {
1868         const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1869         if (f16BitTest)
1870             src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = packed_" << symIter->name << ";\n";
1871         else
1872             src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1873     }
1874 }
1875 
copyToBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr,bool packFloat16Bit)1876 void BufferIoExecutor::copyToBuffer(const glu::VarType &varType, const VarLayout &layout, int numValues,
1877                                     const void *srcBasePtr, void *dstBasePtr, bool packFloat16Bit)
1878 {
1879     if (varType.isBasicType())
1880     {
1881         const glu::DataType basicType = varType.getBasicType();
1882         const bool isMatrix           = glu::isDataTypeMatrix(basicType);
1883         const int scalarSize          = glu::getDataTypeScalarSize(basicType);
1884         const int numVecs             = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1885         const int numComps            = scalarSize / numVecs;
1886         const int size                = (glu::isDataTypeDoubleType(basicType) ?
1887                                              (int)sizeof(uint64_t) :
1888                                              (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(uint16_t) : (int)sizeof(uint32_t)));
1889 
1890         for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1891         {
1892             for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1893             {
1894                 const int srcOffset = size * (elemNdx * scalarSize + vecNdx * numComps);
1895                 const int dstOffset =
1896                     layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1897                 const uint8_t *srcPtr = (const uint8_t *)srcBasePtr + srcOffset;
1898                 uint8_t *dstPtr       = (uint8_t *)dstBasePtr + dstOffset;
1899 
1900                 if (packFloat16Bit)
1901                 {
1902                     // Convert the float values to 16 bit and store in the lower 16 bits of 32 bit ints.
1903                     for (int cmpNdx = 0; cmpNdx < numComps; ++cmpNdx)
1904                     {
1905                         deFloat16 f16vals[2] = {};
1906                         f16vals[0]           = deFloat32To16Round(((float *)srcPtr)[cmpNdx], DE_ROUNDINGMODE_TO_ZERO);
1907                         deMemcpy(dstPtr + cmpNdx * size, &f16vals[0], size);
1908                     }
1909                 }
1910                 else
1911                 {
1912                     deMemcpy(dstPtr, srcPtr, size * numComps);
1913                 }
1914             }
1915         }
1916     }
1917     else
1918         throw tcu::InternalError("Unsupported type");
1919 }
1920 
copyFromBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr)1921 void BufferIoExecutor::copyFromBuffer(const glu::VarType &varType, const VarLayout &layout, int numValues,
1922                                       const void *srcBasePtr, void *dstBasePtr)
1923 {
1924     if (varType.isBasicType())
1925     {
1926         const glu::DataType basicType = varType.getBasicType();
1927         const bool isMatrix           = glu::isDataTypeMatrix(basicType);
1928         const int scalarSize          = glu::getDataTypeScalarSize(basicType);
1929         const int numVecs             = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1930         const int numComps            = scalarSize / numVecs;
1931 
1932         for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1933         {
1934             for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1935             {
1936                 const int size =
1937                     (glu::isDataTypeDoubleType(basicType) ?
1938                          (int)sizeof(uint64_t) :
1939                          (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(uint16_t) : (int)sizeof(uint32_t)));
1940                 const int srcOffset =
1941                     layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1942                 const int dstOffset   = size * (elemNdx * scalarSize + vecNdx * numComps);
1943                 const uint8_t *srcPtr = (const uint8_t *)srcBasePtr + srcOffset;
1944                 uint8_t *dstPtr       = (uint8_t *)dstBasePtr + dstOffset;
1945 
1946                 deMemcpy(dstPtr, srcPtr, size * numComps);
1947             }
1948         }
1949     }
1950     else
1951         throw tcu::InternalError("Unsupported type");
1952 }
1953 
uploadInputBuffer(const void * const * inputPtrs,int numValues,bool packFloat16Bit)1954 void BufferIoExecutor::uploadInputBuffer(const void *const *inputPtrs, int numValues, bool packFloat16Bit)
1955 {
1956     const VkDevice vkDevice   = m_context.getDevice();
1957     const DeviceInterface &vk = m_context.getDeviceInterface();
1958 
1959     const uint32_t inputStride = getLayoutStride(m_inputLayout);
1960     const int inputBufferSize  = inputStride * numValues;
1961 
1962     if (inputBufferSize == 0)
1963         return; // No inputs
1964 
1965     DE_ASSERT(m_shaderSpec.inputs.size() == m_inputLayout.size());
1966     for (size_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
1967     {
1968         const glu::VarType &varType = m_shaderSpec.inputs[inputNdx].varType;
1969         const VarLayout &layout     = m_inputLayout[inputNdx];
1970 
1971         copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], m_inputAlloc->getHostPtr(), packFloat16Bit);
1972     }
1973 
1974     flushAlloc(vk, vkDevice, *m_inputAlloc);
1975 }
1976 
readOutputBuffer(void * const * outputPtrs,int numValues)1977 void BufferIoExecutor::readOutputBuffer(void *const *outputPtrs, int numValues)
1978 {
1979     const VkDevice vkDevice   = m_context.getDevice();
1980     const DeviceInterface &vk = m_context.getDeviceInterface();
1981 
1982     DE_ASSERT(numValues > 0); // At least some outputs are required.
1983 
1984     invalidateAlloc(vk, vkDevice, *m_outputAlloc);
1985 
1986     DE_ASSERT(m_shaderSpec.outputs.size() == m_outputLayout.size());
1987     for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
1988     {
1989         const glu::VarType &varType = m_shaderSpec.outputs[outputNdx].varType;
1990         const VarLayout &layout     = m_outputLayout[outputNdx];
1991 
1992         copyFromBuffer(varType, layout, numValues, m_outputAlloc->getHostPtr(), outputPtrs[outputNdx]);
1993     }
1994 }
1995 
initBuffers(int numValues)1996 void BufferIoExecutor::initBuffers(int numValues)
1997 {
1998     const uint32_t inputStride  = getLayoutStride(m_inputLayout);
1999     const uint32_t outputStride = getLayoutStride(m_outputLayout);
2000     // Avoid creating zero-sized buffer/memory
2001     const size_t inputBufferSize  = de::max(numValues * inputStride, 1u);
2002     const size_t outputBufferSize = numValues * outputStride;
2003 
2004     // Upload data to buffer
2005     const VkDevice vkDevice         = m_context.getDevice();
2006     const DeviceInterface &vk       = m_context.getDeviceInterface();
2007     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2008     Allocator &memAlloc             = m_context.getDefaultAllocator();
2009 
2010     const VkBufferCreateInfo inputBufferParams = {
2011         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
2012         nullptr,                              // const void* pNext;
2013         0u,                                   // VkBufferCreateFlags flags;
2014         inputBufferSize,                      // VkDeviceSize size;
2015         VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,   // VkBufferUsageFlags usage;
2016         VK_SHARING_MODE_EXCLUSIVE,            // VkSharingMode sharingMode;
2017         1u,                                   // uint32_t queueFamilyCount;
2018         &queueFamilyIndex                     // const uint32_t* pQueueFamilyIndices;
2019     };
2020 
2021     m_inputBuffer = createBuffer(vk, vkDevice, &inputBufferParams);
2022     m_inputAlloc =
2023         memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_inputBuffer), MemoryRequirement::HostVisible);
2024 
2025     VK_CHECK(vk.bindBufferMemory(vkDevice, *m_inputBuffer, m_inputAlloc->getMemory(), m_inputAlloc->getOffset()));
2026 
2027     const VkBufferCreateInfo outputBufferParams = {
2028         VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
2029         nullptr,                              // const void* pNext;
2030         0u,                                   // VkBufferCreateFlags flags;
2031         outputBufferSize,                     // VkDeviceSize size;
2032         VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,   // VkBufferUsageFlags usage;
2033         VK_SHARING_MODE_EXCLUSIVE,            // VkSharingMode sharingMode;
2034         1u,                                   // uint32_t queueFamilyCount;
2035         &queueFamilyIndex                     // const uint32_t* pQueueFamilyIndices;
2036     };
2037 
2038     m_outputBuffer = createBuffer(vk, vkDevice, &outputBufferParams);
2039     m_outputAlloc =
2040         memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_outputBuffer), MemoryRequirement::HostVisible);
2041 
2042     VK_CHECK(vk.bindBufferMemory(vkDevice, *m_outputBuffer, m_outputAlloc->getMemory(), m_outputAlloc->getOffset()));
2043 }
2044 
2045 // ComputeShaderExecutor
2046 
2047 class ComputeShaderExecutor : public BufferIoExecutor
2048 {
2049 public:
2050     ComputeShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2051     virtual ~ComputeShaderExecutor(void);
2052 
2053     static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
2054 
2055     virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
2056                          VkDescriptorSet extraResources);
2057 
2058 protected:
2059     static std::string generateComputeShader(const ShaderSpec &spec);
2060 
2061 private:
2062     const VkDescriptorSetLayout m_extraResourcesLayout;
2063 };
2064 
ComputeShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2065 ComputeShaderExecutor::ComputeShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
2066                                              VkDescriptorSetLayout extraResourcesLayout)
2067     : BufferIoExecutor(context, shaderSpec)
2068     , m_extraResourcesLayout(extraResourcesLayout)
2069 {
2070 }
2071 
~ComputeShaderExecutor(void)2072 ComputeShaderExecutor::~ComputeShaderExecutor(void)
2073 {
2074 }
2075 
getTypeSpirv(const glu::DataType type,const bool packFloat16Bit=false)2076 std::string getTypeSpirv(const glu::DataType type, const bool packFloat16Bit = false)
2077 {
2078     switch (type)
2079     {
2080     case glu::TYPE_FLOAT16:
2081         return "%f16";
2082     case glu::TYPE_FLOAT16_VEC2:
2083         return "%v2f16";
2084     case glu::TYPE_FLOAT16_VEC3:
2085         return "%v3f16";
2086     case glu::TYPE_FLOAT16_VEC4:
2087         return "%v4f16";
2088     case glu::TYPE_FLOAT:
2089         return packFloat16Bit ? "%u32" : "%f32"; // f16 values will be bitcast from ui32.
2090     case glu::TYPE_FLOAT_VEC2:
2091         return packFloat16Bit ? "%v2u32" : "%v2f32"; // f16 values will be bitcast from ui32.
2092     case glu::TYPE_FLOAT_VEC3:
2093         return packFloat16Bit ? "%v3u32" : "%v3f32"; // f16 values will be bitcast from ui32.
2094     case glu::TYPE_FLOAT_VEC4:
2095         return packFloat16Bit ? "%v4u32" : "%v4f32"; // f16 values will be bitcast from ui32.
2096     case glu::TYPE_INT:
2097         return "%i32";
2098     case glu::TYPE_INT_VEC2:
2099         return "%v2i32";
2100     case glu::TYPE_INT_VEC3:
2101         return "%v3i32";
2102     case glu::TYPE_INT_VEC4:
2103         return "%v4i32";
2104     case glu::TYPE_DOUBLE:
2105         return "%f64";
2106     case glu::TYPE_DOUBLE_VEC2:
2107         return "%v2f64";
2108     case glu::TYPE_DOUBLE_VEC3:
2109         return "%v3f64";
2110     case glu::TYPE_DOUBLE_VEC4:
2111         return "%v4f64";
2112     default:
2113         DE_ASSERT(0);
2114         return "";
2115     }
2116 }
2117 
moveBitOperation(std::string variableName,const int operationNdx)2118 std::string moveBitOperation(std::string variableName, const int operationNdx)
2119 {
2120     std::ostringstream src;
2121     src << "\n"
2122         << "%operation_move_" << operationNdx << " = OpLoad %i32 " << variableName << "\n"
2123         << "%move1_" << operationNdx << " = OpShiftLeftLogical %i32 %operation_move_" << operationNdx << " %c_i32_1\n"
2124         << "OpStore " << variableName << " %move1_" << operationNdx << "\n";
2125     return src.str();
2126 }
2127 
scalarComparison(const std::string operation,const int operationNdx,const glu::DataType type,const std::string & outputType,const int scalarSize)2128 std::string scalarComparison(const std::string operation, const int operationNdx, const glu::DataType type,
2129                              const std::string &outputType, const int scalarSize)
2130 {
2131     std::ostringstream src;
2132     std::string boolType;
2133 
2134     switch (type)
2135     {
2136     case glu::TYPE_FLOAT16:
2137     case glu::TYPE_FLOAT:
2138     case glu::TYPE_DOUBLE:
2139         src << "\n"
2140             << "%operation_result_" << operationNdx << " = " << operation << " %bool %in0_val %in1_val\n"
2141             << "OpSelectionMerge %IF_" << operationNdx << " None\n"
2142             << "OpBranchConditional %operation_result_" << operationNdx << " %label_IF_" << operationNdx << " %IF_"
2143             << operationNdx << "\n"
2144             << "%label_IF_" << operationNdx << " = OpLabel\n"
2145             << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n"
2146             << "%out_val_" << operationNdx << " = OpLoad %i32 %out0\n"
2147             << "%add_if_" << operationNdx << " = OpIAdd %i32 %out_val_" << operationNdx << " %operation_val_"
2148             << operationNdx << "\n"
2149             << "OpStore %out0 %add_if_" << operationNdx << "\n"
2150             << "OpBranch %IF_" << operationNdx << "\n"
2151             << "%IF_" << operationNdx << " = OpLabel\n";
2152         return src.str();
2153     case glu::TYPE_FLOAT16_VEC2:
2154     case glu::TYPE_FLOAT_VEC2:
2155     case glu::TYPE_DOUBLE_VEC2:
2156         boolType = "%v2bool";
2157         break;
2158     case glu::TYPE_FLOAT16_VEC3:
2159     case glu::TYPE_FLOAT_VEC3:
2160     case glu::TYPE_DOUBLE_VEC3:
2161         boolType = "%v3bool";
2162         break;
2163     case glu::TYPE_FLOAT16_VEC4:
2164     case glu::TYPE_FLOAT_VEC4:
2165     case glu::TYPE_DOUBLE_VEC4:
2166         boolType = "%v4bool";
2167         break;
2168     default:
2169         DE_ASSERT(0);
2170         return "";
2171     }
2172 
2173     src << "\n"
2174         << "%operation_result_" << operationNdx << " = " << operation << " " << boolType << " %in0_val %in1_val\n"
2175         << "%ivec_result_" << operationNdx << " = OpSelect " << outputType << " %operation_result_" << operationNdx
2176         << " %c_" << &outputType[1] << "_1 %c_" << &outputType[1] << "_0\n"
2177         << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n";
2178 
2179     src << "%operation_vec_" << operationNdx << " = OpCompositeConstruct " << outputType;
2180     for (int ndx = 0; ndx < scalarSize; ++ndx)
2181         src << " %operation_val_" << operationNdx;
2182     src << "\n";
2183 
2184     src << "%toAdd" << operationNdx << " = OpIMul " << outputType << " %ivec_result_" << operationNdx
2185         << " %operation_vec_" << operationNdx << "\n"
2186         << "%out_val_" << operationNdx << " = OpLoad " << outputType << " %out0\n"
2187 
2188         << "%add_if_" << operationNdx << " = OpIAdd " << outputType << " %out_val_" << operationNdx << " %toAdd"
2189         << operationNdx << "\n"
2190         << "OpStore %out0 %add_if_" << operationNdx << "\n";
2191 
2192     return src.str();
2193 }
2194 
generateSpirv(const ShaderSpec & spec,const bool are16Bit,const bool are64Bit,const bool isMediump)2195 std::string generateSpirv(const ShaderSpec &spec, const bool are16Bit, const bool are64Bit, const bool isMediump)
2196 {
2197     static const std::string COMPARE_OPERATIONS[] = {"OpFOrdEqual",
2198                                                      "OpFOrdGreaterThan",
2199                                                      "OpFOrdLessThan",
2200                                                      "OpFOrdGreaterThanEqual",
2201                                                      "OpFOrdLessThanEqual",
2202                                                      "OpFUnordEqual",
2203                                                      "OpFUnordGreaterThan",
2204                                                      "OpFUnordLessThan",
2205                                                      "OpFUnordGreaterThanEqual",
2206                                                      "OpFUnordLessThanEqual"};
2207 
2208     int moveBitNdx = 0;
2209     vector<std::string> inputTypes;
2210     vector<std::string> outputTypes;
2211     const std::string packType =
2212         spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
2213 
2214     vector<bool> floatResult;
2215     for (const auto &symbol : spec.outputs)
2216         floatResult.push_back(glu::isDataTypeFloatType(symbol.varType.getBasicType()));
2217 
2218     const bool anyFloatResult = std::any_of(begin(floatResult), end(floatResult), [](bool b) { return b; });
2219 
2220     vector<bool> packFloatRes;
2221     for (const auto &floatRes : floatResult)
2222         packFloatRes.push_back(floatRes && spec.packFloat16Bit);
2223 
2224     const bool useF32Types = (!are16Bit && !are64Bit);
2225     const bool useF64Types = are64Bit;
2226     const bool useF16Types = (spec.packFloat16Bit || are16Bit);
2227 
2228     for (const auto &symbol : spec.inputs)
2229         inputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2230 
2231     for (const auto &symbol : spec.outputs)
2232         outputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2233 
2234     DE_ASSERT(!inputTypes.empty());
2235     DE_ASSERT(!outputTypes.empty());
2236 
2237     // Assert input and output types match the expected operations.
2238     switch (spec.spirvCase)
2239     {
2240     case SPIRV_CASETYPE_COMPARE:
2241     case SPIRV_CASETYPE_FREM:
2242         DE_ASSERT(inputTypes.size() == 2);
2243         DE_ASSERT(outputTypes.size() == 1);
2244         break;
2245     case SPIRV_CASETYPE_MODFSTRUCT:
2246     case SPIRV_CASETYPE_FREXPSTRUCT:
2247         DE_ASSERT(inputTypes.size() == 1);
2248         DE_ASSERT(outputTypes.size() == 2);
2249         break;
2250     default:
2251         DE_ASSERT(false);
2252         break;
2253     }
2254 
2255     std::ostringstream src;
2256     src << "; SPIR-V\n"
2257            "; Version: 1.0\n"
2258            "; Generator: Khronos Glslang Reference Front End; 4\n"
2259            "; Bound: 114\n"
2260            "; Schema: 0\n"
2261            "OpCapability Shader\n";
2262 
2263     if (useF16Types)
2264         src << "OpCapability Float16\n";
2265 
2266     if (are16Bit)
2267         src << "OpCapability StorageBuffer16BitAccess\n"
2268                "OpCapability UniformAndStorageBuffer16BitAccess\n";
2269 
2270     if (useF64Types)
2271         src << "OpCapability Float64\n";
2272 
2273     if (are16Bit)
2274         src << "OpExtension \"SPV_KHR_16bit_storage\"\n";
2275 
2276     src << "%glslstd450 = OpExtInstImport \"GLSL.std.450\"\n"
2277            "OpMemoryModel Logical GLSL450\n"
2278            "OpEntryPoint GLCompute %BP_main \"main\" %BP_id3uNum %BP_id3uID\n"
2279            "OpExecutionMode %BP_main LocalSize 1 1 1\n"
2280            "OpDecorate %BP_id3uNum BuiltIn NumWorkgroups\n"
2281            "OpDecorate %BP_id3uID BuiltIn WorkgroupId\n";
2282 
2283     // Input offsets and stride.
2284     {
2285         int offset  = 0;
2286         int ndx     = 0;
2287         int largest = 0;
2288         for (const auto &symbol : spec.inputs)
2289         {
2290             const int scalarSize = symbol.varType.getScalarSize();
2291             const int memberSize =
2292                 (scalarSize + ((scalarSize == 3) ? 1 : 0)) *
2293                 (isDataTypeDoubleType(symbol.varType.getBasicType()) ?
2294                      (int)sizeof(uint64_t) :
2295                      (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(uint16_t) :
2296                                                                               (int)sizeof(uint32_t)));
2297             const int extraMemberBytes = (offset % memberSize);
2298 
2299             offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2300             src << "OpMemberDecorate %SSB0_IN " << ndx << " Offset " << offset << "\n";
2301             ++ndx;
2302 
2303             if (memberSize > largest)
2304                 largest = memberSize;
2305 
2306             offset += memberSize;
2307         }
2308         DE_ASSERT(largest > 0);
2309         const int extraBytes = (offset % largest);
2310         const int stride     = offset + (extraBytes == 0 ? 0 : (largest - extraBytes));
2311         src << "OpDecorate %up_SSB0_IN ArrayStride " << stride << "\n";
2312     }
2313 
2314     src << "OpMemberDecorate %ssboIN 0 Offset 0\n"
2315            "OpDecorate %ssboIN BufferBlock\n"
2316            "OpDecorate %ssbo_src DescriptorSet 0\n"
2317            "OpDecorate %ssbo_src Binding 0\n"
2318            "\n";
2319 
2320     if (isMediump)
2321     {
2322         for (size_t i = 0; i < inputTypes.size(); ++i)
2323         {
2324             src << "OpMemberDecorate %SSB0_IN " << i
2325                 << " RelaxedPrecision\n"
2326                    "OpDecorate %in"
2327                 << i
2328                 << " RelaxedPrecision\n"
2329                    "OpDecorate %src_val_0_"
2330                 << i
2331                 << " RelaxedPrecision\n"
2332                    "OpDecorate %in"
2333                 << i << "_val RelaxedPrecision\n";
2334         }
2335 
2336         if (anyFloatResult)
2337         {
2338             switch (spec.spirvCase)
2339             {
2340             case SPIRV_CASETYPE_FREM:
2341                 src << "OpDecorate %frem_result RelaxedPrecision\n";
2342                 break;
2343             case SPIRV_CASETYPE_MODFSTRUCT:
2344                 src << "OpDecorate %modfstruct_result RelaxedPrecision\n";
2345                 break;
2346             case SPIRV_CASETYPE_FREXPSTRUCT:
2347                 src << "OpDecorate %frexpstruct_result RelaxedPrecision\n";
2348                 break;
2349             default:
2350                 DE_ASSERT(false);
2351                 break;
2352             }
2353 
2354             for (size_t i = 0; i < outputTypes.size(); ++i)
2355             {
2356                 src << "OpMemberDecorate %SSB0_OUT " << i << " RelaxedPrecision\n";
2357                 src << "OpDecorate %out_val_final_" << i << " RelaxedPrecision\n";
2358                 src << "OpDecorate %out" << i << " RelaxedPrecision\n";
2359             }
2360         }
2361     }
2362 
2363     // Output offsets and stride.
2364     {
2365         int offset  = 0;
2366         int ndx     = 0;
2367         int largest = 0;
2368         for (const auto &symbol : spec.outputs)
2369         {
2370             const int scalarSize = symbol.varType.getScalarSize();
2371             const int memberSize =
2372                 (scalarSize + ((scalarSize == 3) ? 1 : 0)) *
2373                 (isDataTypeDoubleType(symbol.varType.getBasicType()) ?
2374                      (int)sizeof(uint64_t) :
2375                      (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(uint16_t) :
2376                                                                               (int)sizeof(uint32_t)));
2377             const int extraMemberBytes = (offset % memberSize);
2378 
2379             offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2380             src << "OpMemberDecorate %SSB0_OUT " << ndx << " Offset " << offset << "\n";
2381             ++ndx;
2382 
2383             if (memberSize > largest)
2384                 largest = memberSize;
2385 
2386             offset += memberSize;
2387         }
2388         DE_ASSERT(largest > 0);
2389         const int extraBytes = (offset % largest);
2390         const int stride     = offset + ((extraBytes == 0) ? 0 : (largest - extraBytes));
2391         src << "OpDecorate %up_SSB0_OUT ArrayStride " << stride << "\n";
2392     }
2393 
2394     src << "OpMemberDecorate %ssboOUT 0 Offset 0\n"
2395            "OpDecorate %ssboOUT BufferBlock\n"
2396            "OpDecorate %ssbo_dst DescriptorSet 0\n"
2397            "OpDecorate %ssbo_dst Binding 1\n"
2398            "\n"
2399            "%void  = OpTypeVoid\n"
2400            "%bool  = OpTypeBool\n"
2401            "%v2bool = OpTypeVector %bool 2\n"
2402            "%v3bool = OpTypeVector %bool 3\n"
2403            "%v4bool = OpTypeVector %bool 4\n"
2404            "%u32   = OpTypeInt 32 0\n";
2405 
2406     if (useF32Types)
2407         src << "%f32   = OpTypeFloat 32\n"
2408                "%v2f32 = OpTypeVector %f32 2\n"
2409                "%v3f32 = OpTypeVector %f32 3\n"
2410                "%v4f32 = OpTypeVector %f32 4\n";
2411 
2412     if (useF64Types)
2413         src << "%f64   = OpTypeFloat 64\n"
2414                "%v2f64 = OpTypeVector %f64 2\n"
2415                "%v3f64 = OpTypeVector %f64 3\n"
2416                "%v4f64 = OpTypeVector %f64 4\n";
2417 
2418     if (useF16Types)
2419         src << "%f16   = OpTypeFloat 16\n"
2420                "%v2f16 = OpTypeVector %f16 2\n"
2421                "%v3f16 = OpTypeVector %f16 3\n"
2422                "%v4f16 = OpTypeVector %f16 4\n";
2423 
2424     src << "%i32   = OpTypeInt 32 1\n"
2425            "%v2i32 = OpTypeVector %i32 2\n"
2426            "%v3i32 = OpTypeVector %i32 3\n"
2427            "%v4i32 = OpTypeVector %i32 4\n"
2428            "%v2u32 = OpTypeVector %u32 2\n"
2429            "%v3u32 = OpTypeVector %u32 3\n"
2430            "%v4u32 = OpTypeVector %u32 4\n"
2431            "\n"
2432            "%ip_u32   = OpTypePointer Input %u32\n"
2433            "%ip_v3u32 = OpTypePointer Input %v3u32\n"
2434            "%up_float = OpTypePointer Uniform "
2435         << inputTypes[0]
2436         << "\n"
2437            "\n"
2438            "%fp_operation = OpTypePointer Function %i32\n"
2439            "%voidf        = OpTypeFunction %void\n"
2440            "%fp_u32       = OpTypePointer Function %u32\n"
2441            "%fp_it1       = OpTypePointer Function "
2442         << inputTypes[0] << "\n";
2443 
2444     for (size_t i = 0; i < outputTypes.size(); ++i)
2445     {
2446         src << "%fp_out_" << i << "     = OpTypePointer Function " << outputTypes[i] << "\n"
2447             << "%up_out_" << i << "     = OpTypePointer Uniform " << outputTypes[i] << "\n";
2448     }
2449 
2450     if (spec.packFloat16Bit)
2451         src << "%fp_f16  = OpTypePointer Function " << packType << "\n";
2452 
2453     src << "%BP_id3uID = OpVariable %ip_v3u32 Input\n"
2454            "%BP_id3uNum = OpVariable %ip_v3u32 Input\n"
2455            "\n"
2456            "%c_u32_0 = OpConstant %u32 0\n"
2457            "%c_u32_1 = OpConstant %u32 1\n"
2458            "%c_u32_2 = OpConstant %u32 2\n"
2459            "%c_i32_0 = OpConstant %i32 0\n"
2460            "%c_i32_1 = OpConstant %i32 1\n"
2461            "\n";
2462 
2463     if (useF32Types)
2464         src << "%c_f32_0 = OpConstant %f32 0\n"
2465                "%c_f32_1 = OpConstant %f32 1\n";
2466 
2467     if (useF16Types)
2468         src << "%c_f16_0 = OpConstant %f16 0\n"
2469                "%c_f16_1 = OpConstant %f16 1\n"
2470                "%c_f16_minus1 = OpConstant %f16 -0x1p+0";
2471 
2472     if (useF64Types)
2473         src << "%c_f64_0 = OpConstant %f64 0\n"
2474                "%c_f64_1 = OpConstant %f64 1\n";
2475 
2476     src << "\n"
2477            "%c_v2i32_0 = OpConstantComposite %v2i32 %c_i32_0 %c_i32_0\n"
2478            "%c_v2i32_1 = OpConstantComposite %v2i32 %c_i32_1 %c_i32_1\n"
2479            "%c_v3i32_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
2480            "%c_v3i32_1 = OpConstantComposite %v3i32 %c_i32_1 %c_i32_1 %c_i32_1\n"
2481            "%c_v4i32_0 = OpConstantComposite %v4i32 %c_i32_0 %c_i32_0 %c_i32_0 %c_i32_0\n"
2482            "%c_v4i32_1 = OpConstantComposite %v4i32 %c_i32_1 %c_i32_1 %c_i32_1 %c_i32_1\n"
2483            "\n";
2484 
2485     if (useF32Types)
2486         src << "%c_v2f32_0 = OpConstantComposite %v2f32 %c_f32_0 %c_f32_0\n"
2487                "%c_v2f32_1 = OpConstantComposite %v2f32 %c_f32_1 %c_f32_1\n"
2488                "%c_v3f32_0 = OpConstantComposite %v3f32 %c_f32_0 %c_f32_0 %c_f32_0\n"
2489                "%c_v3f32_1 = OpConstantComposite %v3f32 %c_f32_1 %c_f32_1 %c_f32_1\n"
2490                "%c_v4f32_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_0\n"
2491                "%c_v4f32_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n";
2492 
2493     if (useF16Types)
2494         src << "%c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
2495                "%c_v2f16_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
2496                "%c_v3f16_0 = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0 %c_f16_0\n"
2497                "%c_v3f16_1 = OpConstantComposite %v3f16 %c_f16_1 %c_f16_1 %c_f16_1\n"
2498                "%c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
2499                "%c_v4f16_1 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n";
2500 
2501     if (useF64Types)
2502         src << "%c_v2f64_0 = OpConstantComposite %v2f64 %c_f64_0 %c_f64_0\n"
2503                "%c_v2f64_1 = OpConstantComposite %v2f64 %c_f64_1 %c_f64_1\n"
2504                "%c_v3f64_0 = OpConstantComposite %v3f64 %c_f64_0 %c_f64_0 %c_f64_0\n"
2505                "%c_v3f64_1 = OpConstantComposite %v3f64 %c_f64_1 %c_f64_1 %c_f64_1\n"
2506                "%c_v4f64_0 = OpConstantComposite %v4f64 %c_f64_0 %c_f64_0 %c_f64_0 %c_f64_0\n"
2507                "%c_v4f64_1 = OpConstantComposite %v4f64 %c_f64_1 %c_f64_1 %c_f64_1 %c_f64_1\n"
2508                "\n";
2509 
2510     // Input struct.
2511     {
2512         src << "%SSB0_IN    = OpTypeStruct";
2513         for (const auto &t : inputTypes)
2514             src << " " << t;
2515         src << "\n";
2516     }
2517 
2518     src << "%up_SSB0_IN = OpTypeRuntimeArray %SSB0_IN\n"
2519            "%ssboIN     = OpTypeStruct %up_SSB0_IN\n"
2520            "%up_ssboIN  = OpTypePointer Uniform %ssboIN\n"
2521            "%ssbo_src   = OpVariable %up_ssboIN Uniform\n"
2522            "\n";
2523 
2524     // Output struct.
2525     {
2526         src << "%SSB0_OUT    = OpTypeStruct";
2527         for (const auto &t : outputTypes)
2528             src << " " << t;
2529         src << "\n";
2530     }
2531 
2532     std::string modfStructMemberType;
2533     std::string frexpStructFirstMemberType;
2534     if (spec.spirvCase == SPIRV_CASETYPE_MODFSTRUCT)
2535     {
2536         modfStructMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2537         src << "%modfstruct_ret_t = OpTypeStruct " << modfStructMemberType << " " << modfStructMemberType << "\n";
2538     }
2539     else if (spec.spirvCase == SPIRV_CASETYPE_FREXPSTRUCT)
2540     {
2541         frexpStructFirstMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2542         src << "%frexpstruct_ret_t = OpTypeStruct " << frexpStructFirstMemberType << " " << outputTypes[1] << "\n";
2543     }
2544 
2545     src << "%up_SSB0_OUT = OpTypeRuntimeArray %SSB0_OUT\n"
2546            "%ssboOUT     = OpTypeStruct %up_SSB0_OUT\n"
2547            "%up_ssboOUT  = OpTypePointer Uniform %ssboOUT\n"
2548            "%ssbo_dst    = OpVariable %up_ssboOUT Uniform\n"
2549            "\n"
2550            "%BP_main = OpFunction %void None %voidf\n"
2551            "%BP_label = OpLabel\n"
2552            "%invocationNdx = OpVariable %fp_u32 Function\n";
2553 
2554     // Note: here we are supposing all inputs have the same type.
2555     for (size_t i = 0; i < inputTypes.size(); ++i)
2556         src << "%in" << i << " = OpVariable " << (spec.packFloat16Bit ? "%fp_f16" : "%fp_it1") << " Function\n";
2557 
2558     for (size_t i = 0; i < outputTypes.size(); ++i)
2559         src << "%out" << i << " = OpVariable "
2560             << (packFloatRes[i] ? std::string("%fp_f16") : std::string("%fp_out_") + de::toString(i)) << " Function\n";
2561 
2562     src << "%operation = OpVariable %fp_operation Function\n"
2563            "%BP_id_0_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_0\n"
2564            "%BP_id_1_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_1\n"
2565            "%BP_id_2_ptr  = OpAccessChain %ip_u32 %BP_id3uID %c_u32_2\n"
2566            "%BP_num_0_ptr  = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_0\n"
2567            "%BP_num_1_ptr  = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_1\n"
2568            "%BP_id_0_val = OpLoad %u32 %BP_id_0_ptr\n"
2569            "%BP_id_1_val = OpLoad %u32 %BP_id_1_ptr\n"
2570            "%BP_id_2_val = OpLoad %u32 %BP_id_2_ptr\n"
2571            "%BP_num_0_val = OpLoad %u32 %BP_num_0_ptr\n"
2572            "%BP_num_1_val = OpLoad %u32 %BP_num_1_ptr\n"
2573            "\n"
2574            "%mul_1 = OpIMul %u32 %BP_num_0_val %BP_num_1_val\n"
2575            "%mul_2 = OpIMul %u32 %mul_1 %BP_id_2_val\n"
2576            "%mul_3 = OpIMul %u32 %BP_num_0_val %BP_id_1_val\n"
2577            "%add_1 = OpIAdd %u32 %mul_2 %mul_3\n"
2578            "%add_2 = OpIAdd %u32 %add_1 %BP_id_0_val\n"
2579            "OpStore %invocationNdx %add_2\n"
2580            "%invocationNdx_val = OpLoad %u32 %invocationNdx\n";
2581 
2582     // Load input values.
2583     for (size_t inputNdx = 0; inputNdx < inputTypes.size(); ++inputNdx)
2584     {
2585         src << "\n"
2586             << "%src_ptr_0_" << inputNdx << " = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_"
2587             << inputNdx << "\n"
2588             << "%src_val_0_" << inputNdx << " = OpLoad " << inputTypes[inputNdx] << " %src_ptr_0_" << inputNdx << "\n";
2589 
2590         if (spec.packFloat16Bit)
2591         {
2592             if (spec.inputs[inputNdx].varType.getScalarSize() > 1)
2593             {
2594                 // Extract the val<inputNdx> u32 input channels into individual f16 values.
2595                 for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2596                 {
2597                     src << "%src_val_0_" << inputNdx << "_" << i << " = OpCompositeExtract %u32 %src_val_0_" << inputNdx
2598                         << " " << i
2599                         << "\n"
2600                            "%val_v2f16_0_"
2601                         << inputNdx << "_" << i << " = OpBitcast %v2f16 %src_val_0_" << inputNdx << "_" << i
2602                         << "\n"
2603                            "%val_f16_0_"
2604                         << inputNdx << "_" << i << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << "_" << i
2605                         << " 0\n";
2606                 }
2607 
2608                 // Construct the input vector.
2609                 src << "%val_f16_0_" << inputNdx << "   = OpCompositeConstruct " << packType;
2610                 for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2611                 {
2612                     src << " %val_f16_0_" << inputNdx << "_" << i;
2613                 }
2614 
2615                 src << "\n";
2616                 src << "OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2617             }
2618             else
2619             {
2620                 src << "%val_v2f16_0_" << inputNdx << " = OpBitcast %v2f16 %src_val_0_" << inputNdx
2621                     << "\n"
2622                        "%val_f16_0_"
2623                     << inputNdx << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << " 0\n";
2624 
2625                 src << "OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2626             }
2627         }
2628         else
2629             src << "OpStore %in" << inputNdx << " %src_val_0_" << inputNdx << "\n";
2630 
2631         src << "%in" << inputNdx << "_val = OpLoad " << (spec.packFloat16Bit ? packType : inputTypes[inputNdx])
2632             << " %in" << inputNdx << "\n";
2633     }
2634 
2635     src << "\n"
2636            "OpStore %operation %c_i32_1\n";
2637 
2638     // Fill output values with dummy data.
2639     for (size_t i = 0; i < outputTypes.size(); ++i)
2640         src << "OpStore %out" << i << " %c_" << (packFloatRes[i] ? &packType[1] : &outputTypes[i][1]) << "_0\n";
2641 
2642     src << "\n";
2643 
2644     // Run operation.
2645     switch (spec.spirvCase)
2646     {
2647     case SPIRV_CASETYPE_COMPARE:
2648         for (int operationNdx = 0; operationNdx < DE_LENGTH_OF_ARRAY(COMPARE_OPERATIONS); ++operationNdx)
2649         {
2650             src << scalarComparison(COMPARE_OPERATIONS[operationNdx], operationNdx,
2651                                     spec.inputs[0].varType.getBasicType(), outputTypes[0],
2652                                     spec.outputs[0].varType.getScalarSize());
2653             src << moveBitOperation("%operation", moveBitNdx);
2654             ++moveBitNdx;
2655         }
2656         break;
2657     case SPIRV_CASETYPE_FREM:
2658         src << "%frem_result = OpFRem " << (packFloatRes[0] ? packType : outputTypes[0]) << " %in0_val %in1_val\n"
2659             << "OpStore %out0 %frem_result\n";
2660         break;
2661     case SPIRV_CASETYPE_MODFSTRUCT:
2662         src << "%modfstruct_result = OpExtInst %modfstruct_ret_t %glslstd450 ModfStruct %in0_val\n"
2663             << "%modfstruct_result_0 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 0\n"
2664             << "%modfstruct_result_1 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 1\n"
2665             << "OpStore %out0 %modfstruct_result_0\n"
2666             << "OpStore %out1 %modfstruct_result_1\n";
2667         break;
2668     case SPIRV_CASETYPE_FREXPSTRUCT:
2669         src << "%frexpstruct_result = OpExtInst %frexpstruct_ret_t %glslstd450 FrexpStruct %in0_val\n"
2670             << "%frexpstruct_result_0 = OpCompositeExtract " << frexpStructFirstMemberType << " %frexpstruct_result 0\n"
2671             << "%frexpstruct_result_1 = OpCompositeExtract " << outputTypes[1] << " %frexpstruct_result 1\n"
2672             << "OpStore %out0 %frexpstruct_result_0\n"
2673             << "OpStore %out1 %frexpstruct_result_1\n";
2674         break;
2675     default:
2676         DE_ASSERT(false);
2677         break;
2678     }
2679 
2680     for (size_t outputNdx = 0; outputNdx < outputTypes.size(); ++outputNdx)
2681     {
2682         src << "\n"
2683                "%out_val_final_"
2684             << outputNdx << " = OpLoad " << (packFloatRes[outputNdx] ? packType : outputTypes[outputNdx]) << " %out"
2685             << outputNdx
2686             << "\n"
2687                "%ssbo_dst_ptr_"
2688             << outputNdx << " = OpAccessChain %up_out_" << outputNdx << " %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_"
2689             << outputNdx << "\n";
2690 
2691         if (packFloatRes[outputNdx])
2692         {
2693             if (spec.outputs[outputNdx].varType.getScalarSize() > 1)
2694             {
2695                 for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2696                 {
2697                     src << "%out_val_final_" << outputNdx << "_" << i << " = OpCompositeExtract %f16 %out_val_final_"
2698                         << outputNdx << " " << i << "\n";
2699                     src << "%out_composite_" << outputNdx << "_" << i
2700                         << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx << "_" << i
2701                         << " %c_f16_minus1\n";
2702                     src << "%u32_val_" << outputNdx << "_" << i << " = OpBitcast %u32 %out_composite_" << outputNdx
2703                         << "_" << i << "\n";
2704                 }
2705 
2706                 src << "%u32_final_val_" << outputNdx << " = OpCompositeConstruct " << outputTypes[outputNdx];
2707                 for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2708                     src << " %u32_val_" << outputNdx << "_" << i;
2709                 src << "\n";
2710                 src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %u32_final_val_" << outputNdx << "\n";
2711             }
2712             else
2713             {
2714                 src << "%out_composite_" << outputNdx << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx
2715                     << " %c_f16_minus1\n"
2716                        "%out_result_"
2717                     << outputNdx << " = OpBitcast " << outputTypes[outputNdx] << " %out_composite_" << outputNdx
2718                     << "\n"
2719                        "OpStore %ssbo_dst_ptr_"
2720                     << outputNdx << " %out_result_" << outputNdx << "\n";
2721             }
2722         }
2723         else
2724         {
2725             src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %out_val_final_" << outputNdx << "\n";
2726         }
2727     }
2728 
2729     src << "\n"
2730            "OpReturn\n"
2731            "OpFunctionEnd\n";
2732 
2733     return src.str();
2734 }
2735 
generateComputeShader(const ShaderSpec & spec)2736 std::string ComputeShaderExecutor::generateComputeShader(const ShaderSpec &spec)
2737 {
2738     if (spec.spirvCase != SPIRV_CASETYPE_NONE)
2739     {
2740         bool are16Bit  = false;
2741         bool are64Bit  = false;
2742         bool isMediump = false;
2743         for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2744         {
2745             if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
2746                 are16Bit = true;
2747 
2748             if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
2749                 are64Bit = true;
2750 
2751             if (symIter->varType.getPrecision() == glu::PRECISION_MEDIUMP)
2752                 isMediump = true;
2753 
2754             if (isMediump && are16Bit)
2755                 break;
2756         }
2757 
2758         return generateSpirv(spec, are16Bit, are64Bit, isMediump);
2759     }
2760     else
2761     {
2762         std::ostringstream src;
2763         src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n";
2764 
2765         if (!spec.globalDeclarations.empty())
2766             src << spec.globalDeclarations << "\n";
2767 
2768         src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
2769             << "\n";
2770 
2771         declareBufferBlocks(src, spec);
2772 
2773         src << "void main (void)\n"
2774             << "{\n"
2775             << "    uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2776             << "                       + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2777 
2778         generateExecBufferIo(src, spec, "invocationNdx");
2779 
2780         src << "}\n";
2781 
2782         return src.str();
2783     }
2784 }
2785 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)2786 void ComputeShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
2787 {
2788     if (shaderSpec.spirvCase != SPIRV_CASETYPE_NONE)
2789         programCollection.spirvAsmSources.add("compute")
2790             << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3)
2791             << generateComputeShader(shaderSpec);
2792     else
2793         programCollection.glslSources.add("compute")
2794             << glu::ComputeSource(generateComputeShader(shaderSpec)) << shaderSpec.buildOptions;
2795 }
2796 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)2797 void ComputeShaderExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
2798                                     VkDescriptorSet extraResources)
2799 {
2800     const VkDevice vkDevice         = m_context.getDevice();
2801     const DeviceInterface &vk       = m_context.getDeviceInterface();
2802     const VkQueue queue             = m_context.getUniversalQueue();
2803     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2804 
2805     DescriptorPoolBuilder descriptorPoolBuilder;
2806     DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
2807 
2808     Move<VkShaderModule> computeShaderModule;
2809     Move<VkPipeline> computePipeline;
2810     Move<VkPipelineLayout> pipelineLayout;
2811     Move<VkCommandPool> cmdPool;
2812     Move<VkDescriptorPool> descriptorPool;
2813     Move<VkDescriptorSetLayout> descriptorSetLayout;
2814     Move<VkDescriptorSet> descriptorSet;
2815     const uint32_t numDescriptorSets = (m_extraResourcesLayout != VK_NULL_HANDLE) ? 2u : 1u;
2816 
2817     DE_ASSERT((m_extraResourcesLayout != VK_NULL_HANDLE) == (extraResources != VK_NULL_HANDLE));
2818 
2819     initBuffers(numValues);
2820 
2821     // Setup input buffer & copy data
2822     // For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
2823     // storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
2824     // the shader.
2825     uploadInputBuffer(inputs, numValues,
2826                       m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
2827 
2828     // Create command pool
2829     cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2830 
2831     // Create command buffer
2832 
2833     descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2834     descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2835     descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2836     descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2837 
2838     descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
2839     descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2840 
2841     const VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr,
2842                                                    *descriptorPool, 1u, &*descriptorSetLayout};
2843 
2844     descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2845 
2846     // Create pipeline layout
2847     {
2848         const VkDescriptorSetLayout descriptorSetLayouts[]    = {*descriptorSetLayout, m_extraResourcesLayout};
2849         const VkPipelineLayoutCreateInfo pipelineLayoutParams = {
2850             VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
2851             nullptr,                                       // const void* pNext;
2852             (VkPipelineLayoutCreateFlags)0,                // VkPipelineLayoutCreateFlags flags;
2853             numDescriptorSets,                             // uint32_t CdescriptorSetCount;
2854             descriptorSetLayouts,                          // const VkDescriptorSetLayout* pSetLayouts;
2855             0u,                                            // uint32_t pushConstantRangeCount;
2856             nullptr                                        // const VkPushConstantRange* pPushConstantRanges;
2857         };
2858 
2859         pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2860     }
2861 
2862     // Create shaders
2863     {
2864         computeShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("compute"), 0);
2865     }
2866 
2867     // create pipeline
2868     {
2869         const VkPipelineShaderStageCreateInfo shaderStageParams[1] = {{
2870             VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
2871             nullptr,                                             // const void* pNext;
2872             (VkPipelineShaderStageCreateFlags)0u,                // VkPipelineShaderStageCreateFlags flags;
2873             VK_SHADER_STAGE_COMPUTE_BIT,                         // VkShaderStageFlagsBit stage;
2874             *computeShaderModule,                                // VkShaderModule shader;
2875             "main",                                              // const char* pName;
2876             nullptr                                              // const VkSpecializationInfo* pSpecializationInfo;
2877         }};
2878 
2879         const VkComputePipelineCreateInfo computePipelineParams = {
2880             VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
2881             nullptr,                                        // const void* pNext;
2882             (VkPipelineCreateFlags)0,                       // VkPipelineCreateFlags flags;
2883             *shaderStageParams,                             // VkPipelineShaderStageCreateInfo cs;
2884             *pipelineLayout,                                // VkPipelineLayout layout;
2885             VK_NULL_HANDLE,                                 // VkPipeline basePipelineHandle;
2886             0u,                                             // int32_t basePipelineIndex;
2887         };
2888 
2889         computePipeline = createComputePipeline(vk, vkDevice, VK_NULL_HANDLE, &computePipelineParams);
2890     }
2891 
2892     const int maxValuesPerInvocation = m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
2893     int curOffset                    = 0;
2894     const uint32_t inputStride       = getInputStride();
2895     const uint32_t outputStride      = getOutputStride();
2896 
2897     while (curOffset < numValues)
2898     {
2899         Move<VkCommandBuffer> cmdBuffer;
2900         const int numToExec = de::min(maxValuesPerInvocation, numValues - curOffset);
2901 
2902         // Update descriptors
2903         {
2904             DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2905 
2906             const VkDescriptorBufferInfo outputDescriptorBufferInfo = {
2907                 *m_outputBuffer,          // VkBuffer buffer;
2908                 curOffset * outputStride, // VkDeviceSize offset;
2909                 numToExec * outputStride  // VkDeviceSize range;
2910             };
2911 
2912             descriptorSetUpdateBuilder.writeSingle(
2913                 *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((uint32_t)OUTPUT_BUFFER_BINDING),
2914                 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2915 
2916             if (inputStride)
2917             {
2918                 const VkDescriptorBufferInfo inputDescriptorBufferInfo = {
2919                     *m_inputBuffer,          // VkBuffer buffer;
2920                     curOffset * inputStride, // VkDeviceSize offset;
2921                     numToExec * inputStride  // VkDeviceSize range;
2922                 };
2923 
2924                 descriptorSetUpdateBuilder.writeSingle(
2925                     *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((uint32_t)INPUT_BUFFER_BINDING),
2926                     VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2927             }
2928 
2929             descriptorSetUpdateBuilder.update(vk, vkDevice);
2930         }
2931 
2932         cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2933         beginCommandBuffer(vk, *cmdBuffer);
2934         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
2935 
2936         {
2937             const VkDescriptorSet descriptorSets[] = {*descriptorSet, extraResources};
2938             vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, numDescriptorSets,
2939                                      descriptorSets, 0u, nullptr);
2940         }
2941 
2942         vk.cmdDispatch(*cmdBuffer, numToExec, 1, 1);
2943 
2944         // Insert a barrier so data written by the shader is available to the host
2945         {
2946             const VkBufferMemoryBarrier bufferBarrier = {
2947                 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType    sType;
2948                 nullptr,                                 // const void*        pNext;
2949                 VK_ACCESS_SHADER_WRITE_BIT,              // VkAccessFlags      srcAccessMask;
2950                 VK_ACCESS_HOST_READ_BIT,                 // VkAccessFlags      dstAccessMask;
2951                 VK_QUEUE_FAMILY_IGNORED,                 // uint32_t           srcQueueFamilyIndex;
2952                 VK_QUEUE_FAMILY_IGNORED,                 // uint32_t           dstQueueFamilyIndex;
2953                 *m_outputBuffer,                         // VkBuffer           buffer;
2954                 0,                                       // VkDeviceSize       offset;
2955                 VK_WHOLE_SIZE,                           // VkDeviceSize       size;
2956             };
2957 
2958             vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT,
2959                                   (VkDependencyFlags)0, 0, nullptr, 1, &bufferBarrier, 0, nullptr);
2960         }
2961 
2962         endCommandBuffer(vk, *cmdBuffer);
2963 
2964         curOffset += numToExec;
2965 
2966         // Execute
2967         submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2968     }
2969 
2970     // Read back data
2971     readOutputBuffer(outputs, numValues);
2972 }
2973 
2974 #ifndef CTS_USES_VULKANSC
2975 // MeshTaskShaderExecutor
2976 
2977 class MeshTaskShaderExecutor : public BufferIoExecutor
2978 {
2979 public:
2980     MeshTaskShaderExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2981     virtual ~MeshTaskShaderExecutor(void);
2982 
2983     static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection, bool useTask);
2984 
2985     virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
2986                          VkDescriptorSet extraResources);
2987 
2988 protected:
2989     static std::string generateMeshShader(const ShaderSpec &spec, bool useTask);
2990     static std::string generateTaskShader(const ShaderSpec &spec);
2991 
2992 private:
2993     const VkDescriptorSetLayout m_extraResourcesLayout;
2994 };
2995 
MeshTaskShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2996 MeshTaskShaderExecutor::MeshTaskShaderExecutor(Context &context, const ShaderSpec &shaderSpec,
2997                                                VkDescriptorSetLayout extraResourcesLayout)
2998     : BufferIoExecutor(context, shaderSpec)
2999     , m_extraResourcesLayout(extraResourcesLayout)
3000 {
3001 }
3002 
~MeshTaskShaderExecutor(void)3003 MeshTaskShaderExecutor::~MeshTaskShaderExecutor(void)
3004 {
3005 }
3006 
generateMeshShader(const ShaderSpec & spec,bool useTask)3007 std::string MeshTaskShaderExecutor::generateMeshShader(const ShaderSpec &spec, bool useTask)
3008 {
3009     DE_ASSERT(spec.spirvCase == SPIRV_CASETYPE_NONE);
3010 
3011     std::ostringstream src;
3012 
3013     if (useTask)
3014     {
3015         src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
3016             << "#extension GL_EXT_mesh_shader : enable\n"
3017             << "layout(local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
3018             << "layout(points) out;\n"
3019             << "layout(max_vertices=1, max_primitives=1) out;\n"
3020             << "\n"
3021             << "void main (void)\n"
3022             << "{\n"
3023             << "    SetMeshOutputsEXT(0u, 0u);\n"
3024             << "}\n";
3025     }
3026     else
3027     {
3028         src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
3029             << "#extension GL_EXT_mesh_shader : enable\n";
3030 
3031         if (!spec.globalDeclarations.empty())
3032             src << spec.globalDeclarations << "\n";
3033 
3034         src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
3035             << "layout(points) out;\n"
3036             << "layout(max_vertices=1, max_primitives=1) out;\n"
3037             << "\n";
3038 
3039         declareBufferBlocks(src, spec);
3040 
3041         src << "void main (void)\n"
3042             << "{\n"
3043             << "    uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
3044             << "                       + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
3045 
3046         generateExecBufferIo(src, spec, "invocationNdx");
3047 
3048         src << "    SetMeshOutputsEXT(0u, 0u);\n"
3049             << "}\n";
3050     }
3051 
3052     return src.str();
3053 }
3054 
generateTaskShader(const ShaderSpec & spec)3055 std::string MeshTaskShaderExecutor::generateTaskShader(const ShaderSpec &spec)
3056 {
3057     std::ostringstream src;
3058 
3059     src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
3060         << "#extension GL_EXT_mesh_shader : enable\n";
3061 
3062     if (!spec.globalDeclarations.empty())
3063         src << spec.globalDeclarations << "\n";
3064 
3065     src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
3066         << "\n";
3067 
3068     declareBufferBlocks(src, spec);
3069 
3070     src << "void main (void)\n"
3071         << "{\n"
3072         << "    uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
3073         << "                       + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
3074 
3075     generateExecBufferIo(src, spec, "invocationNdx");
3076 
3077     src << "    EmitMeshTasksEXT(0u, 0u, 0u);\n"
3078         << "}\n";
3079 
3080     return src.str();
3081 }
3082 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection,bool useTask)3083 void MeshTaskShaderExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection,
3084                                              bool useTask)
3085 {
3086     DE_ASSERT(shaderSpec.spirvCase == SPIRV_CASETYPE_NONE);
3087     programCollection.glslSources.add("mesh")
3088         << glu::MeshSource(generateMeshShader(shaderSpec, useTask)) << shaderSpec.buildOptions;
3089     if (useTask)
3090         programCollection.glslSources.add("task")
3091             << glu::TaskSource(generateTaskShader(shaderSpec)) << shaderSpec.buildOptions;
3092 }
3093 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3094 void MeshTaskShaderExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
3095                                      VkDescriptorSet extraResources)
3096 {
3097     const auto vkDevice         = m_context.getDevice();
3098     const auto &vk              = m_context.getDeviceInterface();
3099     const auto queue            = m_context.getUniversalQueue();
3100     const auto queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
3101     const auto bindPoint        = VK_PIPELINE_BIND_POINT_GRAPHICS;
3102     const auto &binaries        = m_context.getBinaryCollection();
3103     const bool useTask          = binaries.contains("task");
3104     const auto shaderStage      = (useTask ? VK_SHADER_STAGE_TASK_BIT_EXT : VK_SHADER_STAGE_MESH_BIT_EXT);
3105     const auto pipelineStage =
3106         (useTask ? VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT : VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT);
3107 
3108     DE_ASSERT((m_extraResourcesLayout != VK_NULL_HANDLE) == (extraResources != VK_NULL_HANDLE));
3109 
3110     // Create input and output buffers.
3111     initBuffers(numValues);
3112 
3113     // Setup input buffer & copy data
3114     // For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
3115     // storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
3116     // the shader.
3117     uploadInputBuffer(inputs, numValues,
3118                       m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
3119 
3120     // Create command pool
3121     const auto cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
3122 
3123     // Descriptor pool, set layout and set.
3124     DescriptorPoolBuilder descriptorPoolBuilder;
3125     DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
3126 
3127     descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, shaderStage);
3128     descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3129     descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, shaderStage);
3130     descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3131 
3132     const auto descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
3133     const auto descriptorPool =
3134         descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3135     const auto descriptorSet = makeDescriptorSet(vk, vkDevice, descriptorPool.get(), descriptorSetLayout.get());
3136 
3137     // Create pipeline layout
3138     std::vector<VkDescriptorSetLayout> setLayouts;
3139     setLayouts.push_back(descriptorSetLayout.get());
3140     if (m_extraResourcesLayout != VK_NULL_HANDLE)
3141         setLayouts.push_back(m_extraResourcesLayout);
3142 
3143     const auto pipelineLayout =
3144         makePipelineLayout(vk, vkDevice, static_cast<uint32_t>(setLayouts.size()), de::dataOrNull(setLayouts));
3145 
3146     // Create shaders
3147     const auto meshShaderModule = createShaderModule(vk, vkDevice, binaries.get("mesh"));
3148     const auto taskShaderModule =
3149         (useTask ? createShaderModule(vk, vkDevice, binaries.get("task")) : Move<VkShaderModule>());
3150 
3151     // Render pass and framebuffer.
3152     const auto fbExtent   = makeExtent2D(1u, 1u);
3153     const auto renderPass = makeRenderPass(vk, vkDevice);
3154     const auto framebuffer =
3155         makeFramebuffer(vk, vkDevice, renderPass.get(), 0u, nullptr, fbExtent.width, fbExtent.height);
3156 
3157     const std::vector<VkViewport> viewports(1u, makeViewport(fbExtent));
3158     const std::vector<VkRect2D> scissors(1u, makeRect2D(fbExtent));
3159 
3160     // Create pipeline.
3161     const auto meshPipeline =
3162         makeGraphicsPipeline(vk, vkDevice, pipelineLayout.get(), taskShaderModule.get(), meshShaderModule.get(),
3163                              VK_NULL_HANDLE, renderPass.get(), viewports, scissors);
3164 
3165     const int maxValuesPerInvocation = m_context.getMeshShaderPropertiesEXT().maxMeshWorkGroupSize[0];
3166     const uint32_t inputStride       = getInputStride();
3167     const uint32_t outputStride      = getOutputStride();
3168     const auto outputBufferBinding =
3169         DescriptorSetUpdateBuilder::Location::binding(static_cast<uint32_t>(OUTPUT_BUFFER_BINDING));
3170     const auto inputBufferBinding =
3171         DescriptorSetUpdateBuilder::Location::binding(static_cast<uint32_t>(INPUT_BUFFER_BINDING));
3172     int curOffset = 0;
3173 
3174     while (curOffset < numValues)
3175     {
3176         const auto remaining = numValues - curOffset;
3177         const auto numToExec = de::min(maxValuesPerInvocation, remaining);
3178 
3179         // Update descriptors
3180         {
3181             DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
3182 
3183             const auto outputDescriptorBufferInfo =
3184                 makeDescriptorBufferInfo(m_outputBuffer.get(), curOffset * outputStride, numToExec * outputStride);
3185             descriptorSetUpdateBuilder.writeSingle(descriptorSet.get(), outputBufferBinding,
3186                                                    VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
3187 
3188             if (inputStride)
3189             {
3190                 const auto inputDescriptorBufferInfo =
3191                     makeDescriptorBufferInfo(m_inputBuffer.get(), curOffset * inputStride, numToExec * inputStride);
3192                 descriptorSetUpdateBuilder.writeSingle(descriptorSet.get(), inputBufferBinding,
3193                                                        VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
3194             }
3195 
3196             descriptorSetUpdateBuilder.update(vk, vkDevice);
3197         }
3198 
3199         std::vector<VkDescriptorSet> descriptorSets;
3200         descriptorSets.push_back(descriptorSet.get());
3201         if (extraResources != VK_NULL_HANDLE)
3202             descriptorSets.push_back(extraResources);
3203 
3204         const auto bufferBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT,
3205                                                            m_outputBuffer.get(), 0ull, VK_WHOLE_SIZE);
3206         const auto cmdBufferPtr  = allocateCommandBuffer(vk, vkDevice, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3207         const auto cmdBuffer     = cmdBufferPtr.get();
3208 
3209         // Record command buffer, including pipeline barrier from output buffer to the host.
3210         beginCommandBuffer(vk, cmdBuffer);
3211         beginRenderPass(vk, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u));
3212         vk.cmdBindPipeline(cmdBuffer, bindPoint, meshPipeline.get());
3213         vk.cmdBindDescriptorSets(cmdBuffer, bindPoint, pipelineLayout.get(), 0u,
3214                                  static_cast<uint32_t>(descriptorSets.size()), de::dataOrNull(descriptorSets), 0u,
3215                                  nullptr);
3216         vk.cmdDrawMeshTasksEXT(cmdBuffer, numToExec, 1u, 1u);
3217         endRenderPass(vk, cmdBuffer);
3218         cmdPipelineBufferMemoryBarrier(vk, cmdBuffer, pipelineStage, VK_PIPELINE_STAGE_HOST_BIT, &bufferBarrier);
3219         endCommandBuffer(vk, cmdBuffer);
3220 
3221         // Execute
3222         submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer);
3223 
3224         curOffset += numToExec;
3225     }
3226 
3227     // Read back data
3228     readOutputBuffer(outputs, numValues);
3229 }
3230 #endif // CTS_USES_VULKANSC
3231 
3232 // Tessellation utils
3233 
generateVertexShaderForTess(void)3234 static std::string generateVertexShaderForTess(void)
3235 {
3236     std::ostringstream src;
3237     src << "#version 450\n"
3238         << "void main (void)\n{\n"
3239         << "    gl_Position = vec4(gl_VertexIndex/2, gl_VertexIndex%2, 0.0, 1.0);\n"
3240         << "}\n";
3241 
3242     return src.str();
3243 }
3244 
3245 class TessellationExecutor : public BufferIoExecutor
3246 {
3247 public:
3248     TessellationExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3249     virtual ~TessellationExecutor(void);
3250 
3251     void renderTess(uint32_t numValues, uint32_t vertexCount, uint32_t patchControlPoints,
3252                     VkDescriptorSet extraResources);
3253 
3254 private:
3255     const VkDescriptorSetLayout m_extraResourcesLayout;
3256 };
3257 
TessellationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3258 TessellationExecutor::TessellationExecutor(Context &context, const ShaderSpec &shaderSpec,
3259                                            VkDescriptorSetLayout extraResourcesLayout)
3260     : BufferIoExecutor(context, shaderSpec)
3261     , m_extraResourcesLayout(extraResourcesLayout)
3262 {
3263     const VkPhysicalDeviceFeatures &features = context.getDeviceFeatures();
3264 
3265     if (!features.tessellationShader)
3266         TCU_THROW(NotSupportedError, "Tessellation shader is not supported by device");
3267 }
3268 
~TessellationExecutor(void)3269 TessellationExecutor::~TessellationExecutor(void)
3270 {
3271 }
3272 
renderTess(uint32_t numValues,uint32_t vertexCount,uint32_t patchControlPoints,VkDescriptorSet extraResources)3273 void TessellationExecutor::renderTess(uint32_t numValues, uint32_t vertexCount, uint32_t patchControlPoints,
3274                                       VkDescriptorSet extraResources)
3275 {
3276     const size_t inputBufferSize    = numValues * getInputStride();
3277     const VkDevice vkDevice         = m_context.getDevice();
3278     const DeviceInterface &vk       = m_context.getDeviceInterface();
3279     const VkQueue queue             = m_context.getUniversalQueue();
3280     const uint32_t queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
3281     Allocator &memAlloc             = m_context.getDefaultAllocator();
3282 
3283     const tcu::UVec2 renderSize(DEFAULT_RENDER_WIDTH, DEFAULT_RENDER_HEIGHT);
3284 
3285     Move<VkImage> colorImage;
3286     de::MovePtr<Allocation> colorImageAlloc;
3287     VkFormat colorFormat = VK_FORMAT_R8G8B8A8_UNORM;
3288     Move<VkImageView> colorImageView;
3289 
3290     Move<VkRenderPass> renderPass;
3291     Move<VkFramebuffer> framebuffer;
3292     Move<VkPipelineLayout> pipelineLayout;
3293     Move<VkPipeline> graphicsPipeline;
3294 
3295     Move<VkShaderModule> vertexShaderModule;
3296     Move<VkShaderModule> tessControlShaderModule;
3297     Move<VkShaderModule> tessEvalShaderModule;
3298     Move<VkShaderModule> fragmentShaderModule;
3299 
3300     Move<VkCommandPool> cmdPool;
3301     Move<VkCommandBuffer> cmdBuffer;
3302 
3303     Move<VkDescriptorPool> descriptorPool;
3304     Move<VkDescriptorSetLayout> descriptorSetLayout;
3305     Move<VkDescriptorSet> descriptorSet;
3306     const uint32_t numDescriptorSets = (m_extraResourcesLayout != VK_NULL_HANDLE) ? 2u : 1u;
3307 
3308     DE_ASSERT((m_extraResourcesLayout != VK_NULL_HANDLE) == (extraResources != VK_NULL_HANDLE));
3309 
3310     // Create color image
3311     {
3312         const VkImageCreateInfo colorImageParams = {
3313             VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,                                   // VkStructureType sType;
3314             nullptr,                                                               // const void* pNext;
3315             0u,                                                                    // VkImageCreateFlags flags;
3316             VK_IMAGE_TYPE_2D,                                                      // VkImageType imageType;
3317             colorFormat,                                                           // VkFormat format;
3318             {renderSize.x(), renderSize.y(), 1u},                                  // VkExtent3D extent;
3319             1u,                                                                    // uint32_t mipLevels;
3320             1u,                                                                    // uint32_t arraySize;
3321             VK_SAMPLE_COUNT_1_BIT,                                                 // VkSampleCountFlagBits samples;
3322             VK_IMAGE_TILING_OPTIMAL,                                               // VkImageTiling tiling;
3323             VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
3324             VK_SHARING_MODE_EXCLUSIVE,                                             // VkSharingMode sharingMode;
3325             1u,                                                                    // uint32_t queueFamilyCount;
3326             &queueFamilyIndex,        // const uint32_t* pQueueFamilyIndices;
3327             VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
3328         };
3329 
3330         colorImage = createImage(vk, vkDevice, &colorImageParams);
3331 
3332         // Allocate and bind color image memory
3333         colorImageAlloc =
3334             memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *colorImage), MemoryRequirement::Any);
3335         VK_CHECK(vk.bindImageMemory(vkDevice, *colorImage, colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
3336     }
3337 
3338     // Create color attachment view
3339     {
3340         const VkImageViewCreateInfo colorImageViewParams = {
3341             VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
3342             nullptr,                                  // const void* pNext;
3343             0u,                                       // VkImageViewCreateFlags flags;
3344             *colorImage,                              // VkImage image;
3345             VK_IMAGE_VIEW_TYPE_2D,                    // VkImageViewType viewType;
3346             colorFormat,                              // VkFormat format;
3347             {
3348                 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
3349                 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
3350                 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
3351                 VK_COMPONENT_SWIZZLE_A  // VkComponentSwizzle a;
3352             },                          // VkComponentsMapping components;
3353             {
3354                 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
3355                 0u,                        // uint32_t baseMipLevel;
3356                 1u,                        // uint32_t mipLevels;
3357                 0u,                        // uint32_t baseArraylayer;
3358                 1u                         // uint32_t layerCount;
3359             }                              // VkImageSubresourceRange subresourceRange;
3360         };
3361 
3362         colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
3363     }
3364 
3365     // Create render pass
3366     {
3367         const VkAttachmentDescription colorAttachmentDescription = {
3368             0u,                                      // VkAttachmentDescriptorFlags flags;
3369             colorFormat,                             // VkFormat format;
3370             VK_SAMPLE_COUNT_1_BIT,                   // VkSampleCountFlagBits samples;
3371             VK_ATTACHMENT_LOAD_OP_CLEAR,             // VkAttachmentLoadOp loadOp;
3372             VK_ATTACHMENT_STORE_OP_STORE,            // VkAttachmentStoreOp storeOp;
3373             VK_ATTACHMENT_LOAD_OP_DONT_CARE,         // VkAttachmentLoadOp stencilLoadOp;
3374             VK_ATTACHMENT_STORE_OP_DONT_CARE,        // VkAttachmentStoreOp stencilStoreOp;
3375             VK_IMAGE_LAYOUT_UNDEFINED,               // VkImageLayout initialLayout;
3376             VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout                finalLayout
3377         };
3378 
3379         const VkAttachmentDescription attachments[1] = {colorAttachmentDescription};
3380 
3381         const VkAttachmentReference colorAttachmentReference = {
3382             0u,                                      // uint32_t attachment;
3383             VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
3384         };
3385 
3386         const VkSubpassDescription subpassDescription = {
3387             0u,                              // VkSubpassDescriptionFlags flags;
3388             VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
3389             0u,                              // uint32_t inputCount;
3390             nullptr,                         // const VkAttachmentReference* pInputAttachments;
3391             1u,                              // uint32_t colorCount;
3392             &colorAttachmentReference,       // const VkAttachmentReference* pColorAttachments;
3393             nullptr,                         // const VkAttachmentReference* pResolveAttachments;
3394             nullptr,                         // VkAttachmentReference depthStencilAttachment;
3395             0u,                              // uint32_t preserveCount;
3396             nullptr                          // const VkAttachmentReference* pPreserveAttachments;
3397         };
3398 
3399         const VkRenderPassCreateInfo renderPassParams = {
3400             VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
3401             nullptr,                                   // const void* pNext;
3402             0u,                                        // VkRenderPassCreateFlags flags;
3403             1u,                                        // uint32_t attachmentCount;
3404             attachments,                               // const VkAttachmentDescription* pAttachments;
3405             1u,                                        // uint32_t subpassCount;
3406             &subpassDescription,                       // const VkSubpassDescription* pSubpasses;
3407             0u,                                        // uint32_t dependencyCount;
3408             nullptr                                    // const VkSubpassDependency* pDependencies;
3409         };
3410 
3411         renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
3412     }
3413 
3414     // Create framebuffer
3415     {
3416         const VkFramebufferCreateInfo framebufferParams = {
3417             VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
3418             nullptr,                                   // const void* pNext;
3419             0u,                                        // VkFramebufferCreateFlags flags;
3420             *renderPass,                               // VkRenderPass renderPass;
3421             1u,                                        // uint32_t attachmentCount;
3422             &*colorImageView,                          // const VkAttachmentBindInfo* pAttachments;
3423             (uint32_t)renderSize.x(),                  // uint32_t width;
3424             (uint32_t)renderSize.y(),                  // uint32_t height;
3425             1u                                         // uint32_t layers;
3426         };
3427 
3428         framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
3429     }
3430 
3431     // Create descriptors
3432     {
3433         DescriptorPoolBuilder descriptorPoolBuilder;
3434         DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
3435 
3436         descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3437         descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3438         descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3439         descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3440 
3441         descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
3442         descriptorPool =
3443             descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3444 
3445         const VkDescriptorSetAllocateInfo allocInfo = {VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, nullptr,
3446                                                        *descriptorPool, 1u, &*descriptorSetLayout};
3447 
3448         descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
3449         // Update descriptors
3450         {
3451             DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
3452             const VkDescriptorBufferInfo outputDescriptorBufferInfo = {
3453                 *m_outputBuffer, // VkBuffer buffer;
3454                 0u,              // VkDeviceSize offset;
3455                 VK_WHOLE_SIZE    // VkDeviceSize range;
3456             };
3457 
3458             descriptorSetUpdateBuilder.writeSingle(
3459                 *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((uint32_t)OUTPUT_BUFFER_BINDING),
3460                 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
3461 
3462             VkDescriptorBufferInfo inputDescriptorBufferInfo = {
3463                 VK_NULL_HANDLE, // VkBuffer buffer;
3464                 0u,             // VkDeviceSize offset;
3465                 VK_WHOLE_SIZE   // VkDeviceSize range;
3466             };
3467 
3468             if (inputBufferSize > 0)
3469             {
3470                 inputDescriptorBufferInfo.buffer = *m_inputBuffer;
3471 
3472                 descriptorSetUpdateBuilder.writeSingle(
3473                     *descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((uint32_t)INPUT_BUFFER_BINDING),
3474                     VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
3475             }
3476 
3477             descriptorSetUpdateBuilder.update(vk, vkDevice);
3478         }
3479     }
3480 
3481     // Create pipeline layout
3482     {
3483         const VkDescriptorSetLayout descriptorSetLayouts[]    = {*descriptorSetLayout, m_extraResourcesLayout};
3484         const VkPipelineLayoutCreateInfo pipelineLayoutParams = {
3485             VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
3486             nullptr,                                       // const void* pNext;
3487             (VkPipelineLayoutCreateFlags)0,                // VkPipelineLayoutCreateFlags flags;
3488             numDescriptorSets,                             // uint32_t descriptorSetCount;
3489             descriptorSetLayouts,                          // const VkDescriptorSetLayout* pSetLayouts;
3490             0u,                                            // uint32_t pushConstantRangeCount;
3491             nullptr                                        // const VkPushConstantRange* pPushConstantRanges;
3492         };
3493 
3494         pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
3495     }
3496 
3497     // Create shader modules
3498     {
3499         vertexShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
3500         tessControlShaderModule =
3501             createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_control"), 0);
3502         tessEvalShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_eval"), 0);
3503         fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
3504     }
3505 
3506     // Create pipeline
3507     {
3508         const VkPipelineVertexInputStateCreateInfo vertexInputStateParams = {
3509             VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
3510             nullptr,                                                   // const void* pNext;
3511             (VkPipelineVertexInputStateCreateFlags)0,                  // VkPipelineVertexInputStateCreateFlags flags;
3512             0u,                                                        // uint32_t bindingCount;
3513             nullptr, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
3514             0u,      // uint32_t attributeCount;
3515             nullptr, // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
3516         };
3517 
3518         const std::vector<VkViewport> viewports(1, makeViewport(renderSize));
3519         const std::vector<VkRect2D> scissors(1, makeRect2D(renderSize));
3520 
3521         graphicsPipeline = makeGraphicsPipeline(
3522             vk,                       // const DeviceInterface&                        vk
3523             vkDevice,                 // const VkDevice                                device
3524             *pipelineLayout,          // const VkPipelineLayout                        pipelineLayout
3525             *vertexShaderModule,      // const VkShaderModule                          vertexShaderModule
3526             *tessControlShaderModule, // const VkShaderModule                          tessellationControlShaderModule
3527             *tessEvalShaderModule,    // const VkShaderModule                          tessellationEvalShaderModule
3528             VK_NULL_HANDLE,           // const VkShaderModule                          geometryShaderModule
3529             *fragmentShaderModule,    // const VkShaderModule                          fragmentShaderModule
3530             *renderPass,              // const VkRenderPass                            renderPass
3531             viewports,                // const std::vector<VkViewport>&                viewports
3532             scissors,                 // const std::vector<VkRect2D>&                  scissors
3533             VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, // const VkPrimitiveTopology                     topology
3534             0u,                               // const uint32_t                                subpass
3535             patchControlPoints,               // const uint32_t                                patchControlPoints
3536             &vertexInputStateParams); // const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
3537     }
3538 
3539     // Create command pool
3540     cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
3541 
3542     // Create command buffer
3543     {
3544         const VkClearValue clearValue = getDefaultClearColor();
3545 
3546         cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3547 
3548         beginCommandBuffer(vk, *cmdBuffer);
3549 
3550         beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()),
3551                         clearValue);
3552 
3553         vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
3554 
3555         {
3556             const VkDescriptorSet descriptorSets[] = {*descriptorSet, extraResources};
3557             vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u,
3558                                      numDescriptorSets, descriptorSets, 0u, nullptr);
3559         }
3560 
3561         vk.cmdDraw(*cmdBuffer, vertexCount, 1, 0, 0);
3562 
3563         endRenderPass(vk, *cmdBuffer);
3564 
3565         // Insert a barrier so data written by the shader is available to the host
3566         {
3567             const VkBufferMemoryBarrier bufferBarrier = {
3568                 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType    sType;
3569                 nullptr,                                 // const void*        pNext;
3570                 VK_ACCESS_SHADER_WRITE_BIT,              // VkAccessFlags      srcAccessMask;
3571                 VK_ACCESS_HOST_READ_BIT,                 // VkAccessFlags      dstAccessMask;
3572                 VK_QUEUE_FAMILY_IGNORED,                 // uint32_t           srcQueueFamilyIndex;
3573                 VK_QUEUE_FAMILY_IGNORED,                 // uint32_t           dstQueueFamilyIndex;
3574                 *m_outputBuffer,                         // VkBuffer           buffer;
3575                 0,                                       // VkDeviceSize       offset;
3576                 VK_WHOLE_SIZE,                           // VkDeviceSize       size;
3577             };
3578 
3579             vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT,
3580                                   vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0, nullptr, 1, &bufferBarrier,
3581                                   0, nullptr);
3582         }
3583 
3584         endCommandBuffer(vk, *cmdBuffer);
3585     }
3586 
3587     // Execute Draw
3588     submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
3589 }
3590 
3591 // TessControlExecutor
3592 
3593 class TessControlExecutor : public TessellationExecutor
3594 {
3595 public:
3596     TessControlExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3597     virtual ~TessControlExecutor(void);
3598 
3599     static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
3600 
3601     virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
3602                          VkDescriptorSet extraResources);
3603 
3604 protected:
3605     static std::string generateTessControlShader(const ShaderSpec &shaderSpec);
3606 };
3607 
TessControlExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3608 TessControlExecutor::TessControlExecutor(Context &context, const ShaderSpec &shaderSpec,
3609                                          VkDescriptorSetLayout extraResourcesLayout)
3610     : TessellationExecutor(context, shaderSpec, extraResourcesLayout)
3611 {
3612 }
3613 
~TessControlExecutor(void)3614 TessControlExecutor::~TessControlExecutor(void)
3615 {
3616 }
3617 
generateTessControlShader(const ShaderSpec & shaderSpec)3618 std::string TessControlExecutor::generateTessControlShader(const ShaderSpec &shaderSpec)
3619 {
3620     std::ostringstream src;
3621     src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3622 
3623     if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3624         src << "#extension GL_EXT_tessellation_shader : require\n\n";
3625 
3626     if (!shaderSpec.globalDeclarations.empty())
3627         src << shaderSpec.globalDeclarations << "\n";
3628 
3629     src << "\nlayout(vertices = 1) out;\n\n";
3630 
3631     declareBufferBlocks(src, shaderSpec);
3632 
3633     src << "void main (void)\n{\n";
3634 
3635     for (int ndx = 0; ndx < 2; ndx++)
3636         src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3637 
3638     for (int ndx = 0; ndx < 4; ndx++)
3639         src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3640 
3641     src << "\n"
3642         << "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
3643 
3644     generateExecBufferIo(src, shaderSpec, "invocationId");
3645 
3646     src << "}\n";
3647 
3648     return src.str();
3649 }
3650 
generateEmptyTessEvalShader()3651 static std::string generateEmptyTessEvalShader()
3652 {
3653     std::ostringstream src;
3654 
3655     src << "#version 450\n"
3656            "#extension GL_EXT_tessellation_shader : require\n\n";
3657 
3658     src << "layout(triangles, ccw) in;\n";
3659 
3660     src << "\nvoid main (void)\n{\n"
3661         << "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
3662         << "}\n";
3663 
3664     return src.str();
3665 }
3666 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3667 void TessControlExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
3668 {
3669     programCollection.glslSources.add("vert")
3670         << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3671     programCollection.glslSources.add("tess_control")
3672         << glu::TessellationControlSource(generateTessControlShader(shaderSpec)) << shaderSpec.buildOptions;
3673     programCollection.glslSources.add("tess_eval")
3674         << glu::TessellationEvaluationSource(generateEmptyTessEvalShader()) << shaderSpec.buildOptions;
3675     programCollection.glslSources.add("frag")
3676         << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3677 }
3678 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3679 void TessControlExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
3680                                   VkDescriptorSet extraResources)
3681 {
3682     const uint32_t patchSize = 3;
3683 
3684     initBuffers(numValues);
3685 
3686     // Setup input buffer & copy data
3687     uploadInputBuffer(inputs, numValues, false);
3688 
3689     renderTess(numValues, patchSize * numValues, patchSize, extraResources);
3690 
3691     // Read back data
3692     readOutputBuffer(outputs, numValues);
3693 }
3694 
3695 // TessEvaluationExecutor
3696 
3697 class TessEvaluationExecutor : public TessellationExecutor
3698 {
3699 public:
3700     TessEvaluationExecutor(Context &context, const ShaderSpec &shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3701     virtual ~TessEvaluationExecutor(void);
3702 
3703     static void generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection);
3704 
3705     virtual void execute(int numValues, const void *const *inputs, void *const *outputs,
3706                          VkDescriptorSet extraResources);
3707 
3708 protected:
3709     static std::string generateTessEvalShader(const ShaderSpec &shaderSpec);
3710 };
3711 
TessEvaluationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3712 TessEvaluationExecutor::TessEvaluationExecutor(Context &context, const ShaderSpec &shaderSpec,
3713                                                VkDescriptorSetLayout extraResourcesLayout)
3714     : TessellationExecutor(context, shaderSpec, extraResourcesLayout)
3715 {
3716 }
3717 
~TessEvaluationExecutor(void)3718 TessEvaluationExecutor::~TessEvaluationExecutor(void)
3719 {
3720 }
3721 
generatePassthroughTessControlShader(void)3722 static std::string generatePassthroughTessControlShader(void)
3723 {
3724     std::ostringstream src;
3725 
3726     src << "#version 450\n"
3727            "#extension GL_EXT_tessellation_shader : require\n\n";
3728 
3729     src << "layout(vertices = 1) out;\n\n";
3730 
3731     src << "void main (void)\n{\n";
3732 
3733     for (int ndx = 0; ndx < 2; ndx++)
3734         src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3735 
3736     for (int ndx = 0; ndx < 4; ndx++)
3737         src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3738 
3739     src << "}\n";
3740 
3741     return src.str();
3742 }
3743 
generateTessEvalShader(const ShaderSpec & shaderSpec)3744 std::string TessEvaluationExecutor::generateTessEvalShader(const ShaderSpec &shaderSpec)
3745 {
3746     std::ostringstream src;
3747 
3748     src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3749 
3750     if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3751         src << "#extension GL_EXT_tessellation_shader : require\n\n";
3752 
3753     if (!shaderSpec.globalDeclarations.empty())
3754         src << shaderSpec.globalDeclarations << "\n";
3755 
3756     src << "\n";
3757 
3758     src << "layout(isolines, equal_spacing) in;\n\n";
3759 
3760     declareBufferBlocks(src, shaderSpec);
3761 
3762     src << "void main (void)\n{\n"
3763         << "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
3764         << "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
3765 
3766     generateExecBufferIo(src, shaderSpec, "invocationId");
3767 
3768     src << "}\n";
3769 
3770     return src.str();
3771 }
3772 
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3773 void TessEvaluationExecutor::generateSources(const ShaderSpec &shaderSpec, SourceCollections &programCollection)
3774 {
3775     programCollection.glslSources.add("vert")
3776         << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3777     programCollection.glslSources.add("tess_control")
3778         << glu::TessellationControlSource(generatePassthroughTessControlShader()) << shaderSpec.buildOptions;
3779     programCollection.glslSources.add("tess_eval")
3780         << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec)) << shaderSpec.buildOptions;
3781     programCollection.glslSources.add("frag")
3782         << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3783 }
3784 
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3785 void TessEvaluationExecutor::execute(int numValues, const void *const *inputs, void *const *outputs,
3786                                      VkDescriptorSet extraResources)
3787 {
3788     const int patchSize     = 2;
3789     const int alignedValues = deAlign32(numValues, patchSize);
3790 
3791     // Initialize buffers with aligned value count to make room for padding
3792     initBuffers(alignedValues);
3793 
3794     // Setup input buffer & copy data
3795     uploadInputBuffer(inputs, numValues, false);
3796 
3797     renderTess((uint32_t)alignedValues, (uint32_t)alignedValues, (uint32_t)patchSize, extraResources);
3798 
3799     // Read back data
3800     readOutputBuffer(outputs, numValues);
3801 }
3802 
3803 } // namespace
3804 
3805 // ShaderExecutor
3806 
~ShaderExecutor(void)3807 ShaderExecutor::~ShaderExecutor(void)
3808 {
3809 }
3810 
areInputs16Bit(void) const3811 bool ShaderExecutor::areInputs16Bit(void) const
3812 {
3813     for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end();
3814          ++symIter)
3815     {
3816         if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3817             return true;
3818     }
3819     return false;
3820 }
3821 
areOutputs16Bit(void) const3822 bool ShaderExecutor::areOutputs16Bit(void) const
3823 {
3824     for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end();
3825          ++symIter)
3826     {
3827         if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3828             return true;
3829     }
3830     return false;
3831 }
3832 
isOutput16Bit(const size_t ndx) const3833 bool ShaderExecutor::isOutput16Bit(const size_t ndx) const
3834 {
3835     if (glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3836         return true;
3837     return false;
3838 }
3839 
areInputs64Bit(void) const3840 bool ShaderExecutor::areInputs64Bit(void) const
3841 {
3842     for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end();
3843          ++symIter)
3844     {
3845         if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3846             return true;
3847     }
3848     return false;
3849 }
3850 
areOutputs64Bit(void) const3851 bool ShaderExecutor::areOutputs64Bit(void) const
3852 {
3853     for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end();
3854          ++symIter)
3855     {
3856         if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3857             return true;
3858     }
3859     return false;
3860 }
3861 
isOutput64Bit(const size_t ndx) const3862 bool ShaderExecutor::isOutput64Bit(const size_t ndx) const
3863 {
3864     if (glu::isDataTypeDoubleType(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3865         return true;
3866     return false;
3867 }
3868 
3869 // Utilities
3870 
generateSources(glu::ShaderType shaderType,const ShaderSpec & shaderSpec,vk::SourceCollections & dst)3871 void generateSources(glu::ShaderType shaderType, const ShaderSpec &shaderSpec, vk::SourceCollections &dst)
3872 {
3873     switch (shaderType)
3874     {
3875     case glu::SHADERTYPE_VERTEX:
3876         VertexShaderExecutor::generateSources(shaderSpec, dst);
3877         break;
3878     case glu::SHADERTYPE_TESSELLATION_CONTROL:
3879         TessControlExecutor::generateSources(shaderSpec, dst);
3880         break;
3881     case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3882         TessEvaluationExecutor::generateSources(shaderSpec, dst);
3883         break;
3884     case glu::SHADERTYPE_GEOMETRY:
3885         GeometryShaderExecutor::generateSources(shaderSpec, dst);
3886         break;
3887     case glu::SHADERTYPE_FRAGMENT:
3888         FragmentShaderExecutor::generateSources(shaderSpec, dst);
3889         break;
3890     case glu::SHADERTYPE_COMPUTE:
3891         ComputeShaderExecutor::generateSources(shaderSpec, dst);
3892         break;
3893 #ifndef CTS_USES_VULKANSC
3894     case glu::SHADERTYPE_MESH:
3895         MeshTaskShaderExecutor::generateSources(shaderSpec, dst, false /*useTask*/);
3896         break;
3897     case glu::SHADERTYPE_TASK:
3898         MeshTaskShaderExecutor::generateSources(shaderSpec, dst, true /*useTask*/);
3899         break;
3900 #endif // CTS_USES_VULKANSC
3901     default:
3902         TCU_THROW(InternalError, "Unsupported shader type");
3903     }
3904 }
3905 
createExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3906 ShaderExecutor *createExecutor(Context &context, glu::ShaderType shaderType, const ShaderSpec &shaderSpec,
3907                                VkDescriptorSetLayout extraResourcesLayout)
3908 {
3909     switch (shaderType)
3910     {
3911     case glu::SHADERTYPE_VERTEX:
3912         return new VertexShaderExecutor(context, shaderSpec, extraResourcesLayout);
3913     case glu::SHADERTYPE_TESSELLATION_CONTROL:
3914         return new TessControlExecutor(context, shaderSpec, extraResourcesLayout);
3915     case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3916         return new TessEvaluationExecutor(context, shaderSpec, extraResourcesLayout);
3917     case glu::SHADERTYPE_GEOMETRY:
3918         return new GeometryShaderExecutor(context, shaderSpec, extraResourcesLayout);
3919     case glu::SHADERTYPE_FRAGMENT:
3920         return new FragmentShaderExecutor(context, shaderSpec, extraResourcesLayout);
3921     case glu::SHADERTYPE_COMPUTE:
3922         return new ComputeShaderExecutor(context, shaderSpec, extraResourcesLayout);
3923 #ifndef CTS_USES_VULKANSC
3924     case glu::SHADERTYPE_MESH:
3925         return new MeshTaskShaderExecutor(context, shaderSpec, extraResourcesLayout);
3926     case glu::SHADERTYPE_TASK:
3927         return new MeshTaskShaderExecutor(context, shaderSpec, extraResourcesLayout);
3928 #endif // CTS_USES_VULKANSC
3929     default:
3930         TCU_THROW(InternalError, "Unsupported shader type");
3931     }
3932 }
3933 
executorSupported(glu::ShaderType shaderType)3934 bool executorSupported(glu::ShaderType shaderType)
3935 {
3936     switch (shaderType)
3937     {
3938     case glu::SHADERTYPE_VERTEX:
3939     case glu::SHADERTYPE_TESSELLATION_CONTROL:
3940     case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3941     case glu::SHADERTYPE_GEOMETRY:
3942     case glu::SHADERTYPE_FRAGMENT:
3943     case glu::SHADERTYPE_COMPUTE:
3944     case glu::SHADERTYPE_MESH:
3945     case glu::SHADERTYPE_TASK:
3946         return true;
3947     default:
3948         return false;
3949     }
3950 }
3951 
checkSupportShader(Context & context,const glu::ShaderType shaderType)3952 void checkSupportShader(Context &context, const glu::ShaderType shaderType)
3953 {
3954     // Stage support.
3955     switch (shaderType)
3956     {
3957     case glu::SHADERTYPE_TESSELLATION_CONTROL:
3958     case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3959         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_TESSELLATION_SHADER);
3960         break;
3961 
3962     case glu::SHADERTYPE_GEOMETRY:
3963         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_GEOMETRY_SHADER);
3964         break;
3965 
3966     case glu::SHADERTYPE_TASK:
3967     case glu::SHADERTYPE_MESH:
3968     {
3969         context.requireDeviceFunctionality("VK_EXT_mesh_shader");
3970 
3971         if (shaderType == glu::SHADERTYPE_TASK)
3972         {
3973 #ifndef CTS_USES_VULKANSC
3974             const auto &features = context.getMeshShaderFeaturesEXT();
3975             if (!features.taskShader)
3976                 TCU_THROW(NotSupportedError, "taskShader not supported");
3977 #else  // CTS_USES_VULKANSC
3978             TCU_THROW(NotSupportedError, "taskShader not supported");
3979 #endif // CTS_USES_VULKANSC
3980         }
3981     }
3982     break;
3983 
3984     default:
3985         break;
3986     }
3987 
3988     // Stores and atomic operation support.
3989     switch (shaderType)
3990     {
3991     case glu::SHADERTYPE_VERTEX:
3992     case glu::SHADERTYPE_TESSELLATION_CONTROL:
3993     case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3994     case glu::SHADERTYPE_GEOMETRY:
3995     case glu::SHADERTYPE_TASK:
3996     case glu::SHADERTYPE_MESH:
3997         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
3998         break;
3999     case glu::SHADERTYPE_FRAGMENT:
4000         context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_FRAGMENT_STORES_AND_ATOMICS);
4001         break;
4002     case glu::SHADERTYPE_COMPUTE:
4003         break;
4004     default:
4005         DE_FATAL("Unsupported shader type");
4006         break;
4007     }
4008 
4009 #ifndef CTS_USES_VULKANSC
4010     if (shaderType == glu::SHADERTYPE_TESSELLATION_EVALUATION &&
4011         context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
4012         !context.getPortabilitySubsetFeatures().tessellationIsolines)
4013     {
4014         TCU_THROW(NotSupportedError,
4015                   "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
4016     }
4017 #endif // CTS_USES_VULKANSC
4018 }
4019 
4020 } // namespace shaderexecutor
4021 } // namespace vkt
4022