1 /*------------------------------------------------------------------------
2 * Vulkan Conformance Tests
3 * ------------------------
4 *
5 * Copyright (c) 2015 The Khronos Group Inc.
6 * Copyright (c) 2015 Samsung Electronics Co., Ltd.
7 * Copyright (c) 2016 The Android Open Source Project
8 *
9 * Licensed under the Apache License, Version 2.0 (the "License");
10 * you may not use this file except in compliance with the License.
11 * You may obtain a copy of the License at
12 *
13 * http://www.apache.org/licenses/LICENSE-2.0
14 *
15 * Unless required by applicable law or agreed to in writing, software
16 * distributed under the License is distributed on an "AS IS" BASIS,
17 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 * See the License for the specific language governing permissions and
19 * limitations under the License.
20 *
21 *//*!
22 * \file
23 * \brief Vulkan ShaderExecutor
24 *//*--------------------------------------------------------------------*/
25
26 #include "vktShaderExecutor.hpp"
27
28 #include "vkMemUtil.hpp"
29 #include "vkRef.hpp"
30 #include "vkPrograms.hpp"
31 #include "vkRefUtil.hpp"
32 #include "vkTypeUtil.hpp"
33 #include "vkQueryUtil.hpp"
34 #include "vkBuilderUtil.hpp"
35 #include "vkCmdUtil.hpp"
36 #include "vkObjUtil.hpp"
37 #include "vkBarrierUtil.hpp"
38
39 #include "gluShaderUtil.hpp"
40
41 #include "tcuVector.hpp"
42 #include "tcuTestLog.hpp"
43 #include "tcuTextureUtil.hpp"
44
45 #include "deUniquePtr.hpp"
46 #include "deStringUtil.hpp"
47 #include "deSharedPtr.hpp"
48 #include "deFloat16.h"
49
50 #include <map>
51 #include <sstream>
52 #include <iostream>
53
54 using std::vector;
55 using namespace vk;
56
57 namespace vkt
58 {
59 namespace shaderexecutor
60 {
61 namespace
62 {
63
64 enum
65 {
66 DEFAULT_RENDER_WIDTH = 100,
67 DEFAULT_RENDER_HEIGHT = 100,
68 };
69
70 // Common typedefs
71
72 typedef de::SharedPtr<Unique<VkImage> > VkImageSp;
73 typedef de::SharedPtr<Unique<VkImageView> > VkImageViewSp;
74 typedef de::SharedPtr<Unique<VkBuffer> > VkBufferSp;
75 typedef de::SharedPtr<Allocation> AllocationSp;
76
77 static VkFormat getAttributeFormat(const glu::DataType dataType);
78
79 // Shader utilities
80
getDefaultClearColor(void)81 static VkClearValue getDefaultClearColor (void)
82 {
83 return makeClearValueColorF32(0.125f, 0.25f, 0.5f, 1.0f);
84 }
85
generateEmptyFragmentSource(void)86 static std::string generateEmptyFragmentSource (void)
87 {
88 std::ostringstream src;
89
90 src << "#version 450\n"
91 "layout(location=0) out highp vec4 o_color;\n";
92
93 src << "void main (void)\n{\n";
94 src << " o_color = vec4(0.0);\n";
95 src << "}\n";
96
97 return src.str();
98 }
99
packFloat16Bit(std::ostream & src,const std::vector<Symbol> & outputs)100 void packFloat16Bit (std::ostream& src, const std::vector<Symbol>& outputs)
101 {
102 for (vector<Symbol>::const_iterator symIter = outputs.begin(); symIter != outputs.end(); ++symIter)
103 {
104 if(glu::isDataTypeFloatType(symIter->varType.getBasicType()))
105 {
106 if(glu::isDataTypeVector(symIter->varType.getBasicType()))
107 {
108 for(int i = 0; i < glu::getDataTypeScalarSize(symIter->varType.getBasicType()); i++)
109 {
110 src << "\tpacked_" << symIter->name << "[" << i << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "], -1.0)));\n";
111 }
112 }
113 else if (glu::isDataTypeMatrix(symIter->varType.getBasicType()))
114 {
115 int maxRow = 0;
116 int maxCol = 0;
117 switch (symIter->varType.getBasicType())
118 {
119 case glu::TYPE_FLOAT_MAT2:
120 maxRow = maxCol = 2;
121 break;
122 case glu::TYPE_FLOAT_MAT2X3:
123 maxRow = 2;
124 maxCol = 3;
125 break;
126 case glu::TYPE_FLOAT_MAT2X4:
127 maxRow = 2;
128 maxCol = 4;
129 break;
130 case glu::TYPE_FLOAT_MAT3X2:
131 maxRow = 3;
132 maxCol = 2;
133 break;
134 case glu::TYPE_FLOAT_MAT3:
135 maxRow = maxCol = 3;
136 break;
137 case glu::TYPE_FLOAT_MAT3X4:
138 maxRow = 3;
139 maxCol = 4;
140 break;
141 case glu::TYPE_FLOAT_MAT4X2:
142 maxRow = 4;
143 maxCol = 2;
144 break;
145 case glu::TYPE_FLOAT_MAT4X3:
146 maxRow = 4;
147 maxCol = 3;
148 break;
149 case glu::TYPE_FLOAT_MAT4:
150 maxRow = maxCol = 4;
151 break;
152 default:
153 DE_ASSERT(false);
154 break;
155 }
156
157 for(int i = 0; i < maxRow; i++)
158 for(int j = 0; j < maxCol; j++)
159 {
160 src << "\tpacked_" << symIter->name << "[" << i << "][" << j << "] = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << "[" << i << "][" << j << "], -1.0)));\n";
161 }
162 }
163 else
164 {
165 src << "\tpacked_" << symIter->name << " = uintBitsToFloat(packFloat2x16(f16vec2(" << symIter->name << ", -1.0)));\n";
166 }
167 }
168 }
169 }
170
generatePassthroughVertexShader(const ShaderSpec & shaderSpec,const char * inputPrefix,const char * outputPrefix)171 static std::string generatePassthroughVertexShader (const ShaderSpec& shaderSpec, const char* inputPrefix, const char* outputPrefix)
172 {
173 std::ostringstream src;
174 int location = 0;
175
176 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
177
178 if (!shaderSpec.globalDeclarations.empty())
179 src << shaderSpec.globalDeclarations << "\n";
180
181 src << "layout(location = " << location << ") in highp vec4 a_position;\n";
182
183 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
184 {
185 location++;
186 src << "layout(location = "<< location << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n"
187 << "layout(location = " << location - 1 << ") flat out " << glu::declare(input->varType, outputPrefix + input->name) << ";\n";
188 }
189
190 src << "\nvoid main (void)\n{\n"
191 << " gl_Position = a_position;\n"
192 << " gl_PointSize = 1.0;\n";
193
194 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
195 src << "\t" << outputPrefix << input->name << " = " << inputPrefix << input->name << ";\n";
196
197 src << "}\n";
198
199 return src.str();
200 }
201
generateVertexShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix)202 static std::string generateVertexShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix)
203 {
204 DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
205
206 std::ostringstream src;
207
208 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
209
210 if (!shaderSpec.globalDeclarations.empty())
211 src << shaderSpec.globalDeclarations << "\n";
212
213 src << "layout(location = 0) in highp vec4 a_position;\n";
214
215 int locationNumber = 1;
216 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
217 {
218 src << "layout(location = " << locationNumber << ") in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
219 }
220
221 locationNumber = 0;
222 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
223 {
224 DE_ASSERT(output->varType.isBasicType());
225
226 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
227 {
228 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
229 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
230 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
231
232 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
233 }
234 else
235 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
236 }
237
238 src << "\n"
239 << "void main (void)\n"
240 << "{\n"
241 << " gl_Position = a_position;\n"
242 << " gl_PointSize = 1.0;\n";
243
244 // Declare & fetch local input variables
245 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
246 {
247 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
248 {
249 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
250 src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
251 }
252 else
253 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
254 }
255
256 // Declare local output variables
257 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
258 {
259 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
260 {
261 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
262 src << "\t" << tname << " " << output->name << ";\n";
263 const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
264 src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
265 }
266 else
267 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
268 }
269
270 // Operation - indented to correct level.
271 {
272 std::istringstream opSrc (shaderSpec.source);
273 std::string line;
274
275 while (std::getline(opSrc, line))
276 src << "\t" << line << "\n";
277 }
278
279 if (shaderSpec.packFloat16Bit)
280 packFloat16Bit(src, shaderSpec.outputs);
281
282 // Assignments to outputs.
283 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
284 {
285 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
286 {
287 src << "\t" << outputPrefix << output->name << " = packed_" << output->name << ";\n";
288 }
289 else
290 {
291 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
292 {
293 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
294 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
295
296 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
297 }
298 else
299 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
300 }
301 }
302
303 src << "}\n";
304
305 return src.str();
306 }
307
308 struct FragmentOutputLayout
309 {
310 std::vector<const Symbol*> locationSymbols; //! Symbols by location
311 std::map<std::string, int> locationMap; //! Map from symbol name to start location
312 };
313
generateFragShaderOutputDecl(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & outputPrefix)314 static void generateFragShaderOutputDecl (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& outputPrefix)
315 {
316 for (int outNdx = 0; outNdx < (int)shaderSpec.outputs.size(); ++outNdx)
317 {
318 const Symbol& output = shaderSpec.outputs[outNdx];
319 const int location = de::lookup(outLocationMap, output.name);
320 const std::string outVarName = outputPrefix + output.name;
321 glu::VariableDeclaration decl (output.varType, outVarName, glu::STORAGE_OUT, glu::INTERPOLATION_LAST, glu::Layout(location));
322
323 TCU_CHECK_INTERNAL(output.varType.isBasicType());
324
325 if (useIntOutputs && glu::isDataTypeFloatOrVec(output.varType.getBasicType()))
326 {
327 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType());
328 const glu::DataType uintBasicType = vecSize > 1 ? glu::getDataTypeUintVec(vecSize) : glu::TYPE_UINT;
329 const glu::VarType uintType (uintBasicType, glu::PRECISION_HIGHP);
330
331 decl.varType = uintType;
332 src << decl << ";\n";
333 }
334 else if (glu::isDataTypeBoolOrBVec(output.varType.getBasicType()))
335 {
336 const int vecSize = glu::getDataTypeScalarSize(output.varType.getBasicType());
337 const glu::DataType intBasicType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
338 const glu::VarType intType (intBasicType, glu::PRECISION_HIGHP);
339
340 decl.varType = intType;
341 src << decl << ";\n";
342 }
343 else if (glu::isDataTypeMatrix(output.varType.getBasicType()))
344 {
345 const int vecSize = glu::getDataTypeMatrixNumRows(output.varType.getBasicType());
346 const int numVecs = glu::getDataTypeMatrixNumColumns(output.varType.getBasicType());
347 const glu::DataType uintBasicType = glu::getDataTypeUintVec(vecSize);
348 const glu::VarType uintType (uintBasicType, glu::PRECISION_HIGHP);
349
350 decl.varType = uintType;
351 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
352 {
353 decl.name = outVarName + "_" + de::toString(vecNdx);
354 decl.layout.location = location + vecNdx;
355 src << decl << ";\n";
356 }
357 }
358 else
359 src << decl << ";\n";
360 }
361 }
362
generateFragShaderOutAssign(std::ostream & src,const ShaderSpec & shaderSpec,bool useIntOutputs,const std::string & valuePrefix,const std::string & outputPrefix,const bool isInput16Bit=false)363 static void generateFragShaderOutAssign (std::ostream& src, const ShaderSpec& shaderSpec, bool useIntOutputs, const std::string& valuePrefix, const std::string& outputPrefix, const bool isInput16Bit = false)
364 {
365 if (isInput16Bit)
366 packFloat16Bit(src, shaderSpec.outputs);
367
368 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
369 {
370 const std::string packPrefix = (isInput16Bit && glu::isDataTypeFloatType(output->varType.getBasicType())) ? "packed_" : "";
371
372 if (useIntOutputs && glu::isDataTypeFloatOrVec(output->varType.getBasicType()))
373 src << " o_" << output->name << " = floatBitsToUint(" << valuePrefix << output->name << ");\n";
374 else if (glu::isDataTypeMatrix(output->varType.getBasicType()))
375 {
376 const int numVecs = glu::getDataTypeMatrixNumColumns(output->varType.getBasicType());
377
378 for (int vecNdx = 0; vecNdx < numVecs; ++vecNdx)
379 if (useIntOutputs)
380 src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = floatBitsToUint(" << valuePrefix << output->name << "[" << vecNdx << "]);\n";
381 else
382 src << "\t" << outputPrefix << output->name << "_" << vecNdx << " = " << packPrefix << valuePrefix << output->name << "[" << vecNdx << "];\n";
383 }
384 else if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
385 {
386 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
387 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
388
389 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << valuePrefix << output->name << ");\n";
390 }
391 else
392 src << "\t" << outputPrefix << output->name << " = " << packPrefix << valuePrefix << output->name << ";\n";
393 }
394 }
395
generatePassthroughFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)396 static std::string generatePassthroughFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
397 {
398 std::ostringstream src;
399
400 src <<"#version 450\n";
401
402 if (!shaderSpec.globalDeclarations.empty())
403 src << shaderSpec.globalDeclarations << "\n";
404
405 int locationNumber = 0;
406 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
407 {
408 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
409 {
410 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
411 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
412 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
413
414 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(intType, inputPrefix + output->name) << ";\n";
415 }
416 else
417 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(output->varType, inputPrefix + output->name) << ";\n";
418 }
419
420 generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
421
422 src << "\nvoid main (void)\n{\n";
423
424 generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, inputPrefix, outputPrefix);
425
426 src << "}\n";
427
428 return src.str();
429 }
430
generateGeometryShader(const ShaderSpec & shaderSpec,const std::string & inputPrefix,const std::string & outputPrefix,const bool pointSizeSupported)431 static std::string generateGeometryShader (const ShaderSpec& shaderSpec, const std::string& inputPrefix, const std::string& outputPrefix, const bool pointSizeSupported)
432 {
433 DE_ASSERT(!inputPrefix.empty() && !outputPrefix.empty());
434
435 std::ostringstream src;
436
437 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
438
439 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
440 src << "#extension GL_EXT_geometry_shader : require\n";
441
442 if (!shaderSpec.globalDeclarations.empty())
443 src << shaderSpec.globalDeclarations << "\n";
444
445 src << "layout(points) in;\n"
446 << "layout(points, max_vertices = 1) out;\n";
447
448 int locationNumber = 0;
449 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
450 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << "[];\n";
451
452 locationNumber = 0;
453 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output, ++locationNumber)
454 {
455 DE_ASSERT(output->varType.isBasicType());
456
457 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
458 {
459 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
460 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
461 const glu::VarType intType (intBaseType, glu::PRECISION_HIGHP);
462
463 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(intType, outputPrefix + output->name) << ";\n";
464 }
465 else
466 src << "layout(location = " << locationNumber << ") flat out " << glu::declare(output->varType, outputPrefix + output->name) << ";\n";
467 }
468
469 src << "\n"
470 << "void main (void)\n"
471 << "{\n"
472 << " gl_Position = gl_in[0].gl_Position;\n"
473 << (pointSizeSupported ? " gl_PointSize = gl_in[0].gl_PointSize;\n\n" : "");
474
475 // Fetch input variables
476 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
477 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << "[0];\n";
478
479 // Declare local output variables.
480 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
481 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
482
483 src << "\n";
484
485 // Operation - indented to correct level.
486 {
487 std::istringstream opSrc (shaderSpec.source);
488 std::string line;
489
490 while (std::getline(opSrc, line))
491 src << "\t" << line << "\n";
492 }
493
494 // Assignments to outputs.
495 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
496 {
497 if (glu::isDataTypeBoolOrBVec(output->varType.getBasicType()))
498 {
499 const int vecSize = glu::getDataTypeScalarSize(output->varType.getBasicType());
500 const glu::DataType intBaseType = vecSize > 1 ? glu::getDataTypeIntVec(vecSize) : glu::TYPE_INT;
501
502 src << "\t" << outputPrefix << output->name << " = " << glu::getDataTypeName(intBaseType) << "(" << output->name << ");\n";
503 }
504 else
505 src << "\t" << outputPrefix << output->name << " = " << output->name << ";\n";
506 }
507
508 src << " EmitVertex();\n"
509 << " EndPrimitive();\n"
510 << "}\n";
511
512 return src.str();
513 }
514
generateFragmentShader(const ShaderSpec & shaderSpec,bool useIntOutputs,const std::map<std::string,int> & outLocationMap,const std::string & inputPrefix,const std::string & outputPrefix)515 static std::string generateFragmentShader (const ShaderSpec& shaderSpec, bool useIntOutputs, const std::map<std::string, int>& outLocationMap, const std::string& inputPrefix, const std::string& outputPrefix)
516 {
517 std::ostringstream src;
518 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
519 if (!shaderSpec.globalDeclarations.empty())
520 src << shaderSpec.globalDeclarations << "\n";
521
522 int locationNumber = 0;
523 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input, ++locationNumber)
524 {
525 src << "layout(location = " << locationNumber << ") flat in " << glu::declare(input->varType, inputPrefix + input->name) << ";\n";
526 }
527
528 generateFragShaderOutputDecl(src, shaderSpec, useIntOutputs, outLocationMap, outputPrefix);
529
530 src << "\nvoid main (void)\n{\n";
531
532 // Declare & fetch local input variables
533 for (vector<Symbol>::const_iterator input = shaderSpec.inputs.begin(); input != shaderSpec.inputs.end(); ++input)
534 {
535 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(input->varType.getBasicType()))
536 {
537 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(input->varType.getBasicType()));
538 src << "\t" << tname << " " << input->name << " = " << tname << "(" << inputPrefix << input->name << ");\n";
539 }
540 else
541 src << "\t" << glu::declare(input->varType, input->name) << " = " << inputPrefix << input->name << ";\n";
542 }
543
544 // Declare output variables
545 for (vector<Symbol>::const_iterator output = shaderSpec.outputs.begin(); output != shaderSpec.outputs.end(); ++output)
546 {
547 if (shaderSpec.packFloat16Bit && isDataTypeFloatOrVec(output->varType.getBasicType()))
548 {
549 const std::string tname = glu::getDataTypeName(getDataTypeFloat16Scalars(output->varType.getBasicType()));
550 src << "\t" << tname << " " << output->name << ";\n";
551 const char* tname2 = glu::getDataTypeName(output->varType.getBasicType());
552 src << "\t" << tname2 << " " << "packed_" << output->name << ";\n";
553 }
554 else
555 src << "\t" << glu::declare(output->varType, output->name) << ";\n";
556 }
557
558 // Operation - indented to correct level.
559 {
560 std::istringstream opSrc (shaderSpec.source);
561 std::string line;
562
563 while (std::getline(opSrc, line))
564 src << "\t" << line << "\n";
565 }
566
567 generateFragShaderOutAssign(src, shaderSpec, useIntOutputs, "", outputPrefix, shaderSpec.packFloat16Bit);
568
569 src << "}\n";
570
571 return src.str();
572 }
573
574 // FragmentOutExecutor
575
576 class FragmentOutExecutor : public ShaderExecutor
577 {
578 public:
579 FragmentOutExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
580 virtual ~FragmentOutExecutor (void);
581
582 virtual void execute (int numValues,
583 const void* const* inputs,
584 void* const* outputs,
585 VkDescriptorSet extraResources);
586
587 protected:
588 const glu::ShaderType m_shaderType;
589 const FragmentOutputLayout m_outputLayout;
590
591 private:
592 void bindAttributes (int numValues,
593 const void* const* inputs);
594
595 void addAttribute (deUint32 bindingLocation,
596 VkFormat format,
597 deUint32 sizePerElement,
598 deUint32 count,
599 const void* dataPtr);
600 // reinit render data members
601 virtual void clearRenderData (void);
602
603 const VkDescriptorSetLayout m_extraResourcesLayout;
604
605 std::vector<VkVertexInputBindingDescription> m_vertexBindingDescriptions;
606 std::vector<VkVertexInputAttributeDescription> m_vertexAttributeDescriptions;
607 std::vector<VkBufferSp> m_vertexBuffers;
608 std::vector<AllocationSp> m_vertexBufferAllocs;
609 };
610
computeFragmentOutputLayout(const std::vector<Symbol> & symbols)611 static FragmentOutputLayout computeFragmentOutputLayout (const std::vector<Symbol>& symbols)
612 {
613 FragmentOutputLayout ret;
614 int location = 0;
615
616 for (std::vector<Symbol>::const_iterator it = symbols.begin(); it != symbols.end(); ++it)
617 {
618 const int numLocations = glu::getDataTypeNumLocations(it->varType.getBasicType());
619
620 TCU_CHECK_INTERNAL(!de::contains(ret.locationMap, it->name));
621 de::insert(ret.locationMap, it->name, location);
622 location += numLocations;
623
624 for (int ndx = 0; ndx < numLocations; ++ndx)
625 ret.locationSymbols.push_back(&*it);
626 }
627
628 return ret;
629 }
630
FragmentOutExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)631 FragmentOutExecutor::FragmentOutExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
632 : ShaderExecutor (context, shaderSpec)
633 , m_shaderType (shaderType)
634 , m_outputLayout (computeFragmentOutputLayout(m_shaderSpec.outputs))
635 , m_extraResourcesLayout (extraResourcesLayout)
636 {
637 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice();
638 const InstanceInterface& vki = m_context.getInstanceInterface();
639
640 // Input attributes
641 for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
642 {
643 const Symbol& symbol = m_shaderSpec.inputs[inputNdx];
644 const glu::DataType basicType = symbol.varType.getBasicType();
645 const VkFormat format = getAttributeFormat(basicType);
646 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(vki, physicalDevice, format);
647 if ((formatProperties.bufferFeatures & VK_FORMAT_FEATURE_VERTEX_BUFFER_BIT) == 0)
648 TCU_THROW(NotSupportedError, "format not supported by device as vertex buffer attribute format");
649 }
650 }
651
~FragmentOutExecutor(void)652 FragmentOutExecutor::~FragmentOutExecutor (void)
653 {
654 }
655
computeVertexPositions(int numValues,const tcu::IVec2 & renderSize)656 static std::vector<tcu::Vec2> computeVertexPositions (int numValues, const tcu::IVec2& renderSize)
657 {
658 std::vector<tcu::Vec2> positions(numValues);
659 for (int valNdx = 0; valNdx < numValues; valNdx++)
660 {
661 const int ix = valNdx % renderSize.x();
662 const int iy = valNdx / renderSize.x();
663 const float fx = -1.0f + 2.0f*((float(ix) + 0.5f) / float(renderSize.x()));
664 const float fy = -1.0f + 2.0f*((float(iy) + 0.5f) / float(renderSize.y()));
665
666 positions[valNdx] = tcu::Vec2(fx, fy);
667 }
668
669 return positions;
670 }
671
getRenderbufferFormatForOutput(const glu::VarType & outputType,bool useIntOutputs)672 static tcu::TextureFormat getRenderbufferFormatForOutput (const glu::VarType& outputType, bool useIntOutputs)
673 {
674 const tcu::TextureFormat::ChannelOrder channelOrderMap[] =
675 {
676 tcu::TextureFormat::R,
677 tcu::TextureFormat::RG,
678 tcu::TextureFormat::RGBA, // No RGB variants available.
679 tcu::TextureFormat::RGBA
680 };
681
682 const glu::DataType basicType = outputType.getBasicType();
683 const int numComps = glu::getDataTypeNumComponents(basicType);
684 tcu::TextureFormat::ChannelType channelType;
685
686 switch (glu::getDataTypeScalarType(basicType))
687 {
688 case glu::TYPE_UINT: channelType = tcu::TextureFormat::UNSIGNED_INT32; break;
689 case glu::TYPE_INT: channelType = tcu::TextureFormat::SIGNED_INT32; break;
690 case glu::TYPE_BOOL: channelType = tcu::TextureFormat::SIGNED_INT32; break;
691 case glu::TYPE_FLOAT: channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::FLOAT; break;
692 case glu::TYPE_FLOAT16: channelType = useIntOutputs ? tcu::TextureFormat::UNSIGNED_INT32 : tcu::TextureFormat::HALF_FLOAT; break;
693 default:
694 throw tcu::InternalError("Invalid output type");
695 }
696
697 DE_ASSERT(de::inRange<int>(numComps, 1, DE_LENGTH_OF_ARRAY(channelOrderMap)));
698
699 return tcu::TextureFormat(channelOrderMap[numComps-1], channelType);
700 }
701
getAttributeFormat(const glu::DataType dataType)702 static VkFormat getAttributeFormat (const glu::DataType dataType)
703 {
704 switch (dataType)
705 {
706 case glu::TYPE_FLOAT16: return VK_FORMAT_R16_SFLOAT;
707 case glu::TYPE_FLOAT16_VEC2: return VK_FORMAT_R16G16_SFLOAT;
708 case glu::TYPE_FLOAT16_VEC3: return VK_FORMAT_R16G16B16_SFLOAT;
709 case glu::TYPE_FLOAT16_VEC4: return VK_FORMAT_R16G16B16A16_SFLOAT;
710
711 case glu::TYPE_FLOAT: return VK_FORMAT_R32_SFLOAT;
712 case glu::TYPE_FLOAT_VEC2: return VK_FORMAT_R32G32_SFLOAT;
713 case glu::TYPE_FLOAT_VEC3: return VK_FORMAT_R32G32B32_SFLOAT;
714 case glu::TYPE_FLOAT_VEC4: return VK_FORMAT_R32G32B32A32_SFLOAT;
715
716 case glu::TYPE_INT: return VK_FORMAT_R32_SINT;
717 case glu::TYPE_INT_VEC2: return VK_FORMAT_R32G32_SINT;
718 case glu::TYPE_INT_VEC3: return VK_FORMAT_R32G32B32_SINT;
719 case glu::TYPE_INT_VEC4: return VK_FORMAT_R32G32B32A32_SINT;
720
721 case glu::TYPE_UINT: return VK_FORMAT_R32_UINT;
722 case glu::TYPE_UINT_VEC2: return VK_FORMAT_R32G32_UINT;
723 case glu::TYPE_UINT_VEC3: return VK_FORMAT_R32G32B32_UINT;
724 case glu::TYPE_UINT_VEC4: return VK_FORMAT_R32G32B32A32_UINT;
725
726 case glu::TYPE_FLOAT_MAT2: return VK_FORMAT_R32G32_SFLOAT;
727 case glu::TYPE_FLOAT_MAT2X3: return VK_FORMAT_R32G32B32_SFLOAT;
728 case glu::TYPE_FLOAT_MAT2X4: return VK_FORMAT_R32G32B32A32_SFLOAT;
729 case glu::TYPE_FLOAT_MAT3X2: return VK_FORMAT_R32G32_SFLOAT;
730 case glu::TYPE_FLOAT_MAT3: return VK_FORMAT_R32G32B32_SFLOAT;
731 case glu::TYPE_FLOAT_MAT3X4: return VK_FORMAT_R32G32B32A32_SFLOAT;
732 case glu::TYPE_FLOAT_MAT4X2: return VK_FORMAT_R32G32_SFLOAT;
733 case glu::TYPE_FLOAT_MAT4X3: return VK_FORMAT_R32G32B32_SFLOAT;
734 case glu::TYPE_FLOAT_MAT4: return VK_FORMAT_R32G32B32A32_SFLOAT;
735 default:
736 DE_ASSERT(false);
737 return VK_FORMAT_UNDEFINED;
738 }
739 }
740
addAttribute(deUint32 bindingLocation,VkFormat format,deUint32 sizePerElement,deUint32 count,const void * dataPtr)741 void FragmentOutExecutor::addAttribute (deUint32 bindingLocation, VkFormat format, deUint32 sizePerElement, deUint32 count, const void* dataPtr)
742 {
743 // Portability requires stride to be multiply of minVertexInputBindingStrideAlignment
744 // this value is usually 4 and current tests meet this requirement but
745 // if this changes in future then this limit should be verified in checkSupport
746 #ifndef CTS_USES_VULKANSC
747 if (m_context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
748 ((sizePerElement % m_context.getPortabilitySubsetProperties().minVertexInputBindingStrideAlignment) != 0))
749 {
750 DE_FATAL("stride is not multiply of minVertexInputBindingStrideAlignment");
751 }
752 #endif // CTS_USES_VULKANSC
753
754 // Add binding specification
755 const deUint32 binding = (deUint32)m_vertexBindingDescriptions.size();
756 const VkVertexInputBindingDescription bindingDescription =
757 {
758 binding,
759 sizePerElement,
760 VK_VERTEX_INPUT_RATE_VERTEX
761 };
762
763 m_vertexBindingDescriptions.push_back(bindingDescription);
764
765 // Add location and format specification
766 const VkVertexInputAttributeDescription attributeDescription =
767 {
768 bindingLocation, // deUint32 location;
769 binding, // deUint32 binding;
770 format, // VkFormat format;
771 0u, // deUint32 offsetInBytes;
772 };
773
774 m_vertexAttributeDescriptions.push_back(attributeDescription);
775
776 // Upload data to buffer
777 const VkDevice vkDevice = m_context.getDevice();
778 const DeviceInterface& vk = m_context.getDeviceInterface();
779 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
780
781 const VkDeviceSize inputSize = sizePerElement * count;
782 const VkBufferCreateInfo vertexBufferParams =
783 {
784 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
785 DE_NULL, // const void* pNext;
786 0u, // VkBufferCreateFlags flags;
787 inputSize, // VkDeviceSize size;
788 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT, // VkBufferUsageFlags usage;
789 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
790 1u, // deUint32 queueFamilyCount;
791 &queueFamilyIndex // const deUint32* pQueueFamilyIndices;
792 };
793
794 Move<VkBuffer> buffer = createBuffer(vk, vkDevice, &vertexBufferParams);
795 de::MovePtr<Allocation> alloc = m_context.getDefaultAllocator().allocate(getBufferMemoryRequirements(vk, vkDevice, *buffer), MemoryRequirement::HostVisible);
796
797 VK_CHECK(vk.bindBufferMemory(vkDevice, *buffer, alloc->getMemory(), alloc->getOffset()));
798
799 deMemcpy(alloc->getHostPtr(), dataPtr, (size_t)inputSize);
800 flushAlloc(vk, vkDevice, *alloc);
801
802 m_vertexBuffers.push_back(de::SharedPtr<Unique<VkBuffer> >(new Unique<VkBuffer>(buffer)));
803 m_vertexBufferAllocs.push_back(AllocationSp(alloc.release()));
804 }
805
bindAttributes(int numValues,const void * const * inputs)806 void FragmentOutExecutor::bindAttributes (int numValues, const void* const* inputs)
807 {
808 // Input attributes
809 for (int inputNdx = 0; inputNdx < (int)m_shaderSpec.inputs.size(); inputNdx++)
810 {
811 const Symbol& symbol = m_shaderSpec.inputs[inputNdx];
812 const void* ptr = inputs[inputNdx];
813 const glu::DataType basicType = symbol.varType.getBasicType();
814 const int vecSize = glu::getDataTypeScalarSize(basicType);
815 const VkFormat format = getAttributeFormat(basicType);
816 int elementSize = 0;
817 int numAttrsToAdd = 1;
818
819 if (glu::isDataTypeDoubleOrDVec(basicType))
820 elementSize = sizeof(double);
821 if (glu::isDataTypeFloatOrVec(basicType))
822 elementSize = sizeof(float);
823 else if (glu::isDataTypeFloat16OrVec(basicType))
824 elementSize = sizeof(deUint16);
825 else if (glu::isDataTypeIntOrIVec(basicType))
826 elementSize = sizeof(int);
827 else if (glu::isDataTypeUintOrUVec(basicType))
828 elementSize = sizeof(deUint32);
829 else if (glu::isDataTypeMatrix(basicType))
830 {
831 int numRows = glu::getDataTypeMatrixNumRows(basicType);
832 int numCols = glu::getDataTypeMatrixNumColumns(basicType);
833
834 elementSize = numRows * numCols * (int)sizeof(float);
835 numAttrsToAdd = numCols;
836 }
837 else
838 DE_ASSERT(false);
839
840 // add attributes, in case of matrix every column is binded as an attribute
841 for (int attrNdx = 0; attrNdx < numAttrsToAdd; attrNdx++)
842 {
843 addAttribute((deUint32)m_vertexBindingDescriptions.size(), format, elementSize * vecSize, numValues, ptr);
844 }
845 }
846 }
847
clearRenderData(void)848 void FragmentOutExecutor::clearRenderData (void)
849 {
850 m_vertexBindingDescriptions.clear();
851 m_vertexAttributeDescriptions.clear();
852 m_vertexBuffers.clear();
853 m_vertexBufferAllocs.clear();
854 }
855
createEmptyDescriptorSetLayout(const DeviceInterface & vkd,VkDevice device)856 static Move<VkDescriptorSetLayout> createEmptyDescriptorSetLayout (const DeviceInterface& vkd, VkDevice device)
857 {
858 const VkDescriptorSetLayoutCreateInfo createInfo =
859 {
860 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
861 DE_NULL,
862 (VkDescriptorSetLayoutCreateFlags)0,
863 0u,
864 DE_NULL,
865 };
866 return createDescriptorSetLayout(vkd, device, &createInfo);
867 }
868
createEmptyDescriptorPool(const DeviceInterface & vkd,VkDevice device)869 static Move<VkDescriptorPool> createEmptyDescriptorPool (const DeviceInterface& vkd, VkDevice device)
870 {
871 const VkDescriptorPoolSize emptySize =
872 {
873 VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
874 1u,
875 };
876 const VkDescriptorPoolCreateInfo createInfo =
877 {
878 VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
879 DE_NULL,
880 (VkDescriptorPoolCreateFlags)VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
881 1u,
882 1u,
883 &emptySize
884 };
885 return createDescriptorPool(vkd, device, &createInfo);
886 }
887
allocateSingleDescriptorSet(const DeviceInterface & vkd,VkDevice device,VkDescriptorPool pool,VkDescriptorSetLayout layout)888 static Move<VkDescriptorSet> allocateSingleDescriptorSet (const DeviceInterface& vkd, VkDevice device, VkDescriptorPool pool, VkDescriptorSetLayout layout)
889 {
890 const VkDescriptorSetAllocateInfo allocInfo =
891 {
892 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
893 DE_NULL,
894 pool,
895 1u,
896 &layout,
897 };
898 return allocateDescriptorSet(vkd, device, &allocInfo);
899 }
900
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)901 void FragmentOutExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
902 {
903 const VkDevice vkDevice = m_context.getDevice();
904 const DeviceInterface& vk = m_context.getDeviceInterface();
905 const VkQueue queue = m_context.getUniversalQueue();
906 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
907 Allocator& memAlloc = m_context.getDefaultAllocator();
908
909 const deUint32 renderSizeX = de::min(static_cast<deUint32>(128), (deUint32)numValues);
910 const deUint32 renderSizeY = ((deUint32)numValues / renderSizeX) + (((deUint32)numValues % renderSizeX != 0) ? 1u : 0u);
911 const tcu::UVec2 renderSize (renderSizeX, renderSizeY);
912 std::vector<tcu::Vec2> positions;
913
914 const bool useGeometryShader = m_shaderType == glu::SHADERTYPE_GEOMETRY;
915
916 std::vector<VkImageSp> colorImages;
917 std::vector<VkImageMemoryBarrier> colorImagePreRenderBarriers;
918 std::vector<VkImageMemoryBarrier> colorImagePostRenderBarriers;
919 std::vector<AllocationSp> colorImageAllocs;
920 std::vector<VkAttachmentDescription> attachments;
921 std::vector<VkClearValue> attachmentClearValues;
922 std::vector<VkImageViewSp> colorImageViews;
923
924 std::vector<VkPipelineColorBlendAttachmentState> colorBlendAttachmentStates;
925 std::vector<VkAttachmentReference> colorAttachmentReferences;
926
927 Move<VkRenderPass> renderPass;
928 Move<VkFramebuffer> framebuffer;
929 Move<VkPipelineLayout> pipelineLayout;
930 Move<VkPipeline> graphicsPipeline;
931
932 Move<VkShaderModule> vertexShaderModule;
933 Move<VkShaderModule> geometryShaderModule;
934 Move<VkShaderModule> fragmentShaderModule;
935
936 Move<VkCommandPool> cmdPool;
937 Move<VkCommandBuffer> cmdBuffer;
938
939 Unique<VkDescriptorSetLayout> emptyDescriptorSetLayout (createEmptyDescriptorSetLayout(vk, vkDevice));
940 Unique<VkDescriptorPool> emptyDescriptorPool (createEmptyDescriptorPool(vk, vkDevice));
941 Unique<VkDescriptorSet> emptyDescriptorSet (allocateSingleDescriptorSet(vk, vkDevice, *emptyDescriptorPool, *emptyDescriptorSetLayout));
942
943 clearRenderData();
944
945 // Compute positions - 1px points are used to drive fragment shading.
946 positions = computeVertexPositions(numValues, renderSize.cast<int>());
947
948 // Bind attributes
949 addAttribute(0u, VK_FORMAT_R32G32_SFLOAT, sizeof(tcu::Vec2), (deUint32)positions.size(), &positions[0]);
950 bindAttributes(numValues, inputs);
951
952 // Create color images
953 {
954 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
955 {
956 VK_FALSE, // VkBool32 blendEnable;
957 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcColorBlendFactor;
958 VK_BLEND_FACTOR_ZERO, // VkBlendFactor dstColorBlendFactor;
959 VK_BLEND_OP_ADD, // VkBlendOp blendOpColor;
960 VK_BLEND_FACTOR_ONE, // VkBlendFactor srcAlphaBlendFactor;
961 VK_BLEND_FACTOR_ZERO, // VkBlendFactor destAlphaBlendFactor;
962 VK_BLEND_OP_ADD, // VkBlendOp blendOpAlpha;
963 (VK_COLOR_COMPONENT_R_BIT |
964 VK_COLOR_COMPONENT_G_BIT |
965 VK_COLOR_COMPONENT_B_BIT |
966 VK_COLOR_COMPONENT_A_BIT) // VkColorComponentFlags colorWriteMask;
967 };
968
969 for (int outNdx = 0; outNdx < (int)m_outputLayout.locationSymbols.size(); ++outNdx)
970 {
971 const bool isDouble = glu::isDataTypeDoubleOrDVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
972 const bool isFloat = isDataTypeFloatOrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
973 const bool isFloat16b = glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
974 const bool isSigned = isDataTypeIntOrIVec (m_shaderSpec.outputs[outNdx].varType.getBasicType());
975 const bool isBool = isDataTypeBoolOrBVec(m_shaderSpec.outputs[outNdx].varType.getBasicType());
976 const VkFormat colorFormat = (isDouble ? VK_FORMAT_R64G64B64A64_SFLOAT : (isFloat16b ? VK_FORMAT_R16G16B16A16_SFLOAT : (isFloat ? VK_FORMAT_R32G32B32A32_SFLOAT : (isSigned || isBool ? VK_FORMAT_R32G32B32A32_SINT : VK_FORMAT_R32G32B32A32_UINT))));
977
978 {
979 const VkFormatProperties formatProperties = getPhysicalDeviceFormatProperties(m_context.getInstanceInterface(), m_context.getPhysicalDevice(), colorFormat);
980 if ((formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT) == 0)
981 TCU_THROW(NotSupportedError, "Image format doesn't support COLOR_ATTACHMENT_BIT");
982 }
983
984 const VkImageCreateInfo colorImageParams =
985 {
986 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
987 DE_NULL, // const void* pNext;
988 0u, // VkImageCreateFlags flags;
989 VK_IMAGE_TYPE_2D, // VkImageType imageType;
990 colorFormat, // VkFormat format;
991 { renderSize.x(), renderSize.y(), 1u }, // VkExtent3D extent;
992 1u, // deUint32 mipLevels;
993 1u, // deUint32 arraySize;
994 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
995 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
996 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
997 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
998 1u, // deUint32 queueFamilyCount;
999 &queueFamilyIndex, // const deUint32* pQueueFamilyIndices;
1000 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
1001 };
1002
1003 const VkAttachmentDescription colorAttachmentDescription =
1004 {
1005 0u, // VkAttachmentDescriptorFlags flags;
1006 colorFormat, // VkFormat format;
1007 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
1008 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
1009 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
1010 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
1011 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
1012 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout initialLayout;
1013 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // VkImageLayout finalLayout;
1014 };
1015
1016 Move<VkImage> colorImage = createImage(vk, vkDevice, &colorImageParams);
1017 colorImages.push_back(de::SharedPtr<Unique<VkImage> >(new Unique<VkImage>(colorImage)));
1018 attachmentClearValues.push_back(getDefaultClearColor());
1019
1020 // Allocate and bind color image memory
1021 {
1022 de::MovePtr<Allocation> colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *((const VkImage*) colorImages.back().get())), MemoryRequirement::Any);
1023 VK_CHECK(vk.bindImageMemory(vkDevice, colorImages.back().get()->get(), colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
1024 colorImageAllocs.push_back(de::SharedPtr<Allocation>(colorImageAlloc.release()));
1025
1026 attachments.push_back(colorAttachmentDescription);
1027 colorBlendAttachmentStates.push_back(colorBlendAttachmentState);
1028
1029 const VkAttachmentReference colorAttachmentReference =
1030 {
1031 (deUint32) (colorImages.size() - 1), // deUint32 attachment;
1032 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
1033 };
1034
1035 colorAttachmentReferences.push_back(colorAttachmentReference);
1036 }
1037
1038 // Create color attachment view
1039 {
1040 const VkImageViewCreateInfo colorImageViewParams =
1041 {
1042 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
1043 DE_NULL, // const void* pNext;
1044 0u, // VkImageViewCreateFlags flags;
1045 colorImages.back().get()->get(), // VkImage image;
1046 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
1047 colorFormat, // VkFormat format;
1048 {
1049 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
1050 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
1051 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
1052 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
1053 }, // VkComponentMapping components;
1054 {
1055 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
1056 0u, // deUint32 baseMipLevel;
1057 1u, // deUint32 mipLevels;
1058 0u, // deUint32 baseArraySlice;
1059 1u // deUint32 arraySize;
1060 } // VkImageSubresourceRange subresourceRange;
1061 };
1062
1063 Move<VkImageView> colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
1064 colorImageViews.push_back(de::SharedPtr<Unique<VkImageView> >(new Unique<VkImageView>(colorImageView)));
1065
1066 const VkImageMemoryBarrier colorImagePreRenderBarrier =
1067 {
1068 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1069 DE_NULL, // pNext
1070 0u, // srcAccessMask
1071 (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1072 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // dstAccessMask
1073 VK_IMAGE_LAYOUT_UNDEFINED, // oldLayout
1074 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // newLayout
1075 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
1076 VK_QUEUE_FAMILY_IGNORED, // dstQueueFamilyIndex
1077 colorImages.back().get()->get(), // image
1078 {
1079 VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1080 0u, // baseMipLevel
1081 1u, // levelCount
1082 0u, // baseArrayLayer
1083 1u, // layerCount
1084 } // subresourceRange
1085 };
1086 colorImagePreRenderBarriers.push_back(colorImagePreRenderBarrier);
1087
1088 const VkImageMemoryBarrier colorImagePostRenderBarrier =
1089 {
1090 VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER, // sType
1091 DE_NULL, // pNext
1092 (VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
1093 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT), // srcAccessMask
1094 VK_ACCESS_TRANSFER_READ_BIT, // dstAccessMask
1095 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // oldLayout
1096 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // newLayout
1097 VK_QUEUE_FAMILY_IGNORED, // srcQueueFamilyIndex
1098 VK_QUEUE_FAMILY_IGNORED, // dstQueueFamilyIndex
1099 colorImages.back().get()->get(), // image
1100 {
1101 VK_IMAGE_ASPECT_COLOR_BIT, // aspectMask
1102 0u, // baseMipLevel
1103 1u, // levelCount
1104 0u, // baseArrayLayer
1105 1u, // layerCount
1106 } // subresourceRange
1107 };
1108 colorImagePostRenderBarriers.push_back(colorImagePostRenderBarrier);
1109 }
1110 }
1111 }
1112
1113 // Create render pass
1114 {
1115 const VkSubpassDescription subpassDescription =
1116 {
1117 0u, // VkSubpassDescriptionFlags flags;
1118 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
1119 0u, // deUint32 inputCount;
1120 DE_NULL, // const VkAttachmentReference* pInputAttachments;
1121 (deUint32)colorImages.size(), // deUint32 colorCount;
1122 &colorAttachmentReferences[0], // const VkAttachmentReference* colorAttachments;
1123 DE_NULL, // const VkAttachmentReference* resolveAttachments;
1124 DE_NULL, // VkAttachmentReference depthStencilAttachment;
1125 0u, // deUint32 preserveCount;
1126 DE_NULL // const VkAttachmentReference* pPreserveAttachments;
1127 };
1128
1129 const VkRenderPassCreateInfo renderPassParams =
1130 {
1131 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
1132 DE_NULL, // const void* pNext;
1133 (VkRenderPassCreateFlags)0, // VkRenderPassCreateFlags flags;
1134 (deUint32)attachments.size(), // deUint32 attachmentCount;
1135 &attachments[0], // const VkAttachmentDescription* pAttachments;
1136 1u, // deUint32 subpassCount;
1137 &subpassDescription, // const VkSubpassDescription* pSubpasses;
1138 0u, // deUint32 dependencyCount;
1139 DE_NULL // const VkSubpassDependency* pDependencies;
1140 };
1141
1142 renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
1143 }
1144
1145 // Create framebuffer
1146 {
1147 std::vector<VkImageView> views(colorImageViews.size());
1148 for (size_t i = 0; i < colorImageViews.size(); i++)
1149 {
1150 views[i] = colorImageViews[i].get()->get();
1151 }
1152
1153 const VkFramebufferCreateInfo framebufferParams =
1154 {
1155 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
1156 DE_NULL, // const void* pNext;
1157 0u, // VkFramebufferCreateFlags flags;
1158 *renderPass, // VkRenderPass renderPass;
1159 (deUint32)views.size(), // deUint32 attachmentCount;
1160 &views[0], // const VkImageView* pAttachments;
1161 (deUint32)renderSize.x(), // deUint32 width;
1162 (deUint32)renderSize.y(), // deUint32 height;
1163 1u // deUint32 layers;
1164 };
1165
1166 framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
1167 }
1168
1169 // Create pipeline layout
1170 {
1171 const VkDescriptorSetLayout setLayouts[] =
1172 {
1173 *emptyDescriptorSetLayout,
1174 m_extraResourcesLayout
1175 };
1176 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
1177 {
1178 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
1179 DE_NULL, // const void* pNext;
1180 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
1181 (m_extraResourcesLayout != 0 ? 2u : 0u), // deUint32 descriptorSetCount;
1182 setLayouts, // const VkDescriptorSetLayout* pSetLayouts;
1183 0u, // deUint32 pushConstantRangeCount;
1184 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
1185 };
1186
1187 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
1188 }
1189
1190 // Create shaders
1191 {
1192 vertexShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
1193 fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
1194
1195 if (useGeometryShader)
1196 {
1197 if (m_context.getDeviceFeatures().shaderTessellationAndGeometryPointSize)
1198 geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom_point_size"), 0);
1199 else
1200 geometryShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("geom"), 0);
1201 }
1202 }
1203
1204 // Create pipeline
1205 {
1206 const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
1207 {
1208 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
1209 DE_NULL, // const void* pNext;
1210 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
1211 (deUint32)m_vertexBindingDescriptions.size(), // deUint32 bindingCount;
1212 &m_vertexBindingDescriptions[0], // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
1213 (deUint32)m_vertexAttributeDescriptions.size(), // deUint32 attributeCount;
1214 &m_vertexAttributeDescriptions[0], // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
1215 };
1216
1217 const std::vector<VkViewport> viewports (1, makeViewport(renderSize));
1218 const std::vector<VkRect2D> scissors (1, makeRect2D(renderSize));
1219
1220 const VkPipelineColorBlendStateCreateInfo colorBlendStateParams =
1221 {
1222 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, // VkStructureType sType;
1223 DE_NULL, // const void* pNext;
1224 (VkPipelineColorBlendStateCreateFlags)0, // VkPipelineColorBlendStateCreateFlags flags;
1225 VK_FALSE, // VkBool32 logicOpEnable;
1226 VK_LOGIC_OP_COPY, // VkLogicOp logicOp;
1227 (deUint32)colorBlendAttachmentStates.size(), // deUint32 attachmentCount;
1228 &colorBlendAttachmentStates[0], // const VkPipelineColorBlendAttachmentState* pAttachments;
1229 { 0.0f, 0.0f, 0.0f, 0.0f } // float blendConst[4];
1230 };
1231
1232 graphicsPipeline = makeGraphicsPipeline(vk, // const DeviceInterface& vk
1233 vkDevice, // const VkDevice device
1234 *pipelineLayout, // const VkPipelineLayout pipelineLayout
1235 *vertexShaderModule, // const VkShaderModule vertexShaderModule
1236 DE_NULL, // const VkShaderModule tessellationControlShaderModule
1237 DE_NULL, // const VkShaderModule tessellationEvalShaderModule
1238 useGeometryShader ? *geometryShaderModule : DE_NULL, // const VkShaderModule geometryShaderModule
1239 *fragmentShaderModule, // const VkShaderModule fragmentShaderModule
1240 *renderPass, // const VkRenderPass renderPass
1241 viewports, // const std::vector<VkViewport>& viewports
1242 scissors, // const std::vector<VkRect2D>& scissors
1243 VK_PRIMITIVE_TOPOLOGY_POINT_LIST, // const VkPrimitiveTopology topology
1244 0u, // const deUint32 subpass
1245 0u, // const deUint32 patchControlPoints
1246 &vertexInputStateParams, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
1247 DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
1248 DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo
1249 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo
1250 &colorBlendStateParams); // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo
1251 }
1252
1253 // Create command pool
1254 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1255
1256 // Create command buffer
1257 {
1258 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1259
1260 beginCommandBuffer(vk, *cmdBuffer);
1261
1262 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0,
1263 0, (const VkMemoryBarrier*)DE_NULL,
1264 0, (const VkBufferMemoryBarrier*)DE_NULL,
1265 (deUint32)colorImagePreRenderBarriers.size(), colorImagePreRenderBarriers.empty() ? DE_NULL : &colorImagePreRenderBarriers[0]);
1266 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), (deUint32)attachmentClearValues.size(), &attachmentClearValues[0]);
1267
1268 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
1269
1270 if (m_extraResourcesLayout != 0)
1271 {
1272 DE_ASSERT(extraResources != 0);
1273 const VkDescriptorSet descriptorSets[] = { *emptyDescriptorSet, extraResources };
1274 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, DE_LENGTH_OF_ARRAY(descriptorSets), descriptorSets, 0u, DE_NULL);
1275 }
1276 else
1277 DE_ASSERT(extraResources == 0);
1278
1279 const deUint32 numberOfVertexAttributes = (deUint32)m_vertexBuffers.size();
1280
1281 std::vector<VkDeviceSize> offsets(numberOfVertexAttributes, 0);
1282
1283 std::vector<VkBuffer> buffers(numberOfVertexAttributes);
1284 for (size_t i = 0; i < numberOfVertexAttributes; i++)
1285 {
1286 buffers[i] = m_vertexBuffers[i].get()->get();
1287 }
1288
1289 vk.cmdBindVertexBuffers(*cmdBuffer, 0, numberOfVertexAttributes, &buffers[0], &offsets[0]);
1290 vk.cmdDraw(*cmdBuffer, (deUint32)positions.size(), 1u, 0u, 0u);
1291
1292 endRenderPass(vk, *cmdBuffer);
1293 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0,
1294 0, (const VkMemoryBarrier*)DE_NULL,
1295 0, (const VkBufferMemoryBarrier*)DE_NULL,
1296 (deUint32)colorImagePostRenderBarriers.size(), colorImagePostRenderBarriers.empty() ? DE_NULL : &colorImagePostRenderBarriers[0]);
1297
1298 endCommandBuffer(vk, *cmdBuffer);
1299 }
1300
1301 // Execute Draw
1302 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
1303
1304 // Read back result and output
1305 {
1306 const VkDeviceSize imageSizeBytes = (VkDeviceSize)(4 * sizeof(deUint32) * renderSize.x() * renderSize.y());
1307 const VkBufferCreateInfo readImageBufferParams =
1308 {
1309 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1310 DE_NULL, // const void* pNext;
1311 0u, // VkBufferCreateFlags flags;
1312 imageSizeBytes, // VkDeviceSize size;
1313 VK_BUFFER_USAGE_TRANSFER_DST_BIT, // VkBufferUsageFlags usage;
1314 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1315 1u, // deUint32 queueFamilyCount;
1316 &queueFamilyIndex, // const deUint32* pQueueFamilyIndices;
1317 };
1318
1319 // constants for image copy
1320 Move<VkCommandPool> copyCmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
1321
1322 const VkBufferImageCopy copyParams =
1323 {
1324 0u, // VkDeviceSize bufferOffset;
1325 (deUint32)renderSize.x(), // deUint32 bufferRowLength;
1326 (deUint32)renderSize.y(), // deUint32 bufferImageHeight;
1327 {
1328 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspect aspect;
1329 0u, // deUint32 mipLevel;
1330 0u, // deUint32 arraySlice;
1331 1u, // deUint32 arraySize;
1332 }, // VkImageSubresource imageSubresource;
1333 { 0u, 0u, 0u }, // VkOffset3D imageOffset;
1334 { renderSize.x(), renderSize.y(), 1u } // VkExtent3D imageExtent;
1335 };
1336
1337 // Read back pixels.
1338 for (int outNdx = 0; outNdx < (int)m_shaderSpec.outputs.size(); ++outNdx)
1339 {
1340 const Symbol& output = m_shaderSpec.outputs[outNdx];
1341 const int outSize = output.varType.getScalarSize();
1342 const int outVecSize = glu::getDataTypeNumComponents(output.varType.getBasicType());
1343 const int outNumLocs = glu::getDataTypeNumLocations(output.varType.getBasicType());
1344 const int outLocation = de::lookup(m_outputLayout.locationMap, output.name);
1345
1346 for (int locNdx = 0; locNdx < outNumLocs; ++locNdx)
1347 {
1348 tcu::TextureLevel tmpBuf;
1349 const tcu::TextureFormat format = getRenderbufferFormatForOutput(output.varType, false);
1350 const tcu::TextureFormat readFormat (tcu::TextureFormat::RGBA, format.type);
1351 const Unique<VkBuffer> readImageBuffer(createBuffer(vk, vkDevice, &readImageBufferParams));
1352 const de::UniquePtr<Allocation> readImageBufferMemory(memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *readImageBuffer), MemoryRequirement::HostVisible));
1353
1354 VK_CHECK(vk.bindBufferMemory(vkDevice, *readImageBuffer, readImageBufferMemory->getMemory(), readImageBufferMemory->getOffset()));
1355
1356 // Copy image to buffer
1357 {
1358
1359 Move<VkCommandBuffer> copyCmdBuffer = allocateCommandBuffer(vk, vkDevice, *copyCmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
1360
1361 beginCommandBuffer(vk, *copyCmdBuffer);
1362 vk.cmdCopyImageToBuffer(*copyCmdBuffer, colorImages[outLocation + locNdx].get()->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *readImageBuffer, 1u, ©Params);
1363
1364 // Insert a barrier so data written by the transfer is available to the host
1365 {
1366 const VkBufferMemoryBarrier barrier =
1367 {
1368 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
1369 DE_NULL, // const void* pNext;
1370 VK_ACCESS_TRANSFER_WRITE_BIT, // VkAccessFlags srcAccessMask;
1371 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
1372 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
1373 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
1374 *readImageBuffer, // VkBuffer buffer;
1375 0, // VkDeviceSize offset;
1376 VK_WHOLE_SIZE, // VkDeviceSize size;
1377 };
1378
1379 vk.cmdPipelineBarrier(*copyCmdBuffer, vk::VK_PIPELINE_STAGE_TRANSFER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
1380 0, (const VkMemoryBarrier*)DE_NULL,
1381 1, &barrier,
1382 0, (const VkImageMemoryBarrier*)DE_NULL);
1383 }
1384
1385 endCommandBuffer(vk, *copyCmdBuffer);
1386
1387 submitCommandsAndWait(vk, vkDevice, queue, copyCmdBuffer.get());
1388 }
1389
1390 invalidateAlloc(vk, vkDevice, *readImageBufferMemory);
1391
1392 tmpBuf.setStorage(readFormat, renderSize.x(), renderSize.y());
1393
1394 const tcu::TextureFormat resultFormat(tcu::TextureFormat::RGBA, format.type);
1395 const tcu::ConstPixelBufferAccess resultAccess(resultFormat, renderSize.x(), renderSize.y(), 1, readImageBufferMemory->getHostPtr());
1396
1397 tcu::copy(tmpBuf.getAccess(), resultAccess);
1398
1399 if (isOutput16Bit(static_cast<size_t>(outNdx)))
1400 {
1401 deUint16* dstPtrBase = static_cast<deUint16*>(outputs[outNdx]);
1402 if (outSize == 4 && outNumLocs == 1)
1403 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint16));
1404 else
1405 {
1406 for (int valNdx = 0; valNdx < numValues; valNdx++)
1407 {
1408 const deUint16* srcPtr = (const deUint16*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1409 deUint16* dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1410 deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint16));
1411 }
1412 }
1413 }
1414 else
1415 {
1416 deUint32* dstPtrBase = static_cast<deUint32*>(outputs[outNdx]);
1417 if (outSize == 4 && outNumLocs == 1)
1418 deMemcpy(dstPtrBase, tmpBuf.getAccess().getDataPtr(), numValues * outVecSize * sizeof(deUint32));
1419 else
1420 {
1421 for (int valNdx = 0; valNdx < numValues; valNdx++)
1422 {
1423 const deUint32* srcPtr = (const deUint32*)tmpBuf.getAccess().getDataPtr() + valNdx * 4;
1424 deUint32* dstPtr = &dstPtrBase[outSize * valNdx + outVecSize * locNdx];
1425 deMemcpy(dstPtr, srcPtr, outVecSize * sizeof(deUint32));
1426 }
1427 }
1428 }
1429 }
1430 }
1431 }
1432 }
1433
1434 // VertexShaderExecutor
1435
1436 class VertexShaderExecutor : public FragmentOutExecutor
1437 {
1438 public:
1439 VertexShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1440 virtual ~VertexShaderExecutor (void);
1441
1442 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& dst);
1443 };
1444
VertexShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1445 VertexShaderExecutor::VertexShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1446 : FragmentOutExecutor(context, glu::SHADERTYPE_VERTEX, shaderSpec, extraResourcesLayout)
1447 {
1448 }
1449
~VertexShaderExecutor(void)1450 VertexShaderExecutor::~VertexShaderExecutor (void)
1451 {
1452 }
1453
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1454 void VertexShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1455 {
1456 const FragmentOutputLayout outputLayout (computeFragmentOutputLayout(shaderSpec.outputs));
1457
1458 programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1459 /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1460 programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1461 }
1462
1463 // GeometryShaderExecutor
1464
1465 class GeometryShaderExecutor : public FragmentOutExecutor
1466 {
1467 public:
1468 GeometryShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1469 virtual ~GeometryShaderExecutor (void);
1470
1471 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1472
1473 };
1474
GeometryShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1475 GeometryShaderExecutor::GeometryShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1476 : FragmentOutExecutor(context, glu::SHADERTYPE_GEOMETRY, shaderSpec, extraResourcesLayout)
1477 {
1478 const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
1479
1480 if (!features.geometryShader)
1481 TCU_THROW(NotSupportedError, "Geometry shader type not supported by device");
1482 }
1483
~GeometryShaderExecutor(void)1484 GeometryShaderExecutor::~GeometryShaderExecutor (void)
1485 {
1486 }
1487
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1488 void GeometryShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1489 {
1490 const FragmentOutputLayout outputLayout (computeFragmentOutputLayout(shaderSpec.outputs));
1491
1492 programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1493
1494 programCollection.glslSources.add("geom") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", false)) << shaderSpec.buildOptions;
1495 programCollection.glslSources.add("geom_point_size") << glu::GeometrySource(generateGeometryShader(shaderSpec, "vtx_out_", "geom_out_", true)) << shaderSpec.buildOptions;
1496
1497 /* \todo [2015-09-18 rsipka] set useIntOutputs parameter if needed. */
1498 programCollection.glslSources.add("frag") << glu::FragmentSource(generatePassthroughFragmentShader(shaderSpec, false, outputLayout.locationMap, "geom_out_", "o_")) << shaderSpec.buildOptions;
1499
1500 }
1501
1502 // FragmentShaderExecutor
1503
1504 class FragmentShaderExecutor : public FragmentOutExecutor
1505 {
1506 public:
1507 FragmentShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1508 virtual ~FragmentShaderExecutor (void);
1509
1510 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1511
1512 };
1513
FragmentShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1514 FragmentShaderExecutor::FragmentShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1515 : FragmentOutExecutor(context, glu::SHADERTYPE_FRAGMENT, shaderSpec, extraResourcesLayout)
1516 {
1517 }
1518
~FragmentShaderExecutor(void)1519 FragmentShaderExecutor::~FragmentShaderExecutor (void)
1520 {
1521 }
1522
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)1523 void FragmentShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
1524 {
1525 const FragmentOutputLayout outputLayout (computeFragmentOutputLayout(shaderSpec.outputs));
1526
1527 programCollection.glslSources.add("vert") << glu::VertexSource(generatePassthroughVertexShader(shaderSpec, "a_", "vtx_out_")) << shaderSpec.buildOptions;
1528 /* \todo [2015-09-11 hegedusd] set useIntOutputs parameter if needed. */
1529 programCollection.glslSources.add("frag") << glu::FragmentSource(generateFragmentShader(shaderSpec, false, outputLayout.locationMap, "vtx_out_", "o_")) << shaderSpec.buildOptions;
1530 }
1531
1532 // Shared utilities for compute and tess executors
1533
getVecStd430ByteAlignment(glu::DataType type)1534 static deUint32 getVecStd430ByteAlignment (glu::DataType type)
1535 {
1536 deUint32 baseSize;
1537
1538 switch (glu::getDataTypeScalarType(type))
1539 {
1540 case glu::TYPE_FLOAT16: baseSize = 2u; break;
1541 case glu::TYPE_DOUBLE: baseSize = 8u; break;
1542 default: baseSize = 4u; break;
1543 }
1544
1545 switch (glu::getDataTypeScalarSize(type))
1546 {
1547 case 1: return baseSize;
1548 case 2: return baseSize * 2u;
1549 case 3: // fallthrough.
1550 case 4: return baseSize * 4u;
1551 default:
1552 DE_ASSERT(false);
1553 return 0u;
1554 }
1555 }
1556
1557 class BufferIoExecutor : public ShaderExecutor
1558 {
1559 public:
1560 BufferIoExecutor (Context& context, const ShaderSpec& shaderSpec);
1561 virtual ~BufferIoExecutor (void);
1562
1563 protected:
1564 enum
1565 {
1566 INPUT_BUFFER_BINDING = 0,
1567 OUTPUT_BUFFER_BINDING = 1,
1568 };
1569
1570 void initBuffers (int numValues);
getInputBuffer(void) const1571 VkBuffer getInputBuffer (void) const { return *m_inputBuffer; }
getOutputBuffer(void) const1572 VkBuffer getOutputBuffer (void) const { return *m_outputBuffer; }
getInputStride(void) const1573 deUint32 getInputStride (void) const { return getLayoutStride(m_inputLayout); }
getOutputStride(void) const1574 deUint32 getOutputStride (void) const { return getLayoutStride(m_outputLayout); }
1575
1576 void uploadInputBuffer (const void* const* inputPtrs, int numValues, bool packFloat16Bit);
1577 void readOutputBuffer (void* const* outputPtrs, int numValues);
1578
1579 static void declareBufferBlocks (std::ostream& src, const ShaderSpec& spec);
1580 static void generateExecBufferIo(std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName);
1581
1582 protected:
1583 Move<VkBuffer> m_inputBuffer;
1584 Move<VkBuffer> m_outputBuffer;
1585
1586 private:
1587 struct VarLayout
1588 {
1589 deUint32 offset;
1590 deUint32 stride;
1591 deUint32 matrixStride;
1592
VarLayoutvkt::shaderexecutor::__anond362d2740111::BufferIoExecutor::VarLayout1593 VarLayout (void) : offset(0), stride(0), matrixStride(0) {}
1594 };
1595
1596 static void computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout);
1597 static deUint32 getLayoutStride (const vector<VarLayout>& layout);
1598
1599 static void copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr, bool packFloat16Bit);
1600 static void copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr);
1601
1602 de::MovePtr<Allocation> m_inputAlloc;
1603 de::MovePtr<Allocation> m_outputAlloc;
1604
1605 vector<VarLayout> m_inputLayout;
1606 vector<VarLayout> m_outputLayout;
1607 };
1608
BufferIoExecutor(Context & context,const ShaderSpec & shaderSpec)1609 BufferIoExecutor::BufferIoExecutor (Context& context, const ShaderSpec& shaderSpec)
1610 : ShaderExecutor(context, shaderSpec)
1611 {
1612 computeVarLayout(m_shaderSpec.inputs, &m_inputLayout);
1613 computeVarLayout(m_shaderSpec.outputs, &m_outputLayout);
1614 }
1615
~BufferIoExecutor(void)1616 BufferIoExecutor::~BufferIoExecutor (void)
1617 {
1618 }
1619
getLayoutStride(const vector<VarLayout> & layout)1620 inline deUint32 BufferIoExecutor::getLayoutStride (const vector<VarLayout>& layout)
1621 {
1622 return layout.empty() ? 0 : layout[0].stride;
1623 }
1624
computeVarLayout(const std::vector<Symbol> & symbols,std::vector<VarLayout> * layout)1625 void BufferIoExecutor::computeVarLayout (const std::vector<Symbol>& symbols, std::vector<VarLayout>* layout)
1626 {
1627 deUint32 maxAlignment = 0;
1628 deUint32 curOffset = 0;
1629
1630 DE_ASSERT(layout != DE_NULL);
1631 DE_ASSERT(layout->empty());
1632 layout->resize(symbols.size());
1633
1634 for (size_t varNdx = 0; varNdx < symbols.size(); varNdx++)
1635 {
1636 const Symbol& symbol = symbols[varNdx];
1637 const glu::DataType basicType = symbol.varType.getBasicType();
1638 VarLayout& layoutEntry = (*layout)[varNdx];
1639
1640 if (glu::isDataTypeScalarOrVector(basicType))
1641 {
1642 const deUint32 alignment = getVecStd430ByteAlignment(basicType);
1643 const deUint32 size = (deUint32)glu::getDataTypeScalarSize(basicType) * (isDataTypeDoubleType(basicType) ? (int)(sizeof(deUint64)) : (isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1644
1645 curOffset = (deUint32)deAlign32((int)curOffset, (int)alignment);
1646 maxAlignment = de::max(maxAlignment, alignment);
1647
1648 layoutEntry.offset = curOffset;
1649 layoutEntry.matrixStride = 0;
1650
1651 curOffset += size;
1652 }
1653 else if (glu::isDataTypeMatrix(basicType))
1654 {
1655 const int numVecs = glu::getDataTypeMatrixNumColumns(basicType);
1656 const glu::DataType vecType = glu::getDataTypeVector(glu::getDataTypeScalarType(basicType), glu::getDataTypeMatrixNumRows(basicType));
1657 const deUint32 vecAlignment = getVecStd430ByteAlignment(vecType);
1658
1659 curOffset = (deUint32)deAlign32((int)curOffset, (int)vecAlignment);
1660 maxAlignment = de::max(maxAlignment, vecAlignment);
1661
1662 layoutEntry.offset = curOffset;
1663 layoutEntry.matrixStride = vecAlignment;
1664
1665 curOffset += vecAlignment*numVecs;
1666 }
1667 else
1668 DE_ASSERT(false);
1669 }
1670
1671 {
1672 const deUint32 totalSize = (deUint32)deAlign32(curOffset, maxAlignment);
1673
1674 for (vector<VarLayout>::iterator varIter = layout->begin(); varIter != layout->end(); ++varIter)
1675 varIter->stride = totalSize;
1676 }
1677 }
1678
declareBufferBlocks(std::ostream & src,const ShaderSpec & spec)1679 void BufferIoExecutor::declareBufferBlocks (std::ostream& src, const ShaderSpec& spec)
1680 {
1681 // Input struct
1682 if (!spec.inputs.empty())
1683 {
1684 glu::StructType inputStruct("Inputs");
1685 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1686 inputStruct.addMember(symIter->name.c_str(), symIter->varType);
1687 src << glu::declare(&inputStruct) << ";\n";
1688 }
1689
1690 // Output struct
1691 {
1692 glu::StructType outputStruct("Outputs");
1693 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1694 outputStruct.addMember(symIter->name.c_str(), symIter->varType);
1695 src << glu::declare(&outputStruct) << ";\n";
1696 }
1697
1698 src << "\n";
1699
1700 if (!spec.inputs.empty())
1701 {
1702 src << "layout(set = 0, binding = " << int(INPUT_BUFFER_BINDING) << ", std430) buffer InBuffer\n"
1703 << "{\n"
1704 << " Inputs inputs[];\n"
1705 << "};\n";
1706 }
1707
1708 src << "layout(set = 0, binding = " << int(OUTPUT_BUFFER_BINDING) << ", std430) buffer OutBuffer\n"
1709 << "{\n"
1710 << " Outputs outputs[];\n"
1711 << "};\n"
1712 << "\n";
1713 }
1714
generateExecBufferIo(std::ostream & src,const ShaderSpec & spec,const char * invocationNdxName)1715 void BufferIoExecutor::generateExecBufferIo (std::ostream& src, const ShaderSpec& spec, const char* invocationNdxName)
1716 {
1717 std::string tname;
1718 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
1719 {
1720 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1721 if (f16BitTest)
1722 {
1723 tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1724 }
1725 else
1726 {
1727 tname = glu::getDataTypeName(symIter->varType.getBasicType());
1728 }
1729 src << "\t" << tname << " "<< symIter->name << " = " << tname << "(inputs[" << invocationNdxName << "]." << symIter->name << ");\n";
1730 }
1731
1732 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1733 {
1734 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1735 if (f16BitTest)
1736 {
1737 tname = glu::getDataTypeName(getDataTypeFloat16Scalars(symIter->varType.getBasicType()));
1738 }
1739 else
1740 {
1741 tname = glu::getDataTypeName(symIter->varType.getBasicType());
1742 }
1743 src << "\t" << tname << " " << symIter->name << ";\n";
1744 if (f16BitTest)
1745 {
1746 const char* ttname = glu::getDataTypeName(symIter->varType.getBasicType());
1747 src << "\t" << ttname << " " << "packed_" << symIter->name << ";\n";
1748 }
1749 }
1750
1751 src << "\n";
1752
1753 {
1754 std::istringstream opSrc (spec.source);
1755 std::string line;
1756
1757 while (std::getline(opSrc, line))
1758 src << "\t" << line << "\n";
1759 }
1760
1761 if (spec.packFloat16Bit)
1762 packFloat16Bit (src, spec.outputs);
1763
1764 src << "\n";
1765 for (vector<Symbol>::const_iterator symIter = spec.outputs.begin(); symIter != spec.outputs.end(); ++symIter)
1766 {
1767 const bool f16BitTest = spec.packFloat16Bit && glu::isDataTypeFloatType(symIter->varType.getBasicType());
1768 if(f16BitTest)
1769 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = packed_" << symIter->name << ";\n";
1770 else
1771 src << "\toutputs[" << invocationNdxName << "]." << symIter->name << " = " << symIter->name << ";\n";
1772 }
1773 }
1774
copyToBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr,bool packFloat16Bit)1775 void BufferIoExecutor::copyToBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr, bool packFloat16Bit)
1776 {
1777 if (varType.isBasicType())
1778 {
1779 const glu::DataType basicType = varType.getBasicType();
1780 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1781 const int scalarSize = glu::getDataTypeScalarSize(basicType);
1782 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1783 const int numComps = scalarSize / numVecs;
1784 const int size = (glu::isDataTypeDoubleType(basicType) ? (int)sizeof(deUint64) : (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1785
1786 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1787 {
1788 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1789 {
1790 const int srcOffset = size * (elemNdx * scalarSize + vecNdx * numComps);
1791 const int dstOffset = layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1792 const deUint8* srcPtr = (const deUint8*)srcBasePtr + srcOffset;
1793 deUint8* dstPtr = (deUint8*)dstBasePtr + dstOffset;
1794
1795 if (packFloat16Bit)
1796 {
1797 // Convert the float values to 16 bit and store in the lower 16 bits of 32 bit ints.
1798 for (int cmpNdx=0; cmpNdx < numComps; ++cmpNdx)
1799 {
1800 deFloat16 f16vals[2] = {};
1801 f16vals[0] = deFloat32To16Round(((float*)srcPtr)[cmpNdx], DE_ROUNDINGMODE_TO_ZERO);
1802 deMemcpy(dstPtr + cmpNdx * size, &f16vals[0], size);
1803 }
1804 }
1805 else
1806 {
1807 deMemcpy(dstPtr, srcPtr, size * numComps);
1808 }
1809 }
1810 }
1811 }
1812 else
1813 throw tcu::InternalError("Unsupported type");
1814 }
1815
copyFromBuffer(const glu::VarType & varType,const VarLayout & layout,int numValues,const void * srcBasePtr,void * dstBasePtr)1816 void BufferIoExecutor::copyFromBuffer (const glu::VarType& varType, const VarLayout& layout, int numValues, const void* srcBasePtr, void* dstBasePtr)
1817 {
1818 if (varType.isBasicType())
1819 {
1820 const glu::DataType basicType = varType.getBasicType();
1821 const bool isMatrix = glu::isDataTypeMatrix(basicType);
1822 const int scalarSize = glu::getDataTypeScalarSize(basicType);
1823 const int numVecs = isMatrix ? glu::getDataTypeMatrixNumColumns(basicType) : 1;
1824 const int numComps = scalarSize / numVecs;
1825
1826 for (int elemNdx = 0; elemNdx < numValues; elemNdx++)
1827 {
1828 for (int vecNdx = 0; vecNdx < numVecs; vecNdx++)
1829 {
1830 const int size = (glu::isDataTypeDoubleType(basicType) ? (int)sizeof(deUint64) : (glu::isDataTypeFloat16OrVec(basicType) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
1831 const int srcOffset = layout.offset + layout.stride * elemNdx + (isMatrix ? layout.matrixStride * vecNdx : 0);
1832 const int dstOffset = size * (elemNdx * scalarSize + vecNdx * numComps);
1833 const deUint8* srcPtr = (const deUint8*)srcBasePtr + srcOffset;
1834 deUint8* dstPtr = (deUint8*)dstBasePtr + dstOffset;
1835
1836 deMemcpy(dstPtr, srcPtr, size * numComps);
1837 }
1838 }
1839 }
1840 else
1841 throw tcu::InternalError("Unsupported type");
1842 }
1843
uploadInputBuffer(const void * const * inputPtrs,int numValues,bool packFloat16Bit)1844 void BufferIoExecutor::uploadInputBuffer (const void* const* inputPtrs, int numValues, bool packFloat16Bit)
1845 {
1846 const VkDevice vkDevice = m_context.getDevice();
1847 const DeviceInterface& vk = m_context.getDeviceInterface();
1848
1849 const deUint32 inputStride = getLayoutStride(m_inputLayout);
1850 const int inputBufferSize = inputStride * numValues;
1851
1852 if (inputBufferSize == 0)
1853 return; // No inputs
1854
1855 DE_ASSERT(m_shaderSpec.inputs.size() == m_inputLayout.size());
1856 for (size_t inputNdx = 0; inputNdx < m_shaderSpec.inputs.size(); ++inputNdx)
1857 {
1858 const glu::VarType& varType = m_shaderSpec.inputs[inputNdx].varType;
1859 const VarLayout& layout = m_inputLayout[inputNdx];
1860
1861 copyToBuffer(varType, layout, numValues, inputPtrs[inputNdx], m_inputAlloc->getHostPtr(), packFloat16Bit);
1862 }
1863
1864 flushAlloc(vk, vkDevice, *m_inputAlloc);
1865 }
1866
readOutputBuffer(void * const * outputPtrs,int numValues)1867 void BufferIoExecutor::readOutputBuffer (void* const* outputPtrs, int numValues)
1868 {
1869 const VkDevice vkDevice = m_context.getDevice();
1870 const DeviceInterface& vk = m_context.getDeviceInterface();
1871
1872 DE_ASSERT(numValues > 0); // At least some outputs are required.
1873
1874 invalidateAlloc(vk, vkDevice, *m_outputAlloc);
1875
1876 DE_ASSERT(m_shaderSpec.outputs.size() == m_outputLayout.size());
1877 for (size_t outputNdx = 0; outputNdx < m_shaderSpec.outputs.size(); ++outputNdx)
1878 {
1879 const glu::VarType& varType = m_shaderSpec.outputs[outputNdx].varType;
1880 const VarLayout& layout = m_outputLayout[outputNdx];
1881
1882 copyFromBuffer(varType, layout, numValues, m_outputAlloc->getHostPtr(), outputPtrs[outputNdx]);
1883 }
1884 }
1885
initBuffers(int numValues)1886 void BufferIoExecutor::initBuffers (int numValues)
1887 {
1888 const deUint32 inputStride = getLayoutStride(m_inputLayout);
1889 const deUint32 outputStride = getLayoutStride(m_outputLayout);
1890 // Avoid creating zero-sized buffer/memory
1891 const size_t inputBufferSize = de::max(numValues * inputStride, 1u);
1892 const size_t outputBufferSize = numValues * outputStride;
1893
1894 // Upload data to buffer
1895 const VkDevice vkDevice = m_context.getDevice();
1896 const DeviceInterface& vk = m_context.getDeviceInterface();
1897 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
1898 Allocator& memAlloc = m_context.getDefaultAllocator();
1899
1900 const VkBufferCreateInfo inputBufferParams =
1901 {
1902 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1903 DE_NULL, // const void* pNext;
1904 0u, // VkBufferCreateFlags flags;
1905 inputBufferSize, // VkDeviceSize size;
1906 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, // VkBufferUsageFlags usage;
1907 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1908 1u, // deUint32 queueFamilyCount;
1909 &queueFamilyIndex // const deUint32* pQueueFamilyIndices;
1910 };
1911
1912 m_inputBuffer = createBuffer(vk, vkDevice, &inputBufferParams);
1913 m_inputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_inputBuffer), MemoryRequirement::HostVisible);
1914
1915 VK_CHECK(vk.bindBufferMemory(vkDevice, *m_inputBuffer, m_inputAlloc->getMemory(), m_inputAlloc->getOffset()));
1916
1917 const VkBufferCreateInfo outputBufferParams =
1918 {
1919 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType;
1920 DE_NULL, // const void* pNext;
1921 0u, // VkBufferCreateFlags flags;
1922 outputBufferSize, // VkDeviceSize size;
1923 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, // VkBufferUsageFlags usage;
1924 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
1925 1u, // deUint32 queueFamilyCount;
1926 &queueFamilyIndex // const deUint32* pQueueFamilyIndices;
1927 };
1928
1929 m_outputBuffer = createBuffer(vk, vkDevice, &outputBufferParams);
1930 m_outputAlloc = memAlloc.allocate(getBufferMemoryRequirements(vk, vkDevice, *m_outputBuffer), MemoryRequirement::HostVisible);
1931
1932 VK_CHECK(vk.bindBufferMemory(vkDevice, *m_outputBuffer, m_outputAlloc->getMemory(), m_outputAlloc->getOffset()));
1933 }
1934
1935 // ComputeShaderExecutor
1936
1937 class ComputeShaderExecutor : public BufferIoExecutor
1938 {
1939 public:
1940 ComputeShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
1941 virtual ~ComputeShaderExecutor (void);
1942
1943 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
1944
1945 virtual void execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
1946
1947 protected:
1948 static std::string generateComputeShader (const ShaderSpec& spec);
1949
1950 private:
1951 const VkDescriptorSetLayout m_extraResourcesLayout;
1952 };
1953
ComputeShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)1954 ComputeShaderExecutor::ComputeShaderExecutor(Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
1955 : BufferIoExecutor (context, shaderSpec)
1956 , m_extraResourcesLayout (extraResourcesLayout)
1957 {
1958 }
1959
~ComputeShaderExecutor(void)1960 ComputeShaderExecutor::~ComputeShaderExecutor (void)
1961 {
1962 }
1963
getTypeSpirv(const glu::DataType type,const bool packFloat16Bit=false)1964 std::string getTypeSpirv(const glu::DataType type, const bool packFloat16Bit = false)
1965 {
1966 switch(type)
1967 {
1968 case glu::TYPE_FLOAT16:
1969 return "%f16";
1970 case glu::TYPE_FLOAT16_VEC2:
1971 return "%v2f16";
1972 case glu::TYPE_FLOAT16_VEC3:
1973 return "%v3f16";
1974 case glu::TYPE_FLOAT16_VEC4:
1975 return "%v4f16";
1976 case glu::TYPE_FLOAT:
1977 return packFloat16Bit ? "%u32" : "%f32"; // f16 values will be bitcast from ui32.
1978 case glu::TYPE_FLOAT_VEC2:
1979 return packFloat16Bit ? "%v2u32" : "%v2f32"; // f16 values will be bitcast from ui32.
1980 case glu::TYPE_FLOAT_VEC3:
1981 return packFloat16Bit ? "%v3u32" : "%v3f32"; // f16 values will be bitcast from ui32.
1982 case glu::TYPE_FLOAT_VEC4:
1983 return packFloat16Bit ? "%v4u32" : "%v4f32"; // f16 values will be bitcast from ui32.
1984 case glu::TYPE_INT:
1985 return "%i32";
1986 case glu::TYPE_INT_VEC2:
1987 return "%v2i32";
1988 case glu::TYPE_INT_VEC3:
1989 return "%v3i32";
1990 case glu::TYPE_INT_VEC4:
1991 return "%v4i32";
1992 case glu::TYPE_DOUBLE:
1993 return "%f64";
1994 case glu::TYPE_DOUBLE_VEC2:
1995 return "%v2f64";
1996 case glu::TYPE_DOUBLE_VEC3:
1997 return "%v3f64";
1998 case glu::TYPE_DOUBLE_VEC4:
1999 return "%v4f64";
2000 default:
2001 DE_ASSERT(0);
2002 return "";
2003 }
2004 }
2005
moveBitOperation(std::string variableName,const int operationNdx)2006 std::string moveBitOperation (std::string variableName, const int operationNdx)
2007 {
2008 std::ostringstream src;
2009 src << "\n"
2010 << "%operation_move_" << operationNdx << " = OpLoad %i32 " << variableName << "\n"
2011 << "%move1_" << operationNdx << " = OpShiftLeftLogical %i32 %operation_move_"<< operationNdx <<" %c_i32_1\n"
2012 << "OpStore " << variableName << " %move1_" << operationNdx << "\n";
2013 return src.str();
2014 }
2015
scalarComparison(const std::string operation,const int operationNdx,const glu::DataType type,const std::string & outputType,const int scalarSize)2016 std::string scalarComparison(const std::string operation, const int operationNdx, const glu::DataType type, const std::string& outputType, const int scalarSize)
2017 {
2018 std::ostringstream src;
2019 std::string boolType;
2020
2021 switch (type)
2022 {
2023 case glu::TYPE_FLOAT16:
2024 case glu::TYPE_FLOAT:
2025 case glu::TYPE_DOUBLE:
2026 src << "\n"
2027 << "%operation_result_" << operationNdx << " = " << operation << " %bool %in0_val %in1_val\n"
2028 << "OpSelectionMerge %IF_" << operationNdx << " None\n"
2029 << "OpBranchConditional %operation_result_" << operationNdx << " %label_IF_" << operationNdx << " %IF_" << operationNdx << "\n"
2030 << "%label_IF_" << operationNdx << " = OpLabel\n"
2031 << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n"
2032 << "%out_val_" << operationNdx << " = OpLoad %i32 %out0\n"
2033 << "%add_if_" << operationNdx << " = OpIAdd %i32 %out_val_" << operationNdx << " %operation_val_" << operationNdx << "\n"
2034 << "OpStore %out0 %add_if_" << operationNdx << "\n"
2035 << "OpBranch %IF_" << operationNdx << "\n"
2036 << "%IF_" << operationNdx << " = OpLabel\n";
2037 return src.str();
2038 case glu::TYPE_FLOAT16_VEC2:
2039 case glu::TYPE_FLOAT_VEC2:
2040 case glu::TYPE_DOUBLE_VEC2:
2041 boolType = "%v2bool";
2042 break;
2043 case glu::TYPE_FLOAT16_VEC3:
2044 case glu::TYPE_FLOAT_VEC3:
2045 case glu::TYPE_DOUBLE_VEC3:
2046 boolType = "%v3bool";
2047 break;
2048 case glu::TYPE_FLOAT16_VEC4:
2049 case glu::TYPE_FLOAT_VEC4:
2050 case glu::TYPE_DOUBLE_VEC4:
2051 boolType = "%v4bool";
2052 break;
2053 default:
2054 DE_ASSERT(0);
2055 return "";
2056 }
2057
2058 src << "\n"
2059 << "%operation_result_" << operationNdx << " = " << operation << " " << boolType << " %in0_val %in1_val\n"
2060 << "%ivec_result_" << operationNdx << " = OpSelect " << outputType << " %operation_result_" << operationNdx << " %c_" << &outputType[1] << "_1 %c_" << &outputType[1] << "_0\n"
2061 << "%operation_val_" << operationNdx << " = OpLoad %i32 %operation\n";
2062
2063 src << "%operation_vec_" << operationNdx << " = OpCompositeConstruct " << outputType;
2064 for(int ndx = 0; ndx < scalarSize; ++ndx)
2065 src << " %operation_val_" << operationNdx;
2066 src << "\n";
2067
2068 src << "%toAdd" << operationNdx << " = OpIMul "<< outputType << " %ivec_result_" << operationNdx << " %operation_vec_" << operationNdx <<"\n"
2069 << "%out_val_" << operationNdx << " = OpLoad "<< outputType << " %out0\n"
2070
2071 << "%add_if_" << operationNdx << " = OpIAdd " << outputType << " %out_val_" << operationNdx << " %toAdd" << operationNdx << "\n"
2072 << "OpStore %out0 %add_if_" << operationNdx << "\n";
2073
2074 return src.str();
2075 }
2076
generateSpirv(const ShaderSpec & spec,const bool are16Bit,const bool are64Bit,const bool isMediump)2077 std::string generateSpirv(const ShaderSpec& spec, const bool are16Bit, const bool are64Bit, const bool isMediump)
2078 {
2079 static const std::string COMPARE_OPERATIONS[] =
2080 {
2081 "OpFOrdEqual",
2082 "OpFOrdGreaterThan",
2083 "OpFOrdLessThan",
2084 "OpFOrdGreaterThanEqual",
2085 "OpFOrdLessThanEqual",
2086 "OpFUnordEqual",
2087 "OpFUnordGreaterThan",
2088 "OpFUnordLessThan",
2089 "OpFUnordGreaterThanEqual",
2090 "OpFUnordLessThanEqual"
2091 };
2092
2093 int moveBitNdx = 0;
2094 vector<std::string> inputTypes;
2095 vector<std::string> outputTypes;
2096 const std::string packType = spec.packFloat16Bit ? getTypeSpirv(getDataTypeFloat16Scalars(spec.inputs[0].varType.getBasicType())) : "";
2097
2098 vector<bool> floatResult;
2099 for (const auto& symbol : spec.outputs)
2100 floatResult.push_back(glu::isDataTypeFloatType(symbol.varType.getBasicType()));
2101
2102 const bool anyFloatResult = std::any_of(begin(floatResult), end(floatResult), [](bool b) { return b; });
2103
2104 vector<bool> packFloatRes;
2105 for (const auto& floatRes : floatResult)
2106 packFloatRes.push_back(floatRes && spec.packFloat16Bit);
2107
2108 const bool useF32Types = (!are16Bit && !are64Bit);
2109 const bool useF64Types = are64Bit;
2110 const bool useF16Types = (spec.packFloat16Bit || are16Bit);
2111
2112 for (const auto& symbol : spec.inputs)
2113 inputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2114
2115 for (const auto& symbol : spec.outputs)
2116 outputTypes.push_back(getTypeSpirv(symbol.varType.getBasicType(), spec.packFloat16Bit));
2117
2118 DE_ASSERT(!inputTypes.empty());
2119 DE_ASSERT(!outputTypes.empty());
2120
2121 // Assert input and output types match the expected operations.
2122 switch (spec.spirvCase)
2123 {
2124 case SPIRV_CASETYPE_COMPARE:
2125 case SPIRV_CASETYPE_FREM:
2126 DE_ASSERT(inputTypes.size() == 2);
2127 DE_ASSERT(outputTypes.size() == 1);
2128 break;
2129 case SPIRV_CASETYPE_MODFSTRUCT:
2130 case SPIRV_CASETYPE_FREXPSTRUCT:
2131 DE_ASSERT(inputTypes.size() == 1);
2132 DE_ASSERT(outputTypes.size() == 2);
2133 break;
2134 default:
2135 DE_ASSERT(false);
2136 break;
2137 }
2138
2139 std::ostringstream src;
2140 src << "; SPIR-V\n"
2141 "; Version: 1.0\n"
2142 "; Generator: Khronos Glslang Reference Front End; 4\n"
2143 "; Bound: 114\n"
2144 "; Schema: 0\n"
2145 "OpCapability Shader\n";
2146
2147 if (useF16Types)
2148 src << "OpCapability Float16\n";
2149
2150 if (are16Bit)
2151 src << "OpCapability StorageBuffer16BitAccess\n"
2152 "OpCapability UniformAndStorageBuffer16BitAccess\n";
2153
2154 if (useF64Types)
2155 src << "OpCapability Float64\n";
2156
2157 if (are16Bit)
2158 src << "OpExtension \"SPV_KHR_16bit_storage\"\n";
2159
2160 src << "%glslstd450 = OpExtInstImport \"GLSL.std.450\"\n"
2161 "OpMemoryModel Logical GLSL450\n"
2162 "OpEntryPoint GLCompute %BP_main \"main\" %BP_id3uNum %BP_id3uID\n"
2163 "OpExecutionMode %BP_main LocalSize 1 1 1\n"
2164 "OpDecorate %BP_id3uNum BuiltIn NumWorkgroups\n"
2165 "OpDecorate %BP_id3uID BuiltIn WorkgroupId\n";
2166
2167 // Input offsets and stride.
2168 {
2169 int offset = 0;
2170 int ndx = 0;
2171 int largest = 0;
2172 for (const auto& symbol : spec.inputs)
2173 {
2174 const int scalarSize = symbol.varType.getScalarSize();
2175 const int memberSize = (scalarSize + ((scalarSize == 3) ? 1 : 0)) * (isDataTypeDoubleType(symbol.varType.getBasicType()) ? (int)sizeof(deUint64) : (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
2176 const int extraMemberBytes = (offset % memberSize);
2177
2178 offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2179 src << "OpMemberDecorate %SSB0_IN "<< ndx <<" Offset " << offset << "\n";
2180 ++ndx;
2181
2182 if (memberSize > largest)
2183 largest = memberSize;
2184
2185 offset += memberSize;
2186 }
2187 DE_ASSERT(largest > 0);
2188 const int extraBytes = (offset % largest);
2189 const int stride = offset + (extraBytes == 0 ? 0 : (largest - extraBytes));
2190 src << "OpDecorate %up_SSB0_IN ArrayStride "<< stride << "\n";
2191 }
2192
2193 src << "OpMemberDecorate %ssboIN 0 Offset 0\n"
2194 "OpDecorate %ssboIN BufferBlock\n"
2195 "OpDecorate %ssbo_src DescriptorSet 0\n"
2196 "OpDecorate %ssbo_src Binding 0\n"
2197 "\n";
2198
2199 if (isMediump)
2200 {
2201 for (size_t i = 0; i < inputTypes.size(); ++i)
2202 {
2203 src <<
2204 "OpMemberDecorate %SSB0_IN " << i << " RelaxedPrecision\n"
2205 "OpDecorate %in" << i << " RelaxedPrecision\n"
2206 "OpDecorate %src_val_0_" << i << " RelaxedPrecision\n"
2207 "OpDecorate %in" << i << "_val RelaxedPrecision\n"
2208 ;
2209 }
2210
2211 if (anyFloatResult)
2212 {
2213 switch (spec.spirvCase)
2214 {
2215 case SPIRV_CASETYPE_FREM:
2216 src << "OpDecorate %frem_result RelaxedPrecision\n";
2217 break;
2218 case SPIRV_CASETYPE_MODFSTRUCT:
2219 src << "OpDecorate %modfstruct_result RelaxedPrecision\n";
2220 break;
2221 case SPIRV_CASETYPE_FREXPSTRUCT:
2222 src << "OpDecorate %frexpstruct_result RelaxedPrecision\n";
2223 break;
2224 default:
2225 DE_ASSERT(false);
2226 break;
2227 }
2228
2229 for (size_t i = 0; i < outputTypes.size(); ++i)
2230 {
2231 src << "OpMemberDecorate %SSB0_OUT " << i << " RelaxedPrecision\n";
2232 src << "OpDecorate %out_val_final_" << i << " RelaxedPrecision\n";
2233 src << "OpDecorate %out" << i << " RelaxedPrecision\n";
2234 }
2235 }
2236 }
2237
2238 // Output offsets and stride.
2239 {
2240 int offset = 0;
2241 int ndx = 0;
2242 int largest = 0;
2243 for (const auto& symbol : spec.outputs)
2244 {
2245 const int scalarSize = symbol.varType.getScalarSize();
2246 const int memberSize = (scalarSize + ((scalarSize == 3) ? 1 : 0)) * (isDataTypeDoubleType(symbol.varType.getBasicType()) ? (int)sizeof(deUint64) : (isDataTypeFloat16OrVec(symbol.varType.getBasicType()) ? (int)sizeof(deUint16) : (int)sizeof(deUint32)));
2247 const int extraMemberBytes = (offset % memberSize);
2248
2249 offset += ((extraMemberBytes == 0) ? 0 : (memberSize - extraMemberBytes));
2250 src << "OpMemberDecorate %SSB0_OUT " << ndx << " Offset " << offset << "\n";
2251 ++ndx;
2252
2253 if (memberSize > largest)
2254 largest = memberSize;
2255
2256 offset += memberSize;
2257 }
2258 DE_ASSERT(largest > 0);
2259 const int extraBytes = (offset % largest);
2260 const int stride = offset + ((extraBytes == 0) ? 0 : (largest - extraBytes));
2261 src << "OpDecorate %up_SSB0_OUT ArrayStride " << stride << "\n";
2262 }
2263
2264 src << "OpMemberDecorate %ssboOUT 0 Offset 0\n"
2265 "OpDecorate %ssboOUT BufferBlock\n"
2266 "OpDecorate %ssbo_dst DescriptorSet 0\n"
2267 "OpDecorate %ssbo_dst Binding 1\n"
2268 "\n"
2269 "%void = OpTypeVoid\n"
2270 "%bool = OpTypeBool\n"
2271 "%v2bool = OpTypeVector %bool 2\n"
2272 "%v3bool = OpTypeVector %bool 3\n"
2273 "%v4bool = OpTypeVector %bool 4\n"
2274 "%u32 = OpTypeInt 32 0\n";
2275
2276 if (useF32Types)
2277 src << "%f32 = OpTypeFloat 32\n"
2278 "%v2f32 = OpTypeVector %f32 2\n"
2279 "%v3f32 = OpTypeVector %f32 3\n"
2280 "%v4f32 = OpTypeVector %f32 4\n";
2281
2282 if (useF64Types)
2283 src << "%f64 = OpTypeFloat 64\n"
2284 "%v2f64 = OpTypeVector %f64 2\n"
2285 "%v3f64 = OpTypeVector %f64 3\n"
2286 "%v4f64 = OpTypeVector %f64 4\n";
2287
2288 if (useF16Types)
2289 src << "%f16 = OpTypeFloat 16\n"
2290 "%v2f16 = OpTypeVector %f16 2\n"
2291 "%v3f16 = OpTypeVector %f16 3\n"
2292 "%v4f16 = OpTypeVector %f16 4\n";
2293
2294 src << "%i32 = OpTypeInt 32 1\n"
2295 "%v2i32 = OpTypeVector %i32 2\n"
2296 "%v3i32 = OpTypeVector %i32 3\n"
2297 "%v4i32 = OpTypeVector %i32 4\n"
2298 "%v2u32 = OpTypeVector %u32 2\n"
2299 "%v3u32 = OpTypeVector %u32 3\n"
2300 "%v4u32 = OpTypeVector %u32 4\n"
2301 "\n"
2302 "%ip_u32 = OpTypePointer Input %u32\n"
2303 "%ip_v3u32 = OpTypePointer Input %v3u32\n"
2304 "%up_float = OpTypePointer Uniform " << inputTypes[0] << "\n"
2305 "\n"
2306 "%fp_operation = OpTypePointer Function %i32\n"
2307 "%voidf = OpTypeFunction %void\n"
2308 "%fp_u32 = OpTypePointer Function %u32\n"
2309 "%fp_it1 = OpTypePointer Function " << inputTypes[0] << "\n"
2310 ;
2311
2312 for (size_t i = 0; i < outputTypes.size(); ++i)
2313 {
2314 src << "%fp_out_" << i << " = OpTypePointer Function " << outputTypes[i] << "\n"
2315 << "%up_out_" << i << " = OpTypePointer Uniform " << outputTypes[i] << "\n";
2316 }
2317
2318 if (spec.packFloat16Bit)
2319 src << "%fp_f16 = OpTypePointer Function " << packType << "\n";
2320
2321 src << "%BP_id3uID = OpVariable %ip_v3u32 Input\n"
2322 "%BP_id3uNum = OpVariable %ip_v3u32 Input\n"
2323 "\n"
2324 "%c_u32_0 = OpConstant %u32 0\n"
2325 "%c_u32_1 = OpConstant %u32 1\n"
2326 "%c_u32_2 = OpConstant %u32 2\n"
2327 "%c_i32_0 = OpConstant %i32 0\n"
2328 "%c_i32_1 = OpConstant %i32 1\n"
2329 "\n";
2330
2331 if (useF32Types)
2332 src <<
2333 "%c_f32_0 = OpConstant %f32 0\n"
2334 "%c_f32_1 = OpConstant %f32 1\n"
2335 ;
2336
2337 if (useF16Types)
2338 src <<
2339 "%c_f16_0 = OpConstant %f16 0\n"
2340 "%c_f16_1 = OpConstant %f16 1\n"
2341 "%c_f16_minus1 = OpConstant %f16 -0x1p+0"
2342 ;
2343
2344 if (useF64Types)
2345 src <<
2346 "%c_f64_0 = OpConstant %f64 0\n"
2347 "%c_f64_1 = OpConstant %f64 1\n"
2348 ;
2349
2350 src << "\n"
2351 "%c_v2i32_0 = OpConstantComposite %v2i32 %c_i32_0 %c_i32_0\n"
2352 "%c_v2i32_1 = OpConstantComposite %v2i32 %c_i32_1 %c_i32_1\n"
2353 "%c_v3i32_0 = OpConstantComposite %v3i32 %c_i32_0 %c_i32_0 %c_i32_0\n"
2354 "%c_v3i32_1 = OpConstantComposite %v3i32 %c_i32_1 %c_i32_1 %c_i32_1\n"
2355 "%c_v4i32_0 = OpConstantComposite %v4i32 %c_i32_0 %c_i32_0 %c_i32_0 %c_i32_0\n"
2356 "%c_v4i32_1 = OpConstantComposite %v4i32 %c_i32_1 %c_i32_1 %c_i32_1 %c_i32_1\n"
2357 "\n";
2358
2359 if (useF32Types)
2360 src <<
2361 "%c_v2f32_0 = OpConstantComposite %v2f32 %c_f32_0 %c_f32_0\n"
2362 "%c_v2f32_1 = OpConstantComposite %v2f32 %c_f32_1 %c_f32_1\n"
2363 "%c_v3f32_0 = OpConstantComposite %v3f32 %c_f32_0 %c_f32_0 %c_f32_0\n"
2364 "%c_v3f32_1 = OpConstantComposite %v3f32 %c_f32_1 %c_f32_1 %c_f32_1\n"
2365 "%c_v4f32_0 = OpConstantComposite %v4f32 %c_f32_0 %c_f32_0 %c_f32_0 %c_f32_0\n"
2366 "%c_v4f32_1 = OpConstantComposite %v4f32 %c_f32_1 %c_f32_1 %c_f32_1 %c_f32_1\n"
2367 ;
2368
2369 if (useF16Types)
2370 src <<
2371 "%c_v2f16_0 = OpConstantComposite %v2f16 %c_f16_0 %c_f16_0\n"
2372 "%c_v2f16_1 = OpConstantComposite %v2f16 %c_f16_1 %c_f16_1\n"
2373 "%c_v3f16_0 = OpConstantComposite %v3f16 %c_f16_0 %c_f16_0 %c_f16_0\n"
2374 "%c_v3f16_1 = OpConstantComposite %v3f16 %c_f16_1 %c_f16_1 %c_f16_1\n"
2375 "%c_v4f16_0 = OpConstantComposite %v4f16 %c_f16_0 %c_f16_0 %c_f16_0 %c_f16_0\n"
2376 "%c_v4f16_1 = OpConstantComposite %v4f16 %c_f16_1 %c_f16_1 %c_f16_1 %c_f16_1\n"
2377 ;
2378
2379 if (useF64Types)
2380 src <<
2381 "%c_v2f64_0 = OpConstantComposite %v2f64 %c_f64_0 %c_f64_0\n"
2382 "%c_v2f64_1 = OpConstantComposite %v2f64 %c_f64_1 %c_f64_1\n"
2383 "%c_v3f64_0 = OpConstantComposite %v3f64 %c_f64_0 %c_f64_0 %c_f64_0\n"
2384 "%c_v3f64_1 = OpConstantComposite %v3f64 %c_f64_1 %c_f64_1 %c_f64_1\n"
2385 "%c_v4f64_0 = OpConstantComposite %v4f64 %c_f64_0 %c_f64_0 %c_f64_0 %c_f64_0\n"
2386 "%c_v4f64_1 = OpConstantComposite %v4f64 %c_f64_1 %c_f64_1 %c_f64_1 %c_f64_1\n"
2387 "\n";
2388
2389 // Input struct.
2390 {
2391 src << "%SSB0_IN = OpTypeStruct";
2392 for (const auto& t : inputTypes)
2393 src << " " << t;
2394 src << "\n";
2395 }
2396
2397 src <<
2398 "%up_SSB0_IN = OpTypeRuntimeArray %SSB0_IN\n"
2399 "%ssboIN = OpTypeStruct %up_SSB0_IN\n"
2400 "%up_ssboIN = OpTypePointer Uniform %ssboIN\n"
2401 "%ssbo_src = OpVariable %up_ssboIN Uniform\n"
2402 "\n";
2403
2404 // Output struct.
2405 {
2406 src << "%SSB0_OUT = OpTypeStruct";
2407 for (const auto& t : outputTypes)
2408 src << " " << t;
2409 src << "\n";
2410 }
2411
2412 std::string modfStructMemberType;
2413 std::string frexpStructFirstMemberType;
2414 if (spec.spirvCase == SPIRV_CASETYPE_MODFSTRUCT)
2415 {
2416 modfStructMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2417 src << "%modfstruct_ret_t = OpTypeStruct " << modfStructMemberType << " " << modfStructMemberType << "\n";
2418 }
2419 else if (spec.spirvCase == SPIRV_CASETYPE_FREXPSTRUCT)
2420 {
2421 frexpStructFirstMemberType = (packFloatRes[0] ? packType : outputTypes[0]);
2422 src << "%frexpstruct_ret_t = OpTypeStruct " << frexpStructFirstMemberType << " " << outputTypes[1] << "\n";
2423 }
2424
2425 src <<
2426 "%up_SSB0_OUT = OpTypeRuntimeArray %SSB0_OUT\n"
2427 "%ssboOUT = OpTypeStruct %up_SSB0_OUT\n"
2428 "%up_ssboOUT = OpTypePointer Uniform %ssboOUT\n"
2429 "%ssbo_dst = OpVariable %up_ssboOUT Uniform\n"
2430 "\n"
2431 "%BP_main = OpFunction %void None %voidf\n"
2432 "%BP_label = OpLabel\n"
2433 "%invocationNdx = OpVariable %fp_u32 Function\n";
2434
2435 // Note: here we are supposing all inputs have the same type.
2436 for (size_t i = 0; i < inputTypes.size(); ++i)
2437 src << "%in" << i << " = OpVariable " << (spec.packFloat16Bit ? "%fp_f16" : "%fp_it1") << " Function\n";
2438
2439 for (size_t i = 0; i < outputTypes.size(); ++i)
2440 src << "%out" << i << " = OpVariable " << (packFloatRes[i] ? std::string("%fp_f16") : std::string("%fp_out_") + de::toString(i)) << " Function\n";
2441
2442 src << "%operation = OpVariable %fp_operation Function\n"
2443 "%BP_id_0_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_0\n"
2444 "%BP_id_1_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_1\n"
2445 "%BP_id_2_ptr = OpAccessChain %ip_u32 %BP_id3uID %c_u32_2\n"
2446 "%BP_num_0_ptr = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_0\n"
2447 "%BP_num_1_ptr = OpAccessChain %ip_u32 %BP_id3uNum %c_u32_1\n"
2448 "%BP_id_0_val = OpLoad %u32 %BP_id_0_ptr\n"
2449 "%BP_id_1_val = OpLoad %u32 %BP_id_1_ptr\n"
2450 "%BP_id_2_val = OpLoad %u32 %BP_id_2_ptr\n"
2451 "%BP_num_0_val = OpLoad %u32 %BP_num_0_ptr\n"
2452 "%BP_num_1_val = OpLoad %u32 %BP_num_1_ptr\n"
2453 "\n"
2454 "%mul_1 = OpIMul %u32 %BP_num_0_val %BP_num_1_val\n"
2455 "%mul_2 = OpIMul %u32 %mul_1 %BP_id_2_val\n"
2456 "%mul_3 = OpIMul %u32 %BP_num_0_val %BP_id_1_val\n"
2457 "%add_1 = OpIAdd %u32 %mul_2 %mul_3\n"
2458 "%add_2 = OpIAdd %u32 %add_1 %BP_id_0_val\n"
2459 "OpStore %invocationNdx %add_2\n"
2460 "%invocationNdx_val = OpLoad %u32 %invocationNdx\n";
2461
2462 // Load input values.
2463 for (size_t inputNdx = 0; inputNdx < inputTypes.size(); ++inputNdx)
2464 {
2465 src << "\n"
2466 << "%src_ptr_0_" << inputNdx << " = OpAccessChain %up_float %ssbo_src %c_i32_0 %invocationNdx_val %c_i32_" << inputNdx << "\n"
2467 << "%src_val_0_" << inputNdx << " = OpLoad " << inputTypes[inputNdx] << " %src_ptr_0_" << inputNdx << "\n";
2468
2469 if (spec.packFloat16Bit)
2470 {
2471 if (spec.inputs[inputNdx].varType.getScalarSize() > 1)
2472 {
2473 // Extract the val<inputNdx> u32 input channels into individual f16 values.
2474 for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2475 {
2476 src << "%src_val_0_" << inputNdx << "_" << i << " = OpCompositeExtract %u32 %src_val_0_" << inputNdx << " " << i << "\n"
2477 "%val_v2f16_0_" << inputNdx << "_" << i << " = OpBitcast %v2f16 %src_val_0_" << inputNdx << "_" << i << "\n"
2478 "%val_f16_0_" << inputNdx << "_" << i << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << "_" << i << " 0\n";
2479 }
2480
2481 // Construct the input vector.
2482 src << "%val_f16_0_" << inputNdx << " = OpCompositeConstruct " << packType;
2483 for (int i = 0; i < spec.inputs[inputNdx].varType.getScalarSize(); ++i)
2484 {
2485 src << " %val_f16_0_" << inputNdx << "_" << i;
2486 }
2487
2488 src << "\n";
2489 src << "OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2490 }
2491 else
2492 {
2493 src << "%val_v2f16_0_" << inputNdx << " = OpBitcast %v2f16 %src_val_0_" << inputNdx << "\n"
2494 "%val_f16_0_" << inputNdx << " = OpCompositeExtract %f16 %val_v2f16_0_" << inputNdx << " 0\n";
2495
2496 src << "OpStore %in" << inputNdx << " %val_f16_0_" << inputNdx << "\n";
2497 }
2498 }
2499 else
2500 src << "OpStore %in" << inputNdx << " %src_val_0_" << inputNdx << "\n";
2501
2502 src << "%in" << inputNdx << "_val = OpLoad " << (spec.packFloat16Bit ? packType : inputTypes[inputNdx]) << " %in" << inputNdx << "\n";
2503 }
2504
2505 src << "\n"
2506 "OpStore %operation %c_i32_1\n";
2507
2508 // Fill output values with dummy data.
2509 for (size_t i = 0; i < outputTypes.size(); ++i)
2510 src << "OpStore %out" << i << " %c_" << (packFloatRes[i] ? &packType[1] : &outputTypes[i][1]) << "_0\n";
2511
2512 src << "\n";
2513
2514 // Run operation.
2515 switch (spec.spirvCase)
2516 {
2517 case SPIRV_CASETYPE_COMPARE:
2518 for (int operationNdx = 0; operationNdx < DE_LENGTH_OF_ARRAY(COMPARE_OPERATIONS); ++operationNdx)
2519 {
2520 src << scalarComparison (COMPARE_OPERATIONS[operationNdx], operationNdx,
2521 spec.inputs[0].varType.getBasicType(),
2522 outputTypes[0],
2523 spec.outputs[0].varType.getScalarSize());
2524 src << moveBitOperation("%operation", moveBitNdx);
2525 ++moveBitNdx;
2526 }
2527 break;
2528 case SPIRV_CASETYPE_FREM:
2529 src << "%frem_result = OpFRem " << (packFloatRes[0] ? packType : outputTypes[0]) << " %in0_val %in1_val\n"
2530 << "OpStore %out0 %frem_result\n";
2531 break;
2532 case SPIRV_CASETYPE_MODFSTRUCT:
2533 src << "%modfstruct_result = OpExtInst %modfstruct_ret_t %glslstd450 ModfStruct %in0_val\n"
2534 << "%modfstruct_result_0 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 0\n"
2535 << "%modfstruct_result_1 = OpCompositeExtract " << modfStructMemberType << " %modfstruct_result 1\n"
2536 << "OpStore %out0 %modfstruct_result_0\n"
2537 << "OpStore %out1 %modfstruct_result_1\n";
2538 break;
2539 case SPIRV_CASETYPE_FREXPSTRUCT:
2540 src << "%frexpstruct_result = OpExtInst %frexpstruct_ret_t %glslstd450 FrexpStruct %in0_val\n"
2541 << "%frexpstruct_result_0 = OpCompositeExtract " << frexpStructFirstMemberType << " %frexpstruct_result 0\n"
2542 << "%frexpstruct_result_1 = OpCompositeExtract " << outputTypes[1] << " %frexpstruct_result 1\n"
2543 << "OpStore %out0 %frexpstruct_result_0\n"
2544 << "OpStore %out1 %frexpstruct_result_1\n";
2545 break;
2546 default:
2547 DE_ASSERT(false);
2548 break;
2549 }
2550
2551 for (size_t outputNdx = 0; outputNdx < outputTypes.size(); ++outputNdx)
2552 {
2553 src << "\n"
2554 "%out_val_final_" << outputNdx << " = OpLoad " << (packFloatRes[outputNdx] ? packType : outputTypes[outputNdx]) << " %out" << outputNdx << "\n"
2555 "%ssbo_dst_ptr_" << outputNdx << " = OpAccessChain %up_out_" << outputNdx << " %ssbo_dst %c_i32_0 %invocationNdx_val %c_i32_" << outputNdx << "\n";
2556
2557 if (packFloatRes[outputNdx])
2558 {
2559 if (spec.outputs[outputNdx].varType.getScalarSize() > 1)
2560 {
2561 for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2562 {
2563 src << "%out_val_final_" << outputNdx << "_" << i << " = OpCompositeExtract %f16 %out_val_final_" << outputNdx << " " << i << "\n";
2564 src << "%out_composite_" << outputNdx << "_" << i << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx << "_" << i << " %c_f16_minus1\n";
2565 src << "%u32_val_" << outputNdx << "_" << i << " = OpBitcast %u32 %out_composite_" << outputNdx << "_" << i << "\n";
2566 }
2567
2568 src << "%u32_final_val_" << outputNdx << " = OpCompositeConstruct " << outputTypes[outputNdx];
2569 for (int i = 0; i < spec.outputs[outputNdx].varType.getScalarSize(); ++i)
2570 src << " %u32_val_" << outputNdx << "_" << i;
2571 src << "\n";
2572 src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %u32_final_val_" << outputNdx << "\n";
2573 }
2574 else
2575 {
2576 src <<
2577 "%out_composite_" << outputNdx << " = OpCompositeConstruct %v2f16 %out_val_final_" << outputNdx << " %c_f16_minus1\n"
2578 "%out_result_" << outputNdx << " = OpBitcast " << outputTypes[outputNdx] << " %out_composite_" << outputNdx << "\n"
2579 "OpStore %ssbo_dst_ptr_" << outputNdx << " %out_result_" << outputNdx << "\n";
2580 }
2581 }
2582 else
2583 {
2584 src << "OpStore %ssbo_dst_ptr_" << outputNdx << " %out_val_final_" << outputNdx << "\n";
2585 }
2586 }
2587
2588 src << "\n"
2589 "OpReturn\n"
2590 "OpFunctionEnd\n";
2591
2592 return src.str();
2593 }
2594
2595
generateComputeShader(const ShaderSpec & spec)2596 std::string ComputeShaderExecutor::generateComputeShader (const ShaderSpec& spec)
2597 {
2598 if (spec.spirvCase != SPIRV_CASETYPE_NONE)
2599 {
2600 bool are16Bit = false;
2601 bool are64Bit = false;
2602 bool isMediump = false;
2603 for (vector<Symbol>::const_iterator symIter = spec.inputs.begin(); symIter != spec.inputs.end(); ++symIter)
2604 {
2605 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
2606 are16Bit = true;
2607
2608 if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
2609 are64Bit = true;
2610
2611 if (symIter->varType.getPrecision() == glu::PRECISION_MEDIUMP)
2612 isMediump = true;
2613
2614 if (isMediump && are16Bit)
2615 break;
2616 }
2617
2618 return generateSpirv(spec, are16Bit, are64Bit, isMediump);
2619 }
2620 else
2621 {
2622 std::ostringstream src;
2623 src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n";
2624
2625 if (!spec.globalDeclarations.empty())
2626 src << spec.globalDeclarations << "\n";
2627
2628 src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
2629 << "\n";
2630
2631 declareBufferBlocks(src, spec);
2632
2633 src << "void main (void)\n"
2634 << "{\n"
2635 << " uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2636 << " + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2637
2638 generateExecBufferIo(src, spec, "invocationNdx");
2639
2640 src << "}\n";
2641
2642 return src.str();
2643 }
2644 }
2645
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)2646 void ComputeShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
2647 {
2648 if (shaderSpec.spirvCase != SPIRV_CASETYPE_NONE)
2649 programCollection.spirvAsmSources.add("compute") << SpirVAsmBuildOptions(programCollection.usedVulkanVersion, SPIRV_VERSION_1_3) << generateComputeShader(shaderSpec);
2650 else
2651 programCollection.glslSources.add("compute") << glu::ComputeSource(generateComputeShader(shaderSpec)) << shaderSpec.buildOptions;
2652 }
2653
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)2654 void ComputeShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
2655 {
2656 const VkDevice vkDevice = m_context.getDevice();
2657 const DeviceInterface& vk = m_context.getDeviceInterface();
2658 const VkQueue queue = m_context.getUniversalQueue();
2659 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2660
2661 DescriptorPoolBuilder descriptorPoolBuilder;
2662 DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
2663
2664 Move<VkShaderModule> computeShaderModule;
2665 Move<VkPipeline> computePipeline;
2666 Move<VkPipelineLayout> pipelineLayout;
2667 Move<VkCommandPool> cmdPool;
2668 Move<VkDescriptorPool> descriptorPool;
2669 Move<VkDescriptorSetLayout> descriptorSetLayout;
2670 Move<VkDescriptorSet> descriptorSet;
2671 const deUint32 numDescriptorSets = (m_extraResourcesLayout != 0) ? 2u : 1u;
2672
2673 DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
2674
2675 initBuffers(numValues);
2676
2677 // Setup input buffer & copy data
2678 // For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
2679 // storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
2680 // the shader.
2681 uploadInputBuffer(inputs, numValues, m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
2682
2683 // Create command pool
2684 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2685
2686 // Create command buffer
2687
2688 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2689 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2690 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT);
2691 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2692
2693 descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
2694 descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2695
2696 const VkDescriptorSetAllocateInfo allocInfo =
2697 {
2698 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
2699 DE_NULL,
2700 *descriptorPool,
2701 1u,
2702 &*descriptorSetLayout
2703 };
2704
2705 descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
2706
2707 // Create pipeline layout
2708 {
2709 const VkDescriptorSetLayout descriptorSetLayouts[] =
2710 {
2711 *descriptorSetLayout,
2712 m_extraResourcesLayout
2713 };
2714 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
2715 {
2716 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
2717 DE_NULL, // const void* pNext;
2718 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
2719 numDescriptorSets, // deUint32 CdescriptorSetCount;
2720 descriptorSetLayouts, // const VkDescriptorSetLayout* pSetLayouts;
2721 0u, // deUint32 pushConstantRangeCount;
2722 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
2723 };
2724
2725 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
2726 }
2727
2728 // Create shaders
2729 {
2730 computeShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("compute"), 0);
2731 }
2732
2733 // create pipeline
2734 {
2735 const VkPipelineShaderStageCreateInfo shaderStageParams[1] =
2736 {
2737 {
2738 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType;
2739 DE_NULL, // const void* pNext;
2740 (VkPipelineShaderStageCreateFlags)0u, // VkPipelineShaderStageCreateFlags flags;
2741 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagsBit stage;
2742 *computeShaderModule, // VkShaderModule shader;
2743 "main", // const char* pName;
2744 DE_NULL // const VkSpecializationInfo* pSpecializationInfo;
2745 }
2746 };
2747
2748 const VkComputePipelineCreateInfo computePipelineParams =
2749 {
2750 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType;
2751 DE_NULL, // const void* pNext;
2752 (VkPipelineCreateFlags)0, // VkPipelineCreateFlags flags;
2753 *shaderStageParams, // VkPipelineShaderStageCreateInfo cs;
2754 *pipelineLayout, // VkPipelineLayout layout;
2755 0u, // VkPipeline basePipelineHandle;
2756 0u, // int32_t basePipelineIndex;
2757 };
2758
2759 computePipeline = createComputePipeline(vk, vkDevice, DE_NULL, &computePipelineParams);
2760 }
2761
2762 const int maxValuesPerInvocation = m_context.getDeviceProperties().limits.maxComputeWorkGroupSize[0];
2763 int curOffset = 0;
2764 const deUint32 inputStride = getInputStride();
2765 const deUint32 outputStride = getOutputStride();
2766
2767 while (curOffset < numValues)
2768 {
2769 Move<VkCommandBuffer> cmdBuffer;
2770 const int numToExec = de::min(maxValuesPerInvocation, numValues-curOffset);
2771
2772 // Update descriptors
2773 {
2774 DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
2775
2776 const VkDescriptorBufferInfo outputDescriptorBufferInfo =
2777 {
2778 *m_outputBuffer, // VkBuffer buffer;
2779 curOffset * outputStride, // VkDeviceSize offset;
2780 numToExec * outputStride // VkDeviceSize range;
2781 };
2782
2783 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
2784
2785 if (inputStride)
2786 {
2787 const VkDescriptorBufferInfo inputDescriptorBufferInfo =
2788 {
2789 *m_inputBuffer, // VkBuffer buffer;
2790 curOffset * inputStride, // VkDeviceSize offset;
2791 numToExec * inputStride // VkDeviceSize range;
2792 };
2793
2794 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
2795 }
2796
2797 descriptorSetUpdateBuilder.update(vk, vkDevice);
2798 }
2799
2800 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
2801 beginCommandBuffer(vk, *cmdBuffer);
2802 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
2803
2804 {
2805 const VkDescriptorSet descriptorSets[] = { *descriptorSet, extraResources };
2806 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
2807 }
2808
2809 vk.cmdDispatch(*cmdBuffer, numToExec, 1, 1);
2810
2811 // Insert a barrier so data written by the shader is available to the host
2812 {
2813 const VkBufferMemoryBarrier bufferBarrier =
2814 {
2815 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
2816 DE_NULL, // const void* pNext;
2817 VK_ACCESS_SHADER_WRITE_BIT, // VkAccessFlags srcAccessMask;
2818 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
2819 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
2820 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
2821 *m_outputBuffer, // VkBuffer buffer;
2822 0, // VkDeviceSize offset;
2823 VK_WHOLE_SIZE, // VkDeviceSize size;
2824 };
2825
2826 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
2827 0, (const VkMemoryBarrier*)DE_NULL,
2828 1, &bufferBarrier,
2829 0, (const VkImageMemoryBarrier*)DE_NULL);
2830 }
2831
2832 endCommandBuffer(vk, *cmdBuffer);
2833
2834 curOffset += numToExec;
2835
2836 // Execute
2837 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
2838 }
2839
2840 // Read back data
2841 readOutputBuffer(outputs, numValues);
2842 }
2843
2844 #ifndef CTS_USES_VULKANSC
2845 // MeshTaskShaderExecutor
2846
2847 class MeshTaskShaderExecutor : public BufferIoExecutor
2848 {
2849 public:
2850 MeshTaskShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
2851 virtual ~MeshTaskShaderExecutor (void);
2852
2853 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection, bool useTask);
2854
2855 virtual void execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
2856
2857 protected:
2858 static std::string generateMeshShader (const ShaderSpec& spec, bool useTask);
2859 static std::string generateTaskShader (const ShaderSpec& spec);
2860
2861 private:
2862 const VkDescriptorSetLayout m_extraResourcesLayout;
2863 };
2864
MeshTaskShaderExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)2865 MeshTaskShaderExecutor::MeshTaskShaderExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
2866 : BufferIoExecutor (context, shaderSpec)
2867 , m_extraResourcesLayout (extraResourcesLayout)
2868 {
2869 }
2870
~MeshTaskShaderExecutor(void)2871 MeshTaskShaderExecutor::~MeshTaskShaderExecutor (void)
2872 {
2873 }
2874
generateMeshShader(const ShaderSpec & spec,bool useTask)2875 std::string MeshTaskShaderExecutor::generateMeshShader (const ShaderSpec& spec, bool useTask)
2876 {
2877 DE_ASSERT(spec.spirvCase == SPIRV_CASETYPE_NONE);
2878
2879 std::ostringstream src;
2880
2881 if (useTask)
2882 {
2883 src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
2884 << "#extension GL_EXT_mesh_shader : enable\n"
2885 << "layout(local_size_x=1, local_size_y=1, local_size_z=1) in;\n"
2886 << "layout(points) out;\n"
2887 << "layout(max_vertices=1, max_primitives=1) out;\n"
2888 << "\n"
2889 << "void main (void)\n"
2890 << "{\n"
2891 << " SetMeshOutputsEXT(0u, 0u);\n"
2892 << "}\n";
2893 }
2894 else
2895 {
2896 src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
2897 << "#extension GL_EXT_mesh_shader : enable\n";
2898
2899 if (!spec.globalDeclarations.empty())
2900 src << spec.globalDeclarations << "\n";
2901
2902 src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
2903 << "layout(points) out;\n"
2904 << "layout(max_vertices=1, max_primitives=1) out;\n"
2905 << "\n";
2906
2907 declareBufferBlocks(src, spec);
2908
2909 src << "void main (void)\n"
2910 << "{\n"
2911 << " uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2912 << " + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2913
2914 generateExecBufferIo(src, spec, "invocationNdx");
2915
2916 src << " SetMeshOutputsEXT(0u, 0u);\n"
2917 << "}\n";
2918 }
2919
2920 return src.str();
2921 }
2922
generateTaskShader(const ShaderSpec & spec)2923 std::string MeshTaskShaderExecutor::generateTaskShader (const ShaderSpec& spec)
2924 {
2925 std::ostringstream src;
2926
2927 src << glu::getGLSLVersionDeclaration(spec.glslVersion) << "\n"
2928 << "#extension GL_EXT_mesh_shader : enable\n";
2929
2930 if (!spec.globalDeclarations.empty())
2931 src << spec.globalDeclarations << "\n";
2932
2933 src << "layout(local_size_x = " << spec.localSizeX << ") in;\n"
2934 << "\n";
2935
2936 declareBufferBlocks(src, spec);
2937
2938 src << "void main (void)\n"
2939 << "{\n"
2940 << " uint invocationNdx = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z\n"
2941 << " + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n";
2942
2943 generateExecBufferIo(src, spec, "invocationNdx");
2944
2945 src << " EmitMeshTasksEXT(0u, 0u, 0u);\n"
2946 << "}\n";
2947
2948 return src.str();
2949 }
2950
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection,bool useTask)2951 void MeshTaskShaderExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection, bool useTask)
2952 {
2953 DE_ASSERT(shaderSpec.spirvCase == SPIRV_CASETYPE_NONE);
2954 programCollection.glslSources.add("mesh") << glu::MeshSource(generateMeshShader(shaderSpec, useTask)) << shaderSpec.buildOptions;
2955 if (useTask)
2956 programCollection.glslSources.add("task") << glu::TaskSource(generateTaskShader(shaderSpec)) << shaderSpec.buildOptions;
2957 }
2958
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)2959 void MeshTaskShaderExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
2960 {
2961 const auto vkDevice = m_context.getDevice();
2962 const auto& vk = m_context.getDeviceInterface();
2963 const auto queue = m_context.getUniversalQueue();
2964 const auto queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
2965 const auto bindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
2966 const auto& binaries = m_context.getBinaryCollection();
2967 const bool useTask = binaries.contains("task");
2968 const auto shaderStage = (useTask ? VK_SHADER_STAGE_TASK_BIT_EXT : VK_SHADER_STAGE_MESH_BIT_EXT);
2969 const auto pipelineStage = (useTask ? VK_PIPELINE_STAGE_TASK_SHADER_BIT_EXT : VK_PIPELINE_STAGE_MESH_SHADER_BIT_EXT);
2970
2971 DE_ASSERT((m_extraResourcesLayout != DE_NULL) == (extraResources != DE_NULL));
2972
2973 // Create input and output buffers.
2974 initBuffers(numValues);
2975
2976 // Setup input buffer & copy data
2977 // For spirv shaders using packed 16 bit float values as input, the floats are converted to 16 bit before
2978 // storing in the lower 16 bits of 32 bit integers in the uniform buffer and cast back to 16 bit floats in
2979 // the shader.
2980 uploadInputBuffer(inputs, numValues, m_shaderSpec.packFloat16Bit && (m_shaderSpec.spirvCase != SPIRV_CASETYPE_NONE));
2981
2982 // Create command pool
2983 const auto cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
2984
2985 // Descriptor pool, set layout and set.
2986 DescriptorPoolBuilder descriptorPoolBuilder;
2987 DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
2988
2989 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, shaderStage);
2990 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2991 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, shaderStage);
2992 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
2993
2994 const auto descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
2995 const auto descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
2996 const auto descriptorSet = makeDescriptorSet(vk, vkDevice, descriptorPool.get(), descriptorSetLayout.get());
2997
2998 // Create pipeline layout
2999 std::vector<VkDescriptorSetLayout> setLayouts;
3000 setLayouts.push_back(descriptorSetLayout.get());
3001 if (m_extraResourcesLayout != DE_NULL)
3002 setLayouts.push_back(m_extraResourcesLayout);
3003
3004 const auto pipelineLayout = makePipelineLayout(vk, vkDevice, static_cast<uint32_t>(setLayouts.size()), de::dataOrNull(setLayouts));
3005
3006 // Create shaders
3007 const auto meshShaderModule = createShaderModule(vk, vkDevice, binaries.get("mesh"));
3008 const auto taskShaderModule = (useTask ? createShaderModule(vk, vkDevice, binaries.get("task")) : Move<VkShaderModule>());
3009
3010 // Render pass and framebuffer.
3011 const auto fbExtent = makeExtent2D(1u, 1u);
3012 const auto renderPass = makeRenderPass(vk, vkDevice);
3013 const auto framebuffer = makeFramebuffer(vk, vkDevice, renderPass.get(), 0u, nullptr, fbExtent.width, fbExtent.height);
3014
3015 const std::vector<VkViewport> viewports (1u, makeViewport(fbExtent));
3016 const std::vector<VkRect2D> scissors (1u, makeRect2D(fbExtent));
3017
3018 // Create pipeline.
3019 const auto meshPipeline = makeGraphicsPipeline(
3020 vk, vkDevice, pipelineLayout.get(),
3021 taskShaderModule.get(), meshShaderModule.get(), DE_NULL,
3022 renderPass.get(), viewports, scissors);
3023
3024 const int maxValuesPerInvocation = m_context.getMeshShaderPropertiesEXT().maxMeshWorkGroupSize[0];
3025 const uint32_t inputStride = getInputStride();
3026 const uint32_t outputStride = getOutputStride();
3027 const auto outputBufferBinding = DescriptorSetUpdateBuilder::Location::binding(static_cast<uint32_t>(OUTPUT_BUFFER_BINDING));
3028 const auto inputBufferBinding = DescriptorSetUpdateBuilder::Location::binding(static_cast<uint32_t>(INPUT_BUFFER_BINDING));
3029 int curOffset = 0;
3030
3031 while (curOffset < numValues)
3032 {
3033 const auto remaining = numValues - curOffset;
3034 const auto numToExec = de::min(maxValuesPerInvocation, remaining);
3035
3036 // Update descriptors
3037 {
3038 DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
3039
3040 const auto outputDescriptorBufferInfo = makeDescriptorBufferInfo(m_outputBuffer.get(), curOffset * outputStride, numToExec * outputStride);
3041 descriptorSetUpdateBuilder.writeSingle(descriptorSet.get(), outputBufferBinding, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
3042
3043 if (inputStride)
3044 {
3045 const auto inputDescriptorBufferInfo = makeDescriptorBufferInfo(m_inputBuffer.get(), curOffset * inputStride, numToExec * inputStride);
3046 descriptorSetUpdateBuilder.writeSingle(descriptorSet.get(), inputBufferBinding, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
3047 }
3048
3049 descriptorSetUpdateBuilder.update(vk, vkDevice);
3050 }
3051
3052 std::vector<VkDescriptorSet> descriptorSets;
3053 descriptorSets.push_back(descriptorSet.get());
3054 if (extraResources != DE_NULL)
3055 descriptorSets.push_back(extraResources);
3056
3057 const auto bufferBarrier = makeBufferMemoryBarrier(VK_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT, m_outputBuffer.get(), 0ull, VK_WHOLE_SIZE);
3058 const auto cmdBufferPtr = allocateCommandBuffer(vk, vkDevice, cmdPool.get(), VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3059 const auto cmdBuffer = cmdBufferPtr.get();
3060
3061 // Record command buffer, including pipeline barrier from output buffer to the host.
3062 beginCommandBuffer(vk, cmdBuffer);
3063 beginRenderPass(vk, cmdBuffer, renderPass.get(), framebuffer.get(), scissors.at(0u));
3064 vk.cmdBindPipeline(cmdBuffer, bindPoint, meshPipeline.get());
3065 vk.cmdBindDescriptorSets(cmdBuffer, bindPoint, pipelineLayout.get(), 0u, static_cast<uint32_t>(descriptorSets.size()), de::dataOrNull(descriptorSets), 0u, DE_NULL);
3066 vk.cmdDrawMeshTasksEXT(cmdBuffer, numToExec, 1u, 1u);
3067 endRenderPass(vk, cmdBuffer);
3068 cmdPipelineBufferMemoryBarrier(vk, cmdBuffer, pipelineStage, VK_PIPELINE_STAGE_HOST_BIT, &bufferBarrier);
3069 endCommandBuffer(vk, cmdBuffer);
3070
3071 // Execute
3072 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer);
3073
3074 curOffset += numToExec;
3075 }
3076
3077 // Read back data
3078 readOutputBuffer(outputs, numValues);
3079 }
3080 #endif // CTS_USES_VULKANSC
3081
3082 // Tessellation utils
3083
generateVertexShaderForTess(void)3084 static std::string generateVertexShaderForTess (void)
3085 {
3086 std::ostringstream src;
3087 src << "#version 450\n"
3088 << "void main (void)\n{\n"
3089 << " gl_Position = vec4(gl_VertexIndex/2, gl_VertexIndex%2, 0.0, 1.0);\n"
3090 << "}\n";
3091
3092 return src.str();
3093 }
3094
3095 class TessellationExecutor : public BufferIoExecutor
3096 {
3097 public:
3098 TessellationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3099 virtual ~TessellationExecutor (void);
3100
3101 void renderTess (deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources);
3102
3103 private:
3104 const VkDescriptorSetLayout m_extraResourcesLayout;
3105 };
3106
TessellationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3107 TessellationExecutor::TessellationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3108 : BufferIoExecutor (context, shaderSpec)
3109 , m_extraResourcesLayout (extraResourcesLayout)
3110 {
3111 const VkPhysicalDeviceFeatures& features = context.getDeviceFeatures();
3112
3113 if (!features.tessellationShader)
3114 TCU_THROW(NotSupportedError, "Tessellation shader is not supported by device");
3115 }
3116
~TessellationExecutor(void)3117 TessellationExecutor::~TessellationExecutor (void)
3118 {
3119 }
3120
renderTess(deUint32 numValues,deUint32 vertexCount,deUint32 patchControlPoints,VkDescriptorSet extraResources)3121 void TessellationExecutor::renderTess (deUint32 numValues, deUint32 vertexCount, deUint32 patchControlPoints, VkDescriptorSet extraResources)
3122 {
3123 const size_t inputBufferSize = numValues * getInputStride();
3124 const VkDevice vkDevice = m_context.getDevice();
3125 const DeviceInterface& vk = m_context.getDeviceInterface();
3126 const VkQueue queue = m_context.getUniversalQueue();
3127 const deUint32 queueFamilyIndex = m_context.getUniversalQueueFamilyIndex();
3128 Allocator& memAlloc = m_context.getDefaultAllocator();
3129
3130 const tcu::UVec2 renderSize (DEFAULT_RENDER_WIDTH, DEFAULT_RENDER_HEIGHT);
3131
3132 Move<VkImage> colorImage;
3133 de::MovePtr<Allocation> colorImageAlloc;
3134 VkFormat colorFormat = VK_FORMAT_R8G8B8A8_UNORM;
3135 Move<VkImageView> colorImageView;
3136
3137 Move<VkRenderPass> renderPass;
3138 Move<VkFramebuffer> framebuffer;
3139 Move<VkPipelineLayout> pipelineLayout;
3140 Move<VkPipeline> graphicsPipeline;
3141
3142 Move<VkShaderModule> vertexShaderModule;
3143 Move<VkShaderModule> tessControlShaderModule;
3144 Move<VkShaderModule> tessEvalShaderModule;
3145 Move<VkShaderModule> fragmentShaderModule;
3146
3147 Move<VkCommandPool> cmdPool;
3148 Move<VkCommandBuffer> cmdBuffer;
3149
3150 Move<VkDescriptorPool> descriptorPool;
3151 Move<VkDescriptorSetLayout> descriptorSetLayout;
3152 Move<VkDescriptorSet> descriptorSet;
3153 const deUint32 numDescriptorSets = (m_extraResourcesLayout != 0) ? 2u : 1u;
3154
3155 DE_ASSERT((m_extraResourcesLayout != 0) == (extraResources != 0));
3156
3157 // Create color image
3158 {
3159 const VkImageCreateInfo colorImageParams =
3160 {
3161 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, // VkStructureType sType;
3162 DE_NULL, // const void* pNext;
3163 0u, // VkImageCreateFlags flags;
3164 VK_IMAGE_TYPE_2D, // VkImageType imageType;
3165 colorFormat, // VkFormat format;
3166 { renderSize.x(), renderSize.y(), 1u }, // VkExtent3D extent;
3167 1u, // deUint32 mipLevels;
3168 1u, // deUint32 arraySize;
3169 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
3170 VK_IMAGE_TILING_OPTIMAL, // VkImageTiling tiling;
3171 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT, // VkImageUsageFlags usage;
3172 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode;
3173 1u, // deUint32 queueFamilyCount;
3174 &queueFamilyIndex, // const deUint32* pQueueFamilyIndices;
3175 VK_IMAGE_LAYOUT_UNDEFINED // VkImageLayout initialLayout;
3176 };
3177
3178 colorImage = createImage(vk, vkDevice, &colorImageParams);
3179
3180 // Allocate and bind color image memory
3181 colorImageAlloc = memAlloc.allocate(getImageMemoryRequirements(vk, vkDevice, *colorImage), MemoryRequirement::Any);
3182 VK_CHECK(vk.bindImageMemory(vkDevice, *colorImage, colorImageAlloc->getMemory(), colorImageAlloc->getOffset()));
3183 }
3184
3185 // Create color attachment view
3186 {
3187 const VkImageViewCreateInfo colorImageViewParams =
3188 {
3189 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, // VkStructureType sType;
3190 DE_NULL, // const void* pNext;
3191 0u, // VkImageViewCreateFlags flags;
3192 *colorImage, // VkImage image;
3193 VK_IMAGE_VIEW_TYPE_2D, // VkImageViewType viewType;
3194 colorFormat, // VkFormat format;
3195 {
3196 VK_COMPONENT_SWIZZLE_R, // VkComponentSwizzle r;
3197 VK_COMPONENT_SWIZZLE_G, // VkComponentSwizzle g;
3198 VK_COMPONENT_SWIZZLE_B, // VkComponentSwizzle b;
3199 VK_COMPONENT_SWIZZLE_A // VkComponentSwizzle a;
3200 }, // VkComponentsMapping components;
3201 {
3202 VK_IMAGE_ASPECT_COLOR_BIT, // VkImageAspectFlags aspectMask;
3203 0u, // deUint32 baseMipLevel;
3204 1u, // deUint32 mipLevels;
3205 0u, // deUint32 baseArraylayer;
3206 1u // deUint32 layerCount;
3207 } // VkImageSubresourceRange subresourceRange;
3208 };
3209
3210 colorImageView = createImageView(vk, vkDevice, &colorImageViewParams);
3211 }
3212
3213 // Create render pass
3214 {
3215 const VkAttachmentDescription colorAttachmentDescription =
3216 {
3217 0u, // VkAttachmentDescriptorFlags flags;
3218 colorFormat, // VkFormat format;
3219 VK_SAMPLE_COUNT_1_BIT, // VkSampleCountFlagBits samples;
3220 VK_ATTACHMENT_LOAD_OP_CLEAR, // VkAttachmentLoadOp loadOp;
3221 VK_ATTACHMENT_STORE_OP_STORE, // VkAttachmentStoreOp storeOp;
3222 VK_ATTACHMENT_LOAD_OP_DONT_CARE, // VkAttachmentLoadOp stencilLoadOp;
3223 VK_ATTACHMENT_STORE_OP_DONT_CARE, // VkAttachmentStoreOp stencilStoreOp;
3224 VK_IMAGE_LAYOUT_UNDEFINED, // VkImageLayout initialLayout;
3225 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout finalLayout
3226 };
3227
3228 const VkAttachmentDescription attachments[1] =
3229 {
3230 colorAttachmentDescription
3231 };
3232
3233 const VkAttachmentReference colorAttachmentReference =
3234 {
3235 0u, // deUint32 attachment;
3236 VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL // VkImageLayout layout;
3237 };
3238
3239 const VkSubpassDescription subpassDescription =
3240 {
3241 0u, // VkSubpassDescriptionFlags flags;
3242 VK_PIPELINE_BIND_POINT_GRAPHICS, // VkPipelineBindPoint pipelineBindPoint;
3243 0u, // deUint32 inputCount;
3244 DE_NULL, // const VkAttachmentReference* pInputAttachments;
3245 1u, // deUint32 colorCount;
3246 &colorAttachmentReference, // const VkAttachmentReference* pColorAttachments;
3247 DE_NULL, // const VkAttachmentReference* pResolveAttachments;
3248 DE_NULL, // VkAttachmentReference depthStencilAttachment;
3249 0u, // deUint32 preserveCount;
3250 DE_NULL // const VkAttachmentReference* pPreserveAttachments;
3251 };
3252
3253 const VkRenderPassCreateInfo renderPassParams =
3254 {
3255 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, // VkStructureType sType;
3256 DE_NULL, // const void* pNext;
3257 0u, // VkRenderPassCreateFlags flags;
3258 1u, // deUint32 attachmentCount;
3259 attachments, // const VkAttachmentDescription* pAttachments;
3260 1u, // deUint32 subpassCount;
3261 &subpassDescription, // const VkSubpassDescription* pSubpasses;
3262 0u, // deUint32 dependencyCount;
3263 DE_NULL // const VkSubpassDependency* pDependencies;
3264 };
3265
3266 renderPass = createRenderPass(vk, vkDevice, &renderPassParams);
3267 }
3268
3269 // Create framebuffer
3270 {
3271 const VkFramebufferCreateInfo framebufferParams =
3272 {
3273 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, // VkStructureType sType;
3274 DE_NULL, // const void* pNext;
3275 0u, // VkFramebufferCreateFlags flags;
3276 *renderPass, // VkRenderPass renderPass;
3277 1u, // deUint32 attachmentCount;
3278 &*colorImageView, // const VkAttachmentBindInfo* pAttachments;
3279 (deUint32)renderSize.x(), // deUint32 width;
3280 (deUint32)renderSize.y(), // deUint32 height;
3281 1u // deUint32 layers;
3282 };
3283
3284 framebuffer = createFramebuffer(vk, vkDevice, &framebufferParams);
3285 }
3286
3287 // Create descriptors
3288 {
3289 DescriptorPoolBuilder descriptorPoolBuilder;
3290 DescriptorSetLayoutBuilder descriptorSetLayoutBuilder;
3291
3292 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3293 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3294 descriptorSetLayoutBuilder.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_ALL);
3295 descriptorPoolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
3296
3297 descriptorSetLayout = descriptorSetLayoutBuilder.build(vk, vkDevice);
3298 descriptorPool = descriptorPoolBuilder.build(vk, vkDevice, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
3299
3300 const VkDescriptorSetAllocateInfo allocInfo =
3301 {
3302 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
3303 DE_NULL,
3304 *descriptorPool,
3305 1u,
3306 &*descriptorSetLayout
3307 };
3308
3309 descriptorSet = allocateDescriptorSet(vk, vkDevice, &allocInfo);
3310 // Update descriptors
3311 {
3312 DescriptorSetUpdateBuilder descriptorSetUpdateBuilder;
3313 const VkDescriptorBufferInfo outputDescriptorBufferInfo =
3314 {
3315 *m_outputBuffer, // VkBuffer buffer;
3316 0u, // VkDeviceSize offset;
3317 VK_WHOLE_SIZE // VkDeviceSize range;
3318 };
3319
3320 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)OUTPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &outputDescriptorBufferInfo);
3321
3322 VkDescriptorBufferInfo inputDescriptorBufferInfo =
3323 {
3324 0, // VkBuffer buffer;
3325 0u, // VkDeviceSize offset;
3326 VK_WHOLE_SIZE // VkDeviceSize range;
3327 };
3328
3329 if (inputBufferSize > 0)
3330 {
3331 inputDescriptorBufferInfo.buffer = *m_inputBuffer;
3332
3333 descriptorSetUpdateBuilder.writeSingle(*descriptorSet, vk::DescriptorSetUpdateBuilder::Location::binding((deUint32)INPUT_BUFFER_BINDING), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputDescriptorBufferInfo);
3334 }
3335
3336 descriptorSetUpdateBuilder.update(vk, vkDevice);
3337 }
3338 }
3339
3340 // Create pipeline layout
3341 {
3342 const VkDescriptorSetLayout descriptorSetLayouts[] =
3343 {
3344 *descriptorSetLayout,
3345 m_extraResourcesLayout
3346 };
3347 const VkPipelineLayoutCreateInfo pipelineLayoutParams =
3348 {
3349 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
3350 DE_NULL, // const void* pNext;
3351 (VkPipelineLayoutCreateFlags)0, // VkPipelineLayoutCreateFlags flags;
3352 numDescriptorSets, // deUint32 descriptorSetCount;
3353 descriptorSetLayouts, // const VkDescriptorSetLayout* pSetLayouts;
3354 0u, // deUint32 pushConstantRangeCount;
3355 DE_NULL // const VkPushConstantRange* pPushConstantRanges;
3356 };
3357
3358 pipelineLayout = createPipelineLayout(vk, vkDevice, &pipelineLayoutParams);
3359 }
3360
3361 // Create shader modules
3362 {
3363 vertexShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("vert"), 0);
3364 tessControlShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_control"), 0);
3365 tessEvalShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("tess_eval"), 0);
3366 fragmentShaderModule = createShaderModule(vk, vkDevice, m_context.getBinaryCollection().get("frag"), 0);
3367 }
3368
3369 // Create pipeline
3370 {
3371 const VkPipelineVertexInputStateCreateInfo vertexInputStateParams =
3372 {
3373 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType;
3374 DE_NULL, // const void* pNext;
3375 (VkPipelineVertexInputStateCreateFlags)0, // VkPipelineVertexInputStateCreateFlags flags;
3376 0u, // deUint32 bindingCount;
3377 DE_NULL, // const VkVertexInputBindingDescription* pVertexBindingDescriptions;
3378 0u, // deUint32 attributeCount;
3379 DE_NULL, // const VkVertexInputAttributeDescription* pvertexAttributeDescriptions;
3380 };
3381
3382 const std::vector<VkViewport> viewports (1, makeViewport(renderSize));
3383 const std::vector<VkRect2D> scissors (1, makeRect2D(renderSize));
3384
3385 graphicsPipeline = makeGraphicsPipeline(vk, // const DeviceInterface& vk
3386 vkDevice, // const VkDevice device
3387 *pipelineLayout, // const VkPipelineLayout pipelineLayout
3388 *vertexShaderModule, // const VkShaderModule vertexShaderModule
3389 *tessControlShaderModule, // const VkShaderModule tessellationControlShaderModule
3390 *tessEvalShaderModule, // const VkShaderModule tessellationEvalShaderModule
3391 DE_NULL, // const VkShaderModule geometryShaderModule
3392 *fragmentShaderModule, // const VkShaderModule fragmentShaderModule
3393 *renderPass, // const VkRenderPass renderPass
3394 viewports, // const std::vector<VkViewport>& viewports
3395 scissors, // const std::vector<VkRect2D>& scissors
3396 VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, // const VkPrimitiveTopology topology
3397 0u, // const deUint32 subpass
3398 patchControlPoints, // const deUint32 patchControlPoints
3399 &vertexInputStateParams); // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo
3400 }
3401
3402 // Create command pool
3403 cmdPool = createCommandPool(vk, vkDevice, VK_COMMAND_POOL_CREATE_TRANSIENT_BIT, queueFamilyIndex);
3404
3405 // Create command buffer
3406 {
3407 const VkClearValue clearValue = getDefaultClearColor();
3408
3409 cmdBuffer = allocateCommandBuffer(vk, vkDevice, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY);
3410
3411 beginCommandBuffer(vk, *cmdBuffer);
3412
3413 beginRenderPass(vk, *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, renderSize.x(), renderSize.y()), clearValue);
3414
3415 vk.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *graphicsPipeline);
3416
3417 {
3418 const VkDescriptorSet descriptorSets[] = { *descriptorSet, extraResources };
3419 vk.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, numDescriptorSets, descriptorSets, 0u, DE_NULL);
3420 }
3421
3422 vk.cmdDraw(*cmdBuffer, vertexCount, 1, 0, 0);
3423
3424 endRenderPass(vk, *cmdBuffer);
3425
3426 // Insert a barrier so data written by the shader is available to the host
3427 {
3428 const VkBufferMemoryBarrier bufferBarrier =
3429 {
3430 VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, // VkStructureType sType;
3431 DE_NULL, // const void* pNext;
3432 VK_ACCESS_SHADER_WRITE_BIT, // VkAccessFlags srcAccessMask;
3433 VK_ACCESS_HOST_READ_BIT, // VkAccessFlags dstAccessMask;
3434 VK_QUEUE_FAMILY_IGNORED, // uint32_t srcQueueFamilyIndex;
3435 VK_QUEUE_FAMILY_IGNORED, // uint32_t dstQueueFamilyIndex;
3436 *m_outputBuffer, // VkBuffer buffer;
3437 0, // VkDeviceSize offset;
3438 VK_WHOLE_SIZE, // VkDeviceSize size;
3439 };
3440
3441 vk.cmdPipelineBarrier(*cmdBuffer, vk::VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT, vk::VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0,
3442 0, (const VkMemoryBarrier*)DE_NULL,
3443 1, &bufferBarrier,
3444 0, (const VkImageMemoryBarrier*)DE_NULL);
3445 }
3446
3447 endCommandBuffer(vk, *cmdBuffer);
3448 }
3449
3450 // Execute Draw
3451 submitCommandsAndWait(vk, vkDevice, queue, cmdBuffer.get());
3452 }
3453
3454 // TessControlExecutor
3455
3456 class TessControlExecutor : public TessellationExecutor
3457 {
3458 public:
3459 TessControlExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3460 virtual ~TessControlExecutor (void);
3461
3462 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
3463
3464 virtual void execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
3465
3466 protected:
3467 static std::string generateTessControlShader (const ShaderSpec& shaderSpec);
3468 };
3469
TessControlExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3470 TessControlExecutor::TessControlExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3471 : TessellationExecutor(context, shaderSpec, extraResourcesLayout)
3472 {
3473 }
3474
~TessControlExecutor(void)3475 TessControlExecutor::~TessControlExecutor (void)
3476 {
3477 }
3478
generateTessControlShader(const ShaderSpec & shaderSpec)3479 std::string TessControlExecutor::generateTessControlShader (const ShaderSpec& shaderSpec)
3480 {
3481 std::ostringstream src;
3482 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3483
3484 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3485 src << "#extension GL_EXT_tessellation_shader : require\n\n";
3486
3487 if (!shaderSpec.globalDeclarations.empty())
3488 src << shaderSpec.globalDeclarations << "\n";
3489
3490 src << "\nlayout(vertices = 1) out;\n\n";
3491
3492 declareBufferBlocks(src, shaderSpec);
3493
3494 src << "void main (void)\n{\n";
3495
3496 for (int ndx = 0; ndx < 2; ndx++)
3497 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3498
3499 for (int ndx = 0; ndx < 4; ndx++)
3500 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3501
3502 src << "\n"
3503 << "\thighp uint invocationId = uint(gl_PrimitiveID);\n";
3504
3505 generateExecBufferIo(src, shaderSpec, "invocationId");
3506
3507 src << "}\n";
3508
3509 return src.str();
3510 }
3511
generateEmptyTessEvalShader()3512 static std::string generateEmptyTessEvalShader ()
3513 {
3514 std::ostringstream src;
3515
3516 src << "#version 450\n"
3517 "#extension GL_EXT_tessellation_shader : require\n\n";
3518
3519 src << "layout(triangles, ccw) in;\n";
3520
3521 src << "\nvoid main (void)\n{\n"
3522 << "\tgl_Position = vec4(gl_TessCoord.xy, 0.0, 1.0);\n"
3523 << "}\n";
3524
3525 return src.str();
3526 }
3527
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3528 void TessControlExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
3529 {
3530 programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3531 programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generateTessControlShader(shaderSpec)) << shaderSpec.buildOptions;
3532 programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateEmptyTessEvalShader()) << shaderSpec.buildOptions;
3533 programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3534 }
3535
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3536 void TessControlExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
3537 {
3538 const deUint32 patchSize = 3;
3539
3540 initBuffers(numValues);
3541
3542 // Setup input buffer & copy data
3543 uploadInputBuffer(inputs, numValues, false);
3544
3545 renderTess(numValues, patchSize * numValues, patchSize, extraResources);
3546
3547 // Read back data
3548 readOutputBuffer(outputs, numValues);
3549 }
3550
3551 // TessEvaluationExecutor
3552
3553 class TessEvaluationExecutor : public TessellationExecutor
3554 {
3555 public:
3556 TessEvaluationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout);
3557 virtual ~TessEvaluationExecutor (void);
3558
3559 static void generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection);
3560
3561 virtual void execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources);
3562
3563 protected:
3564 static std::string generateTessEvalShader (const ShaderSpec& shaderSpec);
3565 };
3566
TessEvaluationExecutor(Context & context,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3567 TessEvaluationExecutor::TessEvaluationExecutor (Context& context, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3568 : TessellationExecutor (context, shaderSpec, extraResourcesLayout)
3569 {
3570 }
3571
~TessEvaluationExecutor(void)3572 TessEvaluationExecutor::~TessEvaluationExecutor (void)
3573 {
3574 }
3575
generatePassthroughTessControlShader(void)3576 static std::string generatePassthroughTessControlShader (void)
3577 {
3578 std::ostringstream src;
3579
3580 src << "#version 450\n"
3581 "#extension GL_EXT_tessellation_shader : require\n\n";
3582
3583 src << "layout(vertices = 1) out;\n\n";
3584
3585 src << "void main (void)\n{\n";
3586
3587 for (int ndx = 0; ndx < 2; ndx++)
3588 src << "\tgl_TessLevelInner[" << ndx << "] = 1.0;\n";
3589
3590 for (int ndx = 0; ndx < 4; ndx++)
3591 src << "\tgl_TessLevelOuter[" << ndx << "] = 1.0;\n";
3592
3593 src << "}\n";
3594
3595 return src.str();
3596 }
3597
generateTessEvalShader(const ShaderSpec & shaderSpec)3598 std::string TessEvaluationExecutor::generateTessEvalShader (const ShaderSpec& shaderSpec)
3599 {
3600 std::ostringstream src;
3601
3602 src << glu::getGLSLVersionDeclaration(shaderSpec.glslVersion) << "\n";
3603
3604 if (shaderSpec.glslVersion == glu::GLSL_VERSION_310_ES)
3605 src << "#extension GL_EXT_tessellation_shader : require\n\n";
3606
3607 if (!shaderSpec.globalDeclarations.empty())
3608 src << shaderSpec.globalDeclarations << "\n";
3609
3610 src << "\n";
3611
3612 src << "layout(isolines, equal_spacing) in;\n\n";
3613
3614 declareBufferBlocks(src, shaderSpec);
3615
3616 src << "void main (void)\n{\n"
3617 << "\tgl_Position = vec4(gl_TessCoord.x, 0.0, 0.0, 1.0);\n"
3618 << "\thighp uint invocationId = uint(gl_PrimitiveID)*2u + (gl_TessCoord.x > 0.5 ? 1u : 0u);\n";
3619
3620 generateExecBufferIo(src, shaderSpec, "invocationId");
3621
3622 src << "}\n";
3623
3624 return src.str();
3625 }
3626
generateSources(const ShaderSpec & shaderSpec,SourceCollections & programCollection)3627 void TessEvaluationExecutor::generateSources (const ShaderSpec& shaderSpec, SourceCollections& programCollection)
3628 {
3629 programCollection.glslSources.add("vert") << glu::VertexSource(generateVertexShaderForTess()) << shaderSpec.buildOptions;
3630 programCollection.glslSources.add("tess_control") << glu::TessellationControlSource(generatePassthroughTessControlShader()) << shaderSpec.buildOptions;
3631 programCollection.glslSources.add("tess_eval") << glu::TessellationEvaluationSource(generateTessEvalShader(shaderSpec)) << shaderSpec.buildOptions;
3632 programCollection.glslSources.add("frag") << glu::FragmentSource(generateEmptyFragmentSource()) << shaderSpec.buildOptions;
3633 }
3634
execute(int numValues,const void * const * inputs,void * const * outputs,VkDescriptorSet extraResources)3635 void TessEvaluationExecutor::execute (int numValues, const void* const* inputs, void* const* outputs, VkDescriptorSet extraResources)
3636 {
3637 const int patchSize = 2;
3638 const int alignedValues = deAlign32(numValues, patchSize);
3639
3640 // Initialize buffers with aligned value count to make room for padding
3641 initBuffers(alignedValues);
3642
3643 // Setup input buffer & copy data
3644 uploadInputBuffer(inputs, numValues, false);
3645
3646 renderTess((deUint32)alignedValues, (deUint32)alignedValues, (deUint32)patchSize, extraResources);
3647
3648 // Read back data
3649 readOutputBuffer(outputs, numValues);
3650 }
3651
3652 } // anonymous
3653
3654 // ShaderExecutor
3655
~ShaderExecutor(void)3656 ShaderExecutor::~ShaderExecutor (void)
3657 {
3658 }
3659
areInputs16Bit(void) const3660 bool ShaderExecutor::areInputs16Bit (void) const
3661 {
3662 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end(); ++symIter)
3663 {
3664 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3665 return true;
3666 }
3667 return false;
3668 }
3669
areOutputs16Bit(void) const3670 bool ShaderExecutor::areOutputs16Bit (void) const
3671 {
3672 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end(); ++symIter)
3673 {
3674 if (glu::isDataTypeFloat16OrVec(symIter->varType.getBasicType()))
3675 return true;
3676 }
3677 return false;
3678 }
3679
isOutput16Bit(const size_t ndx) const3680 bool ShaderExecutor::isOutput16Bit (const size_t ndx) const
3681 {
3682 if (glu::isDataTypeFloat16OrVec(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3683 return true;
3684 return false;
3685 }
3686
areInputs64Bit(void) const3687 bool ShaderExecutor::areInputs64Bit (void) const
3688 {
3689 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.inputs.begin(); symIter != m_shaderSpec.inputs.end(); ++symIter)
3690 {
3691 if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3692 return true;
3693 }
3694 return false;
3695 }
3696
areOutputs64Bit(void) const3697 bool ShaderExecutor::areOutputs64Bit (void) const
3698 {
3699 for (vector<Symbol>::const_iterator symIter = m_shaderSpec.outputs.begin(); symIter != m_shaderSpec.outputs.end(); ++symIter)
3700 {
3701 if (glu::isDataTypeDoubleType(symIter->varType.getBasicType()))
3702 return true;
3703 }
3704 return false;
3705 }
3706
isOutput64Bit(const size_t ndx) const3707 bool ShaderExecutor::isOutput64Bit (const size_t ndx) const
3708 {
3709 if (glu::isDataTypeDoubleType(m_shaderSpec.outputs[ndx].varType.getBasicType()))
3710 return true;
3711 return false;
3712 }
3713
3714 // Utilities
3715
generateSources(glu::ShaderType shaderType,const ShaderSpec & shaderSpec,vk::SourceCollections & dst)3716 void generateSources (glu::ShaderType shaderType, const ShaderSpec& shaderSpec, vk::SourceCollections& dst)
3717 {
3718 switch (shaderType)
3719 {
3720 case glu::SHADERTYPE_VERTEX: VertexShaderExecutor::generateSources (shaderSpec, dst); break;
3721 case glu::SHADERTYPE_TESSELLATION_CONTROL: TessControlExecutor::generateSources (shaderSpec, dst); break;
3722 case glu::SHADERTYPE_TESSELLATION_EVALUATION: TessEvaluationExecutor::generateSources (shaderSpec, dst); break;
3723 case glu::SHADERTYPE_GEOMETRY: GeometryShaderExecutor::generateSources (shaderSpec, dst); break;
3724 case glu::SHADERTYPE_FRAGMENT: FragmentShaderExecutor::generateSources (shaderSpec, dst); break;
3725 case glu::SHADERTYPE_COMPUTE: ComputeShaderExecutor::generateSources (shaderSpec, dst); break;
3726 #ifndef CTS_USES_VULKANSC
3727 case glu::SHADERTYPE_MESH: MeshTaskShaderExecutor::generateSources (shaderSpec, dst, false/*useTask*/); break;
3728 case glu::SHADERTYPE_TASK: MeshTaskShaderExecutor::generateSources (shaderSpec, dst, true/*useTask*/); break;
3729 #endif // CTS_USES_VULKANSC
3730 default:
3731 TCU_THROW(InternalError, "Unsupported shader type");
3732 }
3733 }
3734
createExecutor(Context & context,glu::ShaderType shaderType,const ShaderSpec & shaderSpec,VkDescriptorSetLayout extraResourcesLayout)3735 ShaderExecutor* createExecutor (Context& context, glu::ShaderType shaderType, const ShaderSpec& shaderSpec, VkDescriptorSetLayout extraResourcesLayout)
3736 {
3737 switch (shaderType)
3738 {
3739 case glu::SHADERTYPE_VERTEX: return new VertexShaderExecutor (context, shaderSpec, extraResourcesLayout);
3740 case glu::SHADERTYPE_TESSELLATION_CONTROL: return new TessControlExecutor (context, shaderSpec, extraResourcesLayout);
3741 case glu::SHADERTYPE_TESSELLATION_EVALUATION: return new TessEvaluationExecutor (context, shaderSpec, extraResourcesLayout);
3742 case glu::SHADERTYPE_GEOMETRY: return new GeometryShaderExecutor (context, shaderSpec, extraResourcesLayout);
3743 case glu::SHADERTYPE_FRAGMENT: return new FragmentShaderExecutor (context, shaderSpec, extraResourcesLayout);
3744 case glu::SHADERTYPE_COMPUTE: return new ComputeShaderExecutor (context, shaderSpec, extraResourcesLayout);
3745 #ifndef CTS_USES_VULKANSC
3746 case glu::SHADERTYPE_MESH: return new MeshTaskShaderExecutor (context, shaderSpec, extraResourcesLayout);
3747 case glu::SHADERTYPE_TASK: return new MeshTaskShaderExecutor (context, shaderSpec, extraResourcesLayout);
3748 #endif // CTS_USES_VULKANSC
3749 default:
3750 TCU_THROW(InternalError, "Unsupported shader type");
3751 }
3752 }
3753
executorSupported(glu::ShaderType shaderType)3754 bool executorSupported(glu::ShaderType shaderType)
3755 {
3756 switch (shaderType)
3757 {
3758 case glu::SHADERTYPE_VERTEX:
3759 case glu::SHADERTYPE_TESSELLATION_CONTROL:
3760 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3761 case glu::SHADERTYPE_GEOMETRY:
3762 case glu::SHADERTYPE_FRAGMENT:
3763 case glu::SHADERTYPE_COMPUTE:
3764 case glu::SHADERTYPE_MESH:
3765 case glu::SHADERTYPE_TASK:
3766 return true;
3767 default:
3768 return false;
3769 }
3770 }
3771
checkSupportShader(Context & context,const glu::ShaderType shaderType)3772 void checkSupportShader(Context& context, const glu::ShaderType shaderType)
3773 {
3774 #ifndef CTS_USES_VULKANSC
3775 // Stage support.
3776 switch (shaderType)
3777 {
3778 case glu::SHADERTYPE_TESSELLATION_CONTROL:
3779 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3780 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_TESSELLATION_SHADER);
3781 break;
3782
3783 case glu::SHADERTYPE_GEOMETRY:
3784 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_GEOMETRY_SHADER);
3785 break;
3786
3787 case glu::SHADERTYPE_TASK:
3788 case glu::SHADERTYPE_MESH:
3789 {
3790 context.requireDeviceFunctionality("VK_EXT_mesh_shader");
3791
3792 if (shaderType == glu::SHADERTYPE_TASK)
3793 {
3794 const auto& features = context.getMeshShaderFeaturesEXT();
3795 if (!features.taskShader)
3796 TCU_THROW(NotSupportedError, "taskShader not supported");
3797 }
3798 }
3799 break;
3800
3801 default:
3802 break;
3803 }
3804
3805 // Stores and atomic operation support.
3806 switch (shaderType)
3807 {
3808 case glu::SHADERTYPE_VERTEX:
3809 case glu::SHADERTYPE_TESSELLATION_CONTROL:
3810 case glu::SHADERTYPE_TESSELLATION_EVALUATION:
3811 case glu::SHADERTYPE_GEOMETRY:
3812 case glu::SHADERTYPE_TASK:
3813 case glu::SHADERTYPE_MESH:
3814 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_VERTEX_PIPELINE_STORES_AND_ATOMICS);
3815 break;
3816 case glu::SHADERTYPE_FRAGMENT:
3817 context.requireDeviceCoreFeature(DEVICE_CORE_FEATURE_FRAGMENT_STORES_AND_ATOMICS);
3818 break;
3819 case glu::SHADERTYPE_COMPUTE:
3820 break;
3821 default:
3822 DE_FATAL("Unsupported shader type");
3823 break;
3824 }
3825
3826 if (shaderType == glu::SHADERTYPE_TESSELLATION_EVALUATION &&
3827 context.isDeviceFunctionalitySupported("VK_KHR_portability_subset") &&
3828 !context.getPortabilitySubsetFeatures().tessellationIsolines)
3829 {
3830 TCU_THROW(NotSupportedError, "VK_KHR_portability_subset: Tessellation iso lines are not supported by this implementation");
3831 }
3832 #else
3833 DE_UNREF(context);
3834 DE_UNREF(shaderType);
3835 #endif // CTS_USES_VULKANSC
3836 }
3837
3838
3839 } // shaderexecutor
3840 } // vkt
3841